Home | History | Annotate | Line # | Download | only in gt
intel_lrc.c revision 1.6
      1 /*	$NetBSD: intel_lrc.c,v 1.6 2021/12/19 11:47:40 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright  2014 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Ben Widawsky <ben (at) bwidawsk.net>
     27  *    Michel Thierry <michel.thierry (at) intel.com>
     28  *    Thomas Daniel <thomas.daniel (at) intel.com>
     29  *    Oscar Mateo <oscar.mateo (at) intel.com>
     30  *
     31  */
     32 
     33 /**
     34  * DOC: Logical Rings, Logical Ring Contexts and Execlists
     35  *
     36  * Motivation:
     37  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
     38  * These expanded contexts enable a number of new abilities, especially
     39  * "Execlists" (also implemented in this file).
     40  *
     41  * One of the main differences with the legacy HW contexts is that logical
     42  * ring contexts incorporate many more things to the context's state, like
     43  * PDPs or ringbuffer control registers:
     44  *
     45  * The reason why PDPs are included in the context is straightforward: as
     46  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
     47  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
     48  * instead, the GPU will do it for you on the context switch.
     49  *
     50  * But, what about the ringbuffer control registers (head, tail, etc..)?
     51  * shouldn't we just need a set of those per engine command streamer? This is
     52  * where the name "Logical Rings" starts to make sense: by virtualizing the
     53  * rings, the engine cs shifts to a new "ring buffer" with every context
     54  * switch. When you want to submit a workload to the GPU you: A) choose your
     55  * context, B) find its appropriate virtualized ring, C) write commands to it
     56  * and then, finally, D) tell the GPU to switch to that context.
     57  *
     58  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
     59  * to a contexts is via a context execution list, ergo "Execlists".
     60  *
     61  * LRC implementation:
     62  * Regarding the creation of contexts, we have:
     63  *
     64  * - One global default context.
     65  * - One local default context for each opened fd.
     66  * - One local extra context for each context create ioctl call.
     67  *
     68  * Now that ringbuffers belong per-context (and not per-engine, like before)
     69  * and that contexts are uniquely tied to a given engine (and not reusable,
     70  * like before) we need:
     71  *
     72  * - One ringbuffer per-engine inside each context.
     73  * - One backing object per-engine inside each context.
     74  *
     75  * The global default context starts its life with these new objects fully
     76  * allocated and populated. The local default context for each opened fd is
     77  * more complex, because we don't know at creation time which engine is going
     78  * to use them. To handle this, we have implemented a deferred creation of LR
     79  * contexts:
     80  *
     81  * The local context starts its life as a hollow or blank holder, that only
     82  * gets populated for a given engine once we receive an execbuffer. If later
     83  * on we receive another execbuffer ioctl for the same context but a different
     84  * engine, we allocate/populate a new ringbuffer and context backing object and
     85  * so on.
     86  *
     87  * Finally, regarding local contexts created using the ioctl call: as they are
     88  * only allowed with the render ring, we can allocate & populate them right
     89  * away (no need to defer anything, at least for now).
     90  *
     91  * Execlists implementation:
     92  * Execlists are the new method by which, on gen8+ hardware, workloads are
     93  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
     94  * This method works as follows:
     95  *
     96  * When a request is committed, its commands (the BB start and any leading or
     97  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
     98  * for the appropriate context. The tail pointer in the hardware context is not
     99  * updated at this time, but instead, kept by the driver in the ringbuffer
    100  * structure. A structure representing this request is added to a request queue
    101  * for the appropriate engine: this structure contains a copy of the context's
    102  * tail after the request was written to the ring buffer and a pointer to the
    103  * context itself.
    104  *
    105  * If the engine's request queue was empty before the request was added, the
    106  * queue is processed immediately. Otherwise the queue will be processed during
    107  * a context switch interrupt. In any case, elements on the queue will get sent
    108  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
    109  * globally unique 20-bits submission ID.
    110  *
    111  * When execution of a request completes, the GPU updates the context status
    112  * buffer with a context complete event and generates a context switch interrupt.
    113  * During the interrupt handling, the driver examines the events in the buffer:
    114  * for each context complete event, if the announced ID matches that on the head
    115  * of the request queue, then that request is retired and removed from the queue.
    116  *
    117  * After processing, if any requests were retired and the queue is not empty
    118  * then a new execution list can be submitted. The two requests at the front of
    119  * the queue are next to be submitted but since a context may not occur twice in
    120  * an execution list, if subsequent requests have the same ID as the first then
    121  * the two requests must be combined. This is done simply by discarding requests
    122  * at the head of the queue until either only one requests is left (in which case
    123  * we use a NULL second context) or the first two requests have unique IDs.
    124  *
    125  * By always executing the first two requests in the queue the driver ensures
    126  * that the GPU is kept as busy as possible. In the case where a single context
    127  * completes but a second context is still executing, the request for this second
    128  * context will be at the head of the queue when we remove the first one. This
    129  * request will then be resubmitted along with a new request for a different context,
    130  * which will cause the hardware to continue executing the second request and queue
    131  * the new request (the GPU detects the condition of a context getting preempted
    132  * with the same context and optimizes the context switch flow by not doing
    133  * preemption, but just sampling the new tail pointer).
    134  *
    135  */
    136 #include <sys/cdefs.h>
    137 __KERNEL_RCSID(0, "$NetBSD: intel_lrc.c,v 1.6 2021/12/19 11:47:40 riastradh Exp $");
    138 
    139 #include <linux/interrupt.h>
    140 
    141 #include "i915_drv.h"
    142 #include "i915_perf.h"
    143 #include "i915_trace.h"
    144 #include "i915_vgpu.h"
    145 #include "intel_context.h"
    146 #include "intel_engine_pm.h"
    147 #include "intel_gt.h"
    148 #include "intel_gt_pm.h"
    149 #include "intel_gt_requests.h"
    150 #include "intel_lrc_reg.h"
    151 #include "intel_mocs.h"
    152 #include "intel_reset.h"
    153 #include "intel_ring.h"
    154 #include "intel_workarounds.h"
    155 
    156 #include <linux/nbsd-namespace.h>
    157 
    158 #define RING_EXECLIST_QFULL		(1 << 0x2)
    159 #define RING_EXECLIST1_VALID		(1 << 0x3)
    160 #define RING_EXECLIST0_VALID		(1 << 0x4)
    161 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
    162 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
    163 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
    164 
    165 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
    166 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
    167 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
    168 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
    169 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
    170 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
    171 
    172 #define GEN8_CTX_STATUS_COMPLETED_MASK \
    173 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
    174 
    175 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
    176 
    177 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
    178 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
    179 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
    180 #define GEN12_IDLE_CTX_ID		0x7FF
    181 #define GEN12_CSB_CTX_VALID(csb_dw) \
    182 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
    183 
    184 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
    185 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
    186 #define WA_TAIL_DWORDS 2
    187 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
    188 
    189 struct virtual_engine {
    190 	struct intel_engine_cs base;
    191 	struct intel_context context;
    192 
    193 	/*
    194 	 * We allow only a single request through the virtual engine at a time
    195 	 * (each request in the timeline waits for the completion fence of
    196 	 * the previous before being submitted). By restricting ourselves to
    197 	 * only submitting a single request, each request is placed on to a
    198 	 * physical to maximise load spreading (by virtue of the late greedy
    199 	 * scheduling -- each real engine takes the next available request
    200 	 * upon idling).
    201 	 */
    202 	struct i915_request *request;
    203 
    204 	/*
    205 	 * We keep a rbtree of available virtual engines inside each physical
    206 	 * engine, sorted by priority. Here we preallocate the nodes we need
    207 	 * for the virtual engine, indexed by physical_engine->id.
    208 	 */
    209 	struct ve_node {
    210 		struct rb_node rb;
    211 		int prio;
    212 	} nodes[I915_NUM_ENGINES];
    213 
    214 	/*
    215 	 * Keep track of bonded pairs -- restrictions upon on our selection
    216 	 * of physical engines any particular request may be submitted to.
    217 	 * If we receive a submit-fence from a master engine, we will only
    218 	 * use one of sibling_mask physical engines.
    219 	 */
    220 	struct ve_bond {
    221 		const struct intel_engine_cs *master;
    222 		intel_engine_mask_t sibling_mask;
    223 	} *bonds;
    224 	unsigned int num_bonds;
    225 
    226 	/* And finally, which physical engines this virtual engine maps onto. */
    227 	unsigned int num_siblings;
    228 	struct intel_engine_cs *siblings[0];
    229 };
    230 
    231 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
    232 {
    233 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
    234 	return container_of(engine, struct virtual_engine, base);
    235 }
    236 
    237 static int __execlists_context_alloc(struct intel_context *ce,
    238 				     struct intel_engine_cs *engine);
    239 
    240 static void execlists_init_reg_state(u32 *reg_state,
    241 				     const struct intel_context *ce,
    242 				     const struct intel_engine_cs *engine,
    243 				     const struct intel_ring *ring,
    244 				     bool close);
    245 static void
    246 __execlists_update_reg_state(const struct intel_context *ce,
    247 			     const struct intel_engine_cs *engine,
    248 			     u32 head);
    249 
    250 static void mark_eio(struct i915_request *rq)
    251 {
    252 	if (i915_request_completed(rq))
    253 		return;
    254 
    255 	GEM_BUG_ON(i915_request_signaled(rq));
    256 
    257 	dma_fence_set_error(&rq->fence, -EIO);
    258 	i915_request_mark_complete(rq);
    259 }
    260 
    261 static struct i915_request *
    262 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
    263 {
    264 	struct i915_request *active = rq;
    265 
    266 	rcu_read_lock();
    267 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
    268 		if (i915_request_completed(rq))
    269 			break;
    270 
    271 		active = rq;
    272 	}
    273 	rcu_read_unlock();
    274 
    275 	return active;
    276 }
    277 
    278 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
    279 {
    280 	return (i915_ggtt_offset(engine->status_page.vma) +
    281 		I915_GEM_HWS_PREEMPT_ADDR);
    282 }
    283 
    284 static inline void
    285 ring_set_paused(const struct intel_engine_cs *engine, int state)
    286 {
    287 	/*
    288 	 * We inspect HWS_PREEMPT with a semaphore inside
    289 	 * engine->emit_fini_breadcrumb. If the dword is true,
    290 	 * the ring is paused as the semaphore will busywait
    291 	 * until the dword is false.
    292 	 */
    293 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
    294 	if (state)
    295 		wmb();
    296 }
    297 
    298 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
    299 {
    300 	return rb_entry(rb, struct i915_priolist, node);
    301 }
    302 
    303 static inline int rq_prio(const struct i915_request *rq)
    304 {
    305 	return rq->sched.attr.priority;
    306 }
    307 
    308 static int effective_prio(const struct i915_request *rq)
    309 {
    310 	int prio = rq_prio(rq);
    311 
    312 	/*
    313 	 * If this request is special and must not be interrupted at any
    314 	 * cost, so be it. Note we are only checking the most recent request
    315 	 * in the context and so may be masking an earlier vip request. It
    316 	 * is hoped that under the conditions where nopreempt is used, this
    317 	 * will not matter (i.e. all requests to that context will be
    318 	 * nopreempt for as long as desired).
    319 	 */
    320 	if (i915_request_has_nopreempt(rq))
    321 		prio = I915_PRIORITY_UNPREEMPTABLE;
    322 
    323 	/*
    324 	 * On unwinding the active request, we give it a priority bump
    325 	 * if it has completed waiting on any semaphore. If we know that
    326 	 * the request has already started, we can prevent an unwanted
    327 	 * preempt-to-idle cycle by taking that into account now.
    328 	 */
    329 	if (__i915_request_has_started(rq))
    330 		prio |= I915_PRIORITY_NOSEMAPHORE;
    331 
    332 	/* Restrict mere WAIT boosts from triggering preemption */
    333 	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
    334 	return prio | __NO_PREEMPTION;
    335 }
    336 
    337 static int queue_prio(const struct intel_engine_execlists *execlists)
    338 {
    339 	struct i915_priolist *p;
    340 	struct rb_node *rb;
    341 
    342 	rb = rb_first_cached(&execlists->queue);
    343 	if (!rb)
    344 		return INT_MIN;
    345 
    346 	/*
    347 	 * As the priolist[] are inverted, with the highest priority in [0],
    348 	 * we have to flip the index value to become priority.
    349 	 */
    350 	p = to_priolist(rb);
    351 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
    352 }
    353 
    354 static inline bool need_preempt(const struct intel_engine_cs *engine,
    355 				const struct i915_request *rq,
    356 				struct rb_node *rb)
    357 {
    358 	int last_prio;
    359 
    360 	if (!intel_engine_has_semaphores(engine))
    361 		return false;
    362 
    363 	/*
    364 	 * Check if the current priority hint merits a preemption attempt.
    365 	 *
    366 	 * We record the highest value priority we saw during rescheduling
    367 	 * prior to this dequeue, therefore we know that if it is strictly
    368 	 * less than the current tail of ESLP[0], we do not need to force
    369 	 * a preempt-to-idle cycle.
    370 	 *
    371 	 * However, the priority hint is a mere hint that we may need to
    372 	 * preempt. If that hint is stale or we may be trying to preempt
    373 	 * ourselves, ignore the request.
    374 	 *
    375 	 * More naturally we would write
    376 	 *      prio >= max(0, last);
    377 	 * except that we wish to prevent triggering preemption at the same
    378 	 * priority level: the task that is running should remain running
    379 	 * to preserve FIFO ordering of dependencies.
    380 	 */
    381 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
    382 	if (engine->execlists.queue_priority_hint <= last_prio)
    383 		return false;
    384 
    385 	/*
    386 	 * Check against the first request in ELSP[1], it will, thanks to the
    387 	 * power of PI, be the highest priority of that context.
    388 	 */
    389 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
    390 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
    391 		return true;
    392 
    393 	if (rb) {
    394 		struct virtual_engine *ve =
    395 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
    396 		bool preempt = false;
    397 
    398 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
    399 			struct i915_request *next;
    400 
    401 			rcu_read_lock();
    402 			next = READ_ONCE(ve->request);
    403 			if (next)
    404 				preempt = rq_prio(next) > last_prio;
    405 			rcu_read_unlock();
    406 		}
    407 
    408 		if (preempt)
    409 			return preempt;
    410 	}
    411 
    412 	/*
    413 	 * If the inflight context did not trigger the preemption, then maybe
    414 	 * it was the set of queued requests? Pick the highest priority in
    415 	 * the queue (the first active priolist) and see if it deserves to be
    416 	 * running instead of ELSP[0].
    417 	 *
    418 	 * The highest priority request in the queue can not be either
    419 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
    420 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
    421 	 */
    422 	return queue_prio(&engine->execlists) > last_prio;
    423 }
    424 
    425 __maybe_unused static inline bool
    426 assert_priority_queue(const struct i915_request *prev,
    427 		      const struct i915_request *next)
    428 {
    429 	/*
    430 	 * Without preemption, the prev may refer to the still active element
    431 	 * which we refuse to let go.
    432 	 *
    433 	 * Even with preemption, there are times when we think it is better not
    434 	 * to preempt and leave an ostensibly lower priority request in flight.
    435 	 */
    436 	if (i915_request_is_active(prev))
    437 		return true;
    438 
    439 	return rq_prio(prev) >= rq_prio(next);
    440 }
    441 
    442 /*
    443  * The context descriptor encodes various attributes of a context,
    444  * including its GTT address and some flags. Because it's fairly
    445  * expensive to calculate, we'll just do it once and cache the result,
    446  * which remains valid until the context is unpinned.
    447  *
    448  * This is what a descriptor looks like, from LSB to MSB::
    449  *
    450  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
    451  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
    452  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
    453  *      bits 53-54:    mbz, reserved for use by hardware
    454  *      bits 55-63:    group ID, currently unused and set to 0
    455  *
    456  * Starting from Gen11, the upper dword of the descriptor has a new format:
    457  *
    458  *      bits 32-36:    reserved
    459  *      bits 37-47:    SW context ID
    460  *      bits 48:53:    engine instance
    461  *      bit 54:        mbz, reserved for use by hardware
    462  *      bits 55-60:    SW counter
    463  *      bits 61-63:    engine class
    464  *
    465  * engine info, SW context ID and SW counter need to form a unique number
    466  * (Context ID) per lrc.
    467  */
    468 static u64
    469 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
    470 {
    471 	u64 desc;
    472 
    473 	desc = INTEL_LEGACY_32B_CONTEXT;
    474 	if (i915_vm_is_4lvl(ce->vm))
    475 		desc = INTEL_LEGACY_64B_CONTEXT;
    476 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
    477 
    478 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
    479 	if (IS_GEN(engine->i915, 8))
    480 		desc |= GEN8_CTX_L3LLC_COHERENT;
    481 
    482 	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
    483 	/*
    484 	 * The following 32bits are copied into the OA reports (dword 2).
    485 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
    486 	 * anything below.
    487 	 */
    488 	if (INTEL_GEN(engine->i915) >= 11) {
    489 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
    490 								/* bits 48-53 */
    491 
    492 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
    493 								/* bits 61-63 */
    494 	}
    495 
    496 	return desc;
    497 }
    498 
    499 static inline unsigned int dword_in_page(void *addr)
    500 {
    501 	return offset_in_page(addr) / sizeof(u32);
    502 }
    503 
    504 static void set_offsets(u32 *regs,
    505 			const u8 *data,
    506 			const struct intel_engine_cs *engine,
    507 			bool clear)
    508 #define NOP(x) (BIT(7) | (x))
    509 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
    510 #define POSTED BIT(0)
    511 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
    512 #define REG16(x) \
    513 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
    514 	(((x) >> 2) & 0x7f)
    515 #define END(x) 0, (x)
    516 {
    517 	const u32 base = engine->mmio_base;
    518 
    519 	while (*data) {
    520 		u8 count, flags;
    521 
    522 		if (*data & BIT(7)) { /* skip */
    523 			count = *data++ & ~BIT(7);
    524 			if (clear)
    525 				memset32(regs, MI_NOOP, count);
    526 			regs += count;
    527 			continue;
    528 		}
    529 
    530 		count = *data & 0x3f;
    531 		flags = *data >> 6;
    532 		data++;
    533 
    534 		*regs = MI_LOAD_REGISTER_IMM(count);
    535 		if (flags & POSTED)
    536 			*regs |= MI_LRI_FORCE_POSTED;
    537 		if (INTEL_GEN(engine->i915) >= 11)
    538 			*regs |= MI_LRI_CS_MMIO;
    539 		regs++;
    540 
    541 		GEM_BUG_ON(!count);
    542 		do {
    543 			u32 offset = 0;
    544 			u8 v;
    545 
    546 			do {
    547 				v = *data++;
    548 				offset <<= 7;
    549 				offset |= v & ~BIT(7);
    550 			} while (v & BIT(7));
    551 
    552 			regs[0] = base + (offset << 2);
    553 			if (clear)
    554 				regs[1] = 0;
    555 			regs += 2;
    556 		} while (--count);
    557 	}
    558 
    559 	if (clear) {
    560 		u8 count = *++data;
    561 
    562 		/* Clear past the tail for HW access */
    563 		GEM_BUG_ON(dword_in_page(regs) > count);
    564 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
    565 
    566 		/* Close the batch; used mainly by live_lrc_layout() */
    567 		*regs = MI_BATCH_BUFFER_END;
    568 		if (INTEL_GEN(engine->i915) >= 10)
    569 			*regs |= BIT(0);
    570 	}
    571 }
    572 
    573 static const u8 gen8_xcs_offsets[] = {
    574 	NOP(1),
    575 	LRI(11, 0),
    576 	REG16(0x244),
    577 	REG(0x034),
    578 	REG(0x030),
    579 	REG(0x038),
    580 	REG(0x03c),
    581 	REG(0x168),
    582 	REG(0x140),
    583 	REG(0x110),
    584 	REG(0x11c),
    585 	REG(0x114),
    586 	REG(0x118),
    587 
    588 	NOP(9),
    589 	LRI(9, 0),
    590 	REG16(0x3a8),
    591 	REG16(0x28c),
    592 	REG16(0x288),
    593 	REG16(0x284),
    594 	REG16(0x280),
    595 	REG16(0x27c),
    596 	REG16(0x278),
    597 	REG16(0x274),
    598 	REG16(0x270),
    599 
    600 	NOP(13),
    601 	LRI(2, 0),
    602 	REG16(0x200),
    603 	REG(0x028),
    604 
    605 	END(80)
    606 };
    607 
    608 static const u8 gen9_xcs_offsets[] = {
    609 	NOP(1),
    610 	LRI(14, POSTED),
    611 	REG16(0x244),
    612 	REG(0x034),
    613 	REG(0x030),
    614 	REG(0x038),
    615 	REG(0x03c),
    616 	REG(0x168),
    617 	REG(0x140),
    618 	REG(0x110),
    619 	REG(0x11c),
    620 	REG(0x114),
    621 	REG(0x118),
    622 	REG(0x1c0),
    623 	REG(0x1c4),
    624 	REG(0x1c8),
    625 
    626 	NOP(3),
    627 	LRI(9, POSTED),
    628 	REG16(0x3a8),
    629 	REG16(0x28c),
    630 	REG16(0x288),
    631 	REG16(0x284),
    632 	REG16(0x280),
    633 	REG16(0x27c),
    634 	REG16(0x278),
    635 	REG16(0x274),
    636 	REG16(0x270),
    637 
    638 	NOP(13),
    639 	LRI(1, POSTED),
    640 	REG16(0x200),
    641 
    642 	NOP(13),
    643 	LRI(44, POSTED),
    644 	REG(0x028),
    645 	REG(0x09c),
    646 	REG(0x0c0),
    647 	REG(0x178),
    648 	REG(0x17c),
    649 	REG16(0x358),
    650 	REG(0x170),
    651 	REG(0x150),
    652 	REG(0x154),
    653 	REG(0x158),
    654 	REG16(0x41c),
    655 	REG16(0x600),
    656 	REG16(0x604),
    657 	REG16(0x608),
    658 	REG16(0x60c),
    659 	REG16(0x610),
    660 	REG16(0x614),
    661 	REG16(0x618),
    662 	REG16(0x61c),
    663 	REG16(0x620),
    664 	REG16(0x624),
    665 	REG16(0x628),
    666 	REG16(0x62c),
    667 	REG16(0x630),
    668 	REG16(0x634),
    669 	REG16(0x638),
    670 	REG16(0x63c),
    671 	REG16(0x640),
    672 	REG16(0x644),
    673 	REG16(0x648),
    674 	REG16(0x64c),
    675 	REG16(0x650),
    676 	REG16(0x654),
    677 	REG16(0x658),
    678 	REG16(0x65c),
    679 	REG16(0x660),
    680 	REG16(0x664),
    681 	REG16(0x668),
    682 	REG16(0x66c),
    683 	REG16(0x670),
    684 	REG16(0x674),
    685 	REG16(0x678),
    686 	REG16(0x67c),
    687 	REG(0x068),
    688 
    689 	END(176)
    690 };
    691 
    692 static const u8 gen12_xcs_offsets[] = {
    693 	NOP(1),
    694 	LRI(13, POSTED),
    695 	REG16(0x244),
    696 	REG(0x034),
    697 	REG(0x030),
    698 	REG(0x038),
    699 	REG(0x03c),
    700 	REG(0x168),
    701 	REG(0x140),
    702 	REG(0x110),
    703 	REG(0x1c0),
    704 	REG(0x1c4),
    705 	REG(0x1c8),
    706 	REG(0x180),
    707 	REG16(0x2b4),
    708 
    709 	NOP(5),
    710 	LRI(9, POSTED),
    711 	REG16(0x3a8),
    712 	REG16(0x28c),
    713 	REG16(0x288),
    714 	REG16(0x284),
    715 	REG16(0x280),
    716 	REG16(0x27c),
    717 	REG16(0x278),
    718 	REG16(0x274),
    719 	REG16(0x270),
    720 
    721 	END(80)
    722 };
    723 
    724 static const u8 gen8_rcs_offsets[] = {
    725 	NOP(1),
    726 	LRI(14, POSTED),
    727 	REG16(0x244),
    728 	REG(0x034),
    729 	REG(0x030),
    730 	REG(0x038),
    731 	REG(0x03c),
    732 	REG(0x168),
    733 	REG(0x140),
    734 	REG(0x110),
    735 	REG(0x11c),
    736 	REG(0x114),
    737 	REG(0x118),
    738 	REG(0x1c0),
    739 	REG(0x1c4),
    740 	REG(0x1c8),
    741 
    742 	NOP(3),
    743 	LRI(9, POSTED),
    744 	REG16(0x3a8),
    745 	REG16(0x28c),
    746 	REG16(0x288),
    747 	REG16(0x284),
    748 	REG16(0x280),
    749 	REG16(0x27c),
    750 	REG16(0x278),
    751 	REG16(0x274),
    752 	REG16(0x270),
    753 
    754 	NOP(13),
    755 	LRI(1, 0),
    756 	REG(0x0c8),
    757 
    758 	END(80)
    759 };
    760 
    761 static const u8 gen9_rcs_offsets[] = {
    762 	NOP(1),
    763 	LRI(14, POSTED),
    764 	REG16(0x244),
    765 	REG(0x34),
    766 	REG(0x30),
    767 	REG(0x38),
    768 	REG(0x3c),
    769 	REG(0x168),
    770 	REG(0x140),
    771 	REG(0x110),
    772 	REG(0x11c),
    773 	REG(0x114),
    774 	REG(0x118),
    775 	REG(0x1c0),
    776 	REG(0x1c4),
    777 	REG(0x1c8),
    778 
    779 	NOP(3),
    780 	LRI(9, POSTED),
    781 	REG16(0x3a8),
    782 	REG16(0x28c),
    783 	REG16(0x288),
    784 	REG16(0x284),
    785 	REG16(0x280),
    786 	REG16(0x27c),
    787 	REG16(0x278),
    788 	REG16(0x274),
    789 	REG16(0x270),
    790 
    791 	NOP(13),
    792 	LRI(1, 0),
    793 	REG(0xc8),
    794 
    795 	NOP(13),
    796 	LRI(44, POSTED),
    797 	REG(0x28),
    798 	REG(0x9c),
    799 	REG(0xc0),
    800 	REG(0x178),
    801 	REG(0x17c),
    802 	REG16(0x358),
    803 	REG(0x170),
    804 	REG(0x150),
    805 	REG(0x154),
    806 	REG(0x158),
    807 	REG16(0x41c),
    808 	REG16(0x600),
    809 	REG16(0x604),
    810 	REG16(0x608),
    811 	REG16(0x60c),
    812 	REG16(0x610),
    813 	REG16(0x614),
    814 	REG16(0x618),
    815 	REG16(0x61c),
    816 	REG16(0x620),
    817 	REG16(0x624),
    818 	REG16(0x628),
    819 	REG16(0x62c),
    820 	REG16(0x630),
    821 	REG16(0x634),
    822 	REG16(0x638),
    823 	REG16(0x63c),
    824 	REG16(0x640),
    825 	REG16(0x644),
    826 	REG16(0x648),
    827 	REG16(0x64c),
    828 	REG16(0x650),
    829 	REG16(0x654),
    830 	REG16(0x658),
    831 	REG16(0x65c),
    832 	REG16(0x660),
    833 	REG16(0x664),
    834 	REG16(0x668),
    835 	REG16(0x66c),
    836 	REG16(0x670),
    837 	REG16(0x674),
    838 	REG16(0x678),
    839 	REG16(0x67c),
    840 	REG(0x68),
    841 
    842 	END(176)
    843 };
    844 
    845 static const u8 gen11_rcs_offsets[] = {
    846 	NOP(1),
    847 	LRI(15, POSTED),
    848 	REG16(0x244),
    849 	REG(0x034),
    850 	REG(0x030),
    851 	REG(0x038),
    852 	REG(0x03c),
    853 	REG(0x168),
    854 	REG(0x140),
    855 	REG(0x110),
    856 	REG(0x11c),
    857 	REG(0x114),
    858 	REG(0x118),
    859 	REG(0x1c0),
    860 	REG(0x1c4),
    861 	REG(0x1c8),
    862 	REG(0x180),
    863 
    864 	NOP(1),
    865 	LRI(9, POSTED),
    866 	REG16(0x3a8),
    867 	REG16(0x28c),
    868 	REG16(0x288),
    869 	REG16(0x284),
    870 	REG16(0x280),
    871 	REG16(0x27c),
    872 	REG16(0x278),
    873 	REG16(0x274),
    874 	REG16(0x270),
    875 
    876 	LRI(1, POSTED),
    877 	REG(0x1b0),
    878 
    879 	NOP(10),
    880 	LRI(1, 0),
    881 	REG(0x0c8),
    882 
    883 	END(80)
    884 };
    885 
    886 static const u8 gen12_rcs_offsets[] = {
    887 	NOP(1),
    888 	LRI(13, POSTED),
    889 	REG16(0x244),
    890 	REG(0x034),
    891 	REG(0x030),
    892 	REG(0x038),
    893 	REG(0x03c),
    894 	REG(0x168),
    895 	REG(0x140),
    896 	REG(0x110),
    897 	REG(0x1c0),
    898 	REG(0x1c4),
    899 	REG(0x1c8),
    900 	REG(0x180),
    901 	REG16(0x2b4),
    902 
    903 	NOP(5),
    904 	LRI(9, POSTED),
    905 	REG16(0x3a8),
    906 	REG16(0x28c),
    907 	REG16(0x288),
    908 	REG16(0x284),
    909 	REG16(0x280),
    910 	REG16(0x27c),
    911 	REG16(0x278),
    912 	REG16(0x274),
    913 	REG16(0x270),
    914 
    915 	LRI(3, POSTED),
    916 	REG(0x1b0),
    917 	REG16(0x5a8),
    918 	REG16(0x5ac),
    919 
    920 	NOP(6),
    921 	LRI(1, 0),
    922 	REG(0x0c8),
    923 
    924 	END(80)
    925 };
    926 
    927 #undef END
    928 #undef REG16
    929 #undef REG
    930 #undef LRI
    931 #undef NOP
    932 
    933 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
    934 {
    935 	/*
    936 	 * The gen12+ lists only have the registers we program in the basic
    937 	 * default state. We rely on the context image using relative
    938 	 * addressing to automatic fixup the register state between the
    939 	 * physical engines for virtual engine.
    940 	 */
    941 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
    942 		   !intel_engine_has_relative_mmio(engine));
    943 
    944 	if (engine->class == RENDER_CLASS) {
    945 		if (INTEL_GEN(engine->i915) >= 12)
    946 			return gen12_rcs_offsets;
    947 		else if (INTEL_GEN(engine->i915) >= 11)
    948 			return gen11_rcs_offsets;
    949 		else if (INTEL_GEN(engine->i915) >= 9)
    950 			return gen9_rcs_offsets;
    951 		else
    952 			return gen8_rcs_offsets;
    953 	} else {
    954 		if (INTEL_GEN(engine->i915) >= 12)
    955 			return gen12_xcs_offsets;
    956 		else if (INTEL_GEN(engine->i915) >= 9)
    957 			return gen9_xcs_offsets;
    958 		else
    959 			return gen8_xcs_offsets;
    960 	}
    961 }
    962 
    963 static struct i915_request *
    964 __unwind_incomplete_requests(struct intel_engine_cs *engine)
    965 {
    966 	struct i915_request *rq, *rn, *active = NULL;
    967 	struct list_head *uninitialized_var(pl);
    968 	int prio = I915_PRIORITY_INVALID;
    969 
    970 	lockdep_assert_held(&engine->active.lock);
    971 
    972 	list_for_each_entry_safe_reverse(rq, rn,
    973 					 &engine->active.requests,
    974 					 sched.link) {
    975 		if (i915_request_completed(rq))
    976 			continue; /* XXX */
    977 
    978 		__i915_request_unsubmit(rq);
    979 
    980 		/*
    981 		 * Push the request back into the queue for later resubmission.
    982 		 * If this request is not native to this physical engine (i.e.
    983 		 * it came from a virtual source), push it back onto the virtual
    984 		 * engine so that it can be moved across onto another physical
    985 		 * engine as load dictates.
    986 		 */
    987 		if (likely(rq->execution_mask == engine->mask)) {
    988 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
    989 			if (rq_prio(rq) != prio) {
    990 				prio = rq_prio(rq);
    991 				pl = i915_sched_lookup_priolist(engine, prio);
    992 			}
    993 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
    994 
    995 			list_move(&rq->sched.link, pl);
    996 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
    997 
    998 			active = rq;
    999 		} else {
   1000 			struct intel_engine_cs *owner = rq->context->engine;
   1001 
   1002 			/*
   1003 			 * Decouple the virtual breadcrumb before moving it
   1004 			 * back to the virtual engine -- we don't want the
   1005 			 * request to complete in the background and try
   1006 			 * and cancel the breadcrumb on the virtual engine
   1007 			 * (instead of the old engine where it is linked)!
   1008 			 */
   1009 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
   1010 				     &rq->fence.flags)) {
   1011 				spin_lock_nested(&rq->lock,
   1012 						 SINGLE_DEPTH_NESTING);
   1013 				i915_request_cancel_breadcrumb(rq);
   1014 				spin_unlock(&rq->lock);
   1015 			}
   1016 			rq->engine = owner;
   1017 			owner->submit_request(rq);
   1018 			active = NULL;
   1019 		}
   1020 	}
   1021 
   1022 	return active;
   1023 }
   1024 
   1025 struct i915_request *
   1026 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
   1027 {
   1028 	struct intel_engine_cs *engine =
   1029 		container_of(execlists, typeof(*engine), execlists);
   1030 
   1031 	return __unwind_incomplete_requests(engine);
   1032 }
   1033 
   1034 static inline void
   1035 execlists_context_status_change(struct i915_request *rq, unsigned long status)
   1036 {
   1037 	/*
   1038 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
   1039 	 * The compiler should eliminate this function as dead-code.
   1040 	 */
   1041 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
   1042 		return;
   1043 
   1044 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
   1045 				   status, rq);
   1046 }
   1047 
   1048 static void intel_engine_context_in(struct intel_engine_cs *engine)
   1049 {
   1050 	unsigned long flags;
   1051 
   1052 	if (READ_ONCE(engine->stats.enabled) == 0)
   1053 		return;
   1054 
   1055 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1056 
   1057 	if (engine->stats.enabled > 0) {
   1058 		if (engine->stats.active++ == 0)
   1059 			engine->stats.start = ktime_get();
   1060 		GEM_BUG_ON(engine->stats.active == 0);
   1061 	}
   1062 
   1063 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1064 }
   1065 
   1066 static void intel_engine_context_out(struct intel_engine_cs *engine)
   1067 {
   1068 	unsigned long flags;
   1069 
   1070 	if (READ_ONCE(engine->stats.enabled) == 0)
   1071 		return;
   1072 
   1073 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1074 
   1075 	if (engine->stats.enabled > 0) {
   1076 		ktime_t last;
   1077 
   1078 		if (engine->stats.active && --engine->stats.active == 0) {
   1079 			/*
   1080 			 * Decrement the active context count and in case GPU
   1081 			 * is now idle add up to the running total.
   1082 			 */
   1083 			last = ktime_sub(ktime_get(), engine->stats.start);
   1084 
   1085 			engine->stats.total = ktime_add(engine->stats.total,
   1086 							last);
   1087 		} else if (engine->stats.active == 0) {
   1088 			/*
   1089 			 * After turning on engine stats, context out might be
   1090 			 * the first event in which case we account from the
   1091 			 * time stats gathering was turned on.
   1092 			 */
   1093 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
   1094 
   1095 			engine->stats.total = ktime_add(engine->stats.total,
   1096 							last);
   1097 		}
   1098 	}
   1099 
   1100 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1101 }
   1102 
   1103 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
   1104 {
   1105 	if (INTEL_GEN(engine->i915) >= 12)
   1106 		return 0x60;
   1107 	else if (INTEL_GEN(engine->i915) >= 9)
   1108 		return 0x54;
   1109 	else if (engine->class == RENDER_CLASS)
   1110 		return 0x58;
   1111 	else
   1112 		return -1;
   1113 }
   1114 
   1115 static void
   1116 execlists_check_context(const struct intel_context *ce,
   1117 			const struct intel_engine_cs *engine)
   1118 {
   1119 	const struct intel_ring *ring = ce->ring;
   1120 	u32 *regs = ce->lrc_reg_state;
   1121 	bool valid = true;
   1122 	int x;
   1123 
   1124 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
   1125 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
   1126 		       engine->name,
   1127 		       regs[CTX_RING_START],
   1128 		       i915_ggtt_offset(ring->vma));
   1129 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   1130 		valid = false;
   1131 	}
   1132 
   1133 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
   1134 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
   1135 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
   1136 		       engine->name,
   1137 		       regs[CTX_RING_CTL],
   1138 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
   1139 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   1140 		valid = false;
   1141 	}
   1142 
   1143 	x = lrc_ring_mi_mode(engine);
   1144 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
   1145 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
   1146 		       engine->name, regs[x + 1]);
   1147 		regs[x + 1] &= ~STOP_RING;
   1148 		regs[x + 1] |= STOP_RING << 16;
   1149 		valid = false;
   1150 	}
   1151 
   1152 	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
   1153 }
   1154 
   1155 static void restore_default_state(struct intel_context *ce,
   1156 				  struct intel_engine_cs *engine)
   1157 {
   1158 	u32 *regs = ce->lrc_reg_state;
   1159 
   1160 	if (engine->pinned_default_state)
   1161 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
   1162 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
   1163 		       engine->context_size - PAGE_SIZE);
   1164 
   1165 	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
   1166 }
   1167 
   1168 static void reset_active(struct i915_request *rq,
   1169 			 struct intel_engine_cs *engine)
   1170 {
   1171 	struct intel_context * const ce = rq->context;
   1172 	u32 head;
   1173 
   1174 	/*
   1175 	 * The executing context has been cancelled. We want to prevent
   1176 	 * further execution along this context and propagate the error on
   1177 	 * to anything depending on its results.
   1178 	 *
   1179 	 * In __i915_request_submit(), we apply the -EIO and remove the
   1180 	 * requests' payloads for any banned requests. But first, we must
   1181 	 * rewind the context back to the start of the incomplete request so
   1182 	 * that we do not jump back into the middle of the batch.
   1183 	 *
   1184 	 * We preserve the breadcrumbs and semaphores of the incomplete
   1185 	 * requests so that inter-timeline dependencies (i.e other timelines)
   1186 	 * remain correctly ordered. And we defer to __i915_request_submit()
   1187 	 * so that all asynchronous waits are correctly handled.
   1188 	 */
   1189 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
   1190 		     rq->fence.context, rq->fence.seqno);
   1191 
   1192 	/* On resubmission of the active request, payload will be scrubbed */
   1193 	if (i915_request_completed(rq))
   1194 		head = rq->tail;
   1195 	else
   1196 		head = active_request(ce->timeline, rq)->head;
   1197 	head = intel_ring_wrap(ce->ring, head);
   1198 
   1199 	/* Scrub the context image to prevent replaying the previous batch */
   1200 	restore_default_state(ce, engine);
   1201 	__execlists_update_reg_state(ce, engine, head);
   1202 
   1203 	/* We've switched away, so this should be a no-op, but intent matters */
   1204 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   1205 }
   1206 
   1207 static inline struct intel_engine_cs *
   1208 __execlists_schedule_in(struct i915_request *rq)
   1209 {
   1210 	struct intel_engine_cs * const engine = rq->engine;
   1211 	struct intel_context * const ce = rq->context;
   1212 
   1213 	intel_context_get(ce);
   1214 
   1215 	if (unlikely(intel_context_is_banned(ce)))
   1216 		reset_active(rq, engine);
   1217 
   1218 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   1219 		execlists_check_context(ce, engine);
   1220 
   1221 	if (ce->tag) {
   1222 		/* Use a fixed tag for OA and friends */
   1223 		ce->lrc_desc |= (u64)ce->tag << 32;
   1224 	} else {
   1225 		/* We don't need a strict matching tag, just different values */
   1226 		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
   1227 		ce->lrc_desc |=
   1228 			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
   1229 			GEN11_SW_CTX_ID_SHIFT;
   1230 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
   1231 	}
   1232 
   1233 	__intel_gt_pm_get(engine->gt);
   1234 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
   1235 	intel_engine_context_in(engine);
   1236 
   1237 	return engine;
   1238 }
   1239 
   1240 static inline struct i915_request *
   1241 execlists_schedule_in(struct i915_request *rq, int idx)
   1242 {
   1243 	struct intel_context * const ce = rq->context;
   1244 	struct intel_engine_cs *old;
   1245 
   1246 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
   1247 	trace_i915_request_in(rq, idx);
   1248 
   1249 	old = READ_ONCE(ce->inflight);
   1250 	do {
   1251 		if (!old) {
   1252 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
   1253 			break;
   1254 		}
   1255 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
   1256 
   1257 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
   1258 	return i915_request_get(rq);
   1259 }
   1260 
   1261 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
   1262 {
   1263 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   1264 	struct i915_request *next = READ_ONCE(ve->request);
   1265 
   1266 	if (next && next->execution_mask & ~rq->execution_mask)
   1267 		tasklet_schedule(&ve->base.execlists.tasklet);
   1268 }
   1269 
   1270 static inline void
   1271 __execlists_schedule_out(struct i915_request *rq,
   1272 			 struct intel_engine_cs * const engine)
   1273 {
   1274 	struct intel_context * const ce = rq->context;
   1275 
   1276 	/*
   1277 	 * NB process_csb() is not under the engine->active.lock and hence
   1278 	 * schedule_out can race with schedule_in meaning that we should
   1279 	 * refrain from doing non-trivial work here.
   1280 	 */
   1281 
   1282 	/*
   1283 	 * If we have just completed this context, the engine may now be
   1284 	 * idle and we want to re-enter powersaving.
   1285 	 */
   1286 	if (list_is_last(&rq->link, &ce->timeline->requests) &&
   1287 	    i915_request_completed(rq))
   1288 		intel_engine_add_retire(engine, ce->timeline);
   1289 
   1290 	intel_engine_context_out(engine);
   1291 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
   1292 	intel_gt_pm_put_async(engine->gt);
   1293 
   1294 	/*
   1295 	 * If this is part of a virtual engine, its next request may
   1296 	 * have been blocked waiting for access to the active context.
   1297 	 * We have to kick all the siblings again in case we need to
   1298 	 * switch (e.g. the next request is not runnable on this
   1299 	 * engine). Hopefully, we will already have submitted the next
   1300 	 * request before the tasklet runs and do not need to rebuild
   1301 	 * each virtual tree and kick everyone again.
   1302 	 */
   1303 	if (ce->engine != engine)
   1304 		kick_siblings(rq, ce);
   1305 
   1306 	intel_context_put(ce);
   1307 }
   1308 
   1309 static inline void
   1310 execlists_schedule_out(struct i915_request *rq)
   1311 {
   1312 	struct intel_context * const ce = rq->context;
   1313 	struct intel_engine_cs *cur, *old;
   1314 
   1315 	trace_i915_request_out(rq);
   1316 
   1317 	old = READ_ONCE(ce->inflight);
   1318 	do
   1319 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
   1320 	while (!try_cmpxchg(&ce->inflight, &old, cur));
   1321 	if (!cur)
   1322 		__execlists_schedule_out(rq, old);
   1323 
   1324 	i915_request_put(rq);
   1325 }
   1326 
   1327 static u64 execlists_update_context(struct i915_request *rq)
   1328 {
   1329 	struct intel_context *ce = rq->context;
   1330 	u64 desc = ce->lrc_desc;
   1331 	u32 tail, prev;
   1332 
   1333 	/*
   1334 	 * WaIdleLiteRestore:bdw,skl
   1335 	 *
   1336 	 * We should never submit the context with the same RING_TAIL twice
   1337 	 * just in case we submit an empty ring, which confuses the HW.
   1338 	 *
   1339 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
   1340 	 * the normal request to be able to always advance the RING_TAIL on
   1341 	 * subsequent resubmissions (for lite restore). Should that fail us,
   1342 	 * and we try and submit the same tail again, force the context
   1343 	 * reload.
   1344 	 *
   1345 	 * If we need to return to a preempted context, we need to skip the
   1346 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
   1347 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
   1348 	 * an earlier request.
   1349 	 */
   1350 	tail = intel_ring_set_tail(rq->ring, rq->tail);
   1351 	prev = ce->lrc_reg_state[CTX_RING_TAIL];
   1352 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
   1353 		desc |= CTX_DESC_FORCE_RESTORE;
   1354 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
   1355 	rq->tail = rq->wa_tail;
   1356 
   1357 	/*
   1358 	 * Make sure the context image is complete before we submit it to HW.
   1359 	 *
   1360 	 * Ostensibly, writes (including the WCB) should be flushed prior to
   1361 	 * an uncached write such as our mmio register access, the empirical
   1362 	 * evidence (esp. on Braswell) suggests that the WC write into memory
   1363 	 * may not be visible to the HW prior to the completion of the UC
   1364 	 * register write and that we may begin execution from the context
   1365 	 * before its image is complete leading to invalid PD chasing.
   1366 	 */
   1367 	wmb();
   1368 
   1369 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
   1370 	return desc;
   1371 }
   1372 
   1373 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
   1374 {
   1375 #ifdef __NetBSD__
   1376 	if (execlists->ctrl_reg) {
   1377 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2, lower_32_bits(desc));
   1378 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2 + 1, upper_32_bits(desc));
   1379 	} else {
   1380 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, upper_32_bits(desc));
   1381 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, lower_32_bits(desc));
   1382 	}
   1383 #else
   1384 	if (execlists->ctrl_reg) {
   1385 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
   1386 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
   1387 	} else {
   1388 		writel(upper_32_bits(desc), execlists->submit_reg);
   1389 		writel(lower_32_bits(desc), execlists->submit_reg);
   1390 	}
   1391 #endif
   1392 }
   1393 
   1394 static __maybe_unused void
   1395 trace_ports(const struct intel_engine_execlists *execlists,
   1396 	    const char *msg,
   1397 	    struct i915_request * const *ports)
   1398 {
   1399 	const struct intel_engine_cs *engine =
   1400 		const_container_of(execlists, typeof(*engine), execlists);
   1401 
   1402 	if (!ports[0])
   1403 		return;
   1404 
   1405 	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
   1406 		     ports[0]->fence.context,
   1407 		     ports[0]->fence.seqno,
   1408 		     i915_request_completed(ports[0]) ? "!" :
   1409 		     i915_request_started(ports[0]) ? "*" :
   1410 		     "",
   1411 		     ports[1] ? ports[1]->fence.context : 0,
   1412 		     ports[1] ? ports[1]->fence.seqno : 0);
   1413 }
   1414 
   1415 static __maybe_unused bool
   1416 assert_pending_valid(const struct intel_engine_execlists *execlists,
   1417 		     const char *msg)
   1418 {
   1419 	struct i915_request * const *port, *rq;
   1420 	struct intel_context *ce = NULL;
   1421 
   1422 	trace_ports(execlists, msg, execlists->pending);
   1423 
   1424 	if (!execlists->pending[0]) {
   1425 		GEM_TRACE_ERR("Nothing pending for promotion!\n");
   1426 		return false;
   1427 	}
   1428 
   1429 	if (execlists->pending[execlists_num_ports(execlists)]) {
   1430 		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
   1431 			      execlists_num_ports(execlists));
   1432 		return false;
   1433 	}
   1434 
   1435 	for (port = execlists->pending; (rq = *port); port++) {
   1436 		unsigned long flags;
   1437 		bool ok = true;
   1438 
   1439 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
   1440 		GEM_BUG_ON(!i915_request_is_active(rq));
   1441 
   1442 		if (ce == rq->context) {
   1443 			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
   1444 				      ce->timeline->fence_context,
   1445 				      port - execlists->pending);
   1446 			return false;
   1447 		}
   1448 		ce = rq->context;
   1449 
   1450 		/* Hold tightly onto the lock to prevent concurrent retires! */
   1451 		if (!spin_trylock_irqsave(&rq->lock, flags))
   1452 			continue;
   1453 
   1454 		if (i915_request_completed(rq))
   1455 			goto unlock;
   1456 
   1457 		if (i915_active_is_idle(&ce->active) &&
   1458 		    !intel_context_is_barrier(ce)) {
   1459 			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
   1460 				      ce->timeline->fence_context,
   1461 				      port - execlists->pending);
   1462 			ok = false;
   1463 			goto unlock;
   1464 		}
   1465 
   1466 		if (!i915_vma_is_pinned(ce->state)) {
   1467 			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
   1468 				      ce->timeline->fence_context,
   1469 				      port - execlists->pending);
   1470 			ok = false;
   1471 			goto unlock;
   1472 		}
   1473 
   1474 		if (!i915_vma_is_pinned(ce->ring->vma)) {
   1475 			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
   1476 				      ce->timeline->fence_context,
   1477 				      port - execlists->pending);
   1478 			ok = false;
   1479 			goto unlock;
   1480 		}
   1481 
   1482 unlock:
   1483 		spin_unlock_irqrestore(&rq->lock, flags);
   1484 		if (!ok)
   1485 			return false;
   1486 	}
   1487 
   1488 	return ce;
   1489 }
   1490 
   1491 static void execlists_submit_ports(struct intel_engine_cs *engine)
   1492 {
   1493 	struct intel_engine_execlists *execlists = &engine->execlists;
   1494 	unsigned int n;
   1495 
   1496 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
   1497 
   1498 	/*
   1499 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
   1500 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
   1501 	 * not be relinquished until the device is idle (see
   1502 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
   1503 	 * that all ELSP are drained i.e. we have processed the CSB,
   1504 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
   1505 	 */
   1506 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   1507 
   1508 	/*
   1509 	 * ELSQ note: the submit queue is not cleared after being submitted
   1510 	 * to the HW so we need to make sure we always clean it up. This is
   1511 	 * currently ensured by the fact that we always write the same number
   1512 	 * of elsq entries, keep this in mind before changing the loop below.
   1513 	 */
   1514 	for (n = execlists_num_ports(execlists); n--; ) {
   1515 		struct i915_request *rq = execlists->pending[n];
   1516 
   1517 		write_desc(execlists,
   1518 			   rq ? execlists_update_context(rq) : 0,
   1519 			   n);
   1520 	}
   1521 
   1522 	/* we need to manually load the submit queue */
   1523 	if (execlists->ctrl_reg)
   1524 #ifdef __NetBSD__
   1525 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->ctrl_reg, EL_CTRL_LOAD);
   1526 #else
   1527 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
   1528 #endif
   1529 }
   1530 
   1531 static bool ctx_single_port_submission(const struct intel_context *ce)
   1532 {
   1533 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
   1534 		intel_context_force_single_submission(ce));
   1535 }
   1536 
   1537 static bool can_merge_ctx(const struct intel_context *prev,
   1538 			  const struct intel_context *next)
   1539 {
   1540 	if (prev != next)
   1541 		return false;
   1542 
   1543 	if (ctx_single_port_submission(prev))
   1544 		return false;
   1545 
   1546 	return true;
   1547 }
   1548 
   1549 static bool can_merge_rq(const struct i915_request *prev,
   1550 			 const struct i915_request *next)
   1551 {
   1552 	GEM_BUG_ON(prev == next);
   1553 	GEM_BUG_ON(!assert_priority_queue(prev, next));
   1554 
   1555 	/*
   1556 	 * We do not submit known completed requests. Therefore if the next
   1557 	 * request is already completed, we can pretend to merge it in
   1558 	 * with the previous context (and we will skip updating the ELSP
   1559 	 * and tracking). Thus hopefully keeping the ELSP full with active
   1560 	 * contexts, despite the best efforts of preempt-to-busy to confuse
   1561 	 * us.
   1562 	 */
   1563 	if (i915_request_completed(next))
   1564 		return true;
   1565 
   1566 	if (unlikely((prev->fence.flags ^ next->fence.flags) &
   1567 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
   1568 		      BIT(I915_FENCE_FLAG_SENTINEL))))
   1569 		return false;
   1570 
   1571 	if (!can_merge_ctx(prev->context, next->context))
   1572 		return false;
   1573 
   1574 	return true;
   1575 }
   1576 
   1577 static void virtual_update_register_offsets(u32 *regs,
   1578 					    struct intel_engine_cs *engine)
   1579 {
   1580 	set_offsets(regs, reg_offsets(engine), engine, false);
   1581 }
   1582 
   1583 static bool virtual_matches(const struct virtual_engine *ve,
   1584 			    const struct i915_request *rq,
   1585 			    const struct intel_engine_cs *engine)
   1586 {
   1587 	const struct intel_engine_cs *inflight;
   1588 
   1589 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
   1590 		return false;
   1591 
   1592 	/*
   1593 	 * We track when the HW has completed saving the context image
   1594 	 * (i.e. when we have seen the final CS event switching out of
   1595 	 * the context) and must not overwrite the context image before
   1596 	 * then. This restricts us to only using the active engine
   1597 	 * while the previous virtualized request is inflight (so
   1598 	 * we reuse the register offsets). This is a very small
   1599 	 * hystersis on the greedy seelction algorithm.
   1600 	 */
   1601 	inflight = intel_context_inflight(&ve->context);
   1602 	if (inflight && inflight != engine)
   1603 		return false;
   1604 
   1605 	return true;
   1606 }
   1607 
   1608 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
   1609 				     struct intel_engine_cs *engine)
   1610 {
   1611 	struct intel_engine_cs *old = ve->siblings[0];
   1612 
   1613 	/* All unattached (rq->engine == old) must already be completed */
   1614 
   1615 	spin_lock(&old->breadcrumbs.irq_lock);
   1616 	if (!list_empty(&ve->context.signal_link)) {
   1617 		list_move_tail(&ve->context.signal_link,
   1618 			       &engine->breadcrumbs.signalers);
   1619 		intel_engine_signal_breadcrumbs(engine);
   1620 	}
   1621 	spin_unlock(&old->breadcrumbs.irq_lock);
   1622 }
   1623 
   1624 static struct i915_request *
   1625 last_active(const struct intel_engine_execlists *execlists)
   1626 {
   1627 	struct i915_request * const *last = READ_ONCE(execlists->active);
   1628 
   1629 	while (*last && i915_request_completed(*last))
   1630 		last++;
   1631 
   1632 	return *last;
   1633 }
   1634 
   1635 #define for_each_waiter(p__, rq__) \
   1636 	list_for_each_entry_lockless(p__, \
   1637 				     &(rq__)->sched.waiters_list, \
   1638 				     wait_link)
   1639 
   1640 static void defer_request(struct i915_request *rq, struct list_head * const pl)
   1641 {
   1642 	LIST_HEAD(list);
   1643 
   1644 	/*
   1645 	 * We want to move the interrupted request to the back of
   1646 	 * the round-robin list (i.e. its priority level), but
   1647 	 * in doing so, we must then move all requests that were in
   1648 	 * flight and were waiting for the interrupted request to
   1649 	 * be run after it again.
   1650 	 */
   1651 	do {
   1652 		struct i915_dependency *p;
   1653 
   1654 		GEM_BUG_ON(i915_request_is_active(rq));
   1655 		list_move_tail(&rq->sched.link, pl);
   1656 
   1657 		for_each_waiter(p, rq) {
   1658 			struct i915_request *w =
   1659 				container_of(p->waiter, typeof(*w), sched);
   1660 
   1661 			/* Leave semaphores spinning on the other engines */
   1662 			if (w->engine != rq->engine)
   1663 				continue;
   1664 
   1665 			/* No waiter should start before its signaler */
   1666 			GEM_BUG_ON(i915_request_started(w) &&
   1667 				   !i915_request_completed(rq));
   1668 
   1669 			GEM_BUG_ON(i915_request_is_active(w));
   1670 			if (!i915_request_is_ready(w))
   1671 				continue;
   1672 
   1673 			if (rq_prio(w) < rq_prio(rq))
   1674 				continue;
   1675 
   1676 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
   1677 			list_move_tail(&w->sched.link, &list);
   1678 		}
   1679 
   1680 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   1681 	} while (rq);
   1682 }
   1683 
   1684 static void defer_active(struct intel_engine_cs *engine)
   1685 {
   1686 	struct i915_request *rq;
   1687 
   1688 	rq = __unwind_incomplete_requests(engine);
   1689 	if (!rq)
   1690 		return;
   1691 
   1692 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
   1693 }
   1694 
   1695 static bool
   1696 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
   1697 {
   1698 	int hint;
   1699 
   1700 	if (!intel_engine_has_timeslices(engine))
   1701 		return false;
   1702 
   1703 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1704 		return false;
   1705 
   1706 	hint = max(rq_prio(list_next_entry(rq, sched.link)),
   1707 		   engine->execlists.queue_priority_hint);
   1708 
   1709 	return hint >= effective_prio(rq);
   1710 }
   1711 
   1712 static int
   1713 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
   1714 {
   1715 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1716 		return INT_MIN;
   1717 
   1718 	return rq_prio(list_next_entry(rq, sched.link));
   1719 }
   1720 
   1721 static inline unsigned long
   1722 timeslice(const struct intel_engine_cs *engine)
   1723 {
   1724 	return READ_ONCE(engine->props.timeslice_duration_ms);
   1725 }
   1726 
   1727 static unsigned long
   1728 active_timeslice(const struct intel_engine_cs *engine)
   1729 {
   1730 	const struct i915_request *rq = *engine->execlists.active;
   1731 
   1732 	if (!rq || i915_request_completed(rq))
   1733 		return 0;
   1734 
   1735 	if (engine->execlists.switch_priority_hint < effective_prio(rq))
   1736 		return 0;
   1737 
   1738 	return timeslice(engine);
   1739 }
   1740 
   1741 static void set_timeslice(struct intel_engine_cs *engine)
   1742 {
   1743 	if (!intel_engine_has_timeslices(engine))
   1744 		return;
   1745 
   1746 	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
   1747 }
   1748 
   1749 static void record_preemption(struct intel_engine_execlists *execlists)
   1750 {
   1751 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
   1752 }
   1753 
   1754 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
   1755 {
   1756 	struct i915_request *rq;
   1757 
   1758 	rq = last_active(&engine->execlists);
   1759 	if (!rq)
   1760 		return 0;
   1761 
   1762 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
   1763 	if (unlikely(intel_context_is_banned(rq->context)))
   1764 		return 1;
   1765 
   1766 	return READ_ONCE(engine->props.preempt_timeout_ms);
   1767 }
   1768 
   1769 static void set_preempt_timeout(struct intel_engine_cs *engine)
   1770 {
   1771 	if (!intel_engine_has_preempt_reset(engine))
   1772 		return;
   1773 
   1774 	set_timer_ms(&engine->execlists.preempt,
   1775 		     active_preempt_timeout(engine));
   1776 }
   1777 
   1778 static inline void clear_ports(struct i915_request **ports, int count)
   1779 {
   1780 	memset_p((void **)ports, NULL, count);
   1781 }
   1782 
   1783 static void execlists_dequeue(struct intel_engine_cs *engine)
   1784 {
   1785 	struct intel_engine_execlists * const execlists = &engine->execlists;
   1786 	struct i915_request **port = execlists->pending;
   1787 	struct i915_request ** const last_port = port + execlists->port_mask;
   1788 	struct i915_request *last;
   1789 	struct rb_node *rb;
   1790 	bool submit = false;
   1791 
   1792 	/*
   1793 	 * Hardware submission is through 2 ports. Conceptually each port
   1794 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
   1795 	 * static for a context, and unique to each, so we only execute
   1796 	 * requests belonging to a single context from each ring. RING_HEAD
   1797 	 * is maintained by the CS in the context image, it marks the place
   1798 	 * where it got up to last time, and through RING_TAIL we tell the CS
   1799 	 * where we want to execute up to this time.
   1800 	 *
   1801 	 * In this list the requests are in order of execution. Consecutive
   1802 	 * requests from the same context are adjacent in the ringbuffer. We
   1803 	 * can combine these requests into a single RING_TAIL update:
   1804 	 *
   1805 	 *              RING_HEAD...req1...req2
   1806 	 *                                    ^- RING_TAIL
   1807 	 * since to execute req2 the CS must first execute req1.
   1808 	 *
   1809 	 * Our goal then is to point each port to the end of a consecutive
   1810 	 * sequence of requests as being the most optimal (fewest wake ups
   1811 	 * and context switches) submission.
   1812 	 */
   1813 
   1814 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
   1815 		struct virtual_engine *ve =
   1816 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1817 		struct i915_request *rq = READ_ONCE(ve->request);
   1818 
   1819 		if (!rq) { /* lazily cleanup after another engine handled rq */
   1820 			rb_erase_cached(rb, &execlists->virtual);
   1821 			RB_CLEAR_NODE(rb);
   1822 			rb = rb_first_cached(&execlists->virtual);
   1823 			continue;
   1824 		}
   1825 
   1826 		if (!virtual_matches(ve, rq, engine)) {
   1827 			rb = rb_next(rb);
   1828 			continue;
   1829 		}
   1830 
   1831 		break;
   1832 	}
   1833 
   1834 	/*
   1835 	 * If the queue is higher priority than the last
   1836 	 * request in the currently active context, submit afresh.
   1837 	 * We will resubmit again afterwards in case we need to split
   1838 	 * the active context to interject the preemption request,
   1839 	 * i.e. we will retrigger preemption following the ack in case
   1840 	 * of trouble.
   1841 	 */
   1842 	last = last_active(execlists);
   1843 	if (last) {
   1844 		if (need_preempt(engine, last, rb)) {
   1845 			ENGINE_TRACE(engine,
   1846 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
   1847 				     last->fence.context,
   1848 				     last->fence.seqno,
   1849 				     last->sched.attr.priority,
   1850 				     execlists->queue_priority_hint);
   1851 			record_preemption(execlists);
   1852 
   1853 			/*
   1854 			 * Don't let the RING_HEAD advance past the breadcrumb
   1855 			 * as we unwind (and until we resubmit) so that we do
   1856 			 * not accidentally tell it to go backwards.
   1857 			 */
   1858 			ring_set_paused(engine, 1);
   1859 
   1860 			/*
   1861 			 * Note that we have not stopped the GPU at this point,
   1862 			 * so we are unwinding the incomplete requests as they
   1863 			 * remain inflight and so by the time we do complete
   1864 			 * the preemption, some of the unwound requests may
   1865 			 * complete!
   1866 			 */
   1867 			__unwind_incomplete_requests(engine);
   1868 
   1869 			last = NULL;
   1870 		} else if (need_timeslice(engine, last) &&
   1871 			   timer_expired(&engine->execlists.timer)) {
   1872 			ENGINE_TRACE(engine,
   1873 				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
   1874 				     last->fence.context,
   1875 				     last->fence.seqno,
   1876 				     last->sched.attr.priority,
   1877 				     execlists->queue_priority_hint);
   1878 
   1879 			ring_set_paused(engine, 1);
   1880 			defer_active(engine);
   1881 
   1882 			/*
   1883 			 * Unlike for preemption, if we rewind and continue
   1884 			 * executing the same context as previously active,
   1885 			 * the order of execution will remain the same and
   1886 			 * the tail will only advance. We do not need to
   1887 			 * force a full context restore, as a lite-restore
   1888 			 * is sufficient to resample the monotonic TAIL.
   1889 			 *
   1890 			 * If we switch to any other context, similarly we
   1891 			 * will not rewind TAIL of current context, and
   1892 			 * normal save/restore will preserve state and allow
   1893 			 * us to later continue executing the same request.
   1894 			 */
   1895 			last = NULL;
   1896 		} else {
   1897 			/*
   1898 			 * Otherwise if we already have a request pending
   1899 			 * for execution after the current one, we can
   1900 			 * just wait until the next CS event before
   1901 			 * queuing more. In either case we will force a
   1902 			 * lite-restore preemption event, but if we wait
   1903 			 * we hopefully coalesce several updates into a single
   1904 			 * submission.
   1905 			 */
   1906 			if (!list_is_last(&last->sched.link,
   1907 					  &engine->active.requests)) {
   1908 				/*
   1909 				 * Even if ELSP[1] is occupied and not worthy
   1910 				 * of timeslices, our queue might be.
   1911 				 */
   1912 				if (!execlists->timer.expires &&
   1913 				    need_timeslice(engine, last))
   1914 					set_timer_ms(&execlists->timer,
   1915 						     timeslice(engine));
   1916 
   1917 				return;
   1918 			}
   1919 		}
   1920 	}
   1921 
   1922 	while (rb) { /* XXX virtual is always taking precedence */
   1923 		struct virtual_engine *ve =
   1924 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1925 		struct i915_request *rq;
   1926 
   1927 		spin_lock(&ve->base.active.lock);
   1928 
   1929 		rq = ve->request;
   1930 		if (unlikely(!rq)) { /* lost the race to a sibling */
   1931 			spin_unlock(&ve->base.active.lock);
   1932 			rb_erase_cached(rb, &execlists->virtual);
   1933 			RB_CLEAR_NODE(rb);
   1934 			rb = rb_first_cached(&execlists->virtual);
   1935 			continue;
   1936 		}
   1937 
   1938 		GEM_BUG_ON(rq != ve->request);
   1939 		GEM_BUG_ON(rq->engine != &ve->base);
   1940 		GEM_BUG_ON(rq->context != &ve->context);
   1941 
   1942 		if (rq_prio(rq) >= queue_prio(execlists)) {
   1943 			if (!virtual_matches(ve, rq, engine)) {
   1944 				spin_unlock(&ve->base.active.lock);
   1945 				rb = rb_next(rb);
   1946 				continue;
   1947 			}
   1948 
   1949 			if (last && !can_merge_rq(last, rq)) {
   1950 				spin_unlock(&ve->base.active.lock);
   1951 				return; /* leave this for another */
   1952 			}
   1953 
   1954 			ENGINE_TRACE(engine,
   1955 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
   1956 				     rq->fence.context,
   1957 				     rq->fence.seqno,
   1958 				     i915_request_completed(rq) ? "!" :
   1959 				     i915_request_started(rq) ? "*" :
   1960 				     "",
   1961 				     yesno(engine != ve->siblings[0]));
   1962 
   1963 			ve->request = NULL;
   1964 			ve->base.execlists.queue_priority_hint = INT_MIN;
   1965 			rb_erase_cached(rb, &execlists->virtual);
   1966 			RB_CLEAR_NODE(rb);
   1967 
   1968 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
   1969 			rq->engine = engine;
   1970 
   1971 			if (engine != ve->siblings[0]) {
   1972 				u32 *regs = ve->context.lrc_reg_state;
   1973 				unsigned int n;
   1974 
   1975 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
   1976 
   1977 				if (!intel_engine_has_relative_mmio(engine))
   1978 					virtual_update_register_offsets(regs,
   1979 									engine);
   1980 
   1981 				if (!list_empty(&ve->context.signals))
   1982 					virtual_xfer_breadcrumbs(ve, engine);
   1983 
   1984 				/*
   1985 				 * Move the bound engine to the top of the list
   1986 				 * for future execution. We then kick this
   1987 				 * tasklet first before checking others, so that
   1988 				 * we preferentially reuse this set of bound
   1989 				 * registers.
   1990 				 */
   1991 				for (n = 1; n < ve->num_siblings; n++) {
   1992 					if (ve->siblings[n] == engine) {
   1993 						swap(ve->siblings[n],
   1994 						     ve->siblings[0]);
   1995 						break;
   1996 					}
   1997 				}
   1998 
   1999 				GEM_BUG_ON(ve->siblings[0] != engine);
   2000 			}
   2001 
   2002 			if (__i915_request_submit(rq)) {
   2003 				submit = true;
   2004 				last = rq;
   2005 			}
   2006 			i915_request_put(rq);
   2007 
   2008 			/*
   2009 			 * Hmm, we have a bunch of virtual engine requests,
   2010 			 * but the first one was already completed (thanks
   2011 			 * preempt-to-busy!). Keep looking at the veng queue
   2012 			 * until we have no more relevant requests (i.e.
   2013 			 * the normal submit queue has higher priority).
   2014 			 */
   2015 			if (!submit) {
   2016 				spin_unlock(&ve->base.active.lock);
   2017 				rb = rb_first_cached(&execlists->virtual);
   2018 				continue;
   2019 			}
   2020 		}
   2021 
   2022 		spin_unlock(&ve->base.active.lock);
   2023 		break;
   2024 	}
   2025 
   2026 	while ((rb = rb_first_cached(&execlists->queue))) {
   2027 		struct i915_priolist *p = to_priolist(rb);
   2028 		struct i915_request *rq, *rn;
   2029 		int i;
   2030 
   2031 		priolist_for_each_request_consume(rq, rn, p, i) {
   2032 			bool merge = true;
   2033 
   2034 			/*
   2035 			 * Can we combine this request with the current port?
   2036 			 * It has to be the same context/ringbuffer and not
   2037 			 * have any exceptions (e.g. GVT saying never to
   2038 			 * combine contexts).
   2039 			 *
   2040 			 * If we can combine the requests, we can execute both
   2041 			 * by updating the RING_TAIL to point to the end of the
   2042 			 * second request, and so we never need to tell the
   2043 			 * hardware about the first.
   2044 			 */
   2045 			if (last && !can_merge_rq(last, rq)) {
   2046 				/*
   2047 				 * If we are on the second port and cannot
   2048 				 * combine this request with the last, then we
   2049 				 * are done.
   2050 				 */
   2051 				if (port == last_port)
   2052 					goto done;
   2053 
   2054 				/*
   2055 				 * We must not populate both ELSP[] with the
   2056 				 * same LRCA, i.e. we must submit 2 different
   2057 				 * contexts if we submit 2 ELSP.
   2058 				 */
   2059 				if (last->context == rq->context)
   2060 					goto done;
   2061 
   2062 				if (i915_request_has_sentinel(last))
   2063 					goto done;
   2064 
   2065 				/*
   2066 				 * If GVT overrides us we only ever submit
   2067 				 * port[0], leaving port[1] empty. Note that we
   2068 				 * also have to be careful that we don't queue
   2069 				 * the same context (even though a different
   2070 				 * request) to the second port.
   2071 				 */
   2072 				if (ctx_single_port_submission(last->context) ||
   2073 				    ctx_single_port_submission(rq->context))
   2074 					goto done;
   2075 
   2076 				merge = false;
   2077 			}
   2078 
   2079 			if (__i915_request_submit(rq)) {
   2080 				if (!merge) {
   2081 					*port = execlists_schedule_in(last, port - execlists->pending);
   2082 					port++;
   2083 					last = NULL;
   2084 				}
   2085 
   2086 				GEM_BUG_ON(last &&
   2087 					   !can_merge_ctx(last->context,
   2088 							  rq->context));
   2089 
   2090 				submit = true;
   2091 				last = rq;
   2092 			}
   2093 		}
   2094 
   2095 		rb_erase_cached(&p->node, &execlists->queue);
   2096 		i915_priolist_free(p);
   2097 	}
   2098 
   2099 done:
   2100 	/*
   2101 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
   2102 	 *
   2103 	 * We choose the priority hint such that if we add a request of greater
   2104 	 * priority than this, we kick the submission tasklet to decide on
   2105 	 * the right order of submitting the requests to hardware. We must
   2106 	 * also be prepared to reorder requests as they are in-flight on the
   2107 	 * HW. We derive the priority hint then as the first "hole" in
   2108 	 * the HW submission ports and if there are no available slots,
   2109 	 * the priority of the lowest executing request, i.e. last.
   2110 	 *
   2111 	 * When we do receive a higher priority request ready to run from the
   2112 	 * user, see queue_request(), the priority hint is bumped to that
   2113 	 * request triggering preemption on the next dequeue (or subsequent
   2114 	 * interrupt for secondary ports).
   2115 	 */
   2116 	execlists->queue_priority_hint = queue_prio(execlists);
   2117 
   2118 	if (submit) {
   2119 		*port = execlists_schedule_in(last, port - execlists->pending);
   2120 		execlists->switch_priority_hint =
   2121 			switch_prio(engine, *execlists->pending);
   2122 
   2123 		/*
   2124 		 * Skip if we ended up with exactly the same set of requests,
   2125 		 * e.g. trying to timeslice a pair of ordered contexts
   2126 		 */
   2127 		if (!memcmp(execlists->active, execlists->pending,
   2128 			    (port - execlists->pending + 1) * sizeof(*port))) {
   2129 			do
   2130 				execlists_schedule_out(fetch_and_zero(port));
   2131 			while (port-- != execlists->pending);
   2132 
   2133 			goto skip_submit;
   2134 		}
   2135 		clear_ports(port + 1, last_port - port);
   2136 
   2137 		execlists_submit_ports(engine);
   2138 		set_preempt_timeout(engine);
   2139 	} else {
   2140 skip_submit:
   2141 		ring_set_paused(engine, 0);
   2142 	}
   2143 }
   2144 
   2145 static void
   2146 cancel_port_requests(struct intel_engine_execlists * const execlists)
   2147 {
   2148 	struct i915_request * const *port;
   2149 
   2150 	for (port = execlists->pending; *port; port++)
   2151 		execlists_schedule_out(*port);
   2152 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
   2153 
   2154 	/* Mark the end of active before we overwrite *active */
   2155 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
   2156 		execlists_schedule_out(*port);
   2157 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
   2158 
   2159 	WRITE_ONCE(execlists->active, execlists->inflight);
   2160 }
   2161 
   2162 static inline void
   2163 invalidate_csb_entries(const u32 *first, const u32 *last)
   2164 {
   2165 	clflush((void *)first);
   2166 	clflush((void *)last);
   2167 }
   2168 
   2169 static inline bool
   2170 reset_in_progress(const struct intel_engine_execlists *execlists)
   2171 {
   2172 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
   2173 }
   2174 
   2175 /*
   2176  * Starting with Gen12, the status has a new format:
   2177  *
   2178  *     bit  0:     switched to new queue
   2179  *     bit  1:     reserved
   2180  *     bit  2:     semaphore wait mode (poll or signal), only valid when
   2181  *                 switch detail is set to "wait on semaphore"
   2182  *     bits 3-5:   engine class
   2183  *     bits 6-11:  engine instance
   2184  *     bits 12-14: reserved
   2185  *     bits 15-25: sw context id of the lrc the GT switched to
   2186  *     bits 26-31: sw counter of the lrc the GT switched to
   2187  *     bits 32-35: context switch detail
   2188  *                  - 0: ctx complete
   2189  *                  - 1: wait on sync flip
   2190  *                  - 2: wait on vblank
   2191  *                  - 3: wait on scanline
   2192  *                  - 4: wait on semaphore
   2193  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
   2194  *                       WAIT_FOR_EVENT)
   2195  *     bit  36:    reserved
   2196  *     bits 37-43: wait detail (for switch detail 1 to 4)
   2197  *     bits 44-46: reserved
   2198  *     bits 47-57: sw context id of the lrc the GT switched away from
   2199  *     bits 58-63: sw counter of the lrc the GT switched away from
   2200  */
   2201 static inline bool
   2202 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2203 {
   2204 	u32 lower_dw = csb[0];
   2205 	u32 upper_dw = csb[1];
   2206 	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
   2207 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
   2208 	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
   2209 
   2210 	/*
   2211 	 * The context switch detail is not guaranteed to be 5 when a preemption
   2212 	 * occurs, so we can't just check for that. The check below works for
   2213 	 * all the cases we care about, including preemptions of WAIT
   2214 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
   2215 	 * would require some extra handling, but we don't support that.
   2216 	 */
   2217 	if (!ctx_away_valid || new_queue) {
   2218 		GEM_BUG_ON(!ctx_to_valid);
   2219 		return true;
   2220 	}
   2221 
   2222 	/*
   2223 	 * switch detail = 5 is covered by the case above and we do not expect a
   2224 	 * context switch on an unsuccessful wait instruction since we always
   2225 	 * use polling mode.
   2226 	 */
   2227 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
   2228 	return false;
   2229 }
   2230 
   2231 static inline bool
   2232 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2233 {
   2234 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
   2235 }
   2236 
   2237 static void process_csb(struct intel_engine_cs *engine)
   2238 {
   2239 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2240 	const u32 * const buf = execlists->csb_status;
   2241 	const u8 num_entries = execlists->csb_size;
   2242 	u8 head, tail;
   2243 
   2244 	/*
   2245 	 * As we modify our execlists state tracking we require exclusive
   2246 	 * access. Either we are inside the tasklet, or the tasklet is disabled
   2247 	 * and we assume that is only inside the reset paths and so serialised.
   2248 	 */
   2249 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
   2250 		   !reset_in_progress(execlists));
   2251 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
   2252 
   2253 	/*
   2254 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
   2255 	 * When reading from the csb_write mmio register, we have to be
   2256 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
   2257 	 * the low 4bits. As it happens we know the next 4bits are always
   2258 	 * zero and so we can simply masked off the low u8 of the register
   2259 	 * and treat it identically to reading from the HWSP (without having
   2260 	 * to use explicit shifting and masking, and probably bifurcating
   2261 	 * the code to handle the legacy mmio read).
   2262 	 */
   2263 	head = execlists->csb_head;
   2264 	tail = READ_ONCE(*execlists->csb_write);
   2265 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
   2266 	if (unlikely(head == tail))
   2267 		return;
   2268 
   2269 	/*
   2270 	 * Hopefully paired with a wmb() in HW!
   2271 	 *
   2272 	 * We must complete the read of the write pointer before any reads
   2273 	 * from the CSB, so that we do not see stale values. Without an rmb
   2274 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
   2275 	 * we perform the READ_ONCE(*csb_write).
   2276 	 */
   2277 	rmb();
   2278 
   2279 	do {
   2280 		bool promote;
   2281 
   2282 		if (++head == num_entries)
   2283 			head = 0;
   2284 
   2285 		/*
   2286 		 * We are flying near dragons again.
   2287 		 *
   2288 		 * We hold a reference to the request in execlist_port[]
   2289 		 * but no more than that. We are operating in softirq
   2290 		 * context and so cannot hold any mutex or sleep. That
   2291 		 * prevents us stopping the requests we are processing
   2292 		 * in port[] from being retired simultaneously (the
   2293 		 * breadcrumb will be complete before we see the
   2294 		 * context-switch). As we only hold the reference to the
   2295 		 * request, any pointer chasing underneath the request
   2296 		 * is subject to a potential use-after-free. Thus we
   2297 		 * store all of the bookkeeping within port[] as
   2298 		 * required, and avoid using unguarded pointers beneath
   2299 		 * request itself. The same applies to the atomic
   2300 		 * status notifier.
   2301 		 */
   2302 
   2303 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
   2304 			     head, buf[2 * head + 0], buf[2 * head + 1]);
   2305 
   2306 		if (INTEL_GEN(engine->i915) >= 12)
   2307 			promote = gen12_csb_parse(execlists, buf + 2 * head);
   2308 		else
   2309 			promote = gen8_csb_parse(execlists, buf + 2 * head);
   2310 		if (promote) {
   2311 			struct i915_request * const *old = execlists->active;
   2312 
   2313 			/* Point active to the new ELSP; prevent overwriting */
   2314 			WRITE_ONCE(execlists->active, execlists->pending);
   2315 
   2316 			if (!inject_preempt_hang(execlists))
   2317 				ring_set_paused(engine, 0);
   2318 
   2319 			/* cancel old inflight, prepare for switch */
   2320 			trace_ports(execlists, "preempted", old);
   2321 			while (*old)
   2322 				execlists_schedule_out(*old++);
   2323 
   2324 			/* switch pending to inflight */
   2325 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
   2326 			WRITE_ONCE(execlists->active,
   2327 				   memcpy(execlists->inflight,
   2328 					  execlists->pending,
   2329 					  execlists_num_ports(execlists) *
   2330 					  sizeof(*execlists->pending)));
   2331 
   2332 			WRITE_ONCE(execlists->pending[0], NULL);
   2333 		} else {
   2334 			GEM_BUG_ON(!*execlists->active);
   2335 
   2336 			/* port0 completed, advanced to port1 */
   2337 			trace_ports(execlists, "completed", execlists->active);
   2338 
   2339 			/*
   2340 			 * We rely on the hardware being strongly
   2341 			 * ordered, that the breadcrumb write is
   2342 			 * coherent (visible from the CPU) before the
   2343 			 * user interrupt and CSB is processed.
   2344 			 */
   2345 			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
   2346 				   !reset_in_progress(execlists));
   2347 			execlists_schedule_out(*execlists->active++);
   2348 
   2349 			GEM_BUG_ON(execlists->active - execlists->inflight >
   2350 				   execlists_num_ports(execlists));
   2351 		}
   2352 	} while (head != tail);
   2353 
   2354 	execlists->csb_head = head;
   2355 	set_timeslice(engine);
   2356 
   2357 	/*
   2358 	 * Gen11 has proven to fail wrt global observation point between
   2359 	 * entry and tail update, failing on the ordering and thus
   2360 	 * we see an old entry in the context status buffer.
   2361 	 *
   2362 	 * Forcibly evict out entries for the next gpu csb update,
   2363 	 * to increase the odds that we get a fresh entries with non
   2364 	 * working hardware. The cost for doing so comes out mostly with
   2365 	 * the wash as hardware, working or not, will need to do the
   2366 	 * invalidation before.
   2367 	 */
   2368 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
   2369 }
   2370 
   2371 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
   2372 {
   2373 	lockdep_assert_held(&engine->active.lock);
   2374 	if (!engine->execlists.pending[0]) {
   2375 		rcu_read_lock(); /* protect peeking at execlists->active */
   2376 		execlists_dequeue(engine);
   2377 		rcu_read_unlock();
   2378 	}
   2379 }
   2380 
   2381 static void __execlists_hold(struct i915_request *rq)
   2382 {
   2383 	LIST_HEAD(list);
   2384 
   2385 	do {
   2386 		struct i915_dependency *p;
   2387 
   2388 		if (i915_request_is_active(rq))
   2389 			__i915_request_unsubmit(rq);
   2390 
   2391 		RQ_TRACE(rq, "on hold\n");
   2392 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2393 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
   2394 		i915_request_set_hold(rq);
   2395 
   2396 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2397 			struct i915_request *w =
   2398 				container_of(p->waiter, typeof(*w), sched);
   2399 
   2400 			/* Leave semaphores spinning on the other engines */
   2401 			if (w->engine != rq->engine)
   2402 				continue;
   2403 
   2404 			if (!i915_request_is_ready(w))
   2405 				continue;
   2406 
   2407 			if (i915_request_completed(w))
   2408 				continue;
   2409 
   2410 			if (i915_request_on_hold(rq))
   2411 				continue;
   2412 
   2413 			list_move_tail(&w->sched.link, &list);
   2414 		}
   2415 
   2416 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2417 	} while (rq);
   2418 }
   2419 
   2420 static bool execlists_hold(struct intel_engine_cs *engine,
   2421 			   struct i915_request *rq)
   2422 {
   2423 	spin_lock_irq(&engine->active.lock);
   2424 
   2425 	if (i915_request_completed(rq)) { /* too late! */
   2426 		rq = NULL;
   2427 		goto unlock;
   2428 	}
   2429 
   2430 	if (rq->engine != engine) { /* preempted virtual engine */
   2431 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
   2432 
   2433 		/*
   2434 		 * intel_context_inflight() is only protected by virtue
   2435 		 * of process_csb() being called only by the tasklet (or
   2436 		 * directly from inside reset while the tasklet is suspended).
   2437 		 * Assert that neither of those are allowed to run while we
   2438 		 * poke at the request queues.
   2439 		 */
   2440 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
   2441 
   2442 		/*
   2443 		 * An unsubmitted request along a virtual engine will
   2444 		 * remain on the active (this) engine until we are able
   2445 		 * to process the context switch away (and so mark the
   2446 		 * context as no longer in flight). That cannot have happened
   2447 		 * yet, otherwise we would not be hanging!
   2448 		 */
   2449 		spin_lock(&ve->base.active.lock);
   2450 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
   2451 		GEM_BUG_ON(ve->request != rq);
   2452 		ve->request = NULL;
   2453 		spin_unlock(&ve->base.active.lock);
   2454 		i915_request_put(rq);
   2455 
   2456 		rq->engine = engine;
   2457 	}
   2458 
   2459 	/*
   2460 	 * Transfer this request onto the hold queue to prevent it
   2461 	 * being resumbitted to HW (and potentially completed) before we have
   2462 	 * released it. Since we may have already submitted following
   2463 	 * requests, we need to remove those as well.
   2464 	 */
   2465 	GEM_BUG_ON(i915_request_on_hold(rq));
   2466 	GEM_BUG_ON(rq->engine != engine);
   2467 	__execlists_hold(rq);
   2468 
   2469 unlock:
   2470 	spin_unlock_irq(&engine->active.lock);
   2471 	return rq;
   2472 }
   2473 
   2474 static bool hold_request(const struct i915_request *rq)
   2475 {
   2476 	struct i915_dependency *p;
   2477 
   2478 	/*
   2479 	 * If one of our ancestors is on hold, we must also be on hold,
   2480 	 * otherwise we will bypass it and execute before it.
   2481 	 */
   2482 	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
   2483 		const struct i915_request *s =
   2484 			container_of(p->signaler, typeof(*s), sched);
   2485 
   2486 		if (s->engine != rq->engine)
   2487 			continue;
   2488 
   2489 		if (i915_request_on_hold(s))
   2490 			return true;
   2491 	}
   2492 
   2493 	return false;
   2494 }
   2495 
   2496 static void __execlists_unhold(struct i915_request *rq)
   2497 {
   2498 	LIST_HEAD(list);
   2499 
   2500 	do {
   2501 		struct i915_dependency *p;
   2502 
   2503 		GEM_BUG_ON(!i915_request_on_hold(rq));
   2504 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
   2505 
   2506 		i915_request_clear_hold(rq);
   2507 		list_move_tail(&rq->sched.link,
   2508 			       i915_sched_lookup_priolist(rq->engine,
   2509 							  rq_prio(rq)));
   2510 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2511 		RQ_TRACE(rq, "hold release\n");
   2512 
   2513 		/* Also release any children on this engine that are ready */
   2514 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2515 			struct i915_request *w =
   2516 				container_of(p->waiter, typeof(*w), sched);
   2517 
   2518 			if (w->engine != rq->engine)
   2519 				continue;
   2520 
   2521 			if (!i915_request_on_hold(rq))
   2522 				continue;
   2523 
   2524 			/* Check that no other parents are also on hold */
   2525 			if (hold_request(rq))
   2526 				continue;
   2527 
   2528 			list_move_tail(&w->sched.link, &list);
   2529 		}
   2530 
   2531 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2532 	} while (rq);
   2533 }
   2534 
   2535 static void execlists_unhold(struct intel_engine_cs *engine,
   2536 			     struct i915_request *rq)
   2537 {
   2538 	spin_lock_irq(&engine->active.lock);
   2539 
   2540 	/*
   2541 	 * Move this request back to the priority queue, and all of its
   2542 	 * children and grandchildren that were suspended along with it.
   2543 	 */
   2544 	__execlists_unhold(rq);
   2545 
   2546 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
   2547 		engine->execlists.queue_priority_hint = rq_prio(rq);
   2548 		tasklet_hi_schedule(&engine->execlists.tasklet);
   2549 	}
   2550 
   2551 	spin_unlock_irq(&engine->active.lock);
   2552 }
   2553 
   2554 struct execlists_capture {
   2555 	struct work_struct work;
   2556 	struct i915_request *rq;
   2557 	struct i915_gpu_coredump *error;
   2558 };
   2559 
   2560 static void execlists_capture_work(struct work_struct *work)
   2561 {
   2562 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
   2563 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
   2564 	struct intel_engine_cs *engine = cap->rq->engine;
   2565 	struct intel_gt_coredump *gt = cap->error->gt;
   2566 	struct intel_engine_capture_vma *vma;
   2567 
   2568 	/* Compress all the objects attached to the request, slow! */
   2569 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
   2570 	if (vma) {
   2571 		struct i915_vma_compress *compress =
   2572 			i915_vma_capture_prepare(gt);
   2573 
   2574 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
   2575 		i915_vma_capture_finish(gt, compress);
   2576 	}
   2577 
   2578 	gt->simulated = gt->engine->simulated;
   2579 	cap->error->simulated = gt->simulated;
   2580 
   2581 	/* Publish the error state, and announce it to the world */
   2582 	i915_error_state_store(cap->error);
   2583 	i915_gpu_coredump_put(cap->error);
   2584 
   2585 	/* Return this request and all that depend upon it for signaling */
   2586 	execlists_unhold(engine, cap->rq);
   2587 	i915_request_put(cap->rq);
   2588 
   2589 	kfree(cap);
   2590 }
   2591 
   2592 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
   2593 {
   2594 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
   2595 	struct execlists_capture *cap;
   2596 
   2597 	cap = kmalloc(sizeof(*cap), gfp);
   2598 	if (!cap)
   2599 		return NULL;
   2600 
   2601 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
   2602 	if (!cap->error)
   2603 		goto err_cap;
   2604 
   2605 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
   2606 	if (!cap->error->gt)
   2607 		goto err_gpu;
   2608 
   2609 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
   2610 	if (!cap->error->gt->engine)
   2611 		goto err_gt;
   2612 
   2613 	return cap;
   2614 
   2615 err_gt:
   2616 	kfree(cap->error->gt);
   2617 err_gpu:
   2618 	kfree(cap->error);
   2619 err_cap:
   2620 	kfree(cap);
   2621 	return NULL;
   2622 }
   2623 
   2624 static bool execlists_capture(struct intel_engine_cs *engine)
   2625 {
   2626 	struct execlists_capture *cap;
   2627 
   2628 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
   2629 		return true;
   2630 
   2631 	/*
   2632 	 * We need to _quickly_ capture the engine state before we reset.
   2633 	 * We are inside an atomic section (softirq) here and we are delaying
   2634 	 * the forced preemption event.
   2635 	 */
   2636 	cap = capture_regs(engine);
   2637 	if (!cap)
   2638 		return true;
   2639 
   2640 	cap->rq = execlists_active(&engine->execlists);
   2641 	GEM_BUG_ON(!cap->rq);
   2642 
   2643 	rcu_read_lock();
   2644 	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
   2645 	cap->rq = i915_request_get_rcu(cap->rq);
   2646 	rcu_read_unlock();
   2647 	if (!cap->rq)
   2648 		goto err_free;
   2649 
   2650 	/*
   2651 	 * Remove the request from the execlists queue, and take ownership
   2652 	 * of the request. We pass it to our worker who will _slowly_ compress
   2653 	 * all the pages the _user_ requested for debugging their batch, after
   2654 	 * which we return it to the queue for signaling.
   2655 	 *
   2656 	 * By removing them from the execlists queue, we also remove the
   2657 	 * requests from being processed by __unwind_incomplete_requests()
   2658 	 * during the intel_engine_reset(), and so they will *not* be replayed
   2659 	 * afterwards.
   2660 	 *
   2661 	 * Note that because we have not yet reset the engine at this point,
   2662 	 * it is possible for the request that we have identified as being
   2663 	 * guilty, did in fact complete and we will then hit an arbitration
   2664 	 * point allowing the outstanding preemption to succeed. The likelihood
   2665 	 * of that is very low (as capturing of the engine registers should be
   2666 	 * fast enough to run inside an irq-off atomic section!), so we will
   2667 	 * simply hold that request accountable for being non-preemptible
   2668 	 * long enough to force the reset.
   2669 	 */
   2670 	if (!execlists_hold(engine, cap->rq))
   2671 		goto err_rq;
   2672 
   2673 	INIT_WORK(&cap->work, execlists_capture_work);
   2674 	schedule_work(&cap->work);
   2675 	return true;
   2676 
   2677 err_rq:
   2678 	i915_request_put(cap->rq);
   2679 err_free:
   2680 	i915_gpu_coredump_put(cap->error);
   2681 	kfree(cap);
   2682 	return false;
   2683 }
   2684 
   2685 static noinline void preempt_reset(struct intel_engine_cs *engine)
   2686 {
   2687 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
   2688 	unsigned long *lock = &engine->gt->reset.flags;
   2689 
   2690 	if (i915_modparams.reset < 3)
   2691 		return;
   2692 
   2693 	if (test_and_set_bit(bit, lock))
   2694 		return;
   2695 
   2696 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
   2697 	tasklet_disable_nosync(&engine->execlists.tasklet);
   2698 
   2699 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
   2700 		     READ_ONCE(engine->props.preempt_timeout_ms),
   2701 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
   2702 
   2703 	ring_set_paused(engine, 1); /* Freeze the current request in place */
   2704 	if (execlists_capture(engine))
   2705 		intel_engine_reset(engine, "preemption time out");
   2706 	else
   2707 		ring_set_paused(engine, 0);
   2708 
   2709 	tasklet_enable(&engine->execlists.tasklet);
   2710 	clear_and_wake_up_bit(bit, lock);
   2711 }
   2712 
   2713 static bool preempt_timeout(const struct intel_engine_cs *const engine)
   2714 {
   2715 	const struct timer_list *t = &engine->execlists.preempt;
   2716 
   2717 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   2718 		return false;
   2719 
   2720 	if (!timer_expired(t))
   2721 		return false;
   2722 
   2723 	return READ_ONCE(engine->execlists.pending[0]);
   2724 }
   2725 
   2726 /*
   2727  * Check the unread Context Status Buffers and manage the submission of new
   2728  * contexts to the ELSP accordingly.
   2729  */
   2730 static void execlists_submission_tasklet(unsigned long data)
   2731 {
   2732 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
   2733 	bool timeout = preempt_timeout(engine);
   2734 
   2735 	process_csb(engine);
   2736 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
   2737 		unsigned long flags;
   2738 
   2739 		spin_lock_irqsave(&engine->active.lock, flags);
   2740 		__execlists_submission_tasklet(engine);
   2741 		spin_unlock_irqrestore(&engine->active.lock, flags);
   2742 
   2743 		/* Recheck after serialising with direct-submission */
   2744 		if (timeout && preempt_timeout(engine))
   2745 			preempt_reset(engine);
   2746 	}
   2747 }
   2748 
   2749 static void __execlists_kick(struct intel_engine_execlists *execlists)
   2750 {
   2751 	/* Kick the tasklet for some interrupt coalescing and reset handling */
   2752 	tasklet_hi_schedule(&execlists->tasklet);
   2753 }
   2754 
   2755 #define execlists_kick(t, member) \
   2756 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
   2757 
   2758 static void execlists_timeslice(struct timer_list *timer)
   2759 {
   2760 	execlists_kick(timer, timer);
   2761 }
   2762 
   2763 static void execlists_preempt(struct timer_list *timer)
   2764 {
   2765 	execlists_kick(timer, preempt);
   2766 }
   2767 
   2768 static void queue_request(struct intel_engine_cs *engine,
   2769 			  struct i915_request *rq)
   2770 {
   2771 	GEM_BUG_ON(!list_empty(&rq->sched.link));
   2772 	list_add_tail(&rq->sched.link,
   2773 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
   2774 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2775 }
   2776 
   2777 static void __submit_queue_imm(struct intel_engine_cs *engine)
   2778 {
   2779 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2780 
   2781 	if (reset_in_progress(execlists))
   2782 		return; /* defer until we restart the engine following reset */
   2783 
   2784 	if (execlists->tasklet.func == execlists_submission_tasklet)
   2785 		__execlists_submission_tasklet(engine);
   2786 	else
   2787 		tasklet_hi_schedule(&execlists->tasklet);
   2788 }
   2789 
   2790 static void submit_queue(struct intel_engine_cs *engine,
   2791 			 const struct i915_request *rq)
   2792 {
   2793 	struct intel_engine_execlists *execlists = &engine->execlists;
   2794 
   2795 	if (rq_prio(rq) <= execlists->queue_priority_hint)
   2796 		return;
   2797 
   2798 	execlists->queue_priority_hint = rq_prio(rq);
   2799 	__submit_queue_imm(engine);
   2800 }
   2801 
   2802 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
   2803 			     const struct i915_request *rq)
   2804 {
   2805 	GEM_BUG_ON(i915_request_on_hold(rq));
   2806 	return !list_empty(&engine->active.hold) && hold_request(rq);
   2807 }
   2808 
   2809 static void execlists_submit_request(struct i915_request *request)
   2810 {
   2811 	struct intel_engine_cs *engine = request->engine;
   2812 	unsigned long flags;
   2813 
   2814 	/* Will be called from irq-context when using foreign fences. */
   2815 	spin_lock_irqsave(&engine->active.lock, flags);
   2816 
   2817 	if (unlikely(ancestor_on_hold(engine, request))) {
   2818 		list_add_tail(&request->sched.link, &engine->active.hold);
   2819 		i915_request_set_hold(request);
   2820 	} else {
   2821 		queue_request(engine, request);
   2822 
   2823 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
   2824 		GEM_BUG_ON(list_empty(&request->sched.link));
   2825 
   2826 		submit_queue(engine, request);
   2827 	}
   2828 
   2829 	spin_unlock_irqrestore(&engine->active.lock, flags);
   2830 }
   2831 
   2832 static void __execlists_context_fini(struct intel_context *ce)
   2833 {
   2834 	intel_ring_put(ce->ring);
   2835 	i915_vma_put(ce->state);
   2836 }
   2837 
   2838 static void execlists_context_destroy(struct kref *kref)
   2839 {
   2840 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
   2841 
   2842 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
   2843 	GEM_BUG_ON(intel_context_is_pinned(ce));
   2844 
   2845 	if (ce->state)
   2846 		__execlists_context_fini(ce);
   2847 
   2848 	intel_context_fini(ce);
   2849 	intel_context_free(ce);
   2850 }
   2851 
   2852 static void
   2853 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
   2854 {
   2855 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2856 		return;
   2857 
   2858 	vaddr += engine->context_size;
   2859 
   2860 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
   2861 }
   2862 
   2863 static void
   2864 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
   2865 {
   2866 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2867 		return;
   2868 
   2869 	vaddr += engine->context_size;
   2870 
   2871 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
   2872 		dev_err_once(engine->i915->drm.dev,
   2873 			     "%s context redzone overwritten!\n",
   2874 			     engine->name);
   2875 }
   2876 
   2877 static void execlists_context_unpin(struct intel_context *ce)
   2878 {
   2879 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
   2880 		      ce->engine);
   2881 
   2882 	i915_gem_object_unpin_map(ce->state->obj);
   2883 }
   2884 
   2885 static void
   2886 __execlists_update_reg_state(const struct intel_context *ce,
   2887 			     const struct intel_engine_cs *engine,
   2888 			     u32 head)
   2889 {
   2890 	struct intel_ring *ring = ce->ring;
   2891 	u32 *regs = ce->lrc_reg_state;
   2892 
   2893 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
   2894 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
   2895 
   2896 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   2897 	regs[CTX_RING_HEAD] = head;
   2898 	regs[CTX_RING_TAIL] = ring->tail;
   2899 
   2900 	/* RPCS */
   2901 	if (engine->class == RENDER_CLASS) {
   2902 		regs[CTX_R_PWR_CLK_STATE] =
   2903 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
   2904 
   2905 		i915_oa_init_reg_state(ce, engine);
   2906 	}
   2907 }
   2908 
   2909 static int
   2910 __execlists_context_pin(struct intel_context *ce,
   2911 			struct intel_engine_cs *engine)
   2912 {
   2913 	void *vaddr;
   2914 
   2915 	GEM_BUG_ON(!ce->state);
   2916 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   2917 
   2918 	vaddr = i915_gem_object_pin_map(ce->state->obj,
   2919 					i915_coherent_map_type(engine->i915) |
   2920 					I915_MAP_OVERRIDE);
   2921 	if (IS_ERR(vaddr))
   2922 		return PTR_ERR(vaddr);
   2923 
   2924 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
   2925 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
   2926 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
   2927 
   2928 	return 0;
   2929 }
   2930 
   2931 static int execlists_context_pin(struct intel_context *ce)
   2932 {
   2933 	return __execlists_context_pin(ce, ce->engine);
   2934 }
   2935 
   2936 static int execlists_context_alloc(struct intel_context *ce)
   2937 {
   2938 	return __execlists_context_alloc(ce, ce->engine);
   2939 }
   2940 
   2941 static void execlists_context_reset(struct intel_context *ce)
   2942 {
   2943 	CE_TRACE(ce, "reset\n");
   2944 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   2945 
   2946 	/*
   2947 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
   2948 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
   2949 	 * that stored in context. As we only write new commands from
   2950 	 * ce->ring->tail onwards, everything before that is junk. If the GPU
   2951 	 * starts reading from its RING_HEAD from the context, it may try to
   2952 	 * execute that junk and die.
   2953 	 *
   2954 	 * The contexts that are stilled pinned on resume belong to the
   2955 	 * kernel, and are local to each engine. All other contexts will
   2956 	 * have their head/tail sanitized upon pinning before use, so they
   2957 	 * will never see garbage,
   2958 	 *
   2959 	 * So to avoid that we reset the context images upon resume. For
   2960 	 * simplicity, we just zero everything out.
   2961 	 */
   2962 	intel_ring_reset(ce->ring, ce->ring->emit);
   2963 
   2964 	/* Scrub away the garbage */
   2965 	execlists_init_reg_state(ce->lrc_reg_state,
   2966 				 ce, ce->engine, ce->ring, true);
   2967 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
   2968 
   2969 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   2970 }
   2971 
   2972 static const struct intel_context_ops execlists_context_ops = {
   2973 	.alloc = execlists_context_alloc,
   2974 
   2975 	.pin = execlists_context_pin,
   2976 	.unpin = execlists_context_unpin,
   2977 
   2978 	.enter = intel_context_enter_engine,
   2979 	.exit = intel_context_exit_engine,
   2980 
   2981 	.reset = execlists_context_reset,
   2982 	.destroy = execlists_context_destroy,
   2983 };
   2984 
   2985 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
   2986 {
   2987 	u32 *cs;
   2988 
   2989 	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
   2990 
   2991 	cs = intel_ring_begin(rq, 6);
   2992 	if (IS_ERR(cs))
   2993 		return PTR_ERR(cs);
   2994 
   2995 	/*
   2996 	 * Check if we have been preempted before we even get started.
   2997 	 *
   2998 	 * After this point i915_request_started() reports true, even if
   2999 	 * we get preempted and so are no longer running.
   3000 	 */
   3001 	*cs++ = MI_ARB_CHECK;
   3002 	*cs++ = MI_NOOP;
   3003 
   3004 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   3005 	*cs++ = i915_request_timeline(rq)->hwsp_offset;
   3006 	*cs++ = 0;
   3007 	*cs++ = rq->fence.seqno - 1;
   3008 
   3009 	intel_ring_advance(rq, cs);
   3010 
   3011 	/* Record the updated position of the request's payload */
   3012 	rq->infix = intel_ring_offset(rq, cs);
   3013 
   3014 	return 0;
   3015 }
   3016 
   3017 static int execlists_request_alloc(struct i915_request *request)
   3018 {
   3019 	int ret;
   3020 
   3021 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
   3022 
   3023 	/*
   3024 	 * Flush enough space to reduce the likelihood of waiting after
   3025 	 * we start building the request - in which case we will just
   3026 	 * have to repeat work.
   3027 	 */
   3028 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
   3029 
   3030 	/*
   3031 	 * Note that after this point, we have committed to using
   3032 	 * this request as it is being used to both track the
   3033 	 * state of engine initialisation and liveness of the
   3034 	 * golden renderstate above. Think twice before you try
   3035 	 * to cancel/unwind this request now.
   3036 	 */
   3037 
   3038 	/* Unconditionally invalidate GPU caches and TLBs. */
   3039 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
   3040 	if (ret)
   3041 		return ret;
   3042 
   3043 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
   3044 	return 0;
   3045 }
   3046 
   3047 /*
   3048  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
   3049  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
   3050  * but there is a slight complication as this is applied in WA batch where the
   3051  * values are only initialized once so we cannot take register value at the
   3052  * beginning and reuse it further; hence we save its value to memory, upload a
   3053  * constant value with bit21 set and then we restore it back with the saved value.
   3054  * To simplify the WA, a constant value is formed by using the default value
   3055  * of this register. This shouldn't be a problem because we are only modifying
   3056  * it for a short period and this batch in non-premptible. We can ofcourse
   3057  * use additional instructions that read the actual value of the register
   3058  * at that time and set our bit of interest but it makes the WA complicated.
   3059  *
   3060  * This WA is also required for Gen9 so extracting as a function avoids
   3061  * code duplication.
   3062  */
   3063 static u32 *
   3064 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
   3065 {
   3066 	/* NB no one else is allowed to scribble over scratch + 256! */
   3067 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3068 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3069 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3070 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3071 	*batch++ = 0;
   3072 
   3073 	*batch++ = MI_LOAD_REGISTER_IMM(1);
   3074 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3075 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
   3076 
   3077 	batch = gen8_emit_pipe_control(batch,
   3078 				       PIPE_CONTROL_CS_STALL |
   3079 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
   3080 				       0);
   3081 
   3082 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3083 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3084 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3085 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3086 	*batch++ = 0;
   3087 
   3088 	return batch;
   3089 }
   3090 
   3091 /*
   3092  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
   3093  * initialized at the beginning and shared across all contexts but this field
   3094  * helps us to have multiple batches at different offsets and select them based
   3095  * on a criteria. At the moment this batch always start at the beginning of the page
   3096  * and at this point we don't have multiple wa_ctx batch buffers.
   3097  *
   3098  * The number of WA applied are not known at the beginning; we use this field
   3099  * to return the no of DWORDS written.
   3100  *
   3101  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
   3102  * so it adds NOOPs as padding to make it cacheline aligned.
   3103  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
   3104  * makes a complete batch buffer.
   3105  */
   3106 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3107 {
   3108 	/* WaDisableCtxRestoreArbitration:bdw,chv */
   3109 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3110 
   3111 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
   3112 	if (IS_BROADWELL(engine->i915))
   3113 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3114 
   3115 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
   3116 	/* Actual scratch location is at 128 bytes offset */
   3117 	batch = gen8_emit_pipe_control(batch,
   3118 				       PIPE_CONTROL_FLUSH_L3 |
   3119 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3120 				       PIPE_CONTROL_CS_STALL |
   3121 				       PIPE_CONTROL_QW_WRITE,
   3122 				       LRC_PPHWSP_SCRATCH_ADDR);
   3123 
   3124 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3125 
   3126 	/* Pad to end of cacheline */
   3127 	while ((unsigned long)batch % CACHELINE_BYTES)
   3128 		*batch++ = MI_NOOP;
   3129 
   3130 	/*
   3131 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
   3132 	 * execution depends on the length specified in terms of cache lines
   3133 	 * in the register CTX_RCS_INDIRECT_CTX
   3134 	 */
   3135 
   3136 	return batch;
   3137 }
   3138 
   3139 struct lri {
   3140 	i915_reg_t reg;
   3141 	u32 value;
   3142 };
   3143 
   3144 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
   3145 {
   3146 	GEM_BUG_ON(!count || count > 63);
   3147 
   3148 	*batch++ = MI_LOAD_REGISTER_IMM(count);
   3149 	do {
   3150 		*batch++ = i915_mmio_reg_offset(lri->reg);
   3151 		*batch++ = lri->value;
   3152 	} while (lri++, --count);
   3153 	*batch++ = MI_NOOP;
   3154 
   3155 	return batch;
   3156 }
   3157 
   3158 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3159 {
   3160 	static const struct lri lri[] = {
   3161 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
   3162 		{
   3163 			COMMON_SLICE_CHICKEN2,
   3164 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
   3165 				       0),
   3166 		},
   3167 
   3168 		/* BSpec: 11391 */
   3169 		{
   3170 			FF_SLICE_CHICKEN,
   3171 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
   3172 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
   3173 		},
   3174 
   3175 		/* BSpec: 11299 */
   3176 		{
   3177 			_3D_CHICKEN3,
   3178 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
   3179 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
   3180 		}
   3181 	};
   3182 
   3183 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3184 
   3185 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
   3186 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3187 
   3188 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
   3189 	batch = gen8_emit_pipe_control(batch,
   3190 				       PIPE_CONTROL_FLUSH_L3 |
   3191 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3192 				       PIPE_CONTROL_CS_STALL |
   3193 				       PIPE_CONTROL_QW_WRITE,
   3194 				       LRC_PPHWSP_SCRATCH_ADDR);
   3195 
   3196 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
   3197 
   3198 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
   3199 	if (HAS_POOLED_EU(engine->i915)) {
   3200 		/*
   3201 		 * EU pool configuration is setup along with golden context
   3202 		 * during context initialization. This value depends on
   3203 		 * device type (2x6 or 3x6) and needs to be updated based
   3204 		 * on which subslice is disabled especially for 2x6
   3205 		 * devices, however it is safe to load default
   3206 		 * configuration of 3x6 device instead of masking off
   3207 		 * corresponding bits because HW ignores bits of a disabled
   3208 		 * subslice and drops down to appropriate config. Please
   3209 		 * see render_state_setup() in i915_gem_render_state.c for
   3210 		 * possible configurations, to avoid duplication they are
   3211 		 * not shown here again.
   3212 		 */
   3213 		*batch++ = GEN9_MEDIA_POOL_STATE;
   3214 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
   3215 		*batch++ = 0x00777000;
   3216 		*batch++ = 0;
   3217 		*batch++ = 0;
   3218 		*batch++ = 0;
   3219 	}
   3220 
   3221 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3222 
   3223 	/* Pad to end of cacheline */
   3224 	while ((unsigned long)batch % CACHELINE_BYTES)
   3225 		*batch++ = MI_NOOP;
   3226 
   3227 	return batch;
   3228 }
   3229 
   3230 static u32 *
   3231 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3232 {
   3233 	int i;
   3234 
   3235 	/*
   3236 	 * WaPipeControlBefore3DStateSamplePattern: cnl
   3237 	 *
   3238 	 * Ensure the engine is idle prior to programming a
   3239 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
   3240 	 */
   3241 	batch = gen8_emit_pipe_control(batch,
   3242 				       PIPE_CONTROL_CS_STALL,
   3243 				       0);
   3244 	/*
   3245 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
   3246 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
   3247 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
   3248 	 * confusing. Since gen8_emit_pipe_control() already advances the
   3249 	 * batch by 6 dwords, we advance the other 10 here, completing a
   3250 	 * cacheline. It's not clear if the workaround requires this padding
   3251 	 * before other commands, or if it's just the regular padding we would
   3252 	 * already have for the workaround bb, so leave it here for now.
   3253 	 */
   3254 	for (i = 0; i < 10; i++)
   3255 		*batch++ = MI_NOOP;
   3256 
   3257 	/* Pad to end of cacheline */
   3258 	while ((unsigned long)batch % CACHELINE_BYTES)
   3259 		*batch++ = MI_NOOP;
   3260 
   3261 	return batch;
   3262 }
   3263 
   3264 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
   3265 
   3266 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
   3267 {
   3268 	struct drm_i915_gem_object *obj;
   3269 	struct i915_vma *vma;
   3270 	int err;
   3271 
   3272 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
   3273 	if (IS_ERR(obj))
   3274 		return PTR_ERR(obj);
   3275 
   3276 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
   3277 	if (IS_ERR(vma)) {
   3278 		err = PTR_ERR(vma);
   3279 		goto err;
   3280 	}
   3281 
   3282 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
   3283 	if (err)
   3284 		goto err;
   3285 
   3286 	engine->wa_ctx.vma = vma;
   3287 	return 0;
   3288 
   3289 err:
   3290 	i915_gem_object_put(obj);
   3291 	return err;
   3292 }
   3293 
   3294 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
   3295 {
   3296 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
   3297 }
   3298 
   3299 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
   3300 
   3301 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
   3302 {
   3303 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
   3304 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
   3305 					    &wa_ctx->per_ctx };
   3306 	wa_bb_func_t wa_bb_fn[2];
   3307 	struct page *page;
   3308 	void *batch, *batch_ptr;
   3309 	unsigned int i;
   3310 	int ret;
   3311 
   3312 	if (engine->class != RENDER_CLASS)
   3313 		return 0;
   3314 
   3315 	switch (INTEL_GEN(engine->i915)) {
   3316 	case 12:
   3317 	case 11:
   3318 		return 0;
   3319 	case 10:
   3320 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
   3321 		wa_bb_fn[1] = NULL;
   3322 		break;
   3323 	case 9:
   3324 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
   3325 		wa_bb_fn[1] = NULL;
   3326 		break;
   3327 	case 8:
   3328 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
   3329 		wa_bb_fn[1] = NULL;
   3330 		break;
   3331 	default:
   3332 		MISSING_CASE(INTEL_GEN(engine->i915));
   3333 		return 0;
   3334 	}
   3335 
   3336 	ret = lrc_setup_wa_ctx(engine);
   3337 	if (ret) {
   3338 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
   3339 		return ret;
   3340 	}
   3341 
   3342 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
   3343 	batch = batch_ptr = kmap_atomic(page);
   3344 
   3345 	/*
   3346 	 * Emit the two workaround batch buffers, recording the offset from the
   3347 	 * start of the workaround batch buffer object for each and their
   3348 	 * respective sizes.
   3349 	 */
   3350 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
   3351 		wa_bb[i]->offset = batch_ptr - batch;
   3352 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
   3353 						  CACHELINE_BYTES))) {
   3354 			ret = -EINVAL;
   3355 			break;
   3356 		}
   3357 		if (wa_bb_fn[i])
   3358 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
   3359 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
   3360 	}
   3361 
   3362 	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
   3363 
   3364 	kunmap_atomic(batch);
   3365 	if (ret)
   3366 		lrc_destroy_wa_ctx(engine);
   3367 
   3368 	return ret;
   3369 }
   3370 
   3371 static void enable_execlists(struct intel_engine_cs *engine)
   3372 {
   3373 	u32 mode;
   3374 
   3375 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
   3376 
   3377 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
   3378 
   3379 	if (INTEL_GEN(engine->i915) >= 11)
   3380 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
   3381 	else
   3382 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
   3383 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
   3384 
   3385 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
   3386 
   3387 	ENGINE_WRITE_FW(engine,
   3388 			RING_HWS_PGA,
   3389 			i915_ggtt_offset(engine->status_page.vma));
   3390 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
   3391 
   3392 	engine->context_tag = 0;
   3393 }
   3394 
   3395 static bool unexpected_starting_state(struct intel_engine_cs *engine)
   3396 {
   3397 	bool unexpected = false;
   3398 
   3399 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
   3400 		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
   3401 		unexpected = true;
   3402 	}
   3403 
   3404 	return unexpected;
   3405 }
   3406 
   3407 static int execlists_resume(struct intel_engine_cs *engine)
   3408 {
   3409 	intel_engine_apply_workarounds(engine);
   3410 	intel_engine_apply_whitelist(engine);
   3411 
   3412 	intel_mocs_init_engine(engine);
   3413 
   3414 	intel_engine_reset_breadcrumbs(engine);
   3415 
   3416 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
   3417 		struct drm_printer p = drm_debug_printer(__func__);
   3418 
   3419 		intel_engine_dump(engine, &p, NULL);
   3420 	}
   3421 
   3422 	enable_execlists(engine);
   3423 
   3424 	return 0;
   3425 }
   3426 
   3427 static void execlists_reset_prepare(struct intel_engine_cs *engine)
   3428 {
   3429 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3430 	unsigned long flags;
   3431 
   3432 	ENGINE_TRACE(engine, "depth<-%d\n",
   3433 		     atomic_read(&execlists->tasklet.count));
   3434 
   3435 	/*
   3436 	 * Prevent request submission to the hardware until we have
   3437 	 * completed the reset in i915_gem_reset_finish(). If a request
   3438 	 * is completed by one engine, it may then queue a request
   3439 	 * to a second via its execlists->tasklet *just* as we are
   3440 	 * calling engine->resume() and also writing the ELSP.
   3441 	 * Turning off the execlists->tasklet until the reset is over
   3442 	 * prevents the race.
   3443 	 */
   3444 	__tasklet_disable_sync_once(&execlists->tasklet);
   3445 	GEM_BUG_ON(!reset_in_progress(execlists));
   3446 
   3447 	/* And flush any current direct submission. */
   3448 	spin_lock_irqsave(&engine->active.lock, flags);
   3449 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3450 
   3451 	/*
   3452 	 * We stop engines, otherwise we might get failed reset and a
   3453 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
   3454 	 * from system hang if batchbuffer is progressing when
   3455 	 * the reset is issued, regardless of READY_TO_RESET ack.
   3456 	 * Thus assume it is best to stop engines on all gens
   3457 	 * where we have a gpu reset.
   3458 	 *
   3459 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
   3460 	 *
   3461 	 * FIXME: Wa for more modern gens needs to be validated
   3462 	 */
   3463 	intel_engine_stop_cs(engine);
   3464 }
   3465 
   3466 static void reset_csb_pointers(struct intel_engine_cs *engine)
   3467 {
   3468 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3469 	const unsigned int reset_value = execlists->csb_size - 1;
   3470 
   3471 	ring_set_paused(engine, 0);
   3472 
   3473 	/*
   3474 	 * After a reset, the HW starts writing into CSB entry [0]. We
   3475 	 * therefore have to set our HEAD pointer back one entry so that
   3476 	 * the *first* entry we check is entry 0. To complicate this further,
   3477 	 * as we don't wait for the first interrupt after reset, we have to
   3478 	 * fake the HW write to point back to the last entry so that our
   3479 	 * inline comparison of our cached head position against the last HW
   3480 	 * write works even before the first interrupt.
   3481 	 */
   3482 	execlists->csb_head = reset_value;
   3483 	WRITE_ONCE(*execlists->csb_write, reset_value);
   3484 	wmb(); /* Make sure this is visible to HW (paranoia?) */
   3485 
   3486 	/*
   3487 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
   3488 	 * Bludgeon them with a mmio update to be sure.
   3489 	 */
   3490 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
   3491 		     reset_value << 8 | reset_value);
   3492 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
   3493 
   3494 	invalidate_csb_entries(&execlists->csb_status[0],
   3495 			       &execlists->csb_status[reset_value]);
   3496 }
   3497 
   3498 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
   3499 {
   3500 	int x;
   3501 
   3502 	x = lrc_ring_mi_mode(engine);
   3503 	if (x != -1) {
   3504 		regs[x + 1] &= ~STOP_RING;
   3505 		regs[x + 1] |= STOP_RING << 16;
   3506 	}
   3507 }
   3508 
   3509 static void __execlists_reset_reg_state(const struct intel_context *ce,
   3510 					const struct intel_engine_cs *engine)
   3511 {
   3512 	u32 *regs = ce->lrc_reg_state;
   3513 
   3514 	__reset_stop_ring(regs, engine);
   3515 }
   3516 
   3517 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
   3518 {
   3519 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3520 	struct intel_context *ce;
   3521 	struct i915_request *rq;
   3522 	u32 head;
   3523 
   3524 	mb(); /* paranoia: read the CSB pointers from after the reset */
   3525 	clflush(execlists->csb_write);
   3526 	mb();
   3527 
   3528 	process_csb(engine); /* drain preemption events */
   3529 
   3530 	/* Following the reset, we need to reload the CSB read/write pointers */
   3531 	reset_csb_pointers(engine);
   3532 
   3533 	/*
   3534 	 * Save the currently executing context, even if we completed
   3535 	 * its request, it was still running at the time of the
   3536 	 * reset and will have been clobbered.
   3537 	 */
   3538 	rq = execlists_active(execlists);
   3539 	if (!rq)
   3540 		goto unwind;
   3541 
   3542 	/* We still have requests in-flight; the engine should be active */
   3543 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   3544 
   3545 	ce = rq->context;
   3546 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   3547 
   3548 	if (i915_request_completed(rq)) {
   3549 		/* Idle context; tidy up the ring so we can restart afresh */
   3550 		head = intel_ring_wrap(ce->ring, rq->tail);
   3551 		goto out_replay;
   3552 	}
   3553 
   3554 	/* Context has requests still in-flight; it should not be idle! */
   3555 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
   3556 	rq = active_request(ce->timeline, rq);
   3557 	head = intel_ring_wrap(ce->ring, rq->head);
   3558 	GEM_BUG_ON(head == ce->ring->tail);
   3559 
   3560 	/*
   3561 	 * If this request hasn't started yet, e.g. it is waiting on a
   3562 	 * semaphore, we need to avoid skipping the request or else we
   3563 	 * break the signaling chain. However, if the context is corrupt
   3564 	 * the request will not restart and we will be stuck with a wedged
   3565 	 * device. It is quite often the case that if we issue a reset
   3566 	 * while the GPU is loading the context image, that the context
   3567 	 * image becomes corrupt.
   3568 	 *
   3569 	 * Otherwise, if we have not started yet, the request should replay
   3570 	 * perfectly and we do not need to flag the result as being erroneous.
   3571 	 */
   3572 	if (!i915_request_started(rq))
   3573 		goto out_replay;
   3574 
   3575 	/*
   3576 	 * If the request was innocent, we leave the request in the ELSP
   3577 	 * and will try to replay it on restarting. The context image may
   3578 	 * have been corrupted by the reset, in which case we may have
   3579 	 * to service a new GPU hang, but more likely we can continue on
   3580 	 * without impact.
   3581 	 *
   3582 	 * If the request was guilty, we presume the context is corrupt
   3583 	 * and have to at least restore the RING register in the context
   3584 	 * image back to the expected values to skip over the guilty request.
   3585 	 */
   3586 	__i915_request_reset(rq, stalled);
   3587 	if (!stalled)
   3588 		goto out_replay;
   3589 
   3590 	/*
   3591 	 * We want a simple context + ring to execute the breadcrumb update.
   3592 	 * We cannot rely on the context being intact across the GPU hang,
   3593 	 * so clear it and rebuild just what we need for the breadcrumb.
   3594 	 * All pending requests for this context will be zapped, and any
   3595 	 * future request will be after userspace has had the opportunity
   3596 	 * to recreate its own state.
   3597 	 */
   3598 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   3599 	restore_default_state(ce, engine);
   3600 
   3601 out_replay:
   3602 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
   3603 		     head, ce->ring->tail);
   3604 	__execlists_reset_reg_state(ce, engine);
   3605 	__execlists_update_reg_state(ce, engine, head);
   3606 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
   3607 
   3608 unwind:
   3609 	/* Push back any incomplete requests for replay after the reset. */
   3610 	cancel_port_requests(execlists);
   3611 	__unwind_incomplete_requests(engine);
   3612 }
   3613 
   3614 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
   3615 {
   3616 	unsigned long flags;
   3617 
   3618 	ENGINE_TRACE(engine, "\n");
   3619 
   3620 	spin_lock_irqsave(&engine->active.lock, flags);
   3621 
   3622 	__execlists_reset(engine, stalled);
   3623 
   3624 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3625 }
   3626 
   3627 static void nop_submission_tasklet(unsigned long data)
   3628 {
   3629 	/* The driver is wedged; don't process any more events. */
   3630 }
   3631 
   3632 static void execlists_reset_cancel(struct intel_engine_cs *engine)
   3633 {
   3634 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3635 	struct i915_request *rq, *rn;
   3636 	struct rb_node *rb;
   3637 	unsigned long flags;
   3638 
   3639 	ENGINE_TRACE(engine, "\n");
   3640 
   3641 	/*
   3642 	 * Before we call engine->cancel_requests(), we should have exclusive
   3643 	 * access to the submission state. This is arranged for us by the
   3644 	 * caller disabling the interrupt generation, the tasklet and other
   3645 	 * threads that may then access the same state, giving us a free hand
   3646 	 * to reset state. However, we still need to let lockdep be aware that
   3647 	 * we know this state may be accessed in hardirq context, so we
   3648 	 * disable the irq around this manipulation and we want to keep
   3649 	 * the spinlock focused on its duties and not accidentally conflate
   3650 	 * coverage to the submission's irq state. (Similarly, although we
   3651 	 * shouldn't need to disable irq around the manipulation of the
   3652 	 * submission's irq state, we also wish to remind ourselves that
   3653 	 * it is irq state.)
   3654 	 */
   3655 	spin_lock_irqsave(&engine->active.lock, flags);
   3656 
   3657 	__execlists_reset(engine, true);
   3658 
   3659 	/* Mark all executing requests as skipped. */
   3660 	list_for_each_entry(rq, &engine->active.requests, sched.link)
   3661 		mark_eio(rq);
   3662 
   3663 	/* Flush the queued requests to the timeline list (for retiring). */
   3664 	while ((rb = rb_first_cached(&execlists->queue))) {
   3665 		struct i915_priolist *p = to_priolist(rb);
   3666 		int i;
   3667 
   3668 		priolist_for_each_request_consume(rq, rn, p, i) {
   3669 			mark_eio(rq);
   3670 			__i915_request_submit(rq);
   3671 		}
   3672 
   3673 		rb_erase_cached(&p->node, &execlists->queue);
   3674 		i915_priolist_free(p);
   3675 	}
   3676 
   3677 	/* On-hold requests will be flushed to timeline upon their release */
   3678 	list_for_each_entry(rq, &engine->active.hold, sched.link)
   3679 		mark_eio(rq);
   3680 
   3681 	/* Cancel all attached virtual engines */
   3682 	while ((rb = rb_first_cached(&execlists->virtual))) {
   3683 		struct virtual_engine *ve =
   3684 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   3685 
   3686 		rb_erase_cached(rb, &execlists->virtual);
   3687 		RB_CLEAR_NODE(rb);
   3688 
   3689 		spin_lock(&ve->base.active.lock);
   3690 		rq = fetch_and_zero(&ve->request);
   3691 		if (rq) {
   3692 			mark_eio(rq);
   3693 
   3694 			rq->engine = engine;
   3695 			__i915_request_submit(rq);
   3696 			i915_request_put(rq);
   3697 
   3698 			ve->base.execlists.queue_priority_hint = INT_MIN;
   3699 		}
   3700 		spin_unlock(&ve->base.active.lock);
   3701 	}
   3702 
   3703 	/* Remaining _unready_ requests will be nop'ed when submitted */
   3704 
   3705 	execlists->queue_priority_hint = INT_MIN;
   3706 	execlists->queue = RB_ROOT_CACHED;
   3707 
   3708 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
   3709 	execlists->tasklet.func = nop_submission_tasklet;
   3710 
   3711 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3712 }
   3713 
   3714 static void execlists_reset_finish(struct intel_engine_cs *engine)
   3715 {
   3716 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3717 
   3718 	/*
   3719 	 * After a GPU reset, we may have requests to replay. Do so now while
   3720 	 * we still have the forcewake to be sure that the GPU is not allowed
   3721 	 * to sleep before we restart and reload a context.
   3722 	 */
   3723 	GEM_BUG_ON(!reset_in_progress(execlists));
   3724 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
   3725 		execlists->tasklet.func(execlists->tasklet.data);
   3726 
   3727 	if (__tasklet_enable(&execlists->tasklet))
   3728 		/* And kick in case we missed a new request submission. */
   3729 		tasklet_hi_schedule(&execlists->tasklet);
   3730 	ENGINE_TRACE(engine, "depth->%d\n",
   3731 		     atomic_read(&execlists->tasklet.count));
   3732 }
   3733 
   3734 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
   3735 				    u64 offset, u32 len,
   3736 				    const unsigned int flags)
   3737 {
   3738 	u32 *cs;
   3739 
   3740 	cs = intel_ring_begin(rq, 4);
   3741 	if (IS_ERR(cs))
   3742 		return PTR_ERR(cs);
   3743 
   3744 	/*
   3745 	 * WaDisableCtxRestoreArbitration:bdw,chv
   3746 	 *
   3747 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
   3748 	 * particular all the gen that do not need the w/a at all!), if we
   3749 	 * took care to make sure that on every switch into this context
   3750 	 * (both ordinary and for preemption) that arbitrartion was enabled
   3751 	 * we would be fine.  However, for gen8 there is another w/a that
   3752 	 * requires us to not preempt inside GPGPU execution, so we keep
   3753 	 * arbitration disabled for gen8 batches. Arbitration will be
   3754 	 * re-enabled before we close the request
   3755 	 * (engine->emit_fini_breadcrumb).
   3756 	 */
   3757 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3758 
   3759 	/* FIXME(BDW+): Address space and security selectors. */
   3760 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3761 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3762 	*cs++ = lower_32_bits(offset);
   3763 	*cs++ = upper_32_bits(offset);
   3764 
   3765 	intel_ring_advance(rq, cs);
   3766 
   3767 	return 0;
   3768 }
   3769 
   3770 static int gen8_emit_bb_start(struct i915_request *rq,
   3771 			      u64 offset, u32 len,
   3772 			      const unsigned int flags)
   3773 {
   3774 	u32 *cs;
   3775 
   3776 	cs = intel_ring_begin(rq, 6);
   3777 	if (IS_ERR(cs))
   3778 		return PTR_ERR(cs);
   3779 
   3780 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3781 
   3782 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3783 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3784 	*cs++ = lower_32_bits(offset);
   3785 	*cs++ = upper_32_bits(offset);
   3786 
   3787 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3788 	*cs++ = MI_NOOP;
   3789 
   3790 	intel_ring_advance(rq, cs);
   3791 
   3792 	return 0;
   3793 }
   3794 
   3795 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
   3796 {
   3797 	ENGINE_WRITE(engine, RING_IMR,
   3798 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
   3799 	ENGINE_POSTING_READ(engine, RING_IMR);
   3800 }
   3801 
   3802 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
   3803 {
   3804 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
   3805 }
   3806 
   3807 static int gen8_emit_flush(struct i915_request *request, u32 mode)
   3808 {
   3809 	u32 cmd, *cs;
   3810 
   3811 	cs = intel_ring_begin(request, 4);
   3812 	if (IS_ERR(cs))
   3813 		return PTR_ERR(cs);
   3814 
   3815 	cmd = MI_FLUSH_DW + 1;
   3816 
   3817 	/* We always require a command barrier so that subsequent
   3818 	 * commands, such as breadcrumb interrupts, are strictly ordered
   3819 	 * wrt the contents of the write cache being flushed to memory
   3820 	 * (and thus being coherent from the CPU).
   3821 	 */
   3822 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
   3823 
   3824 	if (mode & EMIT_INVALIDATE) {
   3825 		cmd |= MI_INVALIDATE_TLB;
   3826 		if (request->engine->class == VIDEO_DECODE_CLASS)
   3827 			cmd |= MI_INVALIDATE_BSD;
   3828 	}
   3829 
   3830 	*cs++ = cmd;
   3831 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
   3832 	*cs++ = 0; /* upper addr */
   3833 	*cs++ = 0; /* value */
   3834 	intel_ring_advance(request, cs);
   3835 
   3836 	return 0;
   3837 }
   3838 
   3839 static int gen8_emit_flush_render(struct i915_request *request,
   3840 				  u32 mode)
   3841 {
   3842 	bool vf_flush_wa = false, dc_flush_wa = false;
   3843 	u32 *cs, flags = 0;
   3844 	int len;
   3845 
   3846 	flags |= PIPE_CONTROL_CS_STALL;
   3847 
   3848 	if (mode & EMIT_FLUSH) {
   3849 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3850 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3851 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3852 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3853 	}
   3854 
   3855 	if (mode & EMIT_INVALIDATE) {
   3856 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3857 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3858 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3859 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3860 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3861 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3862 		flags |= PIPE_CONTROL_QW_WRITE;
   3863 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3864 
   3865 		/*
   3866 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
   3867 		 * pipe control.
   3868 		 */
   3869 		if (IS_GEN(request->i915, 9))
   3870 			vf_flush_wa = true;
   3871 
   3872 		/* WaForGAMHang:kbl */
   3873 		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
   3874 			dc_flush_wa = true;
   3875 	}
   3876 
   3877 	len = 6;
   3878 
   3879 	if (vf_flush_wa)
   3880 		len += 6;
   3881 
   3882 	if (dc_flush_wa)
   3883 		len += 12;
   3884 
   3885 	cs = intel_ring_begin(request, len);
   3886 	if (IS_ERR(cs))
   3887 		return PTR_ERR(cs);
   3888 
   3889 	if (vf_flush_wa)
   3890 		cs = gen8_emit_pipe_control(cs, 0, 0);
   3891 
   3892 	if (dc_flush_wa)
   3893 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
   3894 					    0);
   3895 
   3896 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3897 
   3898 	if (dc_flush_wa)
   3899 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
   3900 
   3901 	intel_ring_advance(request, cs);
   3902 
   3903 	return 0;
   3904 }
   3905 
   3906 static int gen11_emit_flush_render(struct i915_request *request,
   3907 				   u32 mode)
   3908 {
   3909 	if (mode & EMIT_FLUSH) {
   3910 		u32 *cs;
   3911 		u32 flags = 0;
   3912 
   3913 		flags |= PIPE_CONTROL_CS_STALL;
   3914 
   3915 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   3916 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3917 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3918 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3919 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3920 		flags |= PIPE_CONTROL_QW_WRITE;
   3921 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3922 
   3923 		cs = intel_ring_begin(request, 6);
   3924 		if (IS_ERR(cs))
   3925 			return PTR_ERR(cs);
   3926 
   3927 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3928 		intel_ring_advance(request, cs);
   3929 	}
   3930 
   3931 	if (mode & EMIT_INVALIDATE) {
   3932 		u32 *cs;
   3933 		u32 flags = 0;
   3934 
   3935 		flags |= PIPE_CONTROL_CS_STALL;
   3936 
   3937 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   3938 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3939 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3940 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3941 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3942 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3943 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3944 		flags |= PIPE_CONTROL_QW_WRITE;
   3945 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3946 
   3947 		cs = intel_ring_begin(request, 6);
   3948 		if (IS_ERR(cs))
   3949 			return PTR_ERR(cs);
   3950 
   3951 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3952 		intel_ring_advance(request, cs);
   3953 	}
   3954 
   3955 	return 0;
   3956 }
   3957 
   3958 static u32 preparser_disable(bool state)
   3959 {
   3960 	return MI_ARB_CHECK | 1 << 8 | state;
   3961 }
   3962 
   3963 static int gen12_emit_flush_render(struct i915_request *request,
   3964 				   u32 mode)
   3965 {
   3966 	if (mode & EMIT_FLUSH) {
   3967 		u32 flags = 0;
   3968 		u32 *cs;
   3969 
   3970 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   3971 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3972 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3973 		/* Wa_1409600907:tgl */
   3974 		flags |= PIPE_CONTROL_DEPTH_STALL;
   3975 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3976 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3977 		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   3978 
   3979 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3980 		flags |= PIPE_CONTROL_QW_WRITE;
   3981 
   3982 		flags |= PIPE_CONTROL_CS_STALL;
   3983 
   3984 		cs = intel_ring_begin(request, 6);
   3985 		if (IS_ERR(cs))
   3986 			return PTR_ERR(cs);
   3987 
   3988 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3989 		intel_ring_advance(request, cs);
   3990 	}
   3991 
   3992 	if (mode & EMIT_INVALIDATE) {
   3993 		u32 flags = 0;
   3994 		u32 *cs;
   3995 
   3996 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   3997 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3998 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3999 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   4000 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   4001 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   4002 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   4003 		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
   4004 
   4005 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4006 		flags |= PIPE_CONTROL_QW_WRITE;
   4007 
   4008 		flags |= PIPE_CONTROL_CS_STALL;
   4009 
   4010 		cs = intel_ring_begin(request, 8);
   4011 		if (IS_ERR(cs))
   4012 			return PTR_ERR(cs);
   4013 
   4014 		/*
   4015 		 * Prevent the pre-parser from skipping past the TLB
   4016 		 * invalidate and loading a stale page for the batch
   4017 		 * buffer / request payload.
   4018 		 */
   4019 		*cs++ = preparser_disable(true);
   4020 
   4021 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   4022 
   4023 		*cs++ = preparser_disable(false);
   4024 		intel_ring_advance(request, cs);
   4025 
   4026 		/*
   4027 		 * Wa_1604544889:tgl
   4028 		 */
   4029 		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
   4030 			flags = 0;
   4031 			flags |= PIPE_CONTROL_CS_STALL;
   4032 			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   4033 
   4034 			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4035 			flags |= PIPE_CONTROL_QW_WRITE;
   4036 
   4037 			cs = intel_ring_begin(request, 6);
   4038 			if (IS_ERR(cs))
   4039 				return PTR_ERR(cs);
   4040 
   4041 			cs = gen8_emit_pipe_control(cs, flags,
   4042 						    LRC_PPHWSP_SCRATCH_ADDR);
   4043 			intel_ring_advance(request, cs);
   4044 		}
   4045 	}
   4046 
   4047 	return 0;
   4048 }
   4049 
   4050 /*
   4051  * Reserve space for 2 NOOPs at the end of each request to be
   4052  * used as a workaround for not being allowed to do lite
   4053  * restore with HEAD==TAIL (WaIdleLiteRestore).
   4054  */
   4055 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
   4056 {
   4057 	/* Ensure there's always at least one preemption point per-request. */
   4058 	*cs++ = MI_ARB_CHECK;
   4059 	*cs++ = MI_NOOP;
   4060 	request->wa_tail = intel_ring_offset(request, cs);
   4061 
   4062 	return cs;
   4063 }
   4064 
   4065 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4066 {
   4067 	*cs++ = MI_SEMAPHORE_WAIT |
   4068 		MI_SEMAPHORE_GLOBAL_GTT |
   4069 		MI_SEMAPHORE_POLL |
   4070 		MI_SEMAPHORE_SAD_EQ_SDD;
   4071 	*cs++ = 0;
   4072 	*cs++ = intel_hws_preempt_address(request->engine);
   4073 	*cs++ = 0;
   4074 
   4075 	return cs;
   4076 }
   4077 
   4078 static __always_inline u32*
   4079 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
   4080 				 u32 *cs)
   4081 {
   4082 	*cs++ = MI_USER_INTERRUPT;
   4083 
   4084 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4085 	if (intel_engine_has_semaphores(request->engine))
   4086 		cs = emit_preempt_busywait(request, cs);
   4087 
   4088 	request->tail = intel_ring_offset(request, cs);
   4089 	assert_ring_tail_valid(request->ring, request->tail);
   4090 
   4091 	return gen8_emit_wa_tail(request, cs);
   4092 }
   4093 
   4094 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4095 {
   4096 	cs = gen8_emit_ggtt_write(cs,
   4097 				  request->fence.seqno,
   4098 				  i915_request_active_timeline(request)->hwsp_offset,
   4099 				  0);
   4100 
   4101 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4102 }
   4103 
   4104 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4105 {
   4106 	cs = gen8_emit_pipe_control(cs,
   4107 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4108 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4109 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
   4110 				    0);
   4111 
   4112 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
   4113 	cs = gen8_emit_ggtt_write_rcs(cs,
   4114 				      request->fence.seqno,
   4115 				      i915_request_active_timeline(request)->hwsp_offset,
   4116 				      PIPE_CONTROL_FLUSH_ENABLE |
   4117 				      PIPE_CONTROL_CS_STALL);
   4118 
   4119 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4120 }
   4121 
   4122 static u32 *
   4123 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4124 {
   4125 	cs = gen8_emit_ggtt_write_rcs(cs,
   4126 				      request->fence.seqno,
   4127 				      i915_request_active_timeline(request)->hwsp_offset,
   4128 				      PIPE_CONTROL_CS_STALL |
   4129 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4130 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4131 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4132 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4133 				      PIPE_CONTROL_FLUSH_ENABLE);
   4134 
   4135 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4136 }
   4137 
   4138 /*
   4139  * Note that the CS instruction pre-parser will not stall on the breadcrumb
   4140  * flush and will continue pre-fetching the instructions after it before the
   4141  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
   4142  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
   4143  * of the next request before the memory has been flushed, we're guaranteed that
   4144  * we won't access the batch itself too early.
   4145  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
   4146  * so, if the current request is modifying an instruction in the next request on
   4147  * the same intel_context, we might pre-fetch and then execute the pre-update
   4148  * instruction. To avoid this, the users of self-modifying code should either
   4149  * disable the parser around the code emitting the memory writes, via a new flag
   4150  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
   4151  * the in-kernel use-cases we've opted to use a separate context, see
   4152  * reloc_gpu() as an example.
   4153  * All the above applies only to the instructions themselves. Non-inline data
   4154  * used by the instructions is not pre-fetched.
   4155  */
   4156 
   4157 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4158 {
   4159 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
   4160 		MI_SEMAPHORE_GLOBAL_GTT |
   4161 		MI_SEMAPHORE_POLL |
   4162 		MI_SEMAPHORE_SAD_EQ_SDD;
   4163 	*cs++ = 0;
   4164 	*cs++ = intel_hws_preempt_address(request->engine);
   4165 	*cs++ = 0;
   4166 	*cs++ = 0;
   4167 	*cs++ = MI_NOOP;
   4168 
   4169 	return cs;
   4170 }
   4171 
   4172 static __always_inline u32*
   4173 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
   4174 {
   4175 	*cs++ = MI_USER_INTERRUPT;
   4176 
   4177 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4178 	if (intel_engine_has_semaphores(request->engine))
   4179 		cs = gen12_emit_preempt_busywait(request, cs);
   4180 
   4181 	request->tail = intel_ring_offset(request, cs);
   4182 	assert_ring_tail_valid(request->ring, request->tail);
   4183 
   4184 	return gen8_emit_wa_tail(request, cs);
   4185 }
   4186 
   4187 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4188 {
   4189 	cs = gen8_emit_ggtt_write(cs,
   4190 				  request->fence.seqno,
   4191 				  i915_request_active_timeline(request)->hwsp_offset,
   4192 				  0);
   4193 
   4194 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4195 }
   4196 
   4197 static u32 *
   4198 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4199 {
   4200 	cs = gen8_emit_ggtt_write_rcs(cs,
   4201 				      request->fence.seqno,
   4202 				      i915_request_active_timeline(request)->hwsp_offset,
   4203 				      PIPE_CONTROL_CS_STALL |
   4204 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4205 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4206 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4207 				      /* Wa_1409600907:tgl */
   4208 				      PIPE_CONTROL_DEPTH_STALL |
   4209 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4210 				      PIPE_CONTROL_FLUSH_ENABLE |
   4211 				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
   4212 
   4213 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4214 }
   4215 
   4216 static void execlists_park(struct intel_engine_cs *engine)
   4217 {
   4218 	cancel_timer(&engine->execlists.timer);
   4219 	cancel_timer(&engine->execlists.preempt);
   4220 }
   4221 
   4222 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
   4223 {
   4224 	engine->submit_request = execlists_submit_request;
   4225 	engine->schedule = i915_schedule;
   4226 	engine->execlists.tasklet.func = execlists_submission_tasklet;
   4227 
   4228 	engine->reset.prepare = execlists_reset_prepare;
   4229 	engine->reset.rewind = execlists_reset_rewind;
   4230 	engine->reset.cancel = execlists_reset_cancel;
   4231 	engine->reset.finish = execlists_reset_finish;
   4232 
   4233 	engine->park = execlists_park;
   4234 	engine->unpark = NULL;
   4235 
   4236 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
   4237 	if (!intel_vgpu_active(engine->i915)) {
   4238 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
   4239 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
   4240 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
   4241 	}
   4242 
   4243 	if (INTEL_GEN(engine->i915) >= 12)
   4244 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
   4245 
   4246 	if (intel_engine_has_preemption(engine))
   4247 		engine->emit_bb_start = gen8_emit_bb_start;
   4248 	else
   4249 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
   4250 }
   4251 
   4252 static void execlists_shutdown(struct intel_engine_cs *engine)
   4253 {
   4254 	/* Synchronise with residual timers and any softirq they raise */
   4255 	del_timer_sync(&engine->execlists.timer);
   4256 	del_timer_sync(&engine->execlists.preempt);
   4257 	tasklet_kill(&engine->execlists.tasklet);
   4258 }
   4259 
   4260 static void execlists_release(struct intel_engine_cs *engine)
   4261 {
   4262 	execlists_shutdown(engine);
   4263 
   4264 	intel_engine_cleanup_common(engine);
   4265 	lrc_destroy_wa_ctx(engine);
   4266 }
   4267 
   4268 static void
   4269 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
   4270 {
   4271 	/* Default vfuncs which can be overriden by each engine. */
   4272 
   4273 	engine->resume = execlists_resume;
   4274 
   4275 	engine->cops = &execlists_context_ops;
   4276 	engine->request_alloc = execlists_request_alloc;
   4277 
   4278 	engine->emit_flush = gen8_emit_flush;
   4279 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
   4280 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
   4281 	if (INTEL_GEN(engine->i915) >= 12)
   4282 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
   4283 
   4284 	engine->set_default_submission = intel_execlists_set_default_submission;
   4285 
   4286 	if (INTEL_GEN(engine->i915) < 11) {
   4287 		engine->irq_enable = gen8_logical_ring_enable_irq;
   4288 		engine->irq_disable = gen8_logical_ring_disable_irq;
   4289 	} else {
   4290 		/*
   4291 		 * TODO: On Gen11 interrupt masks need to be clear
   4292 		 * to allow C6 entry. Keep interrupts enabled at
   4293 		 * and take the hit of generating extra interrupts
   4294 		 * until a more refined solution exists.
   4295 		 */
   4296 	}
   4297 }
   4298 
   4299 static inline void
   4300 logical_ring_default_irqs(struct intel_engine_cs *engine)
   4301 {
   4302 	unsigned int shift = 0;
   4303 
   4304 	if (INTEL_GEN(engine->i915) < 11) {
   4305 		const u8 irq_shifts[] = {
   4306 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
   4307 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
   4308 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
   4309 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
   4310 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
   4311 		};
   4312 
   4313 		shift = irq_shifts[engine->id];
   4314 	}
   4315 
   4316 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
   4317 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
   4318 }
   4319 
   4320 static void rcs_submission_override(struct intel_engine_cs *engine)
   4321 {
   4322 	switch (INTEL_GEN(engine->i915)) {
   4323 	case 12:
   4324 		engine->emit_flush = gen12_emit_flush_render;
   4325 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
   4326 		break;
   4327 	case 11:
   4328 		engine->emit_flush = gen11_emit_flush_render;
   4329 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
   4330 		break;
   4331 	default:
   4332 		engine->emit_flush = gen8_emit_flush_render;
   4333 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
   4334 		break;
   4335 	}
   4336 }
   4337 
   4338 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
   4339 {
   4340 	struct intel_engine_execlists * const execlists = &engine->execlists;
   4341 	struct drm_i915_private *i915 = engine->i915;
   4342 	struct intel_uncore *uncore = engine->uncore;
   4343 	u32 base = engine->mmio_base;
   4344 
   4345 	i915_sched_init(&engine->execlists);
   4346 
   4347 	tasklet_init(&engine->execlists.tasklet,
   4348 		     execlists_submission_tasklet, (unsigned long)engine);
   4349 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
   4350 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
   4351 
   4352 	logical_ring_default_vfuncs(engine);
   4353 	logical_ring_default_irqs(engine);
   4354 
   4355 	if (engine->class == RENDER_CLASS)
   4356 		rcs_submission_override(engine);
   4357 
   4358 	if (intel_init_workaround_bb(engine))
   4359 		/*
   4360 		 * We continue even if we fail to initialize WA batch
   4361 		 * because we only expect rare glitches but nothing
   4362 		 * critical to prevent us from using GPU
   4363 		 */
   4364 		DRM_ERROR("WA batch buffer initialization failed\n");
   4365 
   4366 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
   4367 #ifdef __NetBSD__
   4368 		execlists->submit_reg = i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4369 		execlists->ctrl_reg = i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4370 		execlists->bsh = uncore->regs_bsh;
   4371 		execlists->bst = uncore->regs_bst;
   4372 #else
   4373 		execlists->submit_reg = uncore->regs +
   4374 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4375 		execlists->ctrl_reg = uncore->regs +
   4376 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4377 #endif
   4378 	} else {
   4379 #ifdef __NetBSD__
   4380 		execlists->submit_reg = i915_mmio_reg_offset(RING_ELSP(base));
   4381 		execlists->bsh = uncore->regs_bsh;
   4382 		execlists->bst = uncore->regs_bst;
   4383 #else
   4384 		execlists->submit_reg = uncore->regs +
   4385 			i915_mmio_reg_offset(RING_ELSP(base));
   4386 #endif
   4387 	}
   4388 
   4389 	execlists->csb_status =
   4390 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
   4391 
   4392 	execlists->csb_write =
   4393 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
   4394 
   4395 	if (INTEL_GEN(i915) < 11)
   4396 		execlists->csb_size = GEN8_CSB_ENTRIES;
   4397 	else
   4398 		execlists->csb_size = GEN11_CSB_ENTRIES;
   4399 
   4400 	reset_csb_pointers(engine);
   4401 
   4402 	/* Finally, take ownership and responsibility for cleanup! */
   4403 	engine->release = execlists_release;
   4404 
   4405 	return 0;
   4406 }
   4407 
   4408 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
   4409 {
   4410 	u32 indirect_ctx_offset;
   4411 
   4412 	switch (INTEL_GEN(engine->i915)) {
   4413 	default:
   4414 		MISSING_CASE(INTEL_GEN(engine->i915));
   4415 		/* fall through */
   4416 	case 12:
   4417 		indirect_ctx_offset =
   4418 			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4419 		break;
   4420 	case 11:
   4421 		indirect_ctx_offset =
   4422 			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4423 		break;
   4424 	case 10:
   4425 		indirect_ctx_offset =
   4426 			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4427 		break;
   4428 	case 9:
   4429 		indirect_ctx_offset =
   4430 			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4431 		break;
   4432 	case 8:
   4433 		indirect_ctx_offset =
   4434 			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4435 		break;
   4436 	}
   4437 
   4438 	return indirect_ctx_offset;
   4439 }
   4440 
   4441 
   4442 static void init_common_reg_state(u32 * const regs,
   4443 				  const struct intel_engine_cs *engine,
   4444 				  const struct intel_ring *ring,
   4445 				  bool inhibit)
   4446 {
   4447 	u32 ctl;
   4448 
   4449 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
   4450 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
   4451 	if (inhibit)
   4452 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
   4453 	if (INTEL_GEN(engine->i915) < 11)
   4454 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
   4455 					   CTX_CTRL_RS_CTX_ENABLE);
   4456 	regs[CTX_CONTEXT_CONTROL] = ctl;
   4457 
   4458 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   4459 }
   4460 
   4461 static void init_wa_bb_reg_state(u32 * const regs,
   4462 				 const struct intel_engine_cs *engine,
   4463 				 u32 pos_bb_per_ctx)
   4464 {
   4465 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
   4466 
   4467 	if (wa_ctx->per_ctx.size) {
   4468 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4469 
   4470 		regs[pos_bb_per_ctx] =
   4471 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
   4472 	}
   4473 
   4474 	if (wa_ctx->indirect_ctx.size) {
   4475 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4476 
   4477 		regs[pos_bb_per_ctx + 2] =
   4478 			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
   4479 			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
   4480 
   4481 		regs[pos_bb_per_ctx + 4] =
   4482 			intel_lr_indirect_ctx_offset(engine) << 6;
   4483 	}
   4484 }
   4485 
   4486 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
   4487 {
   4488 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
   4489 		/* 64b PPGTT (48bit canonical)
   4490 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
   4491 		 * other PDP Descriptors are ignored.
   4492 		 */
   4493 		ASSIGN_CTX_PML4(ppgtt, regs);
   4494 	} else {
   4495 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
   4496 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
   4497 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
   4498 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
   4499 	}
   4500 }
   4501 
   4502 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
   4503 {
   4504 	if (i915_is_ggtt(vm))
   4505 		return i915_vm_to_ggtt(vm)->alias;
   4506 	else
   4507 		return i915_vm_to_ppgtt(vm);
   4508 }
   4509 
   4510 static void execlists_init_reg_state(u32 *regs,
   4511 				     const struct intel_context *ce,
   4512 				     const struct intel_engine_cs *engine,
   4513 				     const struct intel_ring *ring,
   4514 				     bool inhibit)
   4515 {
   4516 	/*
   4517 	 * A context is actually a big batch buffer with several
   4518 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
   4519 	 * values we are setting here are only for the first context restore:
   4520 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
   4521 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
   4522 	 * we are not initializing here).
   4523 	 *
   4524 	 * Must keep consistent with virtual_update_register_offsets().
   4525 	 */
   4526 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
   4527 
   4528 	init_common_reg_state(regs, engine, ring, inhibit);
   4529 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
   4530 
   4531 	init_wa_bb_reg_state(regs, engine,
   4532 			     INTEL_GEN(engine->i915) >= 12 ?
   4533 			     GEN12_CTX_BB_PER_CTX_PTR :
   4534 			     CTX_BB_PER_CTX_PTR);
   4535 
   4536 	__reset_stop_ring(regs, engine);
   4537 }
   4538 
   4539 static int
   4540 populate_lr_context(struct intel_context *ce,
   4541 		    struct drm_i915_gem_object *ctx_obj,
   4542 		    struct intel_engine_cs *engine,
   4543 		    struct intel_ring *ring)
   4544 {
   4545 	bool inhibit = true;
   4546 	void *vaddr;
   4547 	int ret;
   4548 
   4549 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
   4550 	if (IS_ERR(vaddr)) {
   4551 		ret = PTR_ERR(vaddr);
   4552 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
   4553 		return ret;
   4554 	}
   4555 
   4556 	set_redzone(vaddr, engine);
   4557 
   4558 	if (engine->default_state) {
   4559 		void *defaults;
   4560 
   4561 		defaults = i915_gem_object_pin_map(engine->default_state,
   4562 						   I915_MAP_WB);
   4563 		if (IS_ERR(defaults)) {
   4564 			ret = PTR_ERR(defaults);
   4565 			goto err_unpin_ctx;
   4566 		}
   4567 
   4568 		memcpy(vaddr, defaults, engine->context_size);
   4569 		i915_gem_object_unpin_map(engine->default_state);
   4570 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
   4571 		inhibit = false;
   4572 	}
   4573 
   4574 	/* The second page of the context object contains some fields which must
   4575 	 * be set up prior to the first execution. */
   4576 	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
   4577 				 ce, engine, ring, inhibit);
   4578 
   4579 	ret = 0;
   4580 err_unpin_ctx:
   4581 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
   4582 	i915_gem_object_unpin_map(ctx_obj);
   4583 	return ret;
   4584 }
   4585 
   4586 static int __execlists_context_alloc(struct intel_context *ce,
   4587 				     struct intel_engine_cs *engine)
   4588 {
   4589 	struct drm_i915_gem_object *ctx_obj;
   4590 	struct intel_ring *ring;
   4591 	struct i915_vma *vma;
   4592 	u32 context_size;
   4593 	int ret;
   4594 
   4595 	GEM_BUG_ON(ce->state);
   4596 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
   4597 
   4598 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   4599 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
   4600 
   4601 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
   4602 	if (IS_ERR(ctx_obj))
   4603 		return PTR_ERR(ctx_obj);
   4604 
   4605 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
   4606 	if (IS_ERR(vma)) {
   4607 		ret = PTR_ERR(vma);
   4608 		goto error_deref_obj;
   4609 	}
   4610 
   4611 	if (!ce->timeline) {
   4612 		struct intel_timeline *tl;
   4613 
   4614 		tl = intel_timeline_create(engine->gt, NULL);
   4615 		if (IS_ERR(tl)) {
   4616 			ret = PTR_ERR(tl);
   4617 			goto error_deref_obj;
   4618 		}
   4619 
   4620 		ce->timeline = tl;
   4621 	}
   4622 
   4623 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
   4624 	if (IS_ERR(ring)) {
   4625 		ret = PTR_ERR(ring);
   4626 		goto error_deref_obj;
   4627 	}
   4628 
   4629 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
   4630 	if (ret) {
   4631 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
   4632 		goto error_ring_free;
   4633 	}
   4634 
   4635 	ce->ring = ring;
   4636 	ce->state = vma;
   4637 
   4638 	return 0;
   4639 
   4640 error_ring_free:
   4641 	intel_ring_put(ring);
   4642 error_deref_obj:
   4643 	i915_gem_object_put(ctx_obj);
   4644 	return ret;
   4645 }
   4646 
   4647 static struct list_head *virtual_queue(struct virtual_engine *ve)
   4648 {
   4649 	return &ve->base.execlists.default_priolist.requests[0];
   4650 }
   4651 
   4652 static void virtual_context_destroy(struct kref *kref)
   4653 {
   4654 	struct virtual_engine *ve =
   4655 		container_of(kref, typeof(*ve), context.ref);
   4656 	unsigned int n;
   4657 
   4658 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4659 	GEM_BUG_ON(ve->request);
   4660 	GEM_BUG_ON(ve->context.inflight);
   4661 
   4662 	for (n = 0; n < ve->num_siblings; n++) {
   4663 		struct intel_engine_cs *sibling = ve->siblings[n];
   4664 		struct rb_node *node = &ve->nodes[sibling->id].rb;
   4665 		unsigned long flags;
   4666 
   4667 		if (RB_EMPTY_NODE(node))
   4668 			continue;
   4669 
   4670 		spin_lock_irqsave(&sibling->active.lock, flags);
   4671 
   4672 		/* Detachment is lazily performed in the execlists tasklet */
   4673 		if (!RB_EMPTY_NODE(node))
   4674 			rb_erase_cached(node, &sibling->execlists.virtual);
   4675 
   4676 		spin_unlock_irqrestore(&sibling->active.lock, flags);
   4677 	}
   4678 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
   4679 
   4680 	if (ve->context.state)
   4681 		__execlists_context_fini(&ve->context);
   4682 	intel_context_fini(&ve->context);
   4683 
   4684 	kfree(ve->bonds);
   4685 	kfree(ve);
   4686 }
   4687 
   4688 static void virtual_engine_initial_hint(struct virtual_engine *ve)
   4689 {
   4690 	int swp;
   4691 
   4692 	/*
   4693 	 * Pick a random sibling on starting to help spread the load around.
   4694 	 *
   4695 	 * New contexts are typically created with exactly the same order
   4696 	 * of siblings, and often started in batches. Due to the way we iterate
   4697 	 * the array of sibling when submitting requests, sibling[0] is
   4698 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
   4699 	 * randomised across the system, we also help spread the load by the
   4700 	 * first engine we inspect being different each time.
   4701 	 *
   4702 	 * NB This does not force us to execute on this engine, it will just
   4703 	 * typically be the first we inspect for submission.
   4704 	 */
   4705 	swp = prandom_u32_max(ve->num_siblings);
   4706 	if (!swp)
   4707 		return;
   4708 
   4709 	swap(ve->siblings[swp], ve->siblings[0]);
   4710 	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
   4711 		virtual_update_register_offsets(ve->context.lrc_reg_state,
   4712 						ve->siblings[0]);
   4713 }
   4714 
   4715 static int virtual_context_alloc(struct intel_context *ce)
   4716 {
   4717 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4718 
   4719 	return __execlists_context_alloc(ce, ve->siblings[0]);
   4720 }
   4721 
   4722 static int virtual_context_pin(struct intel_context *ce)
   4723 {
   4724 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4725 	int err;
   4726 
   4727 	/* Note: we must use a real engine class for setting up reg state */
   4728 	err = __execlists_context_pin(ce, ve->siblings[0]);
   4729 	if (err)
   4730 		return err;
   4731 
   4732 	virtual_engine_initial_hint(ve);
   4733 	return 0;
   4734 }
   4735 
   4736 static void virtual_context_enter(struct intel_context *ce)
   4737 {
   4738 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4739 	unsigned int n;
   4740 
   4741 	for (n = 0; n < ve->num_siblings; n++)
   4742 		intel_engine_pm_get(ve->siblings[n]);
   4743 
   4744 	intel_timeline_enter(ce->timeline);
   4745 }
   4746 
   4747 static void virtual_context_exit(struct intel_context *ce)
   4748 {
   4749 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4750 	unsigned int n;
   4751 
   4752 	intel_timeline_exit(ce->timeline);
   4753 
   4754 	for (n = 0; n < ve->num_siblings; n++)
   4755 		intel_engine_pm_put(ve->siblings[n]);
   4756 }
   4757 
   4758 static const struct intel_context_ops virtual_context_ops = {
   4759 	.alloc = virtual_context_alloc,
   4760 
   4761 	.pin = virtual_context_pin,
   4762 	.unpin = execlists_context_unpin,
   4763 
   4764 	.enter = virtual_context_enter,
   4765 	.exit = virtual_context_exit,
   4766 
   4767 	.destroy = virtual_context_destroy,
   4768 };
   4769 
   4770 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
   4771 {
   4772 	struct i915_request *rq;
   4773 	intel_engine_mask_t mask;
   4774 
   4775 	rq = READ_ONCE(ve->request);
   4776 	if (!rq)
   4777 		return 0;
   4778 
   4779 	/* The rq is ready for submission; rq->execution_mask is now stable. */
   4780 	mask = rq->execution_mask;
   4781 	if (unlikely(!mask)) {
   4782 		/* Invalid selection, submit to a random engine in error */
   4783 		i915_request_skip(rq, -ENODEV);
   4784 		mask = ve->siblings[0]->mask;
   4785 	}
   4786 
   4787 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
   4788 		     rq->fence.context, rq->fence.seqno,
   4789 		     mask, ve->base.execlists.queue_priority_hint);
   4790 
   4791 	return mask;
   4792 }
   4793 
   4794 static void virtual_submission_tasklet(unsigned long data)
   4795 {
   4796 	struct virtual_engine * const ve = (struct virtual_engine *)data;
   4797 	const int prio = ve->base.execlists.queue_priority_hint;
   4798 	intel_engine_mask_t mask;
   4799 	unsigned int n;
   4800 
   4801 	rcu_read_lock();
   4802 	mask = virtual_submission_mask(ve);
   4803 	rcu_read_unlock();
   4804 	if (unlikely(!mask))
   4805 		return;
   4806 
   4807 	local_irq_disable();
   4808 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
   4809 		struct intel_engine_cs *sibling = ve->siblings[n];
   4810 		struct ve_node * const node = &ve->nodes[sibling->id];
   4811 		struct rb_node **parent, *rb;
   4812 		bool first;
   4813 
   4814 		if (unlikely(!(mask & sibling->mask))) {
   4815 			if (!RB_EMPTY_NODE(&node->rb)) {
   4816 				spin_lock(&sibling->active.lock);
   4817 				rb_erase_cached(&node->rb,
   4818 						&sibling->execlists.virtual);
   4819 				RB_CLEAR_NODE(&node->rb);
   4820 				spin_unlock(&sibling->active.lock);
   4821 			}
   4822 			continue;
   4823 		}
   4824 
   4825 		spin_lock(&sibling->active.lock);
   4826 
   4827 		if (!RB_EMPTY_NODE(&node->rb)) {
   4828 			/*
   4829 			 * Cheat and avoid rebalancing the tree if we can
   4830 			 * reuse this node in situ.
   4831 			 */
   4832 			first = rb_first_cached(&sibling->execlists.virtual) ==
   4833 				&node->rb;
   4834 			if (prio == node->prio || (prio > node->prio && first))
   4835 				goto submit_engine;
   4836 
   4837 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
   4838 		}
   4839 
   4840 		rb = NULL;
   4841 		first = true;
   4842 		parent = &sibling->execlists.virtual.rb_root.rb_node;
   4843 		while (*parent) {
   4844 			struct ve_node *other;
   4845 
   4846 			rb = *parent;
   4847 			other = rb_entry(rb, typeof(*other), rb);
   4848 			if (prio > other->prio) {
   4849 				parent = &rb->rb_left;
   4850 			} else {
   4851 				parent = &rb->rb_right;
   4852 				first = false;
   4853 			}
   4854 		}
   4855 
   4856 		rb_link_node(&node->rb, rb, parent);
   4857 		rb_insert_color_cached(&node->rb,
   4858 				       &sibling->execlists.virtual,
   4859 				       first);
   4860 
   4861 submit_engine:
   4862 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
   4863 		node->prio = prio;
   4864 		if (first && prio > sibling->execlists.queue_priority_hint) {
   4865 			sibling->execlists.queue_priority_hint = prio;
   4866 			tasklet_hi_schedule(&sibling->execlists.tasklet);
   4867 		}
   4868 
   4869 		spin_unlock(&sibling->active.lock);
   4870 	}
   4871 	local_irq_enable();
   4872 }
   4873 
   4874 static void virtual_submit_request(struct i915_request *rq)
   4875 {
   4876 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   4877 	struct i915_request *old;
   4878 	unsigned long flags;
   4879 
   4880 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
   4881 		     rq->fence.context,
   4882 		     rq->fence.seqno);
   4883 
   4884 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
   4885 
   4886 	spin_lock_irqsave(&ve->base.active.lock, flags);
   4887 
   4888 	old = ve->request;
   4889 	if (old) { /* background completion event from preempt-to-busy */
   4890 		GEM_BUG_ON(!i915_request_completed(old));
   4891 		__i915_request_submit(old);
   4892 		i915_request_put(old);
   4893 	}
   4894 
   4895 	if (i915_request_completed(rq)) {
   4896 		__i915_request_submit(rq);
   4897 
   4898 		ve->base.execlists.queue_priority_hint = INT_MIN;
   4899 		ve->request = NULL;
   4900 	} else {
   4901 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
   4902 		ve->request = i915_request_get(rq);
   4903 
   4904 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4905 		list_move_tail(&rq->sched.link, virtual_queue(ve));
   4906 
   4907 		tasklet_schedule(&ve->base.execlists.tasklet);
   4908 	}
   4909 
   4910 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
   4911 }
   4912 
   4913 static struct ve_bond *
   4914 virtual_find_bond(struct virtual_engine *ve,
   4915 		  const struct intel_engine_cs *master)
   4916 {
   4917 	int i;
   4918 
   4919 	for (i = 0; i < ve->num_bonds; i++) {
   4920 		if (ve->bonds[i].master == master)
   4921 			return &ve->bonds[i];
   4922 	}
   4923 
   4924 	return NULL;
   4925 }
   4926 
   4927 static void
   4928 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
   4929 {
   4930 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   4931 	intel_engine_mask_t allowed, exec;
   4932 	struct ve_bond *bond;
   4933 
   4934 	allowed = ~to_request(signal)->engine->mask;
   4935 
   4936 	bond = virtual_find_bond(ve, to_request(signal)->engine);
   4937 	if (bond)
   4938 		allowed &= bond->sibling_mask;
   4939 
   4940 	/* Restrict the bonded request to run on only the available engines */
   4941 	exec = READ_ONCE(rq->execution_mask);
   4942 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
   4943 		;
   4944 
   4945 	/* Prevent the master from being re-run on the bonded engines */
   4946 	to_request(signal)->execution_mask &= ~allowed;
   4947 }
   4948 
   4949 struct intel_context *
   4950 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
   4951 			       unsigned int count)
   4952 {
   4953 	struct virtual_engine *ve;
   4954 	unsigned int n;
   4955 	int err;
   4956 
   4957 	if (count == 0)
   4958 		return ERR_PTR(-EINVAL);
   4959 
   4960 	if (count == 1)
   4961 		return intel_context_create(siblings[0]);
   4962 
   4963 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
   4964 	if (!ve)
   4965 		return ERR_PTR(-ENOMEM);
   4966 
   4967 	ve->base.i915 = siblings[0]->i915;
   4968 	ve->base.gt = siblings[0]->gt;
   4969 	ve->base.uncore = siblings[0]->uncore;
   4970 	ve->base.id = -1;
   4971 
   4972 	ve->base.class = OTHER_CLASS;
   4973 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
   4974 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4975 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4976 
   4977 	/*
   4978 	 * The decision on whether to submit a request using semaphores
   4979 	 * depends on the saturated state of the engine. We only compute
   4980 	 * this during HW submission of the request, and we need for this
   4981 	 * state to be globally applied to all requests being submitted
   4982 	 * to this engine. Virtual engines encompass more than one physical
   4983 	 * engine and so we cannot accurately tell in advance if one of those
   4984 	 * engines is already saturated and so cannot afford to use a semaphore
   4985 	 * and be pessimized in priority for doing so -- if we are the only
   4986 	 * context using semaphores after all other clients have stopped, we
   4987 	 * will be starved on the saturated system. Such a global switch for
   4988 	 * semaphores is less than ideal, but alas is the current compromise.
   4989 	 */
   4990 	ve->base.saturated = ALL_ENGINES;
   4991 
   4992 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
   4993 
   4994 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
   4995 	intel_engine_init_breadcrumbs(&ve->base);
   4996 	intel_engine_init_execlists(&ve->base);
   4997 
   4998 	ve->base.cops = &virtual_context_ops;
   4999 	ve->base.request_alloc = execlists_request_alloc;
   5000 
   5001 	ve->base.schedule = i915_schedule;
   5002 	ve->base.submit_request = virtual_submit_request;
   5003 	ve->base.bond_execute = virtual_bond_execute;
   5004 
   5005 	INIT_LIST_HEAD(virtual_queue(ve));
   5006 	ve->base.execlists.queue_priority_hint = INT_MIN;
   5007 	tasklet_init(&ve->base.execlists.tasklet,
   5008 		     virtual_submission_tasklet,
   5009 		     (unsigned long)ve);
   5010 
   5011 	intel_context_init(&ve->context, &ve->base);
   5012 
   5013 	for (n = 0; n < count; n++) {
   5014 		struct intel_engine_cs *sibling = siblings[n];
   5015 
   5016 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
   5017 		if (sibling->mask & ve->base.mask) {
   5018 			DRM_DEBUG("duplicate %s entry in load balancer\n",
   5019 				  sibling->name);
   5020 			err = -EINVAL;
   5021 			goto err_put;
   5022 		}
   5023 
   5024 		/*
   5025 		 * The virtual engine implementation is tightly coupled to
   5026 		 * the execlists backend -- we push out request directly
   5027 		 * into a tree inside each physical engine. We could support
   5028 		 * layering if we handle cloning of the requests and
   5029 		 * submitting a copy into each backend.
   5030 		 */
   5031 		if (sibling->execlists.tasklet.func !=
   5032 		    execlists_submission_tasklet) {
   5033 			err = -ENODEV;
   5034 			goto err_put;
   5035 		}
   5036 
   5037 		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
   5038 		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
   5039 
   5040 		ve->siblings[ve->num_siblings++] = sibling;
   5041 		ve->base.mask |= sibling->mask;
   5042 
   5043 		/*
   5044 		 * All physical engines must be compatible for their emission
   5045 		 * functions (as we build the instructions during request
   5046 		 * construction and do not alter them before submission
   5047 		 * on the physical engine). We use the engine class as a guide
   5048 		 * here, although that could be refined.
   5049 		 */
   5050 		if (ve->base.class != OTHER_CLASS) {
   5051 			if (ve->base.class != sibling->class) {
   5052 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
   5053 					  sibling->class, ve->base.class);
   5054 				err = -EINVAL;
   5055 				goto err_put;
   5056 			}
   5057 			continue;
   5058 		}
   5059 
   5060 		ve->base.class = sibling->class;
   5061 		ve->base.uabi_class = sibling->uabi_class;
   5062 		snprintf(ve->base.name, sizeof(ve->base.name),
   5063 			 "v%dx%d", ve->base.class, count);
   5064 		ve->base.context_size = sibling->context_size;
   5065 
   5066 		ve->base.emit_bb_start = sibling->emit_bb_start;
   5067 		ve->base.emit_flush = sibling->emit_flush;
   5068 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
   5069 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
   5070 		ve->base.emit_fini_breadcrumb_dw =
   5071 			sibling->emit_fini_breadcrumb_dw;
   5072 
   5073 		ve->base.flags = sibling->flags;
   5074 	}
   5075 
   5076 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
   5077 
   5078 	return &ve->context;
   5079 
   5080 err_put:
   5081 	intel_context_put(&ve->context);
   5082 	return ERR_PTR(err);
   5083 }
   5084 
   5085 struct intel_context *
   5086 intel_execlists_clone_virtual(struct intel_engine_cs *src)
   5087 {
   5088 	struct virtual_engine *se = to_virtual_engine(src);
   5089 	struct intel_context *dst;
   5090 
   5091 	dst = intel_execlists_create_virtual(se->siblings,
   5092 					     se->num_siblings);
   5093 	if (IS_ERR(dst))
   5094 		return dst;
   5095 
   5096 	if (se->num_bonds) {
   5097 		struct virtual_engine *de = to_virtual_engine(dst->engine);
   5098 
   5099 		de->bonds = kmemdup(se->bonds,
   5100 				    sizeof(*se->bonds) * se->num_bonds,
   5101 				    GFP_KERNEL);
   5102 		if (!de->bonds) {
   5103 			intel_context_put(dst);
   5104 			return ERR_PTR(-ENOMEM);
   5105 		}
   5106 
   5107 		de->num_bonds = se->num_bonds;
   5108 	}
   5109 
   5110 	return dst;
   5111 }
   5112 
   5113 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
   5114 				     const struct intel_engine_cs *master,
   5115 				     const struct intel_engine_cs *sibling)
   5116 {
   5117 	struct virtual_engine *ve = to_virtual_engine(engine);
   5118 	struct ve_bond *bond;
   5119 	int n;
   5120 
   5121 	/* Sanity check the sibling is part of the virtual engine */
   5122 	for (n = 0; n < ve->num_siblings; n++)
   5123 		if (sibling == ve->siblings[n])
   5124 			break;
   5125 	if (n == ve->num_siblings)
   5126 		return -EINVAL;
   5127 
   5128 	bond = virtual_find_bond(ve, master);
   5129 	if (bond) {
   5130 		bond->sibling_mask |= sibling->mask;
   5131 		return 0;
   5132 	}
   5133 
   5134 	bond = krealloc(ve->bonds,
   5135 			sizeof(*bond) * (ve->num_bonds + 1),
   5136 			GFP_KERNEL);
   5137 	if (!bond)
   5138 		return -ENOMEM;
   5139 
   5140 	bond[ve->num_bonds].master = master;
   5141 	bond[ve->num_bonds].sibling_mask = sibling->mask;
   5142 
   5143 	ve->bonds = bond;
   5144 	ve->num_bonds++;
   5145 
   5146 	return 0;
   5147 }
   5148 
   5149 struct intel_engine_cs *
   5150 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
   5151 				 unsigned int sibling)
   5152 {
   5153 	struct virtual_engine *ve = to_virtual_engine(engine);
   5154 
   5155 	if (sibling >= ve->num_siblings)
   5156 		return NULL;
   5157 
   5158 	return ve->siblings[sibling];
   5159 }
   5160 
   5161 void intel_execlists_show_requests(struct intel_engine_cs *engine,
   5162 				   struct drm_printer *m,
   5163 				   void (*show_request)(struct drm_printer *m,
   5164 							struct i915_request *rq,
   5165 							const char *prefix),
   5166 				   unsigned int max)
   5167 {
   5168 	const struct intel_engine_execlists *execlists = &engine->execlists;
   5169 	struct i915_request *rq, *last;
   5170 	unsigned long flags;
   5171 	unsigned int count;
   5172 	struct rb_node *rb;
   5173 
   5174 	spin_lock_irqsave(&engine->active.lock, flags);
   5175 
   5176 	last = NULL;
   5177 	count = 0;
   5178 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
   5179 		if (count++ < max - 1)
   5180 			show_request(m, rq, "\t\tE ");
   5181 		else
   5182 			last = rq;
   5183 	}
   5184 	if (last) {
   5185 		if (count > max) {
   5186 			drm_printf(m,
   5187 				   "\t\t...skipping %d executing requests...\n",
   5188 				   count - max);
   5189 		}
   5190 		show_request(m, last, "\t\tE ");
   5191 	}
   5192 
   5193 	last = NULL;
   5194 	count = 0;
   5195 	if (execlists->queue_priority_hint != INT_MIN)
   5196 		drm_printf(m, "\t\tQueue priority hint: %d\n",
   5197 			   execlists->queue_priority_hint);
   5198 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
   5199 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
   5200 		int i;
   5201 
   5202 		priolist_for_each_request(rq, p, i) {
   5203 			if (count++ < max - 1)
   5204 				show_request(m, rq, "\t\tQ ");
   5205 			else
   5206 				last = rq;
   5207 		}
   5208 	}
   5209 	if (last) {
   5210 		if (count > max) {
   5211 			drm_printf(m,
   5212 				   "\t\t...skipping %d queued requests...\n",
   5213 				   count - max);
   5214 		}
   5215 		show_request(m, last, "\t\tQ ");
   5216 	}
   5217 
   5218 	last = NULL;
   5219 	count = 0;
   5220 	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
   5221 		struct virtual_engine *ve =
   5222 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   5223 		struct i915_request *rq = READ_ONCE(ve->request);
   5224 
   5225 		if (rq) {
   5226 			if (count++ < max - 1)
   5227 				show_request(m, rq, "\t\tV ");
   5228 			else
   5229 				last = rq;
   5230 		}
   5231 	}
   5232 	if (last) {
   5233 		if (count > max) {
   5234 			drm_printf(m,
   5235 				   "\t\t...skipping %d virtual requests...\n",
   5236 				   count - max);
   5237 		}
   5238 		show_request(m, last, "\t\tV ");
   5239 	}
   5240 
   5241 	spin_unlock_irqrestore(&engine->active.lock, flags);
   5242 }
   5243 
   5244 void intel_lr_context_reset(struct intel_engine_cs *engine,
   5245 			    struct intel_context *ce,
   5246 			    u32 head,
   5247 			    bool scrub)
   5248 {
   5249 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   5250 
   5251 	/*
   5252 	 * We want a simple context + ring to execute the breadcrumb update.
   5253 	 * We cannot rely on the context being intact across the GPU hang,
   5254 	 * so clear it and rebuild just what we need for the breadcrumb.
   5255 	 * All pending requests for this context will be zapped, and any
   5256 	 * future request will be after userspace has had the opportunity
   5257 	 * to recreate its own state.
   5258 	 */
   5259 	if (scrub)
   5260 		restore_default_state(ce, engine);
   5261 
   5262 	/* Rerun the request; its payload has been neutered (if guilty). */
   5263 	__execlists_update_reg_state(ce, engine, head);
   5264 }
   5265 
   5266 bool
   5267 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
   5268 {
   5269 	return engine->set_default_submission ==
   5270 	       intel_execlists_set_default_submission;
   5271 }
   5272 
   5273 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   5274 #include "selftest_lrc.c"
   5275 #endif
   5276