Home | History | Annotate | Line # | Download | only in gt
intel_lrc.c revision 1.4
      1 /*	$NetBSD: intel_lrc.c,v 1.4 2021/12/19 11:46:47 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright  2014 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Ben Widawsky <ben (at) bwidawsk.net>
     27  *    Michel Thierry <michel.thierry (at) intel.com>
     28  *    Thomas Daniel <thomas.daniel (at) intel.com>
     29  *    Oscar Mateo <oscar.mateo (at) intel.com>
     30  *
     31  */
     32 
     33 /**
     34  * DOC: Logical Rings, Logical Ring Contexts and Execlists
     35  *
     36  * Motivation:
     37  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
     38  * These expanded contexts enable a number of new abilities, especially
     39  * "Execlists" (also implemented in this file).
     40  *
     41  * One of the main differences with the legacy HW contexts is that logical
     42  * ring contexts incorporate many more things to the context's state, like
     43  * PDPs or ringbuffer control registers:
     44  *
     45  * The reason why PDPs are included in the context is straightforward: as
     46  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
     47  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
     48  * instead, the GPU will do it for you on the context switch.
     49  *
     50  * But, what about the ringbuffer control registers (head, tail, etc..)?
     51  * shouldn't we just need a set of those per engine command streamer? This is
     52  * where the name "Logical Rings" starts to make sense: by virtualizing the
     53  * rings, the engine cs shifts to a new "ring buffer" with every context
     54  * switch. When you want to submit a workload to the GPU you: A) choose your
     55  * context, B) find its appropriate virtualized ring, C) write commands to it
     56  * and then, finally, D) tell the GPU to switch to that context.
     57  *
     58  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
     59  * to a contexts is via a context execution list, ergo "Execlists".
     60  *
     61  * LRC implementation:
     62  * Regarding the creation of contexts, we have:
     63  *
     64  * - One global default context.
     65  * - One local default context for each opened fd.
     66  * - One local extra context for each context create ioctl call.
     67  *
     68  * Now that ringbuffers belong per-context (and not per-engine, like before)
     69  * and that contexts are uniquely tied to a given engine (and not reusable,
     70  * like before) we need:
     71  *
     72  * - One ringbuffer per-engine inside each context.
     73  * - One backing object per-engine inside each context.
     74  *
     75  * The global default context starts its life with these new objects fully
     76  * allocated and populated. The local default context for each opened fd is
     77  * more complex, because we don't know at creation time which engine is going
     78  * to use them. To handle this, we have implemented a deferred creation of LR
     79  * contexts:
     80  *
     81  * The local context starts its life as a hollow or blank holder, that only
     82  * gets populated for a given engine once we receive an execbuffer. If later
     83  * on we receive another execbuffer ioctl for the same context but a different
     84  * engine, we allocate/populate a new ringbuffer and context backing object and
     85  * so on.
     86  *
     87  * Finally, regarding local contexts created using the ioctl call: as they are
     88  * only allowed with the render ring, we can allocate & populate them right
     89  * away (no need to defer anything, at least for now).
     90  *
     91  * Execlists implementation:
     92  * Execlists are the new method by which, on gen8+ hardware, workloads are
     93  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
     94  * This method works as follows:
     95  *
     96  * When a request is committed, its commands (the BB start and any leading or
     97  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
     98  * for the appropriate context. The tail pointer in the hardware context is not
     99  * updated at this time, but instead, kept by the driver in the ringbuffer
    100  * structure. A structure representing this request is added to a request queue
    101  * for the appropriate engine: this structure contains a copy of the context's
    102  * tail after the request was written to the ring buffer and a pointer to the
    103  * context itself.
    104  *
    105  * If the engine's request queue was empty before the request was added, the
    106  * queue is processed immediately. Otherwise the queue will be processed during
    107  * a context switch interrupt. In any case, elements on the queue will get sent
    108  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
    109  * globally unique 20-bits submission ID.
    110  *
    111  * When execution of a request completes, the GPU updates the context status
    112  * buffer with a context complete event and generates a context switch interrupt.
    113  * During the interrupt handling, the driver examines the events in the buffer:
    114  * for each context complete event, if the announced ID matches that on the head
    115  * of the request queue, then that request is retired and removed from the queue.
    116  *
    117  * After processing, if any requests were retired and the queue is not empty
    118  * then a new execution list can be submitted. The two requests at the front of
    119  * the queue are next to be submitted but since a context may not occur twice in
    120  * an execution list, if subsequent requests have the same ID as the first then
    121  * the two requests must be combined. This is done simply by discarding requests
    122  * at the head of the queue until either only one requests is left (in which case
    123  * we use a NULL second context) or the first two requests have unique IDs.
    124  *
    125  * By always executing the first two requests in the queue the driver ensures
    126  * that the GPU is kept as busy as possible. In the case where a single context
    127  * completes but a second context is still executing, the request for this second
    128  * context will be at the head of the queue when we remove the first one. This
    129  * request will then be resubmitted along with a new request for a different context,
    130  * which will cause the hardware to continue executing the second request and queue
    131  * the new request (the GPU detects the condition of a context getting preempted
    132  * with the same context and optimizes the context switch flow by not doing
    133  * preemption, but just sampling the new tail pointer).
    134  *
    135  */
    136 #include <sys/cdefs.h>
    137 __KERNEL_RCSID(0, "$NetBSD: intel_lrc.c,v 1.4 2021/12/19 11:46:47 riastradh Exp $");
    138 
    139 #include <linux/interrupt.h>
    140 
    141 #include "i915_drv.h"
    142 #include "i915_perf.h"
    143 #include "i915_trace.h"
    144 #include "i915_vgpu.h"
    145 #include "intel_context.h"
    146 #include "intel_engine_pm.h"
    147 #include "intel_gt.h"
    148 #include "intel_gt_pm.h"
    149 #include "intel_gt_requests.h"
    150 #include "intel_lrc_reg.h"
    151 #include "intel_mocs.h"
    152 #include "intel_reset.h"
    153 #include "intel_ring.h"
    154 #include "intel_workarounds.h"
    155 
    156 #define RING_EXECLIST_QFULL		(1 << 0x2)
    157 #define RING_EXECLIST1_VALID		(1 << 0x3)
    158 #define RING_EXECLIST0_VALID		(1 << 0x4)
    159 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
    160 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
    161 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
    162 
    163 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
    164 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
    165 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
    166 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
    167 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
    168 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
    169 
    170 #define GEN8_CTX_STATUS_COMPLETED_MASK \
    171 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
    172 
    173 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
    174 
    175 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
    176 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
    177 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
    178 #define GEN12_IDLE_CTX_ID		0x7FF
    179 #define GEN12_CSB_CTX_VALID(csb_dw) \
    180 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
    181 
    182 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
    183 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
    184 #define WA_TAIL_DWORDS 2
    185 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
    186 
    187 struct virtual_engine {
    188 	struct intel_engine_cs base;
    189 	struct intel_context context;
    190 
    191 	/*
    192 	 * We allow only a single request through the virtual engine at a time
    193 	 * (each request in the timeline waits for the completion fence of
    194 	 * the previous before being submitted). By restricting ourselves to
    195 	 * only submitting a single request, each request is placed on to a
    196 	 * physical to maximise load spreading (by virtue of the late greedy
    197 	 * scheduling -- each real engine takes the next available request
    198 	 * upon idling).
    199 	 */
    200 	struct i915_request *request;
    201 
    202 	/*
    203 	 * We keep a rbtree of available virtual engines inside each physical
    204 	 * engine, sorted by priority. Here we preallocate the nodes we need
    205 	 * for the virtual engine, indexed by physical_engine->id.
    206 	 */
    207 	struct ve_node {
    208 		struct rb_node rb;
    209 		int prio;
    210 	} nodes[I915_NUM_ENGINES];
    211 
    212 	/*
    213 	 * Keep track of bonded pairs -- restrictions upon on our selection
    214 	 * of physical engines any particular request may be submitted to.
    215 	 * If we receive a submit-fence from a master engine, we will only
    216 	 * use one of sibling_mask physical engines.
    217 	 */
    218 	struct ve_bond {
    219 		const struct intel_engine_cs *master;
    220 		intel_engine_mask_t sibling_mask;
    221 	} *bonds;
    222 	unsigned int num_bonds;
    223 
    224 	/* And finally, which physical engines this virtual engine maps onto. */
    225 	unsigned int num_siblings;
    226 	struct intel_engine_cs *siblings[0];
    227 };
    228 
    229 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
    230 {
    231 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
    232 	return container_of(engine, struct virtual_engine, base);
    233 }
    234 
    235 static int __execlists_context_alloc(struct intel_context *ce,
    236 				     struct intel_engine_cs *engine);
    237 
    238 static void execlists_init_reg_state(u32 *reg_state,
    239 				     const struct intel_context *ce,
    240 				     const struct intel_engine_cs *engine,
    241 				     const struct intel_ring *ring,
    242 				     bool close);
    243 static void
    244 __execlists_update_reg_state(const struct intel_context *ce,
    245 			     const struct intel_engine_cs *engine,
    246 			     u32 head);
    247 
    248 static void mark_eio(struct i915_request *rq)
    249 {
    250 	if (i915_request_completed(rq))
    251 		return;
    252 
    253 	GEM_BUG_ON(i915_request_signaled(rq));
    254 
    255 	dma_fence_set_error(&rq->fence, -EIO);
    256 	i915_request_mark_complete(rq);
    257 }
    258 
    259 static struct i915_request *
    260 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
    261 {
    262 	struct i915_request *active = rq;
    263 
    264 	rcu_read_lock();
    265 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
    266 		if (i915_request_completed(rq))
    267 			break;
    268 
    269 		active = rq;
    270 	}
    271 	rcu_read_unlock();
    272 
    273 	return active;
    274 }
    275 
    276 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
    277 {
    278 	return (i915_ggtt_offset(engine->status_page.vma) +
    279 		I915_GEM_HWS_PREEMPT_ADDR);
    280 }
    281 
    282 static inline void
    283 ring_set_paused(const struct intel_engine_cs *engine, int state)
    284 {
    285 	/*
    286 	 * We inspect HWS_PREEMPT with a semaphore inside
    287 	 * engine->emit_fini_breadcrumb. If the dword is true,
    288 	 * the ring is paused as the semaphore will busywait
    289 	 * until the dword is false.
    290 	 */
    291 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
    292 	if (state)
    293 		wmb();
    294 }
    295 
    296 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
    297 {
    298 	return rb_entry(rb, struct i915_priolist, node);
    299 }
    300 
    301 static inline int rq_prio(const struct i915_request *rq)
    302 {
    303 	return rq->sched.attr.priority;
    304 }
    305 
    306 static int effective_prio(const struct i915_request *rq)
    307 {
    308 	int prio = rq_prio(rq);
    309 
    310 	/*
    311 	 * If this request is special and must not be interrupted at any
    312 	 * cost, so be it. Note we are only checking the most recent request
    313 	 * in the context and so may be masking an earlier vip request. It
    314 	 * is hoped that under the conditions where nopreempt is used, this
    315 	 * will not matter (i.e. all requests to that context will be
    316 	 * nopreempt for as long as desired).
    317 	 */
    318 	if (i915_request_has_nopreempt(rq))
    319 		prio = I915_PRIORITY_UNPREEMPTABLE;
    320 
    321 	/*
    322 	 * On unwinding the active request, we give it a priority bump
    323 	 * if it has completed waiting on any semaphore. If we know that
    324 	 * the request has already started, we can prevent an unwanted
    325 	 * preempt-to-idle cycle by taking that into account now.
    326 	 */
    327 	if (__i915_request_has_started(rq))
    328 		prio |= I915_PRIORITY_NOSEMAPHORE;
    329 
    330 	/* Restrict mere WAIT boosts from triggering preemption */
    331 	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
    332 	return prio | __NO_PREEMPTION;
    333 }
    334 
    335 static int queue_prio(const struct intel_engine_execlists *execlists)
    336 {
    337 	struct i915_priolist *p;
    338 	struct rb_node *rb;
    339 
    340 	rb = rb_first_cached(&execlists->queue);
    341 	if (!rb)
    342 		return INT_MIN;
    343 
    344 	/*
    345 	 * As the priolist[] are inverted, with the highest priority in [0],
    346 	 * we have to flip the index value to become priority.
    347 	 */
    348 	p = to_priolist(rb);
    349 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
    350 }
    351 
    352 static inline bool need_preempt(const struct intel_engine_cs *engine,
    353 				const struct i915_request *rq,
    354 				struct rb_node *rb)
    355 {
    356 	int last_prio;
    357 
    358 	if (!intel_engine_has_semaphores(engine))
    359 		return false;
    360 
    361 	/*
    362 	 * Check if the current priority hint merits a preemption attempt.
    363 	 *
    364 	 * We record the highest value priority we saw during rescheduling
    365 	 * prior to this dequeue, therefore we know that if it is strictly
    366 	 * less than the current tail of ESLP[0], we do not need to force
    367 	 * a preempt-to-idle cycle.
    368 	 *
    369 	 * However, the priority hint is a mere hint that we may need to
    370 	 * preempt. If that hint is stale or we may be trying to preempt
    371 	 * ourselves, ignore the request.
    372 	 *
    373 	 * More naturally we would write
    374 	 *      prio >= max(0, last);
    375 	 * except that we wish to prevent triggering preemption at the same
    376 	 * priority level: the task that is running should remain running
    377 	 * to preserve FIFO ordering of dependencies.
    378 	 */
    379 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
    380 	if (engine->execlists.queue_priority_hint <= last_prio)
    381 		return false;
    382 
    383 	/*
    384 	 * Check against the first request in ELSP[1], it will, thanks to the
    385 	 * power of PI, be the highest priority of that context.
    386 	 */
    387 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
    388 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
    389 		return true;
    390 
    391 	if (rb) {
    392 		struct virtual_engine *ve =
    393 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
    394 		bool preempt = false;
    395 
    396 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
    397 			struct i915_request *next;
    398 
    399 			rcu_read_lock();
    400 			next = READ_ONCE(ve->request);
    401 			if (next)
    402 				preempt = rq_prio(next) > last_prio;
    403 			rcu_read_unlock();
    404 		}
    405 
    406 		if (preempt)
    407 			return preempt;
    408 	}
    409 
    410 	/*
    411 	 * If the inflight context did not trigger the preemption, then maybe
    412 	 * it was the set of queued requests? Pick the highest priority in
    413 	 * the queue (the first active priolist) and see if it deserves to be
    414 	 * running instead of ELSP[0].
    415 	 *
    416 	 * The highest priority request in the queue can not be either
    417 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
    418 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
    419 	 */
    420 	return queue_prio(&engine->execlists) > last_prio;
    421 }
    422 
    423 __maybe_unused static inline bool
    424 assert_priority_queue(const struct i915_request *prev,
    425 		      const struct i915_request *next)
    426 {
    427 	/*
    428 	 * Without preemption, the prev may refer to the still active element
    429 	 * which we refuse to let go.
    430 	 *
    431 	 * Even with preemption, there are times when we think it is better not
    432 	 * to preempt and leave an ostensibly lower priority request in flight.
    433 	 */
    434 	if (i915_request_is_active(prev))
    435 		return true;
    436 
    437 	return rq_prio(prev) >= rq_prio(next);
    438 }
    439 
    440 /*
    441  * The context descriptor encodes various attributes of a context,
    442  * including its GTT address and some flags. Because it's fairly
    443  * expensive to calculate, we'll just do it once and cache the result,
    444  * which remains valid until the context is unpinned.
    445  *
    446  * This is what a descriptor looks like, from LSB to MSB::
    447  *
    448  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
    449  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
    450  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
    451  *      bits 53-54:    mbz, reserved for use by hardware
    452  *      bits 55-63:    group ID, currently unused and set to 0
    453  *
    454  * Starting from Gen11, the upper dword of the descriptor has a new format:
    455  *
    456  *      bits 32-36:    reserved
    457  *      bits 37-47:    SW context ID
    458  *      bits 48:53:    engine instance
    459  *      bit 54:        mbz, reserved for use by hardware
    460  *      bits 55-60:    SW counter
    461  *      bits 61-63:    engine class
    462  *
    463  * engine info, SW context ID and SW counter need to form a unique number
    464  * (Context ID) per lrc.
    465  */
    466 static u64
    467 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
    468 {
    469 	u64 desc;
    470 
    471 	desc = INTEL_LEGACY_32B_CONTEXT;
    472 	if (i915_vm_is_4lvl(ce->vm))
    473 		desc = INTEL_LEGACY_64B_CONTEXT;
    474 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
    475 
    476 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
    477 	if (IS_GEN(engine->i915, 8))
    478 		desc |= GEN8_CTX_L3LLC_COHERENT;
    479 
    480 	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
    481 	/*
    482 	 * The following 32bits are copied into the OA reports (dword 2).
    483 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
    484 	 * anything below.
    485 	 */
    486 	if (INTEL_GEN(engine->i915) >= 11) {
    487 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
    488 								/* bits 48-53 */
    489 
    490 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
    491 								/* bits 61-63 */
    492 	}
    493 
    494 	return desc;
    495 }
    496 
    497 static inline unsigned int dword_in_page(void *addr)
    498 {
    499 	return offset_in_page(addr) / sizeof(u32);
    500 }
    501 
    502 static void set_offsets(u32 *regs,
    503 			const u8 *data,
    504 			const struct intel_engine_cs *engine,
    505 			bool clear)
    506 #define NOP(x) (BIT(7) | (x))
    507 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
    508 #define POSTED BIT(0)
    509 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
    510 #define REG16(x) \
    511 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
    512 	(((x) >> 2) & 0x7f)
    513 #define END(x) 0, (x)
    514 {
    515 	const u32 base = engine->mmio_base;
    516 
    517 	while (*data) {
    518 		u8 count, flags;
    519 
    520 		if (*data & BIT(7)) { /* skip */
    521 			count = *data++ & ~BIT(7);
    522 			if (clear)
    523 				memset32(regs, MI_NOOP, count);
    524 			regs += count;
    525 			continue;
    526 		}
    527 
    528 		count = *data & 0x3f;
    529 		flags = *data >> 6;
    530 		data++;
    531 
    532 		*regs = MI_LOAD_REGISTER_IMM(count);
    533 		if (flags & POSTED)
    534 			*regs |= MI_LRI_FORCE_POSTED;
    535 		if (INTEL_GEN(engine->i915) >= 11)
    536 			*regs |= MI_LRI_CS_MMIO;
    537 		regs++;
    538 
    539 		GEM_BUG_ON(!count);
    540 		do {
    541 			u32 offset = 0;
    542 			u8 v;
    543 
    544 			do {
    545 				v = *data++;
    546 				offset <<= 7;
    547 				offset |= v & ~BIT(7);
    548 			} while (v & BIT(7));
    549 
    550 			regs[0] = base + (offset << 2);
    551 			if (clear)
    552 				regs[1] = 0;
    553 			regs += 2;
    554 		} while (--count);
    555 	}
    556 
    557 	if (clear) {
    558 		u8 count = *++data;
    559 
    560 		/* Clear past the tail for HW access */
    561 		GEM_BUG_ON(dword_in_page(regs) > count);
    562 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
    563 
    564 		/* Close the batch; used mainly by live_lrc_layout() */
    565 		*regs = MI_BATCH_BUFFER_END;
    566 		if (INTEL_GEN(engine->i915) >= 10)
    567 			*regs |= BIT(0);
    568 	}
    569 }
    570 
    571 static const u8 gen8_xcs_offsets[] = {
    572 	NOP(1),
    573 	LRI(11, 0),
    574 	REG16(0x244),
    575 	REG(0x034),
    576 	REG(0x030),
    577 	REG(0x038),
    578 	REG(0x03c),
    579 	REG(0x168),
    580 	REG(0x140),
    581 	REG(0x110),
    582 	REG(0x11c),
    583 	REG(0x114),
    584 	REG(0x118),
    585 
    586 	NOP(9),
    587 	LRI(9, 0),
    588 	REG16(0x3a8),
    589 	REG16(0x28c),
    590 	REG16(0x288),
    591 	REG16(0x284),
    592 	REG16(0x280),
    593 	REG16(0x27c),
    594 	REG16(0x278),
    595 	REG16(0x274),
    596 	REG16(0x270),
    597 
    598 	NOP(13),
    599 	LRI(2, 0),
    600 	REG16(0x200),
    601 	REG(0x028),
    602 
    603 	END(80)
    604 };
    605 
    606 static const u8 gen9_xcs_offsets[] = {
    607 	NOP(1),
    608 	LRI(14, POSTED),
    609 	REG16(0x244),
    610 	REG(0x034),
    611 	REG(0x030),
    612 	REG(0x038),
    613 	REG(0x03c),
    614 	REG(0x168),
    615 	REG(0x140),
    616 	REG(0x110),
    617 	REG(0x11c),
    618 	REG(0x114),
    619 	REG(0x118),
    620 	REG(0x1c0),
    621 	REG(0x1c4),
    622 	REG(0x1c8),
    623 
    624 	NOP(3),
    625 	LRI(9, POSTED),
    626 	REG16(0x3a8),
    627 	REG16(0x28c),
    628 	REG16(0x288),
    629 	REG16(0x284),
    630 	REG16(0x280),
    631 	REG16(0x27c),
    632 	REG16(0x278),
    633 	REG16(0x274),
    634 	REG16(0x270),
    635 
    636 	NOP(13),
    637 	LRI(1, POSTED),
    638 	REG16(0x200),
    639 
    640 	NOP(13),
    641 	LRI(44, POSTED),
    642 	REG(0x028),
    643 	REG(0x09c),
    644 	REG(0x0c0),
    645 	REG(0x178),
    646 	REG(0x17c),
    647 	REG16(0x358),
    648 	REG(0x170),
    649 	REG(0x150),
    650 	REG(0x154),
    651 	REG(0x158),
    652 	REG16(0x41c),
    653 	REG16(0x600),
    654 	REG16(0x604),
    655 	REG16(0x608),
    656 	REG16(0x60c),
    657 	REG16(0x610),
    658 	REG16(0x614),
    659 	REG16(0x618),
    660 	REG16(0x61c),
    661 	REG16(0x620),
    662 	REG16(0x624),
    663 	REG16(0x628),
    664 	REG16(0x62c),
    665 	REG16(0x630),
    666 	REG16(0x634),
    667 	REG16(0x638),
    668 	REG16(0x63c),
    669 	REG16(0x640),
    670 	REG16(0x644),
    671 	REG16(0x648),
    672 	REG16(0x64c),
    673 	REG16(0x650),
    674 	REG16(0x654),
    675 	REG16(0x658),
    676 	REG16(0x65c),
    677 	REG16(0x660),
    678 	REG16(0x664),
    679 	REG16(0x668),
    680 	REG16(0x66c),
    681 	REG16(0x670),
    682 	REG16(0x674),
    683 	REG16(0x678),
    684 	REG16(0x67c),
    685 	REG(0x068),
    686 
    687 	END(176)
    688 };
    689 
    690 static const u8 gen12_xcs_offsets[] = {
    691 	NOP(1),
    692 	LRI(13, POSTED),
    693 	REG16(0x244),
    694 	REG(0x034),
    695 	REG(0x030),
    696 	REG(0x038),
    697 	REG(0x03c),
    698 	REG(0x168),
    699 	REG(0x140),
    700 	REG(0x110),
    701 	REG(0x1c0),
    702 	REG(0x1c4),
    703 	REG(0x1c8),
    704 	REG(0x180),
    705 	REG16(0x2b4),
    706 
    707 	NOP(5),
    708 	LRI(9, POSTED),
    709 	REG16(0x3a8),
    710 	REG16(0x28c),
    711 	REG16(0x288),
    712 	REG16(0x284),
    713 	REG16(0x280),
    714 	REG16(0x27c),
    715 	REG16(0x278),
    716 	REG16(0x274),
    717 	REG16(0x270),
    718 
    719 	END(80)
    720 };
    721 
    722 static const u8 gen8_rcs_offsets[] = {
    723 	NOP(1),
    724 	LRI(14, POSTED),
    725 	REG16(0x244),
    726 	REG(0x034),
    727 	REG(0x030),
    728 	REG(0x038),
    729 	REG(0x03c),
    730 	REG(0x168),
    731 	REG(0x140),
    732 	REG(0x110),
    733 	REG(0x11c),
    734 	REG(0x114),
    735 	REG(0x118),
    736 	REG(0x1c0),
    737 	REG(0x1c4),
    738 	REG(0x1c8),
    739 
    740 	NOP(3),
    741 	LRI(9, POSTED),
    742 	REG16(0x3a8),
    743 	REG16(0x28c),
    744 	REG16(0x288),
    745 	REG16(0x284),
    746 	REG16(0x280),
    747 	REG16(0x27c),
    748 	REG16(0x278),
    749 	REG16(0x274),
    750 	REG16(0x270),
    751 
    752 	NOP(13),
    753 	LRI(1, 0),
    754 	REG(0x0c8),
    755 
    756 	END(80)
    757 };
    758 
    759 static const u8 gen9_rcs_offsets[] = {
    760 	NOP(1),
    761 	LRI(14, POSTED),
    762 	REG16(0x244),
    763 	REG(0x34),
    764 	REG(0x30),
    765 	REG(0x38),
    766 	REG(0x3c),
    767 	REG(0x168),
    768 	REG(0x140),
    769 	REG(0x110),
    770 	REG(0x11c),
    771 	REG(0x114),
    772 	REG(0x118),
    773 	REG(0x1c0),
    774 	REG(0x1c4),
    775 	REG(0x1c8),
    776 
    777 	NOP(3),
    778 	LRI(9, POSTED),
    779 	REG16(0x3a8),
    780 	REG16(0x28c),
    781 	REG16(0x288),
    782 	REG16(0x284),
    783 	REG16(0x280),
    784 	REG16(0x27c),
    785 	REG16(0x278),
    786 	REG16(0x274),
    787 	REG16(0x270),
    788 
    789 	NOP(13),
    790 	LRI(1, 0),
    791 	REG(0xc8),
    792 
    793 	NOP(13),
    794 	LRI(44, POSTED),
    795 	REG(0x28),
    796 	REG(0x9c),
    797 	REG(0xc0),
    798 	REG(0x178),
    799 	REG(0x17c),
    800 	REG16(0x358),
    801 	REG(0x170),
    802 	REG(0x150),
    803 	REG(0x154),
    804 	REG(0x158),
    805 	REG16(0x41c),
    806 	REG16(0x600),
    807 	REG16(0x604),
    808 	REG16(0x608),
    809 	REG16(0x60c),
    810 	REG16(0x610),
    811 	REG16(0x614),
    812 	REG16(0x618),
    813 	REG16(0x61c),
    814 	REG16(0x620),
    815 	REG16(0x624),
    816 	REG16(0x628),
    817 	REG16(0x62c),
    818 	REG16(0x630),
    819 	REG16(0x634),
    820 	REG16(0x638),
    821 	REG16(0x63c),
    822 	REG16(0x640),
    823 	REG16(0x644),
    824 	REG16(0x648),
    825 	REG16(0x64c),
    826 	REG16(0x650),
    827 	REG16(0x654),
    828 	REG16(0x658),
    829 	REG16(0x65c),
    830 	REG16(0x660),
    831 	REG16(0x664),
    832 	REG16(0x668),
    833 	REG16(0x66c),
    834 	REG16(0x670),
    835 	REG16(0x674),
    836 	REG16(0x678),
    837 	REG16(0x67c),
    838 	REG(0x68),
    839 
    840 	END(176)
    841 };
    842 
    843 static const u8 gen11_rcs_offsets[] = {
    844 	NOP(1),
    845 	LRI(15, POSTED),
    846 	REG16(0x244),
    847 	REG(0x034),
    848 	REG(0x030),
    849 	REG(0x038),
    850 	REG(0x03c),
    851 	REG(0x168),
    852 	REG(0x140),
    853 	REG(0x110),
    854 	REG(0x11c),
    855 	REG(0x114),
    856 	REG(0x118),
    857 	REG(0x1c0),
    858 	REG(0x1c4),
    859 	REG(0x1c8),
    860 	REG(0x180),
    861 
    862 	NOP(1),
    863 	LRI(9, POSTED),
    864 	REG16(0x3a8),
    865 	REG16(0x28c),
    866 	REG16(0x288),
    867 	REG16(0x284),
    868 	REG16(0x280),
    869 	REG16(0x27c),
    870 	REG16(0x278),
    871 	REG16(0x274),
    872 	REG16(0x270),
    873 
    874 	LRI(1, POSTED),
    875 	REG(0x1b0),
    876 
    877 	NOP(10),
    878 	LRI(1, 0),
    879 	REG(0x0c8),
    880 
    881 	END(80)
    882 };
    883 
    884 static const u8 gen12_rcs_offsets[] = {
    885 	NOP(1),
    886 	LRI(13, POSTED),
    887 	REG16(0x244),
    888 	REG(0x034),
    889 	REG(0x030),
    890 	REG(0x038),
    891 	REG(0x03c),
    892 	REG(0x168),
    893 	REG(0x140),
    894 	REG(0x110),
    895 	REG(0x1c0),
    896 	REG(0x1c4),
    897 	REG(0x1c8),
    898 	REG(0x180),
    899 	REG16(0x2b4),
    900 
    901 	NOP(5),
    902 	LRI(9, POSTED),
    903 	REG16(0x3a8),
    904 	REG16(0x28c),
    905 	REG16(0x288),
    906 	REG16(0x284),
    907 	REG16(0x280),
    908 	REG16(0x27c),
    909 	REG16(0x278),
    910 	REG16(0x274),
    911 	REG16(0x270),
    912 
    913 	LRI(3, POSTED),
    914 	REG(0x1b0),
    915 	REG16(0x5a8),
    916 	REG16(0x5ac),
    917 
    918 	NOP(6),
    919 	LRI(1, 0),
    920 	REG(0x0c8),
    921 
    922 	END(80)
    923 };
    924 
    925 #undef END
    926 #undef REG16
    927 #undef REG
    928 #undef LRI
    929 #undef NOP
    930 
    931 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
    932 {
    933 	/*
    934 	 * The gen12+ lists only have the registers we program in the basic
    935 	 * default state. We rely on the context image using relative
    936 	 * addressing to automatic fixup the register state between the
    937 	 * physical engines for virtual engine.
    938 	 */
    939 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
    940 		   !intel_engine_has_relative_mmio(engine));
    941 
    942 	if (engine->class == RENDER_CLASS) {
    943 		if (INTEL_GEN(engine->i915) >= 12)
    944 			return gen12_rcs_offsets;
    945 		else if (INTEL_GEN(engine->i915) >= 11)
    946 			return gen11_rcs_offsets;
    947 		else if (INTEL_GEN(engine->i915) >= 9)
    948 			return gen9_rcs_offsets;
    949 		else
    950 			return gen8_rcs_offsets;
    951 	} else {
    952 		if (INTEL_GEN(engine->i915) >= 12)
    953 			return gen12_xcs_offsets;
    954 		else if (INTEL_GEN(engine->i915) >= 9)
    955 			return gen9_xcs_offsets;
    956 		else
    957 			return gen8_xcs_offsets;
    958 	}
    959 }
    960 
    961 static struct i915_request *
    962 __unwind_incomplete_requests(struct intel_engine_cs *engine)
    963 {
    964 	struct i915_request *rq, *rn, *active = NULL;
    965 	struct list_head *uninitialized_var(pl);
    966 	int prio = I915_PRIORITY_INVALID;
    967 
    968 	lockdep_assert_held(&engine->active.lock);
    969 
    970 	list_for_each_entry_safe_reverse(rq, rn,
    971 					 &engine->active.requests,
    972 					 sched.link) {
    973 		if (i915_request_completed(rq))
    974 			continue; /* XXX */
    975 
    976 		__i915_request_unsubmit(rq);
    977 
    978 		/*
    979 		 * Push the request back into the queue for later resubmission.
    980 		 * If this request is not native to this physical engine (i.e.
    981 		 * it came from a virtual source), push it back onto the virtual
    982 		 * engine so that it can be moved across onto another physical
    983 		 * engine as load dictates.
    984 		 */
    985 		if (likely(rq->execution_mask == engine->mask)) {
    986 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
    987 			if (rq_prio(rq) != prio) {
    988 				prio = rq_prio(rq);
    989 				pl = i915_sched_lookup_priolist(engine, prio);
    990 			}
    991 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
    992 
    993 			list_move(&rq->sched.link, pl);
    994 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
    995 
    996 			active = rq;
    997 		} else {
    998 			struct intel_engine_cs *owner = rq->context->engine;
    999 
   1000 			/*
   1001 			 * Decouple the virtual breadcrumb before moving it
   1002 			 * back to the virtual engine -- we don't want the
   1003 			 * request to complete in the background and try
   1004 			 * and cancel the breadcrumb on the virtual engine
   1005 			 * (instead of the old engine where it is linked)!
   1006 			 */
   1007 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
   1008 				     &rq->fence.flags)) {
   1009 				spin_lock_nested(&rq->lock,
   1010 						 SINGLE_DEPTH_NESTING);
   1011 				i915_request_cancel_breadcrumb(rq);
   1012 				spin_unlock(&rq->lock);
   1013 			}
   1014 			rq->engine = owner;
   1015 			owner->submit_request(rq);
   1016 			active = NULL;
   1017 		}
   1018 	}
   1019 
   1020 	return active;
   1021 }
   1022 
   1023 struct i915_request *
   1024 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
   1025 {
   1026 	struct intel_engine_cs *engine =
   1027 		container_of(execlists, typeof(*engine), execlists);
   1028 
   1029 	return __unwind_incomplete_requests(engine);
   1030 }
   1031 
   1032 static inline void
   1033 execlists_context_status_change(struct i915_request *rq, unsigned long status)
   1034 {
   1035 	/*
   1036 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
   1037 	 * The compiler should eliminate this function as dead-code.
   1038 	 */
   1039 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
   1040 		return;
   1041 
   1042 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
   1043 				   status, rq);
   1044 }
   1045 
   1046 static void intel_engine_context_in(struct intel_engine_cs *engine)
   1047 {
   1048 	unsigned long flags;
   1049 
   1050 	if (READ_ONCE(engine->stats.enabled) == 0)
   1051 		return;
   1052 
   1053 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1054 
   1055 	if (engine->stats.enabled > 0) {
   1056 		if (engine->stats.active++ == 0)
   1057 			engine->stats.start = ktime_get();
   1058 		GEM_BUG_ON(engine->stats.active == 0);
   1059 	}
   1060 
   1061 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1062 }
   1063 
   1064 static void intel_engine_context_out(struct intel_engine_cs *engine)
   1065 {
   1066 	unsigned long flags;
   1067 
   1068 	if (READ_ONCE(engine->stats.enabled) == 0)
   1069 		return;
   1070 
   1071 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1072 
   1073 	if (engine->stats.enabled > 0) {
   1074 		ktime_t last;
   1075 
   1076 		if (engine->stats.active && --engine->stats.active == 0) {
   1077 			/*
   1078 			 * Decrement the active context count and in case GPU
   1079 			 * is now idle add up to the running total.
   1080 			 */
   1081 			last = ktime_sub(ktime_get(), engine->stats.start);
   1082 
   1083 			engine->stats.total = ktime_add(engine->stats.total,
   1084 							last);
   1085 		} else if (engine->stats.active == 0) {
   1086 			/*
   1087 			 * After turning on engine stats, context out might be
   1088 			 * the first event in which case we account from the
   1089 			 * time stats gathering was turned on.
   1090 			 */
   1091 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
   1092 
   1093 			engine->stats.total = ktime_add(engine->stats.total,
   1094 							last);
   1095 		}
   1096 	}
   1097 
   1098 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1099 }
   1100 
   1101 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
   1102 {
   1103 	if (INTEL_GEN(engine->i915) >= 12)
   1104 		return 0x60;
   1105 	else if (INTEL_GEN(engine->i915) >= 9)
   1106 		return 0x54;
   1107 	else if (engine->class == RENDER_CLASS)
   1108 		return 0x58;
   1109 	else
   1110 		return -1;
   1111 }
   1112 
   1113 static void
   1114 execlists_check_context(const struct intel_context *ce,
   1115 			const struct intel_engine_cs *engine)
   1116 {
   1117 	const struct intel_ring *ring = ce->ring;
   1118 	u32 *regs = ce->lrc_reg_state;
   1119 	bool valid = true;
   1120 	int x;
   1121 
   1122 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
   1123 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
   1124 		       engine->name,
   1125 		       regs[CTX_RING_START],
   1126 		       i915_ggtt_offset(ring->vma));
   1127 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   1128 		valid = false;
   1129 	}
   1130 
   1131 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
   1132 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
   1133 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
   1134 		       engine->name,
   1135 		       regs[CTX_RING_CTL],
   1136 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
   1137 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   1138 		valid = false;
   1139 	}
   1140 
   1141 	x = lrc_ring_mi_mode(engine);
   1142 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
   1143 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
   1144 		       engine->name, regs[x + 1]);
   1145 		regs[x + 1] &= ~STOP_RING;
   1146 		regs[x + 1] |= STOP_RING << 16;
   1147 		valid = false;
   1148 	}
   1149 
   1150 	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
   1151 }
   1152 
   1153 static void restore_default_state(struct intel_context *ce,
   1154 				  struct intel_engine_cs *engine)
   1155 {
   1156 	u32 *regs = ce->lrc_reg_state;
   1157 
   1158 	if (engine->pinned_default_state)
   1159 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
   1160 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
   1161 		       engine->context_size - PAGE_SIZE);
   1162 
   1163 	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
   1164 }
   1165 
   1166 static void reset_active(struct i915_request *rq,
   1167 			 struct intel_engine_cs *engine)
   1168 {
   1169 	struct intel_context * const ce = rq->context;
   1170 	u32 head;
   1171 
   1172 	/*
   1173 	 * The executing context has been cancelled. We want to prevent
   1174 	 * further execution along this context and propagate the error on
   1175 	 * to anything depending on its results.
   1176 	 *
   1177 	 * In __i915_request_submit(), we apply the -EIO and remove the
   1178 	 * requests' payloads for any banned requests. But first, we must
   1179 	 * rewind the context back to the start of the incomplete request so
   1180 	 * that we do not jump back into the middle of the batch.
   1181 	 *
   1182 	 * We preserve the breadcrumbs and semaphores of the incomplete
   1183 	 * requests so that inter-timeline dependencies (i.e other timelines)
   1184 	 * remain correctly ordered. And we defer to __i915_request_submit()
   1185 	 * so that all asynchronous waits are correctly handled.
   1186 	 */
   1187 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
   1188 		     rq->fence.context, rq->fence.seqno);
   1189 
   1190 	/* On resubmission of the active request, payload will be scrubbed */
   1191 	if (i915_request_completed(rq))
   1192 		head = rq->tail;
   1193 	else
   1194 		head = active_request(ce->timeline, rq)->head;
   1195 	head = intel_ring_wrap(ce->ring, head);
   1196 
   1197 	/* Scrub the context image to prevent replaying the previous batch */
   1198 	restore_default_state(ce, engine);
   1199 	__execlists_update_reg_state(ce, engine, head);
   1200 
   1201 	/* We've switched away, so this should be a no-op, but intent matters */
   1202 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   1203 }
   1204 
   1205 static inline struct intel_engine_cs *
   1206 __execlists_schedule_in(struct i915_request *rq)
   1207 {
   1208 	struct intel_engine_cs * const engine = rq->engine;
   1209 	struct intel_context * const ce = rq->context;
   1210 
   1211 	intel_context_get(ce);
   1212 
   1213 	if (unlikely(intel_context_is_banned(ce)))
   1214 		reset_active(rq, engine);
   1215 
   1216 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   1217 		execlists_check_context(ce, engine);
   1218 
   1219 	if (ce->tag) {
   1220 		/* Use a fixed tag for OA and friends */
   1221 		ce->lrc_desc |= (u64)ce->tag << 32;
   1222 	} else {
   1223 		/* We don't need a strict matching tag, just different values */
   1224 		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
   1225 		ce->lrc_desc |=
   1226 			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
   1227 			GEN11_SW_CTX_ID_SHIFT;
   1228 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
   1229 	}
   1230 
   1231 	__intel_gt_pm_get(engine->gt);
   1232 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
   1233 	intel_engine_context_in(engine);
   1234 
   1235 	return engine;
   1236 }
   1237 
   1238 static inline struct i915_request *
   1239 execlists_schedule_in(struct i915_request *rq, int idx)
   1240 {
   1241 	struct intel_context * const ce = rq->context;
   1242 	struct intel_engine_cs *old;
   1243 
   1244 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
   1245 	trace_i915_request_in(rq, idx);
   1246 
   1247 	old = READ_ONCE(ce->inflight);
   1248 	do {
   1249 		if (!old) {
   1250 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
   1251 			break;
   1252 		}
   1253 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
   1254 
   1255 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
   1256 	return i915_request_get(rq);
   1257 }
   1258 
   1259 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
   1260 {
   1261 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   1262 	struct i915_request *next = READ_ONCE(ve->request);
   1263 
   1264 	if (next && next->execution_mask & ~rq->execution_mask)
   1265 		tasklet_schedule(&ve->base.execlists.tasklet);
   1266 }
   1267 
   1268 static inline void
   1269 __execlists_schedule_out(struct i915_request *rq,
   1270 			 struct intel_engine_cs * const engine)
   1271 {
   1272 	struct intel_context * const ce = rq->context;
   1273 
   1274 	/*
   1275 	 * NB process_csb() is not under the engine->active.lock and hence
   1276 	 * schedule_out can race with schedule_in meaning that we should
   1277 	 * refrain from doing non-trivial work here.
   1278 	 */
   1279 
   1280 	/*
   1281 	 * If we have just completed this context, the engine may now be
   1282 	 * idle and we want to re-enter powersaving.
   1283 	 */
   1284 	if (list_is_last(&rq->link, &ce->timeline->requests) &&
   1285 	    i915_request_completed(rq))
   1286 		intel_engine_add_retire(engine, ce->timeline);
   1287 
   1288 	intel_engine_context_out(engine);
   1289 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
   1290 	intel_gt_pm_put_async(engine->gt);
   1291 
   1292 	/*
   1293 	 * If this is part of a virtual engine, its next request may
   1294 	 * have been blocked waiting for access to the active context.
   1295 	 * We have to kick all the siblings again in case we need to
   1296 	 * switch (e.g. the next request is not runnable on this
   1297 	 * engine). Hopefully, we will already have submitted the next
   1298 	 * request before the tasklet runs and do not need to rebuild
   1299 	 * each virtual tree and kick everyone again.
   1300 	 */
   1301 	if (ce->engine != engine)
   1302 		kick_siblings(rq, ce);
   1303 
   1304 	intel_context_put(ce);
   1305 }
   1306 
   1307 static inline void
   1308 execlists_schedule_out(struct i915_request *rq)
   1309 {
   1310 	struct intel_context * const ce = rq->context;
   1311 	struct intel_engine_cs *cur, *old;
   1312 
   1313 	trace_i915_request_out(rq);
   1314 
   1315 	old = READ_ONCE(ce->inflight);
   1316 	do
   1317 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
   1318 	while (!try_cmpxchg(&ce->inflight, &old, cur));
   1319 	if (!cur)
   1320 		__execlists_schedule_out(rq, old);
   1321 
   1322 	i915_request_put(rq);
   1323 }
   1324 
   1325 static u64 execlists_update_context(struct i915_request *rq)
   1326 {
   1327 	struct intel_context *ce = rq->context;
   1328 	u64 desc = ce->lrc_desc;
   1329 	u32 tail, prev;
   1330 
   1331 	/*
   1332 	 * WaIdleLiteRestore:bdw,skl
   1333 	 *
   1334 	 * We should never submit the context with the same RING_TAIL twice
   1335 	 * just in case we submit an empty ring, which confuses the HW.
   1336 	 *
   1337 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
   1338 	 * the normal request to be able to always advance the RING_TAIL on
   1339 	 * subsequent resubmissions (for lite restore). Should that fail us,
   1340 	 * and we try and submit the same tail again, force the context
   1341 	 * reload.
   1342 	 *
   1343 	 * If we need to return to a preempted context, we need to skip the
   1344 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
   1345 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
   1346 	 * an earlier request.
   1347 	 */
   1348 	tail = intel_ring_set_tail(rq->ring, rq->tail);
   1349 	prev = ce->lrc_reg_state[CTX_RING_TAIL];
   1350 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
   1351 		desc |= CTX_DESC_FORCE_RESTORE;
   1352 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
   1353 	rq->tail = rq->wa_tail;
   1354 
   1355 	/*
   1356 	 * Make sure the context image is complete before we submit it to HW.
   1357 	 *
   1358 	 * Ostensibly, writes (including the WCB) should be flushed prior to
   1359 	 * an uncached write such as our mmio register access, the empirical
   1360 	 * evidence (esp. on Braswell) suggests that the WC write into memory
   1361 	 * may not be visible to the HW prior to the completion of the UC
   1362 	 * register write and that we may begin execution from the context
   1363 	 * before its image is complete leading to invalid PD chasing.
   1364 	 */
   1365 	wmb();
   1366 
   1367 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
   1368 	return desc;
   1369 }
   1370 
   1371 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
   1372 {
   1373 #ifdef __NetBSD__
   1374 	if (execlists->ctrl_reg) {
   1375 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2, lower_32_bits(desc));
   1376 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2 + 1, upper_32_bits(desc));
   1377 	} else {
   1378 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, upper_32_bits(desc));
   1379 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, lower_32_bits(desc));
   1380 	}
   1381 #else
   1382 	if (execlists->ctrl_reg) {
   1383 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
   1384 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
   1385 	} else {
   1386 		writel(upper_32_bits(desc), execlists->submit_reg);
   1387 		writel(lower_32_bits(desc), execlists->submit_reg);
   1388 	}
   1389 #endif
   1390 }
   1391 
   1392 static __maybe_unused void
   1393 trace_ports(const struct intel_engine_execlists *execlists,
   1394 	    const char *msg,
   1395 	    struct i915_request * const *ports)
   1396 {
   1397 	const struct intel_engine_cs *engine =
   1398 		container_of(execlists, typeof(*engine), execlists);
   1399 
   1400 	if (!ports[0])
   1401 		return;
   1402 
   1403 	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
   1404 		     ports[0]->fence.context,
   1405 		     ports[0]->fence.seqno,
   1406 		     i915_request_completed(ports[0]) ? "!" :
   1407 		     i915_request_started(ports[0]) ? "*" :
   1408 		     "",
   1409 		     ports[1] ? ports[1]->fence.context : 0,
   1410 		     ports[1] ? ports[1]->fence.seqno : 0);
   1411 }
   1412 
   1413 static __maybe_unused bool
   1414 assert_pending_valid(const struct intel_engine_execlists *execlists,
   1415 		     const char *msg)
   1416 {
   1417 	struct i915_request * const *port, *rq;
   1418 	struct intel_context *ce = NULL;
   1419 
   1420 	trace_ports(execlists, msg, execlists->pending);
   1421 
   1422 	if (!execlists->pending[0]) {
   1423 		GEM_TRACE_ERR("Nothing pending for promotion!\n");
   1424 		return false;
   1425 	}
   1426 
   1427 	if (execlists->pending[execlists_num_ports(execlists)]) {
   1428 		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
   1429 			      execlists_num_ports(execlists));
   1430 		return false;
   1431 	}
   1432 
   1433 	for (port = execlists->pending; (rq = *port); port++) {
   1434 		unsigned long flags;
   1435 		bool ok = true;
   1436 
   1437 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
   1438 		GEM_BUG_ON(!i915_request_is_active(rq));
   1439 
   1440 		if (ce == rq->context) {
   1441 			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
   1442 				      ce->timeline->fence_context,
   1443 				      port - execlists->pending);
   1444 			return false;
   1445 		}
   1446 		ce = rq->context;
   1447 
   1448 		/* Hold tightly onto the lock to prevent concurrent retires! */
   1449 		if (!spin_trylock_irqsave(&rq->lock, flags))
   1450 			continue;
   1451 
   1452 		if (i915_request_completed(rq))
   1453 			goto unlock;
   1454 
   1455 		if (i915_active_is_idle(&ce->active) &&
   1456 		    !intel_context_is_barrier(ce)) {
   1457 			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
   1458 				      ce->timeline->fence_context,
   1459 				      port - execlists->pending);
   1460 			ok = false;
   1461 			goto unlock;
   1462 		}
   1463 
   1464 		if (!i915_vma_is_pinned(ce->state)) {
   1465 			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
   1466 				      ce->timeline->fence_context,
   1467 				      port - execlists->pending);
   1468 			ok = false;
   1469 			goto unlock;
   1470 		}
   1471 
   1472 		if (!i915_vma_is_pinned(ce->ring->vma)) {
   1473 			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
   1474 				      ce->timeline->fence_context,
   1475 				      port - execlists->pending);
   1476 			ok = false;
   1477 			goto unlock;
   1478 		}
   1479 
   1480 unlock:
   1481 		spin_unlock_irqrestore(&rq->lock, flags);
   1482 		if (!ok)
   1483 			return false;
   1484 	}
   1485 
   1486 	return ce;
   1487 }
   1488 
   1489 static void execlists_submit_ports(struct intel_engine_cs *engine)
   1490 {
   1491 	struct intel_engine_execlists *execlists = &engine->execlists;
   1492 	unsigned int n;
   1493 
   1494 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
   1495 
   1496 	/*
   1497 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
   1498 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
   1499 	 * not be relinquished until the device is idle (see
   1500 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
   1501 	 * that all ELSP are drained i.e. we have processed the CSB,
   1502 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
   1503 	 */
   1504 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   1505 
   1506 	/*
   1507 	 * ELSQ note: the submit queue is not cleared after being submitted
   1508 	 * to the HW so we need to make sure we always clean it up. This is
   1509 	 * currently ensured by the fact that we always write the same number
   1510 	 * of elsq entries, keep this in mind before changing the loop below.
   1511 	 */
   1512 	for (n = execlists_num_ports(execlists); n--; ) {
   1513 		struct i915_request *rq = execlists->pending[n];
   1514 
   1515 		write_desc(execlists,
   1516 			   rq ? execlists_update_context(rq) : 0,
   1517 			   n);
   1518 	}
   1519 
   1520 	/* we need to manually load the submit queue */
   1521 	if (execlists->ctrl_reg)
   1522 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
   1523 }
   1524 
   1525 static bool ctx_single_port_submission(const struct intel_context *ce)
   1526 {
   1527 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
   1528 		intel_context_force_single_submission(ce));
   1529 }
   1530 
   1531 static bool can_merge_ctx(const struct intel_context *prev,
   1532 			  const struct intel_context *next)
   1533 {
   1534 	if (prev != next)
   1535 		return false;
   1536 
   1537 	if (ctx_single_port_submission(prev))
   1538 		return false;
   1539 
   1540 	return true;
   1541 }
   1542 
   1543 static bool can_merge_rq(const struct i915_request *prev,
   1544 			 const struct i915_request *next)
   1545 {
   1546 	GEM_BUG_ON(prev == next);
   1547 	GEM_BUG_ON(!assert_priority_queue(prev, next));
   1548 
   1549 	/*
   1550 	 * We do not submit known completed requests. Therefore if the next
   1551 	 * request is already completed, we can pretend to merge it in
   1552 	 * with the previous context (and we will skip updating the ELSP
   1553 	 * and tracking). Thus hopefully keeping the ELSP full with active
   1554 	 * contexts, despite the best efforts of preempt-to-busy to confuse
   1555 	 * us.
   1556 	 */
   1557 	if (i915_request_completed(next))
   1558 		return true;
   1559 
   1560 	if (unlikely((prev->fence.flags ^ next->fence.flags) &
   1561 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
   1562 		      BIT(I915_FENCE_FLAG_SENTINEL))))
   1563 		return false;
   1564 
   1565 	if (!can_merge_ctx(prev->context, next->context))
   1566 		return false;
   1567 
   1568 	return true;
   1569 }
   1570 
   1571 static void virtual_update_register_offsets(u32 *regs,
   1572 					    struct intel_engine_cs *engine)
   1573 {
   1574 	set_offsets(regs, reg_offsets(engine), engine, false);
   1575 }
   1576 
   1577 static bool virtual_matches(const struct virtual_engine *ve,
   1578 			    const struct i915_request *rq,
   1579 			    const struct intel_engine_cs *engine)
   1580 {
   1581 	const struct intel_engine_cs *inflight;
   1582 
   1583 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
   1584 		return false;
   1585 
   1586 	/*
   1587 	 * We track when the HW has completed saving the context image
   1588 	 * (i.e. when we have seen the final CS event switching out of
   1589 	 * the context) and must not overwrite the context image before
   1590 	 * then. This restricts us to only using the active engine
   1591 	 * while the previous virtualized request is inflight (so
   1592 	 * we reuse the register offsets). This is a very small
   1593 	 * hystersis on the greedy seelction algorithm.
   1594 	 */
   1595 	inflight = intel_context_inflight(&ve->context);
   1596 	if (inflight && inflight != engine)
   1597 		return false;
   1598 
   1599 	return true;
   1600 }
   1601 
   1602 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
   1603 				     struct intel_engine_cs *engine)
   1604 {
   1605 	struct intel_engine_cs *old = ve->siblings[0];
   1606 
   1607 	/* All unattached (rq->engine == old) must already be completed */
   1608 
   1609 	spin_lock(&old->breadcrumbs.irq_lock);
   1610 	if (!list_empty(&ve->context.signal_link)) {
   1611 		list_move_tail(&ve->context.signal_link,
   1612 			       &engine->breadcrumbs.signalers);
   1613 		intel_engine_signal_breadcrumbs(engine);
   1614 	}
   1615 	spin_unlock(&old->breadcrumbs.irq_lock);
   1616 }
   1617 
   1618 static struct i915_request *
   1619 last_active(const struct intel_engine_execlists *execlists)
   1620 {
   1621 	struct i915_request * const *last = READ_ONCE(execlists->active);
   1622 
   1623 	while (*last && i915_request_completed(*last))
   1624 		last++;
   1625 
   1626 	return *last;
   1627 }
   1628 
   1629 #define for_each_waiter(p__, rq__) \
   1630 	list_for_each_entry_lockless(p__, \
   1631 				     &(rq__)->sched.waiters_list, \
   1632 				     wait_link)
   1633 
   1634 static void defer_request(struct i915_request *rq, struct list_head * const pl)
   1635 {
   1636 	LIST_HEAD(list);
   1637 
   1638 	/*
   1639 	 * We want to move the interrupted request to the back of
   1640 	 * the round-robin list (i.e. its priority level), but
   1641 	 * in doing so, we must then move all requests that were in
   1642 	 * flight and were waiting for the interrupted request to
   1643 	 * be run after it again.
   1644 	 */
   1645 	do {
   1646 		struct i915_dependency *p;
   1647 
   1648 		GEM_BUG_ON(i915_request_is_active(rq));
   1649 		list_move_tail(&rq->sched.link, pl);
   1650 
   1651 		for_each_waiter(p, rq) {
   1652 			struct i915_request *w =
   1653 				container_of(p->waiter, typeof(*w), sched);
   1654 
   1655 			/* Leave semaphores spinning on the other engines */
   1656 			if (w->engine != rq->engine)
   1657 				continue;
   1658 
   1659 			/* No waiter should start before its signaler */
   1660 			GEM_BUG_ON(i915_request_started(w) &&
   1661 				   !i915_request_completed(rq));
   1662 
   1663 			GEM_BUG_ON(i915_request_is_active(w));
   1664 			if (!i915_request_is_ready(w))
   1665 				continue;
   1666 
   1667 			if (rq_prio(w) < rq_prio(rq))
   1668 				continue;
   1669 
   1670 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
   1671 			list_move_tail(&w->sched.link, &list);
   1672 		}
   1673 
   1674 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   1675 	} while (rq);
   1676 }
   1677 
   1678 static void defer_active(struct intel_engine_cs *engine)
   1679 {
   1680 	struct i915_request *rq;
   1681 
   1682 	rq = __unwind_incomplete_requests(engine);
   1683 	if (!rq)
   1684 		return;
   1685 
   1686 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
   1687 }
   1688 
   1689 static bool
   1690 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
   1691 {
   1692 	int hint;
   1693 
   1694 	if (!intel_engine_has_timeslices(engine))
   1695 		return false;
   1696 
   1697 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1698 		return false;
   1699 
   1700 	hint = max(rq_prio(list_next_entry(rq, sched.link)),
   1701 		   engine->execlists.queue_priority_hint);
   1702 
   1703 	return hint >= effective_prio(rq);
   1704 }
   1705 
   1706 static int
   1707 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
   1708 {
   1709 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1710 		return INT_MIN;
   1711 
   1712 	return rq_prio(list_next_entry(rq, sched.link));
   1713 }
   1714 
   1715 static inline unsigned long
   1716 timeslice(const struct intel_engine_cs *engine)
   1717 {
   1718 	return READ_ONCE(engine->props.timeslice_duration_ms);
   1719 }
   1720 
   1721 static unsigned long
   1722 active_timeslice(const struct intel_engine_cs *engine)
   1723 {
   1724 	const struct i915_request *rq = *engine->execlists.active;
   1725 
   1726 	if (!rq || i915_request_completed(rq))
   1727 		return 0;
   1728 
   1729 	if (engine->execlists.switch_priority_hint < effective_prio(rq))
   1730 		return 0;
   1731 
   1732 	return timeslice(engine);
   1733 }
   1734 
   1735 static void set_timeslice(struct intel_engine_cs *engine)
   1736 {
   1737 	if (!intel_engine_has_timeslices(engine))
   1738 		return;
   1739 
   1740 	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
   1741 }
   1742 
   1743 static void record_preemption(struct intel_engine_execlists *execlists)
   1744 {
   1745 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
   1746 }
   1747 
   1748 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
   1749 {
   1750 	struct i915_request *rq;
   1751 
   1752 	rq = last_active(&engine->execlists);
   1753 	if (!rq)
   1754 		return 0;
   1755 
   1756 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
   1757 	if (unlikely(intel_context_is_banned(rq->context)))
   1758 		return 1;
   1759 
   1760 	return READ_ONCE(engine->props.preempt_timeout_ms);
   1761 }
   1762 
   1763 static void set_preempt_timeout(struct intel_engine_cs *engine)
   1764 {
   1765 	if (!intel_engine_has_preempt_reset(engine))
   1766 		return;
   1767 
   1768 	set_timer_ms(&engine->execlists.preempt,
   1769 		     active_preempt_timeout(engine));
   1770 }
   1771 
   1772 static inline void clear_ports(struct i915_request **ports, int count)
   1773 {
   1774 	memset_p((void **)ports, NULL, count);
   1775 }
   1776 
   1777 static void execlists_dequeue(struct intel_engine_cs *engine)
   1778 {
   1779 	struct intel_engine_execlists * const execlists = &engine->execlists;
   1780 	struct i915_request **port = execlists->pending;
   1781 	struct i915_request ** const last_port = port + execlists->port_mask;
   1782 	struct i915_request *last;
   1783 	struct rb_node *rb;
   1784 	bool submit = false;
   1785 
   1786 	/*
   1787 	 * Hardware submission is through 2 ports. Conceptually each port
   1788 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
   1789 	 * static for a context, and unique to each, so we only execute
   1790 	 * requests belonging to a single context from each ring. RING_HEAD
   1791 	 * is maintained by the CS in the context image, it marks the place
   1792 	 * where it got up to last time, and through RING_TAIL we tell the CS
   1793 	 * where we want to execute up to this time.
   1794 	 *
   1795 	 * In this list the requests are in order of execution. Consecutive
   1796 	 * requests from the same context are adjacent in the ringbuffer. We
   1797 	 * can combine these requests into a single RING_TAIL update:
   1798 	 *
   1799 	 *              RING_HEAD...req1...req2
   1800 	 *                                    ^- RING_TAIL
   1801 	 * since to execute req2 the CS must first execute req1.
   1802 	 *
   1803 	 * Our goal then is to point each port to the end of a consecutive
   1804 	 * sequence of requests as being the most optimal (fewest wake ups
   1805 	 * and context switches) submission.
   1806 	 */
   1807 
   1808 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
   1809 		struct virtual_engine *ve =
   1810 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1811 		struct i915_request *rq = READ_ONCE(ve->request);
   1812 
   1813 		if (!rq) { /* lazily cleanup after another engine handled rq */
   1814 			rb_erase_cached(rb, &execlists->virtual);
   1815 			RB_CLEAR_NODE(rb);
   1816 			rb = rb_first_cached(&execlists->virtual);
   1817 			continue;
   1818 		}
   1819 
   1820 		if (!virtual_matches(ve, rq, engine)) {
   1821 			rb = rb_next(rb);
   1822 			continue;
   1823 		}
   1824 
   1825 		break;
   1826 	}
   1827 
   1828 	/*
   1829 	 * If the queue is higher priority than the last
   1830 	 * request in the currently active context, submit afresh.
   1831 	 * We will resubmit again afterwards in case we need to split
   1832 	 * the active context to interject the preemption request,
   1833 	 * i.e. we will retrigger preemption following the ack in case
   1834 	 * of trouble.
   1835 	 */
   1836 	last = last_active(execlists);
   1837 	if (last) {
   1838 		if (need_preempt(engine, last, rb)) {
   1839 			ENGINE_TRACE(engine,
   1840 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
   1841 				     last->fence.context,
   1842 				     last->fence.seqno,
   1843 				     last->sched.attr.priority,
   1844 				     execlists->queue_priority_hint);
   1845 			record_preemption(execlists);
   1846 
   1847 			/*
   1848 			 * Don't let the RING_HEAD advance past the breadcrumb
   1849 			 * as we unwind (and until we resubmit) so that we do
   1850 			 * not accidentally tell it to go backwards.
   1851 			 */
   1852 			ring_set_paused(engine, 1);
   1853 
   1854 			/*
   1855 			 * Note that we have not stopped the GPU at this point,
   1856 			 * so we are unwinding the incomplete requests as they
   1857 			 * remain inflight and so by the time we do complete
   1858 			 * the preemption, some of the unwound requests may
   1859 			 * complete!
   1860 			 */
   1861 			__unwind_incomplete_requests(engine);
   1862 
   1863 			last = NULL;
   1864 		} else if (need_timeslice(engine, last) &&
   1865 			   timer_expired(&engine->execlists.timer)) {
   1866 			ENGINE_TRACE(engine,
   1867 				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
   1868 				     last->fence.context,
   1869 				     last->fence.seqno,
   1870 				     last->sched.attr.priority,
   1871 				     execlists->queue_priority_hint);
   1872 
   1873 			ring_set_paused(engine, 1);
   1874 			defer_active(engine);
   1875 
   1876 			/*
   1877 			 * Unlike for preemption, if we rewind and continue
   1878 			 * executing the same context as previously active,
   1879 			 * the order of execution will remain the same and
   1880 			 * the tail will only advance. We do not need to
   1881 			 * force a full context restore, as a lite-restore
   1882 			 * is sufficient to resample the monotonic TAIL.
   1883 			 *
   1884 			 * If we switch to any other context, similarly we
   1885 			 * will not rewind TAIL of current context, and
   1886 			 * normal save/restore will preserve state and allow
   1887 			 * us to later continue executing the same request.
   1888 			 */
   1889 			last = NULL;
   1890 		} else {
   1891 			/*
   1892 			 * Otherwise if we already have a request pending
   1893 			 * for execution after the current one, we can
   1894 			 * just wait until the next CS event before
   1895 			 * queuing more. In either case we will force a
   1896 			 * lite-restore preemption event, but if we wait
   1897 			 * we hopefully coalesce several updates into a single
   1898 			 * submission.
   1899 			 */
   1900 			if (!list_is_last(&last->sched.link,
   1901 					  &engine->active.requests)) {
   1902 				/*
   1903 				 * Even if ELSP[1] is occupied and not worthy
   1904 				 * of timeslices, our queue might be.
   1905 				 */
   1906 				if (!execlists->timer.expires &&
   1907 				    need_timeslice(engine, last))
   1908 					set_timer_ms(&execlists->timer,
   1909 						     timeslice(engine));
   1910 
   1911 				return;
   1912 			}
   1913 		}
   1914 	}
   1915 
   1916 	while (rb) { /* XXX virtual is always taking precedence */
   1917 		struct virtual_engine *ve =
   1918 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1919 		struct i915_request *rq;
   1920 
   1921 		spin_lock(&ve->base.active.lock);
   1922 
   1923 		rq = ve->request;
   1924 		if (unlikely(!rq)) { /* lost the race to a sibling */
   1925 			spin_unlock(&ve->base.active.lock);
   1926 			rb_erase_cached(rb, &execlists->virtual);
   1927 			RB_CLEAR_NODE(rb);
   1928 			rb = rb_first_cached(&execlists->virtual);
   1929 			continue;
   1930 		}
   1931 
   1932 		GEM_BUG_ON(rq != ve->request);
   1933 		GEM_BUG_ON(rq->engine != &ve->base);
   1934 		GEM_BUG_ON(rq->context != &ve->context);
   1935 
   1936 		if (rq_prio(rq) >= queue_prio(execlists)) {
   1937 			if (!virtual_matches(ve, rq, engine)) {
   1938 				spin_unlock(&ve->base.active.lock);
   1939 				rb = rb_next(rb);
   1940 				continue;
   1941 			}
   1942 
   1943 			if (last && !can_merge_rq(last, rq)) {
   1944 				spin_unlock(&ve->base.active.lock);
   1945 				return; /* leave this for another */
   1946 			}
   1947 
   1948 			ENGINE_TRACE(engine,
   1949 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
   1950 				     rq->fence.context,
   1951 				     rq->fence.seqno,
   1952 				     i915_request_completed(rq) ? "!" :
   1953 				     i915_request_started(rq) ? "*" :
   1954 				     "",
   1955 				     yesno(engine != ve->siblings[0]));
   1956 
   1957 			ve->request = NULL;
   1958 			ve->base.execlists.queue_priority_hint = INT_MIN;
   1959 			rb_erase_cached(rb, &execlists->virtual);
   1960 			RB_CLEAR_NODE(rb);
   1961 
   1962 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
   1963 			rq->engine = engine;
   1964 
   1965 			if (engine != ve->siblings[0]) {
   1966 				u32 *regs = ve->context.lrc_reg_state;
   1967 				unsigned int n;
   1968 
   1969 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
   1970 
   1971 				if (!intel_engine_has_relative_mmio(engine))
   1972 					virtual_update_register_offsets(regs,
   1973 									engine);
   1974 
   1975 				if (!list_empty(&ve->context.signals))
   1976 					virtual_xfer_breadcrumbs(ve, engine);
   1977 
   1978 				/*
   1979 				 * Move the bound engine to the top of the list
   1980 				 * for future execution. We then kick this
   1981 				 * tasklet first before checking others, so that
   1982 				 * we preferentially reuse this set of bound
   1983 				 * registers.
   1984 				 */
   1985 				for (n = 1; n < ve->num_siblings; n++) {
   1986 					if (ve->siblings[n] == engine) {
   1987 						swap(ve->siblings[n],
   1988 						     ve->siblings[0]);
   1989 						break;
   1990 					}
   1991 				}
   1992 
   1993 				GEM_BUG_ON(ve->siblings[0] != engine);
   1994 			}
   1995 
   1996 			if (__i915_request_submit(rq)) {
   1997 				submit = true;
   1998 				last = rq;
   1999 			}
   2000 			i915_request_put(rq);
   2001 
   2002 			/*
   2003 			 * Hmm, we have a bunch of virtual engine requests,
   2004 			 * but the first one was already completed (thanks
   2005 			 * preempt-to-busy!). Keep looking at the veng queue
   2006 			 * until we have no more relevant requests (i.e.
   2007 			 * the normal submit queue has higher priority).
   2008 			 */
   2009 			if (!submit) {
   2010 				spin_unlock(&ve->base.active.lock);
   2011 				rb = rb_first_cached(&execlists->virtual);
   2012 				continue;
   2013 			}
   2014 		}
   2015 
   2016 		spin_unlock(&ve->base.active.lock);
   2017 		break;
   2018 	}
   2019 
   2020 	while ((rb = rb_first_cached(&execlists->queue))) {
   2021 		struct i915_priolist *p = to_priolist(rb);
   2022 		struct i915_request *rq, *rn;
   2023 		int i;
   2024 
   2025 		priolist_for_each_request_consume(rq, rn, p, i) {
   2026 			bool merge = true;
   2027 
   2028 			/*
   2029 			 * Can we combine this request with the current port?
   2030 			 * It has to be the same context/ringbuffer and not
   2031 			 * have any exceptions (e.g. GVT saying never to
   2032 			 * combine contexts).
   2033 			 *
   2034 			 * If we can combine the requests, we can execute both
   2035 			 * by updating the RING_TAIL to point to the end of the
   2036 			 * second request, and so we never need to tell the
   2037 			 * hardware about the first.
   2038 			 */
   2039 			if (last && !can_merge_rq(last, rq)) {
   2040 				/*
   2041 				 * If we are on the second port and cannot
   2042 				 * combine this request with the last, then we
   2043 				 * are done.
   2044 				 */
   2045 				if (port == last_port)
   2046 					goto done;
   2047 
   2048 				/*
   2049 				 * We must not populate both ELSP[] with the
   2050 				 * same LRCA, i.e. we must submit 2 different
   2051 				 * contexts if we submit 2 ELSP.
   2052 				 */
   2053 				if (last->context == rq->context)
   2054 					goto done;
   2055 
   2056 				if (i915_request_has_sentinel(last))
   2057 					goto done;
   2058 
   2059 				/*
   2060 				 * If GVT overrides us we only ever submit
   2061 				 * port[0], leaving port[1] empty. Note that we
   2062 				 * also have to be careful that we don't queue
   2063 				 * the same context (even though a different
   2064 				 * request) to the second port.
   2065 				 */
   2066 				if (ctx_single_port_submission(last->context) ||
   2067 				    ctx_single_port_submission(rq->context))
   2068 					goto done;
   2069 
   2070 				merge = false;
   2071 			}
   2072 
   2073 			if (__i915_request_submit(rq)) {
   2074 				if (!merge) {
   2075 					*port = execlists_schedule_in(last, port - execlists->pending);
   2076 					port++;
   2077 					last = NULL;
   2078 				}
   2079 
   2080 				GEM_BUG_ON(last &&
   2081 					   !can_merge_ctx(last->context,
   2082 							  rq->context));
   2083 
   2084 				submit = true;
   2085 				last = rq;
   2086 			}
   2087 		}
   2088 
   2089 		rb_erase_cached(&p->node, &execlists->queue);
   2090 		i915_priolist_free(p);
   2091 	}
   2092 
   2093 done:
   2094 	/*
   2095 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
   2096 	 *
   2097 	 * We choose the priority hint such that if we add a request of greater
   2098 	 * priority than this, we kick the submission tasklet to decide on
   2099 	 * the right order of submitting the requests to hardware. We must
   2100 	 * also be prepared to reorder requests as they are in-flight on the
   2101 	 * HW. We derive the priority hint then as the first "hole" in
   2102 	 * the HW submission ports and if there are no available slots,
   2103 	 * the priority of the lowest executing request, i.e. last.
   2104 	 *
   2105 	 * When we do receive a higher priority request ready to run from the
   2106 	 * user, see queue_request(), the priority hint is bumped to that
   2107 	 * request triggering preemption on the next dequeue (or subsequent
   2108 	 * interrupt for secondary ports).
   2109 	 */
   2110 	execlists->queue_priority_hint = queue_prio(execlists);
   2111 
   2112 	if (submit) {
   2113 		*port = execlists_schedule_in(last, port - execlists->pending);
   2114 		execlists->switch_priority_hint =
   2115 			switch_prio(engine, *execlists->pending);
   2116 
   2117 		/*
   2118 		 * Skip if we ended up with exactly the same set of requests,
   2119 		 * e.g. trying to timeslice a pair of ordered contexts
   2120 		 */
   2121 		if (!memcmp(execlists->active, execlists->pending,
   2122 			    (port - execlists->pending + 1) * sizeof(*port))) {
   2123 			do
   2124 				execlists_schedule_out(fetch_and_zero(port));
   2125 			while (port-- != execlists->pending);
   2126 
   2127 			goto skip_submit;
   2128 		}
   2129 		clear_ports(port + 1, last_port - port);
   2130 
   2131 		execlists_submit_ports(engine);
   2132 		set_preempt_timeout(engine);
   2133 	} else {
   2134 skip_submit:
   2135 		ring_set_paused(engine, 0);
   2136 	}
   2137 }
   2138 
   2139 static void
   2140 cancel_port_requests(struct intel_engine_execlists * const execlists)
   2141 {
   2142 	struct i915_request * const *port;
   2143 
   2144 	for (port = execlists->pending; *port; port++)
   2145 		execlists_schedule_out(*port);
   2146 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
   2147 
   2148 	/* Mark the end of active before we overwrite *active */
   2149 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
   2150 		execlists_schedule_out(*port);
   2151 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
   2152 
   2153 	WRITE_ONCE(execlists->active, execlists->inflight);
   2154 }
   2155 
   2156 static inline void
   2157 invalidate_csb_entries(const u32 *first, const u32 *last)
   2158 {
   2159 	clflush((void *)first);
   2160 	clflush((void *)last);
   2161 }
   2162 
   2163 static inline bool
   2164 reset_in_progress(const struct intel_engine_execlists *execlists)
   2165 {
   2166 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
   2167 }
   2168 
   2169 /*
   2170  * Starting with Gen12, the status has a new format:
   2171  *
   2172  *     bit  0:     switched to new queue
   2173  *     bit  1:     reserved
   2174  *     bit  2:     semaphore wait mode (poll or signal), only valid when
   2175  *                 switch detail is set to "wait on semaphore"
   2176  *     bits 3-5:   engine class
   2177  *     bits 6-11:  engine instance
   2178  *     bits 12-14: reserved
   2179  *     bits 15-25: sw context id of the lrc the GT switched to
   2180  *     bits 26-31: sw counter of the lrc the GT switched to
   2181  *     bits 32-35: context switch detail
   2182  *                  - 0: ctx complete
   2183  *                  - 1: wait on sync flip
   2184  *                  - 2: wait on vblank
   2185  *                  - 3: wait on scanline
   2186  *                  - 4: wait on semaphore
   2187  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
   2188  *                       WAIT_FOR_EVENT)
   2189  *     bit  36:    reserved
   2190  *     bits 37-43: wait detail (for switch detail 1 to 4)
   2191  *     bits 44-46: reserved
   2192  *     bits 47-57: sw context id of the lrc the GT switched away from
   2193  *     bits 58-63: sw counter of the lrc the GT switched away from
   2194  */
   2195 static inline bool
   2196 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2197 {
   2198 	u32 lower_dw = csb[0];
   2199 	u32 upper_dw = csb[1];
   2200 	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
   2201 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
   2202 	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
   2203 
   2204 	/*
   2205 	 * The context switch detail is not guaranteed to be 5 when a preemption
   2206 	 * occurs, so we can't just check for that. The check below works for
   2207 	 * all the cases we care about, including preemptions of WAIT
   2208 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
   2209 	 * would require some extra handling, but we don't support that.
   2210 	 */
   2211 	if (!ctx_away_valid || new_queue) {
   2212 		GEM_BUG_ON(!ctx_to_valid);
   2213 		return true;
   2214 	}
   2215 
   2216 	/*
   2217 	 * switch detail = 5 is covered by the case above and we do not expect a
   2218 	 * context switch on an unsuccessful wait instruction since we always
   2219 	 * use polling mode.
   2220 	 */
   2221 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
   2222 	return false;
   2223 }
   2224 
   2225 static inline bool
   2226 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2227 {
   2228 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
   2229 }
   2230 
   2231 static void process_csb(struct intel_engine_cs *engine)
   2232 {
   2233 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2234 	const u32 * const buf = execlists->csb_status;
   2235 	const u8 num_entries = execlists->csb_size;
   2236 	u8 head, tail;
   2237 
   2238 	/*
   2239 	 * As we modify our execlists state tracking we require exclusive
   2240 	 * access. Either we are inside the tasklet, or the tasklet is disabled
   2241 	 * and we assume that is only inside the reset paths and so serialised.
   2242 	 */
   2243 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
   2244 		   !reset_in_progress(execlists));
   2245 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
   2246 
   2247 	/*
   2248 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
   2249 	 * When reading from the csb_write mmio register, we have to be
   2250 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
   2251 	 * the low 4bits. As it happens we know the next 4bits are always
   2252 	 * zero and so we can simply masked off the low u8 of the register
   2253 	 * and treat it identically to reading from the HWSP (without having
   2254 	 * to use explicit shifting and masking, and probably bifurcating
   2255 	 * the code to handle the legacy mmio read).
   2256 	 */
   2257 	head = execlists->csb_head;
   2258 	tail = READ_ONCE(*execlists->csb_write);
   2259 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
   2260 	if (unlikely(head == tail))
   2261 		return;
   2262 
   2263 	/*
   2264 	 * Hopefully paired with a wmb() in HW!
   2265 	 *
   2266 	 * We must complete the read of the write pointer before any reads
   2267 	 * from the CSB, so that we do not see stale values. Without an rmb
   2268 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
   2269 	 * we perform the READ_ONCE(*csb_write).
   2270 	 */
   2271 	rmb();
   2272 
   2273 	do {
   2274 		bool promote;
   2275 
   2276 		if (++head == num_entries)
   2277 			head = 0;
   2278 
   2279 		/*
   2280 		 * We are flying near dragons again.
   2281 		 *
   2282 		 * We hold a reference to the request in execlist_port[]
   2283 		 * but no more than that. We are operating in softirq
   2284 		 * context and so cannot hold any mutex or sleep. That
   2285 		 * prevents us stopping the requests we are processing
   2286 		 * in port[] from being retired simultaneously (the
   2287 		 * breadcrumb will be complete before we see the
   2288 		 * context-switch). As we only hold the reference to the
   2289 		 * request, any pointer chasing underneath the request
   2290 		 * is subject to a potential use-after-free. Thus we
   2291 		 * store all of the bookkeeping within port[] as
   2292 		 * required, and avoid using unguarded pointers beneath
   2293 		 * request itself. The same applies to the atomic
   2294 		 * status notifier.
   2295 		 */
   2296 
   2297 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
   2298 			     head, buf[2 * head + 0], buf[2 * head + 1]);
   2299 
   2300 		if (INTEL_GEN(engine->i915) >= 12)
   2301 			promote = gen12_csb_parse(execlists, buf + 2 * head);
   2302 		else
   2303 			promote = gen8_csb_parse(execlists, buf + 2 * head);
   2304 		if (promote) {
   2305 			struct i915_request * const *old = execlists->active;
   2306 
   2307 			/* Point active to the new ELSP; prevent overwriting */
   2308 			WRITE_ONCE(execlists->active, execlists->pending);
   2309 
   2310 			if (!inject_preempt_hang(execlists))
   2311 				ring_set_paused(engine, 0);
   2312 
   2313 			/* cancel old inflight, prepare for switch */
   2314 			trace_ports(execlists, "preempted", old);
   2315 			while (*old)
   2316 				execlists_schedule_out(*old++);
   2317 
   2318 			/* switch pending to inflight */
   2319 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
   2320 			WRITE_ONCE(execlists->active,
   2321 				   memcpy(execlists->inflight,
   2322 					  execlists->pending,
   2323 					  execlists_num_ports(execlists) *
   2324 					  sizeof(*execlists->pending)));
   2325 
   2326 			WRITE_ONCE(execlists->pending[0], NULL);
   2327 		} else {
   2328 			GEM_BUG_ON(!*execlists->active);
   2329 
   2330 			/* port0 completed, advanced to port1 */
   2331 			trace_ports(execlists, "completed", execlists->active);
   2332 
   2333 			/*
   2334 			 * We rely on the hardware being strongly
   2335 			 * ordered, that the breadcrumb write is
   2336 			 * coherent (visible from the CPU) before the
   2337 			 * user interrupt and CSB is processed.
   2338 			 */
   2339 			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
   2340 				   !reset_in_progress(execlists));
   2341 			execlists_schedule_out(*execlists->active++);
   2342 
   2343 			GEM_BUG_ON(execlists->active - execlists->inflight >
   2344 				   execlists_num_ports(execlists));
   2345 		}
   2346 	} while (head != tail);
   2347 
   2348 	execlists->csb_head = head;
   2349 	set_timeslice(engine);
   2350 
   2351 	/*
   2352 	 * Gen11 has proven to fail wrt global observation point between
   2353 	 * entry and tail update, failing on the ordering and thus
   2354 	 * we see an old entry in the context status buffer.
   2355 	 *
   2356 	 * Forcibly evict out entries for the next gpu csb update,
   2357 	 * to increase the odds that we get a fresh entries with non
   2358 	 * working hardware. The cost for doing so comes out mostly with
   2359 	 * the wash as hardware, working or not, will need to do the
   2360 	 * invalidation before.
   2361 	 */
   2362 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
   2363 }
   2364 
   2365 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
   2366 {
   2367 	lockdep_assert_held(&engine->active.lock);
   2368 	if (!engine->execlists.pending[0]) {
   2369 		rcu_read_lock(); /* protect peeking at execlists->active */
   2370 		execlists_dequeue(engine);
   2371 		rcu_read_unlock();
   2372 	}
   2373 }
   2374 
   2375 static void __execlists_hold(struct i915_request *rq)
   2376 {
   2377 	LIST_HEAD(list);
   2378 
   2379 	do {
   2380 		struct i915_dependency *p;
   2381 
   2382 		if (i915_request_is_active(rq))
   2383 			__i915_request_unsubmit(rq);
   2384 
   2385 		RQ_TRACE(rq, "on hold\n");
   2386 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2387 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
   2388 		i915_request_set_hold(rq);
   2389 
   2390 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2391 			struct i915_request *w =
   2392 				container_of(p->waiter, typeof(*w), sched);
   2393 
   2394 			/* Leave semaphores spinning on the other engines */
   2395 			if (w->engine != rq->engine)
   2396 				continue;
   2397 
   2398 			if (!i915_request_is_ready(w))
   2399 				continue;
   2400 
   2401 			if (i915_request_completed(w))
   2402 				continue;
   2403 
   2404 			if (i915_request_on_hold(rq))
   2405 				continue;
   2406 
   2407 			list_move_tail(&w->sched.link, &list);
   2408 		}
   2409 
   2410 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2411 	} while (rq);
   2412 }
   2413 
   2414 static bool execlists_hold(struct intel_engine_cs *engine,
   2415 			   struct i915_request *rq)
   2416 {
   2417 	spin_lock_irq(&engine->active.lock);
   2418 
   2419 	if (i915_request_completed(rq)) { /* too late! */
   2420 		rq = NULL;
   2421 		goto unlock;
   2422 	}
   2423 
   2424 	if (rq->engine != engine) { /* preempted virtual engine */
   2425 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
   2426 
   2427 		/*
   2428 		 * intel_context_inflight() is only protected by virtue
   2429 		 * of process_csb() being called only by the tasklet (or
   2430 		 * directly from inside reset while the tasklet is suspended).
   2431 		 * Assert that neither of those are allowed to run while we
   2432 		 * poke at the request queues.
   2433 		 */
   2434 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
   2435 
   2436 		/*
   2437 		 * An unsubmitted request along a virtual engine will
   2438 		 * remain on the active (this) engine until we are able
   2439 		 * to process the context switch away (and so mark the
   2440 		 * context as no longer in flight). That cannot have happened
   2441 		 * yet, otherwise we would not be hanging!
   2442 		 */
   2443 		spin_lock(&ve->base.active.lock);
   2444 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
   2445 		GEM_BUG_ON(ve->request != rq);
   2446 		ve->request = NULL;
   2447 		spin_unlock(&ve->base.active.lock);
   2448 		i915_request_put(rq);
   2449 
   2450 		rq->engine = engine;
   2451 	}
   2452 
   2453 	/*
   2454 	 * Transfer this request onto the hold queue to prevent it
   2455 	 * being resumbitted to HW (and potentially completed) before we have
   2456 	 * released it. Since we may have already submitted following
   2457 	 * requests, we need to remove those as well.
   2458 	 */
   2459 	GEM_BUG_ON(i915_request_on_hold(rq));
   2460 	GEM_BUG_ON(rq->engine != engine);
   2461 	__execlists_hold(rq);
   2462 
   2463 unlock:
   2464 	spin_unlock_irq(&engine->active.lock);
   2465 	return rq;
   2466 }
   2467 
   2468 static bool hold_request(const struct i915_request *rq)
   2469 {
   2470 	struct i915_dependency *p;
   2471 
   2472 	/*
   2473 	 * If one of our ancestors is on hold, we must also be on hold,
   2474 	 * otherwise we will bypass it and execute before it.
   2475 	 */
   2476 	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
   2477 		const struct i915_request *s =
   2478 			container_of(p->signaler, typeof(*s), sched);
   2479 
   2480 		if (s->engine != rq->engine)
   2481 			continue;
   2482 
   2483 		if (i915_request_on_hold(s))
   2484 			return true;
   2485 	}
   2486 
   2487 	return false;
   2488 }
   2489 
   2490 static void __execlists_unhold(struct i915_request *rq)
   2491 {
   2492 	LIST_HEAD(list);
   2493 
   2494 	do {
   2495 		struct i915_dependency *p;
   2496 
   2497 		GEM_BUG_ON(!i915_request_on_hold(rq));
   2498 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
   2499 
   2500 		i915_request_clear_hold(rq);
   2501 		list_move_tail(&rq->sched.link,
   2502 			       i915_sched_lookup_priolist(rq->engine,
   2503 							  rq_prio(rq)));
   2504 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2505 		RQ_TRACE(rq, "hold release\n");
   2506 
   2507 		/* Also release any children on this engine that are ready */
   2508 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2509 			struct i915_request *w =
   2510 				container_of(p->waiter, typeof(*w), sched);
   2511 
   2512 			if (w->engine != rq->engine)
   2513 				continue;
   2514 
   2515 			if (!i915_request_on_hold(rq))
   2516 				continue;
   2517 
   2518 			/* Check that no other parents are also on hold */
   2519 			if (hold_request(rq))
   2520 				continue;
   2521 
   2522 			list_move_tail(&w->sched.link, &list);
   2523 		}
   2524 
   2525 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2526 	} while (rq);
   2527 }
   2528 
   2529 static void execlists_unhold(struct intel_engine_cs *engine,
   2530 			     struct i915_request *rq)
   2531 {
   2532 	spin_lock_irq(&engine->active.lock);
   2533 
   2534 	/*
   2535 	 * Move this request back to the priority queue, and all of its
   2536 	 * children and grandchildren that were suspended along with it.
   2537 	 */
   2538 	__execlists_unhold(rq);
   2539 
   2540 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
   2541 		engine->execlists.queue_priority_hint = rq_prio(rq);
   2542 		tasklet_hi_schedule(&engine->execlists.tasklet);
   2543 	}
   2544 
   2545 	spin_unlock_irq(&engine->active.lock);
   2546 }
   2547 
   2548 struct execlists_capture {
   2549 	struct work_struct work;
   2550 	struct i915_request *rq;
   2551 	struct i915_gpu_coredump *error;
   2552 };
   2553 
   2554 static void execlists_capture_work(struct work_struct *work)
   2555 {
   2556 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
   2557 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
   2558 	struct intel_engine_cs *engine = cap->rq->engine;
   2559 	struct intel_gt_coredump *gt = cap->error->gt;
   2560 	struct intel_engine_capture_vma *vma;
   2561 
   2562 	/* Compress all the objects attached to the request, slow! */
   2563 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
   2564 	if (vma) {
   2565 		struct i915_vma_compress *compress =
   2566 			i915_vma_capture_prepare(gt);
   2567 
   2568 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
   2569 		i915_vma_capture_finish(gt, compress);
   2570 	}
   2571 
   2572 	gt->simulated = gt->engine->simulated;
   2573 	cap->error->simulated = gt->simulated;
   2574 
   2575 	/* Publish the error state, and announce it to the world */
   2576 	i915_error_state_store(cap->error);
   2577 	i915_gpu_coredump_put(cap->error);
   2578 
   2579 	/* Return this request and all that depend upon it for signaling */
   2580 	execlists_unhold(engine, cap->rq);
   2581 	i915_request_put(cap->rq);
   2582 
   2583 	kfree(cap);
   2584 }
   2585 
   2586 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
   2587 {
   2588 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
   2589 	struct execlists_capture *cap;
   2590 
   2591 	cap = kmalloc(sizeof(*cap), gfp);
   2592 	if (!cap)
   2593 		return NULL;
   2594 
   2595 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
   2596 	if (!cap->error)
   2597 		goto err_cap;
   2598 
   2599 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
   2600 	if (!cap->error->gt)
   2601 		goto err_gpu;
   2602 
   2603 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
   2604 	if (!cap->error->gt->engine)
   2605 		goto err_gt;
   2606 
   2607 	return cap;
   2608 
   2609 err_gt:
   2610 	kfree(cap->error->gt);
   2611 err_gpu:
   2612 	kfree(cap->error);
   2613 err_cap:
   2614 	kfree(cap);
   2615 	return NULL;
   2616 }
   2617 
   2618 static bool execlists_capture(struct intel_engine_cs *engine)
   2619 {
   2620 	struct execlists_capture *cap;
   2621 
   2622 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
   2623 		return true;
   2624 
   2625 	/*
   2626 	 * We need to _quickly_ capture the engine state before we reset.
   2627 	 * We are inside an atomic section (softirq) here and we are delaying
   2628 	 * the forced preemption event.
   2629 	 */
   2630 	cap = capture_regs(engine);
   2631 	if (!cap)
   2632 		return true;
   2633 
   2634 	cap->rq = execlists_active(&engine->execlists);
   2635 	GEM_BUG_ON(!cap->rq);
   2636 
   2637 	rcu_read_lock();
   2638 	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
   2639 	cap->rq = i915_request_get_rcu(cap->rq);
   2640 	rcu_read_unlock();
   2641 	if (!cap->rq)
   2642 		goto err_free;
   2643 
   2644 	/*
   2645 	 * Remove the request from the execlists queue, and take ownership
   2646 	 * of the request. We pass it to our worker who will _slowly_ compress
   2647 	 * all the pages the _user_ requested for debugging their batch, after
   2648 	 * which we return it to the queue for signaling.
   2649 	 *
   2650 	 * By removing them from the execlists queue, we also remove the
   2651 	 * requests from being processed by __unwind_incomplete_requests()
   2652 	 * during the intel_engine_reset(), and so they will *not* be replayed
   2653 	 * afterwards.
   2654 	 *
   2655 	 * Note that because we have not yet reset the engine at this point,
   2656 	 * it is possible for the request that we have identified as being
   2657 	 * guilty, did in fact complete and we will then hit an arbitration
   2658 	 * point allowing the outstanding preemption to succeed. The likelihood
   2659 	 * of that is very low (as capturing of the engine registers should be
   2660 	 * fast enough to run inside an irq-off atomic section!), so we will
   2661 	 * simply hold that request accountable for being non-preemptible
   2662 	 * long enough to force the reset.
   2663 	 */
   2664 	if (!execlists_hold(engine, cap->rq))
   2665 		goto err_rq;
   2666 
   2667 	INIT_WORK(&cap->work, execlists_capture_work);
   2668 	schedule_work(&cap->work);
   2669 	return true;
   2670 
   2671 err_rq:
   2672 	i915_request_put(cap->rq);
   2673 err_free:
   2674 	i915_gpu_coredump_put(cap->error);
   2675 	kfree(cap);
   2676 	return false;
   2677 }
   2678 
   2679 static noinline void preempt_reset(struct intel_engine_cs *engine)
   2680 {
   2681 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
   2682 	unsigned long *lock = &engine->gt->reset.flags;
   2683 
   2684 	if (i915_modparams.reset < 3)
   2685 		return;
   2686 
   2687 	if (test_and_set_bit(bit, lock))
   2688 		return;
   2689 
   2690 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
   2691 	tasklet_disable_nosync(&engine->execlists.tasklet);
   2692 
   2693 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
   2694 		     READ_ONCE(engine->props.preempt_timeout_ms),
   2695 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
   2696 
   2697 	ring_set_paused(engine, 1); /* Freeze the current request in place */
   2698 	if (execlists_capture(engine))
   2699 		intel_engine_reset(engine, "preemption time out");
   2700 	else
   2701 		ring_set_paused(engine, 0);
   2702 
   2703 	tasklet_enable(&engine->execlists.tasklet);
   2704 	clear_and_wake_up_bit(bit, lock);
   2705 }
   2706 
   2707 static bool preempt_timeout(const struct intel_engine_cs *const engine)
   2708 {
   2709 	const struct timer_list *t = &engine->execlists.preempt;
   2710 
   2711 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   2712 		return false;
   2713 
   2714 	if (!timer_expired(t))
   2715 		return false;
   2716 
   2717 	return READ_ONCE(engine->execlists.pending[0]);
   2718 }
   2719 
   2720 /*
   2721  * Check the unread Context Status Buffers and manage the submission of new
   2722  * contexts to the ELSP accordingly.
   2723  */
   2724 static void execlists_submission_tasklet(unsigned long data)
   2725 {
   2726 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
   2727 	bool timeout = preempt_timeout(engine);
   2728 
   2729 	process_csb(engine);
   2730 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
   2731 		unsigned long flags;
   2732 
   2733 		spin_lock_irqsave(&engine->active.lock, flags);
   2734 		__execlists_submission_tasklet(engine);
   2735 		spin_unlock_irqrestore(&engine->active.lock, flags);
   2736 
   2737 		/* Recheck after serialising with direct-submission */
   2738 		if (timeout && preempt_timeout(engine))
   2739 			preempt_reset(engine);
   2740 	}
   2741 }
   2742 
   2743 static void __execlists_kick(struct intel_engine_execlists *execlists)
   2744 {
   2745 	/* Kick the tasklet for some interrupt coalescing and reset handling */
   2746 	tasklet_hi_schedule(&execlists->tasklet);
   2747 }
   2748 
   2749 #define execlists_kick(t, member) \
   2750 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
   2751 
   2752 static void execlists_timeslice(struct timer_list *timer)
   2753 {
   2754 	execlists_kick(timer, timer);
   2755 }
   2756 
   2757 static void execlists_preempt(struct timer_list *timer)
   2758 {
   2759 	execlists_kick(timer, preempt);
   2760 }
   2761 
   2762 static void queue_request(struct intel_engine_cs *engine,
   2763 			  struct i915_request *rq)
   2764 {
   2765 	GEM_BUG_ON(!list_empty(&rq->sched.link));
   2766 	list_add_tail(&rq->sched.link,
   2767 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
   2768 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2769 }
   2770 
   2771 static void __submit_queue_imm(struct intel_engine_cs *engine)
   2772 {
   2773 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2774 
   2775 	if (reset_in_progress(execlists))
   2776 		return; /* defer until we restart the engine following reset */
   2777 
   2778 	if (execlists->tasklet.func == execlists_submission_tasklet)
   2779 		__execlists_submission_tasklet(engine);
   2780 	else
   2781 		tasklet_hi_schedule(&execlists->tasklet);
   2782 }
   2783 
   2784 static void submit_queue(struct intel_engine_cs *engine,
   2785 			 const struct i915_request *rq)
   2786 {
   2787 	struct intel_engine_execlists *execlists = &engine->execlists;
   2788 
   2789 	if (rq_prio(rq) <= execlists->queue_priority_hint)
   2790 		return;
   2791 
   2792 	execlists->queue_priority_hint = rq_prio(rq);
   2793 	__submit_queue_imm(engine);
   2794 }
   2795 
   2796 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
   2797 			     const struct i915_request *rq)
   2798 {
   2799 	GEM_BUG_ON(i915_request_on_hold(rq));
   2800 	return !list_empty(&engine->active.hold) && hold_request(rq);
   2801 }
   2802 
   2803 static void execlists_submit_request(struct i915_request *request)
   2804 {
   2805 	struct intel_engine_cs *engine = request->engine;
   2806 	unsigned long flags;
   2807 
   2808 	/* Will be called from irq-context when using foreign fences. */
   2809 	spin_lock_irqsave(&engine->active.lock, flags);
   2810 
   2811 	if (unlikely(ancestor_on_hold(engine, request))) {
   2812 		list_add_tail(&request->sched.link, &engine->active.hold);
   2813 		i915_request_set_hold(request);
   2814 	} else {
   2815 		queue_request(engine, request);
   2816 
   2817 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
   2818 		GEM_BUG_ON(list_empty(&request->sched.link));
   2819 
   2820 		submit_queue(engine, request);
   2821 	}
   2822 
   2823 	spin_unlock_irqrestore(&engine->active.lock, flags);
   2824 }
   2825 
   2826 static void __execlists_context_fini(struct intel_context *ce)
   2827 {
   2828 	intel_ring_put(ce->ring);
   2829 	i915_vma_put(ce->state);
   2830 }
   2831 
   2832 static void execlists_context_destroy(struct kref *kref)
   2833 {
   2834 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
   2835 
   2836 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
   2837 	GEM_BUG_ON(intel_context_is_pinned(ce));
   2838 
   2839 	if (ce->state)
   2840 		__execlists_context_fini(ce);
   2841 
   2842 	intel_context_fini(ce);
   2843 	intel_context_free(ce);
   2844 }
   2845 
   2846 static void
   2847 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
   2848 {
   2849 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2850 		return;
   2851 
   2852 	vaddr += engine->context_size;
   2853 
   2854 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
   2855 }
   2856 
   2857 static void
   2858 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
   2859 {
   2860 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2861 		return;
   2862 
   2863 	vaddr += engine->context_size;
   2864 
   2865 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
   2866 		dev_err_once(engine->i915->drm.dev,
   2867 			     "%s context redzone overwritten!\n",
   2868 			     engine->name);
   2869 }
   2870 
   2871 static void execlists_context_unpin(struct intel_context *ce)
   2872 {
   2873 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
   2874 		      ce->engine);
   2875 
   2876 	i915_gem_object_unpin_map(ce->state->obj);
   2877 }
   2878 
   2879 static void
   2880 __execlists_update_reg_state(const struct intel_context *ce,
   2881 			     const struct intel_engine_cs *engine,
   2882 			     u32 head)
   2883 {
   2884 	struct intel_ring *ring = ce->ring;
   2885 	u32 *regs = ce->lrc_reg_state;
   2886 
   2887 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
   2888 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
   2889 
   2890 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   2891 	regs[CTX_RING_HEAD] = head;
   2892 	regs[CTX_RING_TAIL] = ring->tail;
   2893 
   2894 	/* RPCS */
   2895 	if (engine->class == RENDER_CLASS) {
   2896 		regs[CTX_R_PWR_CLK_STATE] =
   2897 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
   2898 
   2899 		i915_oa_init_reg_state(ce, engine);
   2900 	}
   2901 }
   2902 
   2903 static int
   2904 __execlists_context_pin(struct intel_context *ce,
   2905 			struct intel_engine_cs *engine)
   2906 {
   2907 	void *vaddr;
   2908 
   2909 	GEM_BUG_ON(!ce->state);
   2910 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   2911 
   2912 	vaddr = i915_gem_object_pin_map(ce->state->obj,
   2913 					i915_coherent_map_type(engine->i915) |
   2914 					I915_MAP_OVERRIDE);
   2915 	if (IS_ERR(vaddr))
   2916 		return PTR_ERR(vaddr);
   2917 
   2918 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
   2919 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
   2920 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
   2921 
   2922 	return 0;
   2923 }
   2924 
   2925 static int execlists_context_pin(struct intel_context *ce)
   2926 {
   2927 	return __execlists_context_pin(ce, ce->engine);
   2928 }
   2929 
   2930 static int execlists_context_alloc(struct intel_context *ce)
   2931 {
   2932 	return __execlists_context_alloc(ce, ce->engine);
   2933 }
   2934 
   2935 static void execlists_context_reset(struct intel_context *ce)
   2936 {
   2937 	CE_TRACE(ce, "reset\n");
   2938 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   2939 
   2940 	/*
   2941 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
   2942 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
   2943 	 * that stored in context. As we only write new commands from
   2944 	 * ce->ring->tail onwards, everything before that is junk. If the GPU
   2945 	 * starts reading from its RING_HEAD from the context, it may try to
   2946 	 * execute that junk and die.
   2947 	 *
   2948 	 * The contexts that are stilled pinned on resume belong to the
   2949 	 * kernel, and are local to each engine. All other contexts will
   2950 	 * have their head/tail sanitized upon pinning before use, so they
   2951 	 * will never see garbage,
   2952 	 *
   2953 	 * So to avoid that we reset the context images upon resume. For
   2954 	 * simplicity, we just zero everything out.
   2955 	 */
   2956 	intel_ring_reset(ce->ring, ce->ring->emit);
   2957 
   2958 	/* Scrub away the garbage */
   2959 	execlists_init_reg_state(ce->lrc_reg_state,
   2960 				 ce, ce->engine, ce->ring, true);
   2961 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
   2962 
   2963 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   2964 }
   2965 
   2966 static const struct intel_context_ops execlists_context_ops = {
   2967 	.alloc = execlists_context_alloc,
   2968 
   2969 	.pin = execlists_context_pin,
   2970 	.unpin = execlists_context_unpin,
   2971 
   2972 	.enter = intel_context_enter_engine,
   2973 	.exit = intel_context_exit_engine,
   2974 
   2975 	.reset = execlists_context_reset,
   2976 	.destroy = execlists_context_destroy,
   2977 };
   2978 
   2979 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
   2980 {
   2981 	u32 *cs;
   2982 
   2983 	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
   2984 
   2985 	cs = intel_ring_begin(rq, 6);
   2986 	if (IS_ERR(cs))
   2987 		return PTR_ERR(cs);
   2988 
   2989 	/*
   2990 	 * Check if we have been preempted before we even get started.
   2991 	 *
   2992 	 * After this point i915_request_started() reports true, even if
   2993 	 * we get preempted and so are no longer running.
   2994 	 */
   2995 	*cs++ = MI_ARB_CHECK;
   2996 	*cs++ = MI_NOOP;
   2997 
   2998 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   2999 	*cs++ = i915_request_timeline(rq)->hwsp_offset;
   3000 	*cs++ = 0;
   3001 	*cs++ = rq->fence.seqno - 1;
   3002 
   3003 	intel_ring_advance(rq, cs);
   3004 
   3005 	/* Record the updated position of the request's payload */
   3006 	rq->infix = intel_ring_offset(rq, cs);
   3007 
   3008 	return 0;
   3009 }
   3010 
   3011 static int execlists_request_alloc(struct i915_request *request)
   3012 {
   3013 	int ret;
   3014 
   3015 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
   3016 
   3017 	/*
   3018 	 * Flush enough space to reduce the likelihood of waiting after
   3019 	 * we start building the request - in which case we will just
   3020 	 * have to repeat work.
   3021 	 */
   3022 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
   3023 
   3024 	/*
   3025 	 * Note that after this point, we have committed to using
   3026 	 * this request as it is being used to both track the
   3027 	 * state of engine initialisation and liveness of the
   3028 	 * golden renderstate above. Think twice before you try
   3029 	 * to cancel/unwind this request now.
   3030 	 */
   3031 
   3032 	/* Unconditionally invalidate GPU caches and TLBs. */
   3033 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
   3034 	if (ret)
   3035 		return ret;
   3036 
   3037 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
   3038 	return 0;
   3039 }
   3040 
   3041 /*
   3042  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
   3043  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
   3044  * but there is a slight complication as this is applied in WA batch where the
   3045  * values are only initialized once so we cannot take register value at the
   3046  * beginning and reuse it further; hence we save its value to memory, upload a
   3047  * constant value with bit21 set and then we restore it back with the saved value.
   3048  * To simplify the WA, a constant value is formed by using the default value
   3049  * of this register. This shouldn't be a problem because we are only modifying
   3050  * it for a short period and this batch in non-premptible. We can ofcourse
   3051  * use additional instructions that read the actual value of the register
   3052  * at that time and set our bit of interest but it makes the WA complicated.
   3053  *
   3054  * This WA is also required for Gen9 so extracting as a function avoids
   3055  * code duplication.
   3056  */
   3057 static u32 *
   3058 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
   3059 {
   3060 	/* NB no one else is allowed to scribble over scratch + 256! */
   3061 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3062 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3063 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3064 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3065 	*batch++ = 0;
   3066 
   3067 	*batch++ = MI_LOAD_REGISTER_IMM(1);
   3068 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3069 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
   3070 
   3071 	batch = gen8_emit_pipe_control(batch,
   3072 				       PIPE_CONTROL_CS_STALL |
   3073 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
   3074 				       0);
   3075 
   3076 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3077 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3078 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3079 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3080 	*batch++ = 0;
   3081 
   3082 	return batch;
   3083 }
   3084 
   3085 /*
   3086  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
   3087  * initialized at the beginning and shared across all contexts but this field
   3088  * helps us to have multiple batches at different offsets and select them based
   3089  * on a criteria. At the moment this batch always start at the beginning of the page
   3090  * and at this point we don't have multiple wa_ctx batch buffers.
   3091  *
   3092  * The number of WA applied are not known at the beginning; we use this field
   3093  * to return the no of DWORDS written.
   3094  *
   3095  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
   3096  * so it adds NOOPs as padding to make it cacheline aligned.
   3097  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
   3098  * makes a complete batch buffer.
   3099  */
   3100 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3101 {
   3102 	/* WaDisableCtxRestoreArbitration:bdw,chv */
   3103 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3104 
   3105 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
   3106 	if (IS_BROADWELL(engine->i915))
   3107 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3108 
   3109 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
   3110 	/* Actual scratch location is at 128 bytes offset */
   3111 	batch = gen8_emit_pipe_control(batch,
   3112 				       PIPE_CONTROL_FLUSH_L3 |
   3113 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3114 				       PIPE_CONTROL_CS_STALL |
   3115 				       PIPE_CONTROL_QW_WRITE,
   3116 				       LRC_PPHWSP_SCRATCH_ADDR);
   3117 
   3118 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3119 
   3120 	/* Pad to end of cacheline */
   3121 	while ((unsigned long)batch % CACHELINE_BYTES)
   3122 		*batch++ = MI_NOOP;
   3123 
   3124 	/*
   3125 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
   3126 	 * execution depends on the length specified in terms of cache lines
   3127 	 * in the register CTX_RCS_INDIRECT_CTX
   3128 	 */
   3129 
   3130 	return batch;
   3131 }
   3132 
   3133 struct lri {
   3134 	i915_reg_t reg;
   3135 	u32 value;
   3136 };
   3137 
   3138 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
   3139 {
   3140 	GEM_BUG_ON(!count || count > 63);
   3141 
   3142 	*batch++ = MI_LOAD_REGISTER_IMM(count);
   3143 	do {
   3144 		*batch++ = i915_mmio_reg_offset(lri->reg);
   3145 		*batch++ = lri->value;
   3146 	} while (lri++, --count);
   3147 	*batch++ = MI_NOOP;
   3148 
   3149 	return batch;
   3150 }
   3151 
   3152 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3153 {
   3154 	static const struct lri lri[] = {
   3155 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
   3156 		{
   3157 			COMMON_SLICE_CHICKEN2,
   3158 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
   3159 				       0),
   3160 		},
   3161 
   3162 		/* BSpec: 11391 */
   3163 		{
   3164 			FF_SLICE_CHICKEN,
   3165 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
   3166 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
   3167 		},
   3168 
   3169 		/* BSpec: 11299 */
   3170 		{
   3171 			_3D_CHICKEN3,
   3172 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
   3173 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
   3174 		}
   3175 	};
   3176 
   3177 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3178 
   3179 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
   3180 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3181 
   3182 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
   3183 	batch = gen8_emit_pipe_control(batch,
   3184 				       PIPE_CONTROL_FLUSH_L3 |
   3185 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3186 				       PIPE_CONTROL_CS_STALL |
   3187 				       PIPE_CONTROL_QW_WRITE,
   3188 				       LRC_PPHWSP_SCRATCH_ADDR);
   3189 
   3190 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
   3191 
   3192 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
   3193 	if (HAS_POOLED_EU(engine->i915)) {
   3194 		/*
   3195 		 * EU pool configuration is setup along with golden context
   3196 		 * during context initialization. This value depends on
   3197 		 * device type (2x6 or 3x6) and needs to be updated based
   3198 		 * on which subslice is disabled especially for 2x6
   3199 		 * devices, however it is safe to load default
   3200 		 * configuration of 3x6 device instead of masking off
   3201 		 * corresponding bits because HW ignores bits of a disabled
   3202 		 * subslice and drops down to appropriate config. Please
   3203 		 * see render_state_setup() in i915_gem_render_state.c for
   3204 		 * possible configurations, to avoid duplication they are
   3205 		 * not shown here again.
   3206 		 */
   3207 		*batch++ = GEN9_MEDIA_POOL_STATE;
   3208 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
   3209 		*batch++ = 0x00777000;
   3210 		*batch++ = 0;
   3211 		*batch++ = 0;
   3212 		*batch++ = 0;
   3213 	}
   3214 
   3215 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3216 
   3217 	/* Pad to end of cacheline */
   3218 	while ((unsigned long)batch % CACHELINE_BYTES)
   3219 		*batch++ = MI_NOOP;
   3220 
   3221 	return batch;
   3222 }
   3223 
   3224 static u32 *
   3225 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3226 {
   3227 	int i;
   3228 
   3229 	/*
   3230 	 * WaPipeControlBefore3DStateSamplePattern: cnl
   3231 	 *
   3232 	 * Ensure the engine is idle prior to programming a
   3233 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
   3234 	 */
   3235 	batch = gen8_emit_pipe_control(batch,
   3236 				       PIPE_CONTROL_CS_STALL,
   3237 				       0);
   3238 	/*
   3239 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
   3240 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
   3241 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
   3242 	 * confusing. Since gen8_emit_pipe_control() already advances the
   3243 	 * batch by 6 dwords, we advance the other 10 here, completing a
   3244 	 * cacheline. It's not clear if the workaround requires this padding
   3245 	 * before other commands, or if it's just the regular padding we would
   3246 	 * already have for the workaround bb, so leave it here for now.
   3247 	 */
   3248 	for (i = 0; i < 10; i++)
   3249 		*batch++ = MI_NOOP;
   3250 
   3251 	/* Pad to end of cacheline */
   3252 	while ((unsigned long)batch % CACHELINE_BYTES)
   3253 		*batch++ = MI_NOOP;
   3254 
   3255 	return batch;
   3256 }
   3257 
   3258 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
   3259 
   3260 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
   3261 {
   3262 	struct drm_i915_gem_object *obj;
   3263 	struct i915_vma *vma;
   3264 	int err;
   3265 
   3266 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
   3267 	if (IS_ERR(obj))
   3268 		return PTR_ERR(obj);
   3269 
   3270 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
   3271 	if (IS_ERR(vma)) {
   3272 		err = PTR_ERR(vma);
   3273 		goto err;
   3274 	}
   3275 
   3276 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
   3277 	if (err)
   3278 		goto err;
   3279 
   3280 	engine->wa_ctx.vma = vma;
   3281 	return 0;
   3282 
   3283 err:
   3284 	i915_gem_object_put(obj);
   3285 	return err;
   3286 }
   3287 
   3288 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
   3289 {
   3290 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
   3291 }
   3292 
   3293 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
   3294 
   3295 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
   3296 {
   3297 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
   3298 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
   3299 					    &wa_ctx->per_ctx };
   3300 	wa_bb_func_t wa_bb_fn[2];
   3301 	struct page *page;
   3302 	void *batch, *batch_ptr;
   3303 	unsigned int i;
   3304 	int ret;
   3305 
   3306 	if (engine->class != RENDER_CLASS)
   3307 		return 0;
   3308 
   3309 	switch (INTEL_GEN(engine->i915)) {
   3310 	case 12:
   3311 	case 11:
   3312 		return 0;
   3313 	case 10:
   3314 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
   3315 		wa_bb_fn[1] = NULL;
   3316 		break;
   3317 	case 9:
   3318 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
   3319 		wa_bb_fn[1] = NULL;
   3320 		break;
   3321 	case 8:
   3322 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
   3323 		wa_bb_fn[1] = NULL;
   3324 		break;
   3325 	default:
   3326 		MISSING_CASE(INTEL_GEN(engine->i915));
   3327 		return 0;
   3328 	}
   3329 
   3330 	ret = lrc_setup_wa_ctx(engine);
   3331 	if (ret) {
   3332 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
   3333 		return ret;
   3334 	}
   3335 
   3336 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
   3337 	batch = batch_ptr = kmap_atomic(page);
   3338 
   3339 	/*
   3340 	 * Emit the two workaround batch buffers, recording the offset from the
   3341 	 * start of the workaround batch buffer object for each and their
   3342 	 * respective sizes.
   3343 	 */
   3344 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
   3345 		wa_bb[i]->offset = batch_ptr - batch;
   3346 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
   3347 						  CACHELINE_BYTES))) {
   3348 			ret = -EINVAL;
   3349 			break;
   3350 		}
   3351 		if (wa_bb_fn[i])
   3352 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
   3353 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
   3354 	}
   3355 
   3356 	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
   3357 
   3358 	kunmap_atomic(batch);
   3359 	if (ret)
   3360 		lrc_destroy_wa_ctx(engine);
   3361 
   3362 	return ret;
   3363 }
   3364 
   3365 static void enable_execlists(struct intel_engine_cs *engine)
   3366 {
   3367 	u32 mode;
   3368 
   3369 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
   3370 
   3371 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
   3372 
   3373 	if (INTEL_GEN(engine->i915) >= 11)
   3374 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
   3375 	else
   3376 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
   3377 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
   3378 
   3379 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
   3380 
   3381 	ENGINE_WRITE_FW(engine,
   3382 			RING_HWS_PGA,
   3383 			i915_ggtt_offset(engine->status_page.vma));
   3384 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
   3385 
   3386 	engine->context_tag = 0;
   3387 }
   3388 
   3389 static bool unexpected_starting_state(struct intel_engine_cs *engine)
   3390 {
   3391 	bool unexpected = false;
   3392 
   3393 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
   3394 		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
   3395 		unexpected = true;
   3396 	}
   3397 
   3398 	return unexpected;
   3399 }
   3400 
   3401 static int execlists_resume(struct intel_engine_cs *engine)
   3402 {
   3403 	intel_engine_apply_workarounds(engine);
   3404 	intel_engine_apply_whitelist(engine);
   3405 
   3406 	intel_mocs_init_engine(engine);
   3407 
   3408 	intel_engine_reset_breadcrumbs(engine);
   3409 
   3410 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
   3411 		struct drm_printer p = drm_debug_printer(__func__);
   3412 
   3413 		intel_engine_dump(engine, &p, NULL);
   3414 	}
   3415 
   3416 	enable_execlists(engine);
   3417 
   3418 	return 0;
   3419 }
   3420 
   3421 static void execlists_reset_prepare(struct intel_engine_cs *engine)
   3422 {
   3423 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3424 	unsigned long flags;
   3425 
   3426 	ENGINE_TRACE(engine, "depth<-%d\n",
   3427 		     atomic_read(&execlists->tasklet.count));
   3428 
   3429 	/*
   3430 	 * Prevent request submission to the hardware until we have
   3431 	 * completed the reset in i915_gem_reset_finish(). If a request
   3432 	 * is completed by one engine, it may then queue a request
   3433 	 * to a second via its execlists->tasklet *just* as we are
   3434 	 * calling engine->resume() and also writing the ELSP.
   3435 	 * Turning off the execlists->tasklet until the reset is over
   3436 	 * prevents the race.
   3437 	 */
   3438 	__tasklet_disable_sync_once(&execlists->tasklet);
   3439 	GEM_BUG_ON(!reset_in_progress(execlists));
   3440 
   3441 	/* And flush any current direct submission. */
   3442 	spin_lock_irqsave(&engine->active.lock, flags);
   3443 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3444 
   3445 	/*
   3446 	 * We stop engines, otherwise we might get failed reset and a
   3447 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
   3448 	 * from system hang if batchbuffer is progressing when
   3449 	 * the reset is issued, regardless of READY_TO_RESET ack.
   3450 	 * Thus assume it is best to stop engines on all gens
   3451 	 * where we have a gpu reset.
   3452 	 *
   3453 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
   3454 	 *
   3455 	 * FIXME: Wa for more modern gens needs to be validated
   3456 	 */
   3457 	intel_engine_stop_cs(engine);
   3458 }
   3459 
   3460 static void reset_csb_pointers(struct intel_engine_cs *engine)
   3461 {
   3462 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3463 	const unsigned int reset_value = execlists->csb_size - 1;
   3464 
   3465 	ring_set_paused(engine, 0);
   3466 
   3467 	/*
   3468 	 * After a reset, the HW starts writing into CSB entry [0]. We
   3469 	 * therefore have to set our HEAD pointer back one entry so that
   3470 	 * the *first* entry we check is entry 0. To complicate this further,
   3471 	 * as we don't wait for the first interrupt after reset, we have to
   3472 	 * fake the HW write to point back to the last entry so that our
   3473 	 * inline comparison of our cached head position against the last HW
   3474 	 * write works even before the first interrupt.
   3475 	 */
   3476 	execlists->csb_head = reset_value;
   3477 	WRITE_ONCE(*execlists->csb_write, reset_value);
   3478 	wmb(); /* Make sure this is visible to HW (paranoia?) */
   3479 
   3480 	/*
   3481 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
   3482 	 * Bludgeon them with a mmio update to be sure.
   3483 	 */
   3484 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
   3485 		     reset_value << 8 | reset_value);
   3486 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
   3487 
   3488 	invalidate_csb_entries(&execlists->csb_status[0],
   3489 			       &execlists->csb_status[reset_value]);
   3490 }
   3491 
   3492 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
   3493 {
   3494 	int x;
   3495 
   3496 	x = lrc_ring_mi_mode(engine);
   3497 	if (x != -1) {
   3498 		regs[x + 1] &= ~STOP_RING;
   3499 		regs[x + 1] |= STOP_RING << 16;
   3500 	}
   3501 }
   3502 
   3503 static void __execlists_reset_reg_state(const struct intel_context *ce,
   3504 					const struct intel_engine_cs *engine)
   3505 {
   3506 	u32 *regs = ce->lrc_reg_state;
   3507 
   3508 	__reset_stop_ring(regs, engine);
   3509 }
   3510 
   3511 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
   3512 {
   3513 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3514 	struct intel_context *ce;
   3515 	struct i915_request *rq;
   3516 	u32 head;
   3517 
   3518 	mb(); /* paranoia: read the CSB pointers from after the reset */
   3519 	clflush(execlists->csb_write);
   3520 	mb();
   3521 
   3522 	process_csb(engine); /* drain preemption events */
   3523 
   3524 	/* Following the reset, we need to reload the CSB read/write pointers */
   3525 	reset_csb_pointers(engine);
   3526 
   3527 	/*
   3528 	 * Save the currently executing context, even if we completed
   3529 	 * its request, it was still running at the time of the
   3530 	 * reset and will have been clobbered.
   3531 	 */
   3532 	rq = execlists_active(execlists);
   3533 	if (!rq)
   3534 		goto unwind;
   3535 
   3536 	/* We still have requests in-flight; the engine should be active */
   3537 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   3538 
   3539 	ce = rq->context;
   3540 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   3541 
   3542 	if (i915_request_completed(rq)) {
   3543 		/* Idle context; tidy up the ring so we can restart afresh */
   3544 		head = intel_ring_wrap(ce->ring, rq->tail);
   3545 		goto out_replay;
   3546 	}
   3547 
   3548 	/* Context has requests still in-flight; it should not be idle! */
   3549 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
   3550 	rq = active_request(ce->timeline, rq);
   3551 	head = intel_ring_wrap(ce->ring, rq->head);
   3552 	GEM_BUG_ON(head == ce->ring->tail);
   3553 
   3554 	/*
   3555 	 * If this request hasn't started yet, e.g. it is waiting on a
   3556 	 * semaphore, we need to avoid skipping the request or else we
   3557 	 * break the signaling chain. However, if the context is corrupt
   3558 	 * the request will not restart and we will be stuck with a wedged
   3559 	 * device. It is quite often the case that if we issue a reset
   3560 	 * while the GPU is loading the context image, that the context
   3561 	 * image becomes corrupt.
   3562 	 *
   3563 	 * Otherwise, if we have not started yet, the request should replay
   3564 	 * perfectly and we do not need to flag the result as being erroneous.
   3565 	 */
   3566 	if (!i915_request_started(rq))
   3567 		goto out_replay;
   3568 
   3569 	/*
   3570 	 * If the request was innocent, we leave the request in the ELSP
   3571 	 * and will try to replay it on restarting. The context image may
   3572 	 * have been corrupted by the reset, in which case we may have
   3573 	 * to service a new GPU hang, but more likely we can continue on
   3574 	 * without impact.
   3575 	 *
   3576 	 * If the request was guilty, we presume the context is corrupt
   3577 	 * and have to at least restore the RING register in the context
   3578 	 * image back to the expected values to skip over the guilty request.
   3579 	 */
   3580 	__i915_request_reset(rq, stalled);
   3581 	if (!stalled)
   3582 		goto out_replay;
   3583 
   3584 	/*
   3585 	 * We want a simple context + ring to execute the breadcrumb update.
   3586 	 * We cannot rely on the context being intact across the GPU hang,
   3587 	 * so clear it and rebuild just what we need for the breadcrumb.
   3588 	 * All pending requests for this context will be zapped, and any
   3589 	 * future request will be after userspace has had the opportunity
   3590 	 * to recreate its own state.
   3591 	 */
   3592 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   3593 	restore_default_state(ce, engine);
   3594 
   3595 out_replay:
   3596 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
   3597 		     head, ce->ring->tail);
   3598 	__execlists_reset_reg_state(ce, engine);
   3599 	__execlists_update_reg_state(ce, engine, head);
   3600 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
   3601 
   3602 unwind:
   3603 	/* Push back any incomplete requests for replay after the reset. */
   3604 	cancel_port_requests(execlists);
   3605 	__unwind_incomplete_requests(engine);
   3606 }
   3607 
   3608 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
   3609 {
   3610 	unsigned long flags;
   3611 
   3612 	ENGINE_TRACE(engine, "\n");
   3613 
   3614 	spin_lock_irqsave(&engine->active.lock, flags);
   3615 
   3616 	__execlists_reset(engine, stalled);
   3617 
   3618 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3619 }
   3620 
   3621 static void nop_submission_tasklet(unsigned long data)
   3622 {
   3623 	/* The driver is wedged; don't process any more events. */
   3624 }
   3625 
   3626 static void execlists_reset_cancel(struct intel_engine_cs *engine)
   3627 {
   3628 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3629 	struct i915_request *rq, *rn;
   3630 	struct rb_node *rb;
   3631 	unsigned long flags;
   3632 
   3633 	ENGINE_TRACE(engine, "\n");
   3634 
   3635 	/*
   3636 	 * Before we call engine->cancel_requests(), we should have exclusive
   3637 	 * access to the submission state. This is arranged for us by the
   3638 	 * caller disabling the interrupt generation, the tasklet and other
   3639 	 * threads that may then access the same state, giving us a free hand
   3640 	 * to reset state. However, we still need to let lockdep be aware that
   3641 	 * we know this state may be accessed in hardirq context, so we
   3642 	 * disable the irq around this manipulation and we want to keep
   3643 	 * the spinlock focused on its duties and not accidentally conflate
   3644 	 * coverage to the submission's irq state. (Similarly, although we
   3645 	 * shouldn't need to disable irq around the manipulation of the
   3646 	 * submission's irq state, we also wish to remind ourselves that
   3647 	 * it is irq state.)
   3648 	 */
   3649 	spin_lock_irqsave(&engine->active.lock, flags);
   3650 
   3651 	__execlists_reset(engine, true);
   3652 
   3653 	/* Mark all executing requests as skipped. */
   3654 	list_for_each_entry(rq, &engine->active.requests, sched.link)
   3655 		mark_eio(rq);
   3656 
   3657 	/* Flush the queued requests to the timeline list (for retiring). */
   3658 	while ((rb = rb_first_cached(&execlists->queue))) {
   3659 		struct i915_priolist *p = to_priolist(rb);
   3660 		int i;
   3661 
   3662 		priolist_for_each_request_consume(rq, rn, p, i) {
   3663 			mark_eio(rq);
   3664 			__i915_request_submit(rq);
   3665 		}
   3666 
   3667 		rb_erase_cached(&p->node, &execlists->queue);
   3668 		i915_priolist_free(p);
   3669 	}
   3670 
   3671 	/* On-hold requests will be flushed to timeline upon their release */
   3672 	list_for_each_entry(rq, &engine->active.hold, sched.link)
   3673 		mark_eio(rq);
   3674 
   3675 	/* Cancel all attached virtual engines */
   3676 	while ((rb = rb_first_cached(&execlists->virtual))) {
   3677 		struct virtual_engine *ve =
   3678 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   3679 
   3680 		rb_erase_cached(rb, &execlists->virtual);
   3681 		RB_CLEAR_NODE(rb);
   3682 
   3683 		spin_lock(&ve->base.active.lock);
   3684 		rq = fetch_and_zero(&ve->request);
   3685 		if (rq) {
   3686 			mark_eio(rq);
   3687 
   3688 			rq->engine = engine;
   3689 			__i915_request_submit(rq);
   3690 			i915_request_put(rq);
   3691 
   3692 			ve->base.execlists.queue_priority_hint = INT_MIN;
   3693 		}
   3694 		spin_unlock(&ve->base.active.lock);
   3695 	}
   3696 
   3697 	/* Remaining _unready_ requests will be nop'ed when submitted */
   3698 
   3699 	execlists->queue_priority_hint = INT_MIN;
   3700 	execlists->queue = RB_ROOT_CACHED;
   3701 
   3702 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
   3703 	execlists->tasklet.func = nop_submission_tasklet;
   3704 
   3705 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3706 }
   3707 
   3708 static void execlists_reset_finish(struct intel_engine_cs *engine)
   3709 {
   3710 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3711 
   3712 	/*
   3713 	 * After a GPU reset, we may have requests to replay. Do so now while
   3714 	 * we still have the forcewake to be sure that the GPU is not allowed
   3715 	 * to sleep before we restart and reload a context.
   3716 	 */
   3717 	GEM_BUG_ON(!reset_in_progress(execlists));
   3718 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
   3719 		execlists->tasklet.func(execlists->tasklet.data);
   3720 
   3721 	if (__tasklet_enable(&execlists->tasklet))
   3722 		/* And kick in case we missed a new request submission. */
   3723 		tasklet_hi_schedule(&execlists->tasklet);
   3724 	ENGINE_TRACE(engine, "depth->%d\n",
   3725 		     atomic_read(&execlists->tasklet.count));
   3726 }
   3727 
   3728 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
   3729 				    u64 offset, u32 len,
   3730 				    const unsigned int flags)
   3731 {
   3732 	u32 *cs;
   3733 
   3734 	cs = intel_ring_begin(rq, 4);
   3735 	if (IS_ERR(cs))
   3736 		return PTR_ERR(cs);
   3737 
   3738 	/*
   3739 	 * WaDisableCtxRestoreArbitration:bdw,chv
   3740 	 *
   3741 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
   3742 	 * particular all the gen that do not need the w/a at all!), if we
   3743 	 * took care to make sure that on every switch into this context
   3744 	 * (both ordinary and for preemption) that arbitrartion was enabled
   3745 	 * we would be fine.  However, for gen8 there is another w/a that
   3746 	 * requires us to not preempt inside GPGPU execution, so we keep
   3747 	 * arbitration disabled for gen8 batches. Arbitration will be
   3748 	 * re-enabled before we close the request
   3749 	 * (engine->emit_fini_breadcrumb).
   3750 	 */
   3751 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3752 
   3753 	/* FIXME(BDW+): Address space and security selectors. */
   3754 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3755 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3756 	*cs++ = lower_32_bits(offset);
   3757 	*cs++ = upper_32_bits(offset);
   3758 
   3759 	intel_ring_advance(rq, cs);
   3760 
   3761 	return 0;
   3762 }
   3763 
   3764 static int gen8_emit_bb_start(struct i915_request *rq,
   3765 			      u64 offset, u32 len,
   3766 			      const unsigned int flags)
   3767 {
   3768 	u32 *cs;
   3769 
   3770 	cs = intel_ring_begin(rq, 6);
   3771 	if (IS_ERR(cs))
   3772 		return PTR_ERR(cs);
   3773 
   3774 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3775 
   3776 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3777 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3778 	*cs++ = lower_32_bits(offset);
   3779 	*cs++ = upper_32_bits(offset);
   3780 
   3781 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3782 	*cs++ = MI_NOOP;
   3783 
   3784 	intel_ring_advance(rq, cs);
   3785 
   3786 	return 0;
   3787 }
   3788 
   3789 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
   3790 {
   3791 	ENGINE_WRITE(engine, RING_IMR,
   3792 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
   3793 	ENGINE_POSTING_READ(engine, RING_IMR);
   3794 }
   3795 
   3796 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
   3797 {
   3798 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
   3799 }
   3800 
   3801 static int gen8_emit_flush(struct i915_request *request, u32 mode)
   3802 {
   3803 	u32 cmd, *cs;
   3804 
   3805 	cs = intel_ring_begin(request, 4);
   3806 	if (IS_ERR(cs))
   3807 		return PTR_ERR(cs);
   3808 
   3809 	cmd = MI_FLUSH_DW + 1;
   3810 
   3811 	/* We always require a command barrier so that subsequent
   3812 	 * commands, such as breadcrumb interrupts, are strictly ordered
   3813 	 * wrt the contents of the write cache being flushed to memory
   3814 	 * (and thus being coherent from the CPU).
   3815 	 */
   3816 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
   3817 
   3818 	if (mode & EMIT_INVALIDATE) {
   3819 		cmd |= MI_INVALIDATE_TLB;
   3820 		if (request->engine->class == VIDEO_DECODE_CLASS)
   3821 			cmd |= MI_INVALIDATE_BSD;
   3822 	}
   3823 
   3824 	*cs++ = cmd;
   3825 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
   3826 	*cs++ = 0; /* upper addr */
   3827 	*cs++ = 0; /* value */
   3828 	intel_ring_advance(request, cs);
   3829 
   3830 	return 0;
   3831 }
   3832 
   3833 static int gen8_emit_flush_render(struct i915_request *request,
   3834 				  u32 mode)
   3835 {
   3836 	bool vf_flush_wa = false, dc_flush_wa = false;
   3837 	u32 *cs, flags = 0;
   3838 	int len;
   3839 
   3840 	flags |= PIPE_CONTROL_CS_STALL;
   3841 
   3842 	if (mode & EMIT_FLUSH) {
   3843 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3844 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3845 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3846 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3847 	}
   3848 
   3849 	if (mode & EMIT_INVALIDATE) {
   3850 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3851 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3852 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3853 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3854 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3855 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3856 		flags |= PIPE_CONTROL_QW_WRITE;
   3857 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3858 
   3859 		/*
   3860 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
   3861 		 * pipe control.
   3862 		 */
   3863 		if (IS_GEN(request->i915, 9))
   3864 			vf_flush_wa = true;
   3865 
   3866 		/* WaForGAMHang:kbl */
   3867 		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
   3868 			dc_flush_wa = true;
   3869 	}
   3870 
   3871 	len = 6;
   3872 
   3873 	if (vf_flush_wa)
   3874 		len += 6;
   3875 
   3876 	if (dc_flush_wa)
   3877 		len += 12;
   3878 
   3879 	cs = intel_ring_begin(request, len);
   3880 	if (IS_ERR(cs))
   3881 		return PTR_ERR(cs);
   3882 
   3883 	if (vf_flush_wa)
   3884 		cs = gen8_emit_pipe_control(cs, 0, 0);
   3885 
   3886 	if (dc_flush_wa)
   3887 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
   3888 					    0);
   3889 
   3890 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3891 
   3892 	if (dc_flush_wa)
   3893 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
   3894 
   3895 	intel_ring_advance(request, cs);
   3896 
   3897 	return 0;
   3898 }
   3899 
   3900 static int gen11_emit_flush_render(struct i915_request *request,
   3901 				   u32 mode)
   3902 {
   3903 	if (mode & EMIT_FLUSH) {
   3904 		u32 *cs;
   3905 		u32 flags = 0;
   3906 
   3907 		flags |= PIPE_CONTROL_CS_STALL;
   3908 
   3909 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   3910 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3911 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3912 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3913 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3914 		flags |= PIPE_CONTROL_QW_WRITE;
   3915 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3916 
   3917 		cs = intel_ring_begin(request, 6);
   3918 		if (IS_ERR(cs))
   3919 			return PTR_ERR(cs);
   3920 
   3921 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3922 		intel_ring_advance(request, cs);
   3923 	}
   3924 
   3925 	if (mode & EMIT_INVALIDATE) {
   3926 		u32 *cs;
   3927 		u32 flags = 0;
   3928 
   3929 		flags |= PIPE_CONTROL_CS_STALL;
   3930 
   3931 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   3932 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3933 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3934 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3935 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3936 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3937 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3938 		flags |= PIPE_CONTROL_QW_WRITE;
   3939 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3940 
   3941 		cs = intel_ring_begin(request, 6);
   3942 		if (IS_ERR(cs))
   3943 			return PTR_ERR(cs);
   3944 
   3945 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3946 		intel_ring_advance(request, cs);
   3947 	}
   3948 
   3949 	return 0;
   3950 }
   3951 
   3952 static u32 preparser_disable(bool state)
   3953 {
   3954 	return MI_ARB_CHECK | 1 << 8 | state;
   3955 }
   3956 
   3957 static int gen12_emit_flush_render(struct i915_request *request,
   3958 				   u32 mode)
   3959 {
   3960 	if (mode & EMIT_FLUSH) {
   3961 		u32 flags = 0;
   3962 		u32 *cs;
   3963 
   3964 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   3965 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3966 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3967 		/* Wa_1409600907:tgl */
   3968 		flags |= PIPE_CONTROL_DEPTH_STALL;
   3969 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3970 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3971 		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   3972 
   3973 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3974 		flags |= PIPE_CONTROL_QW_WRITE;
   3975 
   3976 		flags |= PIPE_CONTROL_CS_STALL;
   3977 
   3978 		cs = intel_ring_begin(request, 6);
   3979 		if (IS_ERR(cs))
   3980 			return PTR_ERR(cs);
   3981 
   3982 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3983 		intel_ring_advance(request, cs);
   3984 	}
   3985 
   3986 	if (mode & EMIT_INVALIDATE) {
   3987 		u32 flags = 0;
   3988 		u32 *cs;
   3989 
   3990 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   3991 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3992 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3993 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3994 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3995 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3996 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3997 		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
   3998 
   3999 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4000 		flags |= PIPE_CONTROL_QW_WRITE;
   4001 
   4002 		flags |= PIPE_CONTROL_CS_STALL;
   4003 
   4004 		cs = intel_ring_begin(request, 8);
   4005 		if (IS_ERR(cs))
   4006 			return PTR_ERR(cs);
   4007 
   4008 		/*
   4009 		 * Prevent the pre-parser from skipping past the TLB
   4010 		 * invalidate and loading a stale page for the batch
   4011 		 * buffer / request payload.
   4012 		 */
   4013 		*cs++ = preparser_disable(true);
   4014 
   4015 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   4016 
   4017 		*cs++ = preparser_disable(false);
   4018 		intel_ring_advance(request, cs);
   4019 
   4020 		/*
   4021 		 * Wa_1604544889:tgl
   4022 		 */
   4023 		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
   4024 			flags = 0;
   4025 			flags |= PIPE_CONTROL_CS_STALL;
   4026 			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   4027 
   4028 			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4029 			flags |= PIPE_CONTROL_QW_WRITE;
   4030 
   4031 			cs = intel_ring_begin(request, 6);
   4032 			if (IS_ERR(cs))
   4033 				return PTR_ERR(cs);
   4034 
   4035 			cs = gen8_emit_pipe_control(cs, flags,
   4036 						    LRC_PPHWSP_SCRATCH_ADDR);
   4037 			intel_ring_advance(request, cs);
   4038 		}
   4039 	}
   4040 
   4041 	return 0;
   4042 }
   4043 
   4044 /*
   4045  * Reserve space for 2 NOOPs at the end of each request to be
   4046  * used as a workaround for not being allowed to do lite
   4047  * restore with HEAD==TAIL (WaIdleLiteRestore).
   4048  */
   4049 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
   4050 {
   4051 	/* Ensure there's always at least one preemption point per-request. */
   4052 	*cs++ = MI_ARB_CHECK;
   4053 	*cs++ = MI_NOOP;
   4054 	request->wa_tail = intel_ring_offset(request, cs);
   4055 
   4056 	return cs;
   4057 }
   4058 
   4059 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4060 {
   4061 	*cs++ = MI_SEMAPHORE_WAIT |
   4062 		MI_SEMAPHORE_GLOBAL_GTT |
   4063 		MI_SEMAPHORE_POLL |
   4064 		MI_SEMAPHORE_SAD_EQ_SDD;
   4065 	*cs++ = 0;
   4066 	*cs++ = intel_hws_preempt_address(request->engine);
   4067 	*cs++ = 0;
   4068 
   4069 	return cs;
   4070 }
   4071 
   4072 static __always_inline u32*
   4073 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
   4074 				 u32 *cs)
   4075 {
   4076 	*cs++ = MI_USER_INTERRUPT;
   4077 
   4078 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4079 	if (intel_engine_has_semaphores(request->engine))
   4080 		cs = emit_preempt_busywait(request, cs);
   4081 
   4082 	request->tail = intel_ring_offset(request, cs);
   4083 	assert_ring_tail_valid(request->ring, request->tail);
   4084 
   4085 	return gen8_emit_wa_tail(request, cs);
   4086 }
   4087 
   4088 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4089 {
   4090 	cs = gen8_emit_ggtt_write(cs,
   4091 				  request->fence.seqno,
   4092 				  i915_request_active_timeline(request)->hwsp_offset,
   4093 				  0);
   4094 
   4095 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4096 }
   4097 
   4098 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4099 {
   4100 	cs = gen8_emit_pipe_control(cs,
   4101 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4102 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4103 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
   4104 				    0);
   4105 
   4106 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
   4107 	cs = gen8_emit_ggtt_write_rcs(cs,
   4108 				      request->fence.seqno,
   4109 				      i915_request_active_timeline(request)->hwsp_offset,
   4110 				      PIPE_CONTROL_FLUSH_ENABLE |
   4111 				      PIPE_CONTROL_CS_STALL);
   4112 
   4113 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4114 }
   4115 
   4116 static u32 *
   4117 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4118 {
   4119 	cs = gen8_emit_ggtt_write_rcs(cs,
   4120 				      request->fence.seqno,
   4121 				      i915_request_active_timeline(request)->hwsp_offset,
   4122 				      PIPE_CONTROL_CS_STALL |
   4123 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4124 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4125 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4126 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4127 				      PIPE_CONTROL_FLUSH_ENABLE);
   4128 
   4129 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4130 }
   4131 
   4132 /*
   4133  * Note that the CS instruction pre-parser will not stall on the breadcrumb
   4134  * flush and will continue pre-fetching the instructions after it before the
   4135  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
   4136  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
   4137  * of the next request before the memory has been flushed, we're guaranteed that
   4138  * we won't access the batch itself too early.
   4139  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
   4140  * so, if the current request is modifying an instruction in the next request on
   4141  * the same intel_context, we might pre-fetch and then execute the pre-update
   4142  * instruction. To avoid this, the users of self-modifying code should either
   4143  * disable the parser around the code emitting the memory writes, via a new flag
   4144  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
   4145  * the in-kernel use-cases we've opted to use a separate context, see
   4146  * reloc_gpu() as an example.
   4147  * All the above applies only to the instructions themselves. Non-inline data
   4148  * used by the instructions is not pre-fetched.
   4149  */
   4150 
   4151 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4152 {
   4153 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
   4154 		MI_SEMAPHORE_GLOBAL_GTT |
   4155 		MI_SEMAPHORE_POLL |
   4156 		MI_SEMAPHORE_SAD_EQ_SDD;
   4157 	*cs++ = 0;
   4158 	*cs++ = intel_hws_preempt_address(request->engine);
   4159 	*cs++ = 0;
   4160 	*cs++ = 0;
   4161 	*cs++ = MI_NOOP;
   4162 
   4163 	return cs;
   4164 }
   4165 
   4166 static __always_inline u32*
   4167 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
   4168 {
   4169 	*cs++ = MI_USER_INTERRUPT;
   4170 
   4171 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4172 	if (intel_engine_has_semaphores(request->engine))
   4173 		cs = gen12_emit_preempt_busywait(request, cs);
   4174 
   4175 	request->tail = intel_ring_offset(request, cs);
   4176 	assert_ring_tail_valid(request->ring, request->tail);
   4177 
   4178 	return gen8_emit_wa_tail(request, cs);
   4179 }
   4180 
   4181 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4182 {
   4183 	cs = gen8_emit_ggtt_write(cs,
   4184 				  request->fence.seqno,
   4185 				  i915_request_active_timeline(request)->hwsp_offset,
   4186 				  0);
   4187 
   4188 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4189 }
   4190 
   4191 static u32 *
   4192 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4193 {
   4194 	cs = gen8_emit_ggtt_write_rcs(cs,
   4195 				      request->fence.seqno,
   4196 				      i915_request_active_timeline(request)->hwsp_offset,
   4197 				      PIPE_CONTROL_CS_STALL |
   4198 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4199 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4200 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4201 				      /* Wa_1409600907:tgl */
   4202 				      PIPE_CONTROL_DEPTH_STALL |
   4203 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4204 				      PIPE_CONTROL_FLUSH_ENABLE |
   4205 				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
   4206 
   4207 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4208 }
   4209 
   4210 static void execlists_park(struct intel_engine_cs *engine)
   4211 {
   4212 	cancel_timer(&engine->execlists.timer);
   4213 	cancel_timer(&engine->execlists.preempt);
   4214 }
   4215 
   4216 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
   4217 {
   4218 	engine->submit_request = execlists_submit_request;
   4219 	engine->schedule = i915_schedule;
   4220 	engine->execlists.tasklet.func = execlists_submission_tasklet;
   4221 
   4222 	engine->reset.prepare = execlists_reset_prepare;
   4223 	engine->reset.rewind = execlists_reset_rewind;
   4224 	engine->reset.cancel = execlists_reset_cancel;
   4225 	engine->reset.finish = execlists_reset_finish;
   4226 
   4227 	engine->park = execlists_park;
   4228 	engine->unpark = NULL;
   4229 
   4230 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
   4231 	if (!intel_vgpu_active(engine->i915)) {
   4232 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
   4233 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
   4234 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
   4235 	}
   4236 
   4237 	if (INTEL_GEN(engine->i915) >= 12)
   4238 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
   4239 
   4240 	if (intel_engine_has_preemption(engine))
   4241 		engine->emit_bb_start = gen8_emit_bb_start;
   4242 	else
   4243 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
   4244 }
   4245 
   4246 static void execlists_shutdown(struct intel_engine_cs *engine)
   4247 {
   4248 	/* Synchronise with residual timers and any softirq they raise */
   4249 	del_timer_sync(&engine->execlists.timer);
   4250 	del_timer_sync(&engine->execlists.preempt);
   4251 	tasklet_kill(&engine->execlists.tasklet);
   4252 }
   4253 
   4254 static void execlists_release(struct intel_engine_cs *engine)
   4255 {
   4256 	execlists_shutdown(engine);
   4257 
   4258 	intel_engine_cleanup_common(engine);
   4259 	lrc_destroy_wa_ctx(engine);
   4260 }
   4261 
   4262 static void
   4263 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
   4264 {
   4265 	/* Default vfuncs which can be overriden by each engine. */
   4266 
   4267 	engine->resume = execlists_resume;
   4268 
   4269 	engine->cops = &execlists_context_ops;
   4270 	engine->request_alloc = execlists_request_alloc;
   4271 
   4272 	engine->emit_flush = gen8_emit_flush;
   4273 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
   4274 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
   4275 	if (INTEL_GEN(engine->i915) >= 12)
   4276 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
   4277 
   4278 	engine->set_default_submission = intel_execlists_set_default_submission;
   4279 
   4280 	if (INTEL_GEN(engine->i915) < 11) {
   4281 		engine->irq_enable = gen8_logical_ring_enable_irq;
   4282 		engine->irq_disable = gen8_logical_ring_disable_irq;
   4283 	} else {
   4284 		/*
   4285 		 * TODO: On Gen11 interrupt masks need to be clear
   4286 		 * to allow C6 entry. Keep interrupts enabled at
   4287 		 * and take the hit of generating extra interrupts
   4288 		 * until a more refined solution exists.
   4289 		 */
   4290 	}
   4291 }
   4292 
   4293 static inline void
   4294 logical_ring_default_irqs(struct intel_engine_cs *engine)
   4295 {
   4296 	unsigned int shift = 0;
   4297 
   4298 	if (INTEL_GEN(engine->i915) < 11) {
   4299 		const u8 irq_shifts[] = {
   4300 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
   4301 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
   4302 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
   4303 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
   4304 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
   4305 		};
   4306 
   4307 		shift = irq_shifts[engine->id];
   4308 	}
   4309 
   4310 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
   4311 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
   4312 }
   4313 
   4314 static void rcs_submission_override(struct intel_engine_cs *engine)
   4315 {
   4316 	switch (INTEL_GEN(engine->i915)) {
   4317 	case 12:
   4318 		engine->emit_flush = gen12_emit_flush_render;
   4319 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
   4320 		break;
   4321 	case 11:
   4322 		engine->emit_flush = gen11_emit_flush_render;
   4323 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
   4324 		break;
   4325 	default:
   4326 		engine->emit_flush = gen8_emit_flush_render;
   4327 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
   4328 		break;
   4329 	}
   4330 }
   4331 
   4332 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
   4333 {
   4334 	struct intel_engine_execlists * const execlists = &engine->execlists;
   4335 	struct drm_i915_private *i915 = engine->i915;
   4336 	struct intel_uncore *uncore = engine->uncore;
   4337 	u32 base = engine->mmio_base;
   4338 
   4339 	i915_sched_init(&engine->execlists);
   4340 
   4341 	tasklet_init(&engine->execlists.tasklet,
   4342 		     execlists_submission_tasklet, (unsigned long)engine);
   4343 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
   4344 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
   4345 
   4346 	logical_ring_default_vfuncs(engine);
   4347 	logical_ring_default_irqs(engine);
   4348 
   4349 	if (engine->class == RENDER_CLASS)
   4350 		rcs_submission_override(engine);
   4351 
   4352 	if (intel_init_workaround_bb(engine))
   4353 		/*
   4354 		 * We continue even if we fail to initialize WA batch
   4355 		 * because we only expect rare glitches but nothing
   4356 		 * critical to prevent us from using GPU
   4357 		 */
   4358 		DRM_ERROR("WA batch buffer initialization failed\n");
   4359 
   4360 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
   4361 #ifdef __NetBSD__
   4362 		execlists->submit_reg = i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4363 		execlists->ctrl_reg = i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4364 		execlists->bsh = uncore->regs_bsh;
   4365 		execlists->bst = uncore->regs_bst;
   4366 #else
   4367 		execlists->submit_reg = uncore->regs +
   4368 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4369 		execlists->ctrl_reg = uncore->regs +
   4370 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4371 #endif
   4372 	} else {
   4373 #ifdef __NetBSD__
   4374 		execlists->submit_reg = i915_mmio_reg_offset(RING_ELSP(base));
   4375 		execlists->bsh = uncore->regs_bsh;
   4376 		execlists->bst = uncore->regs_bst;
   4377 #else
   4378 		execlists->submit_reg = uncore->regs +
   4379 			i915_mmio_reg_offset(RING_ELSP(base));
   4380 #endif
   4381 	}
   4382 
   4383 	execlists->csb_status =
   4384 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
   4385 
   4386 	execlists->csb_write =
   4387 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
   4388 
   4389 	if (INTEL_GEN(i915) < 11)
   4390 		execlists->csb_size = GEN8_CSB_ENTRIES;
   4391 	else
   4392 		execlists->csb_size = GEN11_CSB_ENTRIES;
   4393 
   4394 	reset_csb_pointers(engine);
   4395 
   4396 	/* Finally, take ownership and responsibility for cleanup! */
   4397 	engine->release = execlists_release;
   4398 
   4399 	return 0;
   4400 }
   4401 
   4402 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
   4403 {
   4404 	u32 indirect_ctx_offset;
   4405 
   4406 	switch (INTEL_GEN(engine->i915)) {
   4407 	default:
   4408 		MISSING_CASE(INTEL_GEN(engine->i915));
   4409 		/* fall through */
   4410 	case 12:
   4411 		indirect_ctx_offset =
   4412 			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4413 		break;
   4414 	case 11:
   4415 		indirect_ctx_offset =
   4416 			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4417 		break;
   4418 	case 10:
   4419 		indirect_ctx_offset =
   4420 			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4421 		break;
   4422 	case 9:
   4423 		indirect_ctx_offset =
   4424 			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4425 		break;
   4426 	case 8:
   4427 		indirect_ctx_offset =
   4428 			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4429 		break;
   4430 	}
   4431 
   4432 	return indirect_ctx_offset;
   4433 }
   4434 
   4435 
   4436 static void init_common_reg_state(u32 * const regs,
   4437 				  const struct intel_engine_cs *engine,
   4438 				  const struct intel_ring *ring,
   4439 				  bool inhibit)
   4440 {
   4441 	u32 ctl;
   4442 
   4443 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
   4444 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
   4445 	if (inhibit)
   4446 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
   4447 	if (INTEL_GEN(engine->i915) < 11)
   4448 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
   4449 					   CTX_CTRL_RS_CTX_ENABLE);
   4450 	regs[CTX_CONTEXT_CONTROL] = ctl;
   4451 
   4452 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   4453 }
   4454 
   4455 static void init_wa_bb_reg_state(u32 * const regs,
   4456 				 const struct intel_engine_cs *engine,
   4457 				 u32 pos_bb_per_ctx)
   4458 {
   4459 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
   4460 
   4461 	if (wa_ctx->per_ctx.size) {
   4462 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4463 
   4464 		regs[pos_bb_per_ctx] =
   4465 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
   4466 	}
   4467 
   4468 	if (wa_ctx->indirect_ctx.size) {
   4469 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4470 
   4471 		regs[pos_bb_per_ctx + 2] =
   4472 			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
   4473 			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
   4474 
   4475 		regs[pos_bb_per_ctx + 4] =
   4476 			intel_lr_indirect_ctx_offset(engine) << 6;
   4477 	}
   4478 }
   4479 
   4480 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
   4481 {
   4482 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
   4483 		/* 64b PPGTT (48bit canonical)
   4484 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
   4485 		 * other PDP Descriptors are ignored.
   4486 		 */
   4487 		ASSIGN_CTX_PML4(ppgtt, regs);
   4488 	} else {
   4489 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
   4490 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
   4491 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
   4492 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
   4493 	}
   4494 }
   4495 
   4496 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
   4497 {
   4498 	if (i915_is_ggtt(vm))
   4499 		return i915_vm_to_ggtt(vm)->alias;
   4500 	else
   4501 		return i915_vm_to_ppgtt(vm);
   4502 }
   4503 
   4504 static void execlists_init_reg_state(u32 *regs,
   4505 				     const struct intel_context *ce,
   4506 				     const struct intel_engine_cs *engine,
   4507 				     const struct intel_ring *ring,
   4508 				     bool inhibit)
   4509 {
   4510 	/*
   4511 	 * A context is actually a big batch buffer with several
   4512 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
   4513 	 * values we are setting here are only for the first context restore:
   4514 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
   4515 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
   4516 	 * we are not initializing here).
   4517 	 *
   4518 	 * Must keep consistent with virtual_update_register_offsets().
   4519 	 */
   4520 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
   4521 
   4522 	init_common_reg_state(regs, engine, ring, inhibit);
   4523 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
   4524 
   4525 	init_wa_bb_reg_state(regs, engine,
   4526 			     INTEL_GEN(engine->i915) >= 12 ?
   4527 			     GEN12_CTX_BB_PER_CTX_PTR :
   4528 			     CTX_BB_PER_CTX_PTR);
   4529 
   4530 	__reset_stop_ring(regs, engine);
   4531 }
   4532 
   4533 static int
   4534 populate_lr_context(struct intel_context *ce,
   4535 		    struct drm_i915_gem_object *ctx_obj,
   4536 		    struct intel_engine_cs *engine,
   4537 		    struct intel_ring *ring)
   4538 {
   4539 	bool inhibit = true;
   4540 	void *vaddr;
   4541 	int ret;
   4542 
   4543 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
   4544 	if (IS_ERR(vaddr)) {
   4545 		ret = PTR_ERR(vaddr);
   4546 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
   4547 		return ret;
   4548 	}
   4549 
   4550 	set_redzone(vaddr, engine);
   4551 
   4552 	if (engine->default_state) {
   4553 		void *defaults;
   4554 
   4555 		defaults = i915_gem_object_pin_map(engine->default_state,
   4556 						   I915_MAP_WB);
   4557 		if (IS_ERR(defaults)) {
   4558 			ret = PTR_ERR(defaults);
   4559 			goto err_unpin_ctx;
   4560 		}
   4561 
   4562 		memcpy(vaddr, defaults, engine->context_size);
   4563 		i915_gem_object_unpin_map(engine->default_state);
   4564 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
   4565 		inhibit = false;
   4566 	}
   4567 
   4568 	/* The second page of the context object contains some fields which must
   4569 	 * be set up prior to the first execution. */
   4570 	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
   4571 				 ce, engine, ring, inhibit);
   4572 
   4573 	ret = 0;
   4574 err_unpin_ctx:
   4575 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
   4576 	i915_gem_object_unpin_map(ctx_obj);
   4577 	return ret;
   4578 }
   4579 
   4580 static int __execlists_context_alloc(struct intel_context *ce,
   4581 				     struct intel_engine_cs *engine)
   4582 {
   4583 	struct drm_i915_gem_object *ctx_obj;
   4584 	struct intel_ring *ring;
   4585 	struct i915_vma *vma;
   4586 	u32 context_size;
   4587 	int ret;
   4588 
   4589 	GEM_BUG_ON(ce->state);
   4590 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
   4591 
   4592 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   4593 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
   4594 
   4595 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
   4596 	if (IS_ERR(ctx_obj))
   4597 		return PTR_ERR(ctx_obj);
   4598 
   4599 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
   4600 	if (IS_ERR(vma)) {
   4601 		ret = PTR_ERR(vma);
   4602 		goto error_deref_obj;
   4603 	}
   4604 
   4605 	if (!ce->timeline) {
   4606 		struct intel_timeline *tl;
   4607 
   4608 		tl = intel_timeline_create(engine->gt, NULL);
   4609 		if (IS_ERR(tl)) {
   4610 			ret = PTR_ERR(tl);
   4611 			goto error_deref_obj;
   4612 		}
   4613 
   4614 		ce->timeline = tl;
   4615 	}
   4616 
   4617 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
   4618 	if (IS_ERR(ring)) {
   4619 		ret = PTR_ERR(ring);
   4620 		goto error_deref_obj;
   4621 	}
   4622 
   4623 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
   4624 	if (ret) {
   4625 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
   4626 		goto error_ring_free;
   4627 	}
   4628 
   4629 	ce->ring = ring;
   4630 	ce->state = vma;
   4631 
   4632 	return 0;
   4633 
   4634 error_ring_free:
   4635 	intel_ring_put(ring);
   4636 error_deref_obj:
   4637 	i915_gem_object_put(ctx_obj);
   4638 	return ret;
   4639 }
   4640 
   4641 static struct list_head *virtual_queue(struct virtual_engine *ve)
   4642 {
   4643 	return &ve->base.execlists.default_priolist.requests[0];
   4644 }
   4645 
   4646 static void virtual_context_destroy(struct kref *kref)
   4647 {
   4648 	struct virtual_engine *ve =
   4649 		container_of(kref, typeof(*ve), context.ref);
   4650 	unsigned int n;
   4651 
   4652 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4653 	GEM_BUG_ON(ve->request);
   4654 	GEM_BUG_ON(ve->context.inflight);
   4655 
   4656 	for (n = 0; n < ve->num_siblings; n++) {
   4657 		struct intel_engine_cs *sibling = ve->siblings[n];
   4658 		struct rb_node *node = &ve->nodes[sibling->id].rb;
   4659 		unsigned long flags;
   4660 
   4661 		if (RB_EMPTY_NODE(node))
   4662 			continue;
   4663 
   4664 		spin_lock_irqsave(&sibling->active.lock, flags);
   4665 
   4666 		/* Detachment is lazily performed in the execlists tasklet */
   4667 		if (!RB_EMPTY_NODE(node))
   4668 			rb_erase_cached(node, &sibling->execlists.virtual);
   4669 
   4670 		spin_unlock_irqrestore(&sibling->active.lock, flags);
   4671 	}
   4672 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
   4673 
   4674 	if (ve->context.state)
   4675 		__execlists_context_fini(&ve->context);
   4676 	intel_context_fini(&ve->context);
   4677 
   4678 	kfree(ve->bonds);
   4679 	kfree(ve);
   4680 }
   4681 
   4682 static void virtual_engine_initial_hint(struct virtual_engine *ve)
   4683 {
   4684 	int swp;
   4685 
   4686 	/*
   4687 	 * Pick a random sibling on starting to help spread the load around.
   4688 	 *
   4689 	 * New contexts are typically created with exactly the same order
   4690 	 * of siblings, and often started in batches. Due to the way we iterate
   4691 	 * the array of sibling when submitting requests, sibling[0] is
   4692 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
   4693 	 * randomised across the system, we also help spread the load by the
   4694 	 * first engine we inspect being different each time.
   4695 	 *
   4696 	 * NB This does not force us to execute on this engine, it will just
   4697 	 * typically be the first we inspect for submission.
   4698 	 */
   4699 	swp = prandom_u32_max(ve->num_siblings);
   4700 	if (!swp)
   4701 		return;
   4702 
   4703 	swap(ve->siblings[swp], ve->siblings[0]);
   4704 	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
   4705 		virtual_update_register_offsets(ve->context.lrc_reg_state,
   4706 						ve->siblings[0]);
   4707 }
   4708 
   4709 static int virtual_context_alloc(struct intel_context *ce)
   4710 {
   4711 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4712 
   4713 	return __execlists_context_alloc(ce, ve->siblings[0]);
   4714 }
   4715 
   4716 static int virtual_context_pin(struct intel_context *ce)
   4717 {
   4718 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4719 	int err;
   4720 
   4721 	/* Note: we must use a real engine class for setting up reg state */
   4722 	err = __execlists_context_pin(ce, ve->siblings[0]);
   4723 	if (err)
   4724 		return err;
   4725 
   4726 	virtual_engine_initial_hint(ve);
   4727 	return 0;
   4728 }
   4729 
   4730 static void virtual_context_enter(struct intel_context *ce)
   4731 {
   4732 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4733 	unsigned int n;
   4734 
   4735 	for (n = 0; n < ve->num_siblings; n++)
   4736 		intel_engine_pm_get(ve->siblings[n]);
   4737 
   4738 	intel_timeline_enter(ce->timeline);
   4739 }
   4740 
   4741 static void virtual_context_exit(struct intel_context *ce)
   4742 {
   4743 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4744 	unsigned int n;
   4745 
   4746 	intel_timeline_exit(ce->timeline);
   4747 
   4748 	for (n = 0; n < ve->num_siblings; n++)
   4749 		intel_engine_pm_put(ve->siblings[n]);
   4750 }
   4751 
   4752 static const struct intel_context_ops virtual_context_ops = {
   4753 	.alloc = virtual_context_alloc,
   4754 
   4755 	.pin = virtual_context_pin,
   4756 	.unpin = execlists_context_unpin,
   4757 
   4758 	.enter = virtual_context_enter,
   4759 	.exit = virtual_context_exit,
   4760 
   4761 	.destroy = virtual_context_destroy,
   4762 };
   4763 
   4764 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
   4765 {
   4766 	struct i915_request *rq;
   4767 	intel_engine_mask_t mask;
   4768 
   4769 	rq = READ_ONCE(ve->request);
   4770 	if (!rq)
   4771 		return 0;
   4772 
   4773 	/* The rq is ready for submission; rq->execution_mask is now stable. */
   4774 	mask = rq->execution_mask;
   4775 	if (unlikely(!mask)) {
   4776 		/* Invalid selection, submit to a random engine in error */
   4777 		i915_request_skip(rq, -ENODEV);
   4778 		mask = ve->siblings[0]->mask;
   4779 	}
   4780 
   4781 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
   4782 		     rq->fence.context, rq->fence.seqno,
   4783 		     mask, ve->base.execlists.queue_priority_hint);
   4784 
   4785 	return mask;
   4786 }
   4787 
   4788 static void virtual_submission_tasklet(unsigned long data)
   4789 {
   4790 	struct virtual_engine * const ve = (struct virtual_engine *)data;
   4791 	const int prio = ve->base.execlists.queue_priority_hint;
   4792 	intel_engine_mask_t mask;
   4793 	unsigned int n;
   4794 
   4795 	rcu_read_lock();
   4796 	mask = virtual_submission_mask(ve);
   4797 	rcu_read_unlock();
   4798 	if (unlikely(!mask))
   4799 		return;
   4800 
   4801 	local_irq_disable();
   4802 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
   4803 		struct intel_engine_cs *sibling = ve->siblings[n];
   4804 		struct ve_node * const node = &ve->nodes[sibling->id];
   4805 		struct rb_node **parent, *rb;
   4806 		bool first;
   4807 
   4808 		if (unlikely(!(mask & sibling->mask))) {
   4809 			if (!RB_EMPTY_NODE(&node->rb)) {
   4810 				spin_lock(&sibling->active.lock);
   4811 				rb_erase_cached(&node->rb,
   4812 						&sibling->execlists.virtual);
   4813 				RB_CLEAR_NODE(&node->rb);
   4814 				spin_unlock(&sibling->active.lock);
   4815 			}
   4816 			continue;
   4817 		}
   4818 
   4819 		spin_lock(&sibling->active.lock);
   4820 
   4821 		if (!RB_EMPTY_NODE(&node->rb)) {
   4822 			/*
   4823 			 * Cheat and avoid rebalancing the tree if we can
   4824 			 * reuse this node in situ.
   4825 			 */
   4826 			first = rb_first_cached(&sibling->execlists.virtual) ==
   4827 				&node->rb;
   4828 			if (prio == node->prio || (prio > node->prio && first))
   4829 				goto submit_engine;
   4830 
   4831 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
   4832 		}
   4833 
   4834 		rb = NULL;
   4835 		first = true;
   4836 		parent = &sibling->execlists.virtual.rb_root.rb_node;
   4837 		while (*parent) {
   4838 			struct ve_node *other;
   4839 
   4840 			rb = *parent;
   4841 			other = rb_entry(rb, typeof(*other), rb);
   4842 			if (prio > other->prio) {
   4843 				parent = &rb->rb_left;
   4844 			} else {
   4845 				parent = &rb->rb_right;
   4846 				first = false;
   4847 			}
   4848 		}
   4849 
   4850 		rb_link_node(&node->rb, rb, parent);
   4851 		rb_insert_color_cached(&node->rb,
   4852 				       &sibling->execlists.virtual,
   4853 				       first);
   4854 
   4855 submit_engine:
   4856 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
   4857 		node->prio = prio;
   4858 		if (first && prio > sibling->execlists.queue_priority_hint) {
   4859 			sibling->execlists.queue_priority_hint = prio;
   4860 			tasklet_hi_schedule(&sibling->execlists.tasklet);
   4861 		}
   4862 
   4863 		spin_unlock(&sibling->active.lock);
   4864 	}
   4865 	local_irq_enable();
   4866 }
   4867 
   4868 static void virtual_submit_request(struct i915_request *rq)
   4869 {
   4870 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   4871 	struct i915_request *old;
   4872 	unsigned long flags;
   4873 
   4874 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
   4875 		     rq->fence.context,
   4876 		     rq->fence.seqno);
   4877 
   4878 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
   4879 
   4880 	spin_lock_irqsave(&ve->base.active.lock, flags);
   4881 
   4882 	old = ve->request;
   4883 	if (old) { /* background completion event from preempt-to-busy */
   4884 		GEM_BUG_ON(!i915_request_completed(old));
   4885 		__i915_request_submit(old);
   4886 		i915_request_put(old);
   4887 	}
   4888 
   4889 	if (i915_request_completed(rq)) {
   4890 		__i915_request_submit(rq);
   4891 
   4892 		ve->base.execlists.queue_priority_hint = INT_MIN;
   4893 		ve->request = NULL;
   4894 	} else {
   4895 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
   4896 		ve->request = i915_request_get(rq);
   4897 
   4898 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4899 		list_move_tail(&rq->sched.link, virtual_queue(ve));
   4900 
   4901 		tasklet_schedule(&ve->base.execlists.tasklet);
   4902 	}
   4903 
   4904 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
   4905 }
   4906 
   4907 static struct ve_bond *
   4908 virtual_find_bond(struct virtual_engine *ve,
   4909 		  const struct intel_engine_cs *master)
   4910 {
   4911 	int i;
   4912 
   4913 	for (i = 0; i < ve->num_bonds; i++) {
   4914 		if (ve->bonds[i].master == master)
   4915 			return &ve->bonds[i];
   4916 	}
   4917 
   4918 	return NULL;
   4919 }
   4920 
   4921 static void
   4922 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
   4923 {
   4924 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   4925 	intel_engine_mask_t allowed, exec;
   4926 	struct ve_bond *bond;
   4927 
   4928 	allowed = ~to_request(signal)->engine->mask;
   4929 
   4930 	bond = virtual_find_bond(ve, to_request(signal)->engine);
   4931 	if (bond)
   4932 		allowed &= bond->sibling_mask;
   4933 
   4934 	/* Restrict the bonded request to run on only the available engines */
   4935 	exec = READ_ONCE(rq->execution_mask);
   4936 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
   4937 		;
   4938 
   4939 	/* Prevent the master from being re-run on the bonded engines */
   4940 	to_request(signal)->execution_mask &= ~allowed;
   4941 }
   4942 
   4943 struct intel_context *
   4944 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
   4945 			       unsigned int count)
   4946 {
   4947 	struct virtual_engine *ve;
   4948 	unsigned int n;
   4949 	int err;
   4950 
   4951 	if (count == 0)
   4952 		return ERR_PTR(-EINVAL);
   4953 
   4954 	if (count == 1)
   4955 		return intel_context_create(siblings[0]);
   4956 
   4957 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
   4958 	if (!ve)
   4959 		return ERR_PTR(-ENOMEM);
   4960 
   4961 	ve->base.i915 = siblings[0]->i915;
   4962 	ve->base.gt = siblings[0]->gt;
   4963 	ve->base.uncore = siblings[0]->uncore;
   4964 	ve->base.id = -1;
   4965 
   4966 	ve->base.class = OTHER_CLASS;
   4967 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
   4968 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4969 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4970 
   4971 	/*
   4972 	 * The decision on whether to submit a request using semaphores
   4973 	 * depends on the saturated state of the engine. We only compute
   4974 	 * this during HW submission of the request, and we need for this
   4975 	 * state to be globally applied to all requests being submitted
   4976 	 * to this engine. Virtual engines encompass more than one physical
   4977 	 * engine and so we cannot accurately tell in advance if one of those
   4978 	 * engines is already saturated and so cannot afford to use a semaphore
   4979 	 * and be pessimized in priority for doing so -- if we are the only
   4980 	 * context using semaphores after all other clients have stopped, we
   4981 	 * will be starved on the saturated system. Such a global switch for
   4982 	 * semaphores is less than ideal, but alas is the current compromise.
   4983 	 */
   4984 	ve->base.saturated = ALL_ENGINES;
   4985 
   4986 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
   4987 
   4988 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
   4989 	intel_engine_init_breadcrumbs(&ve->base);
   4990 	intel_engine_init_execlists(&ve->base);
   4991 
   4992 	ve->base.cops = &virtual_context_ops;
   4993 	ve->base.request_alloc = execlists_request_alloc;
   4994 
   4995 	ve->base.schedule = i915_schedule;
   4996 	ve->base.submit_request = virtual_submit_request;
   4997 	ve->base.bond_execute = virtual_bond_execute;
   4998 
   4999 	INIT_LIST_HEAD(virtual_queue(ve));
   5000 	ve->base.execlists.queue_priority_hint = INT_MIN;
   5001 	tasklet_init(&ve->base.execlists.tasklet,
   5002 		     virtual_submission_tasklet,
   5003 		     (unsigned long)ve);
   5004 
   5005 	intel_context_init(&ve->context, &ve->base);
   5006 
   5007 	for (n = 0; n < count; n++) {
   5008 		struct intel_engine_cs *sibling = siblings[n];
   5009 
   5010 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
   5011 		if (sibling->mask & ve->base.mask) {
   5012 			DRM_DEBUG("duplicate %s entry in load balancer\n",
   5013 				  sibling->name);
   5014 			err = -EINVAL;
   5015 			goto err_put;
   5016 		}
   5017 
   5018 		/*
   5019 		 * The virtual engine implementation is tightly coupled to
   5020 		 * the execlists backend -- we push out request directly
   5021 		 * into a tree inside each physical engine. We could support
   5022 		 * layering if we handle cloning of the requests and
   5023 		 * submitting a copy into each backend.
   5024 		 */
   5025 		if (sibling->execlists.tasklet.func !=
   5026 		    execlists_submission_tasklet) {
   5027 			err = -ENODEV;
   5028 			goto err_put;
   5029 		}
   5030 
   5031 		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
   5032 		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
   5033 
   5034 		ve->siblings[ve->num_siblings++] = sibling;
   5035 		ve->base.mask |= sibling->mask;
   5036 
   5037 		/*
   5038 		 * All physical engines must be compatible for their emission
   5039 		 * functions (as we build the instructions during request
   5040 		 * construction and do not alter them before submission
   5041 		 * on the physical engine). We use the engine class as a guide
   5042 		 * here, although that could be refined.
   5043 		 */
   5044 		if (ve->base.class != OTHER_CLASS) {
   5045 			if (ve->base.class != sibling->class) {
   5046 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
   5047 					  sibling->class, ve->base.class);
   5048 				err = -EINVAL;
   5049 				goto err_put;
   5050 			}
   5051 			continue;
   5052 		}
   5053 
   5054 		ve->base.class = sibling->class;
   5055 		ve->base.uabi_class = sibling->uabi_class;
   5056 		snprintf(ve->base.name, sizeof(ve->base.name),
   5057 			 "v%dx%d", ve->base.class, count);
   5058 		ve->base.context_size = sibling->context_size;
   5059 
   5060 		ve->base.emit_bb_start = sibling->emit_bb_start;
   5061 		ve->base.emit_flush = sibling->emit_flush;
   5062 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
   5063 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
   5064 		ve->base.emit_fini_breadcrumb_dw =
   5065 			sibling->emit_fini_breadcrumb_dw;
   5066 
   5067 		ve->base.flags = sibling->flags;
   5068 	}
   5069 
   5070 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
   5071 
   5072 	return &ve->context;
   5073 
   5074 err_put:
   5075 	intel_context_put(&ve->context);
   5076 	return ERR_PTR(err);
   5077 }
   5078 
   5079 struct intel_context *
   5080 intel_execlists_clone_virtual(struct intel_engine_cs *src)
   5081 {
   5082 	struct virtual_engine *se = to_virtual_engine(src);
   5083 	struct intel_context *dst;
   5084 
   5085 	dst = intel_execlists_create_virtual(se->siblings,
   5086 					     se->num_siblings);
   5087 	if (IS_ERR(dst))
   5088 		return dst;
   5089 
   5090 	if (se->num_bonds) {
   5091 		struct virtual_engine *de = to_virtual_engine(dst->engine);
   5092 
   5093 		de->bonds = kmemdup(se->bonds,
   5094 				    sizeof(*se->bonds) * se->num_bonds,
   5095 				    GFP_KERNEL);
   5096 		if (!de->bonds) {
   5097 			intel_context_put(dst);
   5098 			return ERR_PTR(-ENOMEM);
   5099 		}
   5100 
   5101 		de->num_bonds = se->num_bonds;
   5102 	}
   5103 
   5104 	return dst;
   5105 }
   5106 
   5107 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
   5108 				     const struct intel_engine_cs *master,
   5109 				     const struct intel_engine_cs *sibling)
   5110 {
   5111 	struct virtual_engine *ve = to_virtual_engine(engine);
   5112 	struct ve_bond *bond;
   5113 	int n;
   5114 
   5115 	/* Sanity check the sibling is part of the virtual engine */
   5116 	for (n = 0; n < ve->num_siblings; n++)
   5117 		if (sibling == ve->siblings[n])
   5118 			break;
   5119 	if (n == ve->num_siblings)
   5120 		return -EINVAL;
   5121 
   5122 	bond = virtual_find_bond(ve, master);
   5123 	if (bond) {
   5124 		bond->sibling_mask |= sibling->mask;
   5125 		return 0;
   5126 	}
   5127 
   5128 	bond = krealloc(ve->bonds,
   5129 			sizeof(*bond) * (ve->num_bonds + 1),
   5130 			GFP_KERNEL);
   5131 	if (!bond)
   5132 		return -ENOMEM;
   5133 
   5134 	bond[ve->num_bonds].master = master;
   5135 	bond[ve->num_bonds].sibling_mask = sibling->mask;
   5136 
   5137 	ve->bonds = bond;
   5138 	ve->num_bonds++;
   5139 
   5140 	return 0;
   5141 }
   5142 
   5143 struct intel_engine_cs *
   5144 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
   5145 				 unsigned int sibling)
   5146 {
   5147 	struct virtual_engine *ve = to_virtual_engine(engine);
   5148 
   5149 	if (sibling >= ve->num_siblings)
   5150 		return NULL;
   5151 
   5152 	return ve->siblings[sibling];
   5153 }
   5154 
   5155 void intel_execlists_show_requests(struct intel_engine_cs *engine,
   5156 				   struct drm_printer *m,
   5157 				   void (*show_request)(struct drm_printer *m,
   5158 							struct i915_request *rq,
   5159 							const char *prefix),
   5160 				   unsigned int max)
   5161 {
   5162 	const struct intel_engine_execlists *execlists = &engine->execlists;
   5163 	struct i915_request *rq, *last;
   5164 	unsigned long flags;
   5165 	unsigned int count;
   5166 	struct rb_node *rb;
   5167 
   5168 	spin_lock_irqsave(&engine->active.lock, flags);
   5169 
   5170 	last = NULL;
   5171 	count = 0;
   5172 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
   5173 		if (count++ < max - 1)
   5174 			show_request(m, rq, "\t\tE ");
   5175 		else
   5176 			last = rq;
   5177 	}
   5178 	if (last) {
   5179 		if (count > max) {
   5180 			drm_printf(m,
   5181 				   "\t\t...skipping %d executing requests...\n",
   5182 				   count - max);
   5183 		}
   5184 		show_request(m, last, "\t\tE ");
   5185 	}
   5186 
   5187 	last = NULL;
   5188 	count = 0;
   5189 	if (execlists->queue_priority_hint != INT_MIN)
   5190 		drm_printf(m, "\t\tQueue priority hint: %d\n",
   5191 			   execlists->queue_priority_hint);
   5192 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
   5193 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
   5194 		int i;
   5195 
   5196 		priolist_for_each_request(rq, p, i) {
   5197 			if (count++ < max - 1)
   5198 				show_request(m, rq, "\t\tQ ");
   5199 			else
   5200 				last = rq;
   5201 		}
   5202 	}
   5203 	if (last) {
   5204 		if (count > max) {
   5205 			drm_printf(m,
   5206 				   "\t\t...skipping %d queued requests...\n",
   5207 				   count - max);
   5208 		}
   5209 		show_request(m, last, "\t\tQ ");
   5210 	}
   5211 
   5212 	last = NULL;
   5213 	count = 0;
   5214 	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
   5215 		struct virtual_engine *ve =
   5216 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   5217 		struct i915_request *rq = READ_ONCE(ve->request);
   5218 
   5219 		if (rq) {
   5220 			if (count++ < max - 1)
   5221 				show_request(m, rq, "\t\tV ");
   5222 			else
   5223 				last = rq;
   5224 		}
   5225 	}
   5226 	if (last) {
   5227 		if (count > max) {
   5228 			drm_printf(m,
   5229 				   "\t\t...skipping %d virtual requests...\n",
   5230 				   count - max);
   5231 		}
   5232 		show_request(m, last, "\t\tV ");
   5233 	}
   5234 
   5235 	spin_unlock_irqrestore(&engine->active.lock, flags);
   5236 }
   5237 
   5238 void intel_lr_context_reset(struct intel_engine_cs *engine,
   5239 			    struct intel_context *ce,
   5240 			    u32 head,
   5241 			    bool scrub)
   5242 {
   5243 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   5244 
   5245 	/*
   5246 	 * We want a simple context + ring to execute the breadcrumb update.
   5247 	 * We cannot rely on the context being intact across the GPU hang,
   5248 	 * so clear it and rebuild just what we need for the breadcrumb.
   5249 	 * All pending requests for this context will be zapped, and any
   5250 	 * future request will be after userspace has had the opportunity
   5251 	 * to recreate its own state.
   5252 	 */
   5253 	if (scrub)
   5254 		restore_default_state(ce, engine);
   5255 
   5256 	/* Rerun the request; its payload has been neutered (if guilty). */
   5257 	__execlists_update_reg_state(ce, engine, head);
   5258 }
   5259 
   5260 bool
   5261 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
   5262 {
   5263 	return engine->set_default_submission ==
   5264 	       intel_execlists_set_default_submission;
   5265 }
   5266 
   5267 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   5268 #include "selftest_lrc.c"
   5269 #endif
   5270