Home | History | Annotate | Line # | Download | only in gt
      1 /*	$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2019 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 
     14 #include "intel_context.h"
     15 #include "intel_engine.h"
     16 #include "intel_engine_heartbeat.h"
     17 #include "intel_engine_pm.h"
     18 #include "intel_engine_pool.h"
     19 #include "intel_gt.h"
     20 #include "intel_gt_pm.h"
     21 #include "intel_rc6.h"
     22 #include "intel_ring.h"
     23 
     24 static int __engine_unpark(struct intel_wakeref *wf)
     25 {
     26 	struct intel_engine_cs *engine =
     27 		container_of(wf, typeof(*engine), wakeref);
     28 	struct intel_context *ce;
     29 	void *map;
     30 
     31 	ENGINE_TRACE(engine, "\n");
     32 
     33 	intel_gt_pm_get(engine->gt);
     34 
     35 	/* Pin the default state for fast resets from atomic context. */
     36 	map = NULL;
     37 	if (engine->default_state)
     38 		map = i915_gem_object_pin_map(engine->default_state,
     39 					      I915_MAP_WB);
     40 	if (!IS_ERR_OR_NULL(map))
     41 		engine->pinned_default_state = map;
     42 
     43 	/* Discard stale context state from across idling */
     44 	ce = engine->kernel_context;
     45 	if (ce) {
     46 		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
     47 
     48 		/* First poison the image to verify we never fully trust it */
     49 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
     50 			struct drm_i915_gem_object *obj = ce->state->obj;
     51 			int type = i915_coherent_map_type(engine->i915);
     52 
     53 			map = i915_gem_object_pin_map(obj, type);
     54 			if (!IS_ERR(map)) {
     55 				memset(map, CONTEXT_REDZONE, obj->base.size);
     56 				i915_gem_object_flush_map(obj);
     57 				i915_gem_object_unpin_map(obj);
     58 			}
     59 		}
     60 
     61 		ce->ops->reset(ce);
     62 	}
     63 
     64 	if (engine->unpark)
     65 		engine->unpark(engine);
     66 
     67 	intel_engine_unpark_heartbeat(engine);
     68 	return 0;
     69 }
     70 
     71 #if IS_ENABLED(CONFIG_LOCKDEP)
     72 
     73 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
     74 {
     75 	unsigned long flags;
     76 
     77 	local_irq_save(flags);
     78 	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
     79 
     80 	return flags;
     81 }
     82 
     83 static inline void __timeline_mark_unlock(struct intel_context *ce,
     84 					  unsigned long flags)
     85 {
     86 	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
     87 	local_irq_restore(flags);
     88 }
     89 
     90 #else
     91 
     92 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
     93 {
     94 	return 0;
     95 }
     96 
     97 static inline void __timeline_mark_unlock(struct intel_context *ce,
     98 					  unsigned long flags)
     99 {
    100 }
    101 
    102 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
    103 
    104 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
    105 {
    106 	struct i915_request *rq = to_request(fence);
    107 
    108 	ewma__engine_latency_add(&rq->engine->latency,
    109 				 ktime_us_delta(rq->fence.timestamp,
    110 						rq->duration.emitted));
    111 }
    112 
    113 static void
    114 __queue_and_release_pm(struct i915_request *rq,
    115 		       struct intel_timeline *tl,
    116 		       struct intel_engine_cs *engine)
    117 {
    118 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
    119 
    120 	ENGINE_TRACE(engine, "\n");
    121 
    122 	/*
    123 	 * We have to serialise all potential retirement paths with our
    124 	 * submission, as we don't want to underflow either the
    125 	 * engine->wakeref.counter or our timeline->active_count.
    126 	 *
    127 	 * Equally, we cannot allow a new submission to start until
    128 	 * after we finish queueing, nor could we allow that submitter
    129 	 * to retire us before we are ready!
    130 	 */
    131 	spin_lock(&timelines->lock);
    132 
    133 	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
    134 	if (!atomic_fetch_inc(&tl->active_count))
    135 		list_add_tail(&tl->link, &timelines->active_list);
    136 
    137 	/* Hand the request over to HW and so engine_retire() */
    138 	__i915_request_queue(rq, NULL);
    139 
    140 	/* Let new submissions commence (and maybe retire this timeline) */
    141 	__intel_wakeref_defer_park(&engine->wakeref);
    142 
    143 	spin_unlock(&timelines->lock);
    144 }
    145 
    146 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
    147 {
    148 	struct intel_context *ce = engine->kernel_context;
    149 	struct i915_request *rq;
    150 	unsigned long flags;
    151 	bool result = true;
    152 
    153 	/* GPU is pointing to the void, as good as in the kernel context. */
    154 	if (intel_gt_is_wedged(engine->gt))
    155 		return true;
    156 
    157 	GEM_BUG_ON(!intel_context_is_barrier(ce));
    158 
    159 	/* Already inside the kernel context, safe to power down. */
    160 	if (engine->wakeref_serial == engine->serial)
    161 		return true;
    162 
    163 	/*
    164 	 * Note, we do this without taking the timeline->mutex. We cannot
    165 	 * as we may be called while retiring the kernel context and so
    166 	 * already underneath the timeline->mutex. Instead we rely on the
    167 	 * exclusive property of the __engine_park that prevents anyone
    168 	 * else from creating a request on this engine. This also requires
    169 	 * that the ring is empty and we avoid any waits while constructing
    170 	 * the context, as they assume protection by the timeline->mutex.
    171 	 * This should hold true as we can only park the engine after
    172 	 * retiring the last request, thus all rings should be empty and
    173 	 * all timelines idle.
    174 	 *
    175 	 * For unlocking, there are 2 other parties and the GPU who have a
    176 	 * stake here.
    177 	 *
    178 	 * A new gpu user will be waiting on the engine-pm to start their
    179 	 * engine_unpark. New waiters are predicated on engine->wakeref.count
    180 	 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
    181 	 * engine->wakeref.
    182 	 *
    183 	 * The other party is intel_gt_retire_requests(), which is walking the
    184 	 * list of active timelines looking for completions. Meanwhile as soon
    185 	 * as we call __i915_request_queue(), the GPU may complete our request.
    186 	 * Ergo, if we put ourselves on the timelines.active_list
    187 	 * (se intel_timeline_enter()) before we increment the
    188 	 * engine->wakeref.count, we may see the request completion and retire
    189 	 * it causing an undeflow of the engine->wakeref.
    190 	 */
    191 	flags = __timeline_mark_lock(ce);
    192 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
    193 
    194 	rq = __i915_request_create(ce, GFP_NOWAIT);
    195 	if (IS_ERR(rq))
    196 		/* Context switch failed, hope for the best! Maybe reset? */
    197 		goto out_unlock;
    198 
    199 	/* Check again on the next retirement. */
    200 	engine->wakeref_serial = engine->serial + 1;
    201 	i915_request_add_active_barriers(rq);
    202 
    203 	/* Install ourselves as a preemption barrier */
    204 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
    205 	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
    206 		/*
    207 		 * Use an interrupt for precise measurement of duration,
    208 		 * otherwise we rely on someone else retiring all the requests
    209 		 * which may delay the signaling (i.e. we will likely wait
    210 		 * until the background request retirement running every
    211 		 * second or two).
    212 		 */
    213 		dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
    214 		rq->duration.emitted = ktime_get();
    215 	}
    216 
    217 	/* Expose ourselves to the world */
    218 	__queue_and_release_pm(rq, ce->timeline, engine);
    219 
    220 	result = false;
    221 out_unlock:
    222 	__timeline_mark_unlock(ce, flags);
    223 	return result;
    224 }
    225 
    226 static void call_idle_barriers(struct intel_engine_cs *engine)
    227 {
    228 	struct llist_node *node, *next;
    229 
    230 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
    231 		struct i915_active_fence *fence =
    232 		    container_of(node, struct i915_active_fence, llist);
    233 
    234 		fence->cb.func(ERR_PTR(-EAGAIN), &fence->cb);
    235 	}
    236 }
    237 
    238 static int __engine_park(struct intel_wakeref *wf)
    239 {
    240 	struct intel_engine_cs *engine =
    241 		container_of(wf, typeof(*engine), wakeref);
    242 
    243 	engine->saturated = 0;
    244 
    245 	/*
    246 	 * If one and only one request is completed between pm events,
    247 	 * we know that we are inside the kernel context and it is
    248 	 * safe to power down. (We are paranoid in case that runtime
    249 	 * suspend causes corruption to the active context image, and
    250 	 * want to avoid that impacting userspace.)
    251 	 */
    252 	if (!switch_to_kernel_context(engine))
    253 		return -EBUSY;
    254 
    255 	ENGINE_TRACE(engine, "\n");
    256 
    257 	call_idle_barriers(engine); /* cleanup after wedging */
    258 
    259 	intel_engine_park_heartbeat(engine);
    260 	intel_engine_disarm_breadcrumbs(engine);
    261 	intel_engine_pool_park(&engine->pool);
    262 
    263 	/* Must be reset upon idling, or we may miss the busy wakeup. */
    264 	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
    265 
    266 	if (engine->park)
    267 		engine->park(engine);
    268 
    269 	if (engine->pinned_default_state) {
    270 		i915_gem_object_unpin_map(engine->default_state);
    271 		engine->pinned_default_state = NULL;
    272 	}
    273 
    274 	engine->execlists.no_priolist = false;
    275 
    276 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
    277 	intel_gt_pm_put_async(engine->gt);
    278 	return 0;
    279 }
    280 
    281 static const struct intel_wakeref_ops wf_ops = {
    282 	.get = __engine_unpark,
    283 	.put = __engine_park,
    284 };
    285 
    286 void intel_engine_init__pm(struct intel_engine_cs *engine)
    287 {
    288 	struct intel_runtime_pm *rpm = engine->uncore->rpm;
    289 
    290 	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
    291 	intel_engine_init_heartbeat(engine);
    292 }
    293 
    294 void
    295 intel_engine_fini__pm(struct intel_engine_cs *engine)
    296 {
    297 
    298 	intel_wakeref_fini(&engine->wakeref);
    299 }
    300 
    301 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    302 #include "selftest_engine_pm.c"
    303 #endif
    304