Home | History | Annotate | Line # | Download | only in gt
      1 /*	$NetBSD: intel_engine_heartbeat.c,v 1.3 2021/12/19 11:38:37 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2019 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_engine_heartbeat.c,v 1.3 2021/12/19 11:38:37 riastradh Exp $");
     11 
     12 #include "i915_request.h"
     13 
     14 #include "intel_context.h"
     15 #include "intel_engine_heartbeat.h"
     16 #include "intel_engine_pm.h"
     17 #include "intel_engine.h"
     18 #include "intel_gt.h"
     19 #include "intel_reset.h"
     20 
     21 /*
     22  * While the engine is active, we send a periodic pulse along the engine
     23  * to check on its health and to flush any idle-barriers. If that request
     24  * is stuck, and we fail to preempt it, we declare the engine hung and
     25  * issue a reset -- in the hope that restores progress.
     26  */
     27 
     28 static bool next_heartbeat(struct intel_engine_cs *engine)
     29 {
     30 	long delay;
     31 
     32 	delay = READ_ONCE(engine->props.heartbeat_interval_ms);
     33 	if (!delay)
     34 		return false;
     35 
     36 	delay = msecs_to_jiffies_timeout(delay);
     37 	if (delay >= HZ)
     38 		delay = round_jiffies_up_relative(delay);
     39 	schedule_delayed_work(&engine->heartbeat.work, delay);
     40 
     41 	return true;
     42 }
     43 
     44 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
     45 {
     46 	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
     47 	i915_request_add_active_barriers(rq);
     48 }
     49 
     50 static void show_heartbeat(const struct i915_request *rq,
     51 			   struct intel_engine_cs *engine)
     52 {
     53 	struct drm_printer p = drm_debug_printer("heartbeat");
     54 
     55 	intel_engine_dump(engine, &p,
     56 			  "%s heartbeat {prio:%d} not ticking\n",
     57 			  engine->name,
     58 			  rq->sched.attr.priority);
     59 }
     60 
     61 static void heartbeat(struct work_struct *wrk)
     62 {
     63 	struct i915_sched_attr attr = {
     64 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
     65 	};
     66 	struct intel_engine_cs *engine =
     67 		container_of(wrk, typeof(*engine), heartbeat.work.work);
     68 	struct intel_context *ce = engine->kernel_context;
     69 	struct i915_request *rq;
     70 
     71 	rq = engine->heartbeat.systole;
     72 	if (rq && i915_request_completed(rq)) {
     73 		i915_request_put(rq);
     74 		engine->heartbeat.systole = NULL;
     75 	}
     76 
     77 	if (!intel_engine_pm_get_if_awake(engine))
     78 		return;
     79 
     80 	if (intel_gt_is_wedged(engine->gt))
     81 		goto out;
     82 
     83 	if (engine->heartbeat.systole) {
     84 		if (engine->schedule &&
     85 		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
     86 			/*
     87 			 * Gradually raise the priority of the heartbeat to
     88 			 * give high priority work [which presumably desires
     89 			 * low latency and no jitter] the chance to naturally
     90 			 * complete before being preempted.
     91 			 */
     92 			attr.priority = I915_PRIORITY_MASK;
     93 			if (rq->sched.attr.priority >= attr.priority)
     94 				attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
     95 			if (rq->sched.attr.priority >= attr.priority)
     96 				attr.priority = I915_PRIORITY_BARRIER;
     97 
     98 #ifdef __NetBSD__
     99 			int s = splsoftserial();
    100 #else
    101 			local_bh_disable();
    102 #endif
    103 			engine->schedule(rq, &attr);
    104 #ifdef __NetBSD__
    105 			splx(s);
    106 #else
    107 			local_bh_enable();
    108 #endif
    109 		} else {
    110 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
    111 				show_heartbeat(rq, engine);
    112 
    113 			intel_gt_handle_error(engine->gt, engine->mask,
    114 					      I915_ERROR_CAPTURE,
    115 					      "stopped heartbeat on %s",
    116 					      engine->name);
    117 		}
    118 		goto out;
    119 	}
    120 
    121 	if (engine->wakeref_serial == engine->serial)
    122 		goto out;
    123 
    124 	mutex_lock(&ce->timeline->mutex);
    125 
    126 	intel_context_enter(ce);
    127 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
    128 	intel_context_exit(ce);
    129 	if (IS_ERR(rq))
    130 		goto unlock;
    131 
    132 	idle_pulse(engine, rq);
    133 	if (i915_modparams.enable_hangcheck)
    134 		engine->heartbeat.systole = i915_request_get(rq);
    135 
    136 	__i915_request_commit(rq);
    137 	__i915_request_queue(rq, &attr);
    138 
    139 unlock:
    140 	mutex_unlock(&ce->timeline->mutex);
    141 out:
    142 	if (!next_heartbeat(engine))
    143 		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
    144 	intel_engine_pm_put(engine);
    145 }
    146 
    147 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
    148 {
    149 	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
    150 		return;
    151 
    152 	next_heartbeat(engine);
    153 }
    154 
    155 void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
    156 {
    157 	if (cancel_delayed_work(&engine->heartbeat.work))
    158 		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
    159 }
    160 
    161 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
    162 {
    163 	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
    164 }
    165 
    166 int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
    167 			       unsigned long delay)
    168 {
    169 	int err;
    170 
    171 	/* Send one last pulse before to cleanup persistent hogs */
    172 	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
    173 		err = intel_engine_pulse(engine);
    174 		if (err)
    175 			return err;
    176 	}
    177 
    178 	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
    179 
    180 	if (intel_engine_pm_get_if_awake(engine)) {
    181 		if (delay)
    182 			intel_engine_unpark_heartbeat(engine);
    183 		else
    184 			intel_engine_park_heartbeat(engine);
    185 		intel_engine_pm_put(engine);
    186 	}
    187 
    188 	return 0;
    189 }
    190 
    191 int intel_engine_pulse(struct intel_engine_cs *engine)
    192 {
    193 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
    194 	struct intel_context *ce = engine->kernel_context;
    195 	struct i915_request *rq;
    196 	int err = 0;
    197 
    198 	if (!intel_engine_has_preemption(engine))
    199 		return -ENODEV;
    200 
    201 	if (!intel_engine_pm_get_if_awake(engine))
    202 		return 0;
    203 
    204 	if (mutex_lock_interruptible(&ce->timeline->mutex))
    205 		goto out_rpm;
    206 
    207 	intel_context_enter(ce);
    208 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
    209 	intel_context_exit(ce);
    210 	if (IS_ERR(rq)) {
    211 		err = PTR_ERR(rq);
    212 		goto out_unlock;
    213 	}
    214 
    215 	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
    216 	idle_pulse(engine, rq);
    217 
    218 	__i915_request_commit(rq);
    219 	__i915_request_queue(rq, &attr);
    220 
    221 out_unlock:
    222 	mutex_unlock(&ce->timeline->mutex);
    223 out_rpm:
    224 	intel_engine_pm_put(engine);
    225 	return err;
    226 }
    227 
    228 int intel_engine_flush_barriers(struct intel_engine_cs *engine)
    229 {
    230 	struct i915_request *rq;
    231 	int err = 0;
    232 
    233 	if (llist_empty(&engine->barrier_tasks))
    234 		return 0;
    235 
    236 	if (!intel_engine_pm_get_if_awake(engine))
    237 		return 0;
    238 
    239 	rq = i915_request_create(engine->kernel_context);
    240 	if (IS_ERR(rq)) {
    241 		err = PTR_ERR(rq);
    242 		goto out_rpm;
    243 	}
    244 
    245 	idle_pulse(engine, rq);
    246 	i915_request_add(rq);
    247 
    248 out_rpm:
    249 	intel_engine_pm_put(engine);
    250 	return err;
    251 }
    252 
    253 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    254 #include "selftest_engine_heartbeat.c"
    255 #endif
    256