1 /* $NetBSD: selftest_engine_heartbeat.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2018 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: selftest_engine_heartbeat.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 12 #include <linux/sort.h> 13 14 #include "i915_drv.h" 15 16 #include "intel_gt_requests.h" 17 #include "i915_selftest.h" 18 19 static int timeline_sync(struct intel_timeline *tl) 20 { 21 struct dma_fence *fence; 22 long timeout; 23 24 fence = i915_active_fence_get(&tl->last_request); 25 if (!fence) 26 return 0; 27 28 timeout = dma_fence_wait_timeout(fence, true, HZ / 2); 29 dma_fence_put(fence); 30 if (timeout < 0) 31 return timeout; 32 33 return 0; 34 } 35 36 static int engine_sync_barrier(struct intel_engine_cs *engine) 37 { 38 return timeline_sync(engine->kernel_context->timeline); 39 } 40 41 struct pulse { 42 struct i915_active active; 43 struct kref kref; 44 }; 45 46 static int pulse_active(struct i915_active *active) 47 { 48 kref_get(&container_of(active, struct pulse, active)->kref); 49 return 0; 50 } 51 52 static void pulse_free(struct kref *kref) 53 { 54 kfree(container_of(kref, struct pulse, kref)); 55 } 56 57 static void pulse_put(struct pulse *p) 58 { 59 kref_put(&p->kref, pulse_free); 60 } 61 62 static void pulse_retire(struct i915_active *active) 63 { 64 pulse_put(container_of(active, struct pulse, active)); 65 } 66 67 static struct pulse *pulse_create(void) 68 { 69 struct pulse *p; 70 71 p = kmalloc(sizeof(*p), GFP_KERNEL); 72 if (!p) 73 return p; 74 75 kref_init(&p->kref); 76 i915_active_init(&p->active, pulse_active, pulse_retire); 77 78 return p; 79 } 80 81 static void pulse_unlock_wait(struct pulse *p) 82 { 83 i915_active_unlock_wait(&p->active); 84 } 85 86 static int __live_idle_pulse(struct intel_engine_cs *engine, 87 int (*fn)(struct intel_engine_cs *cs)) 88 { 89 struct pulse *p; 90 int err; 91 92 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 93 94 p = pulse_create(); 95 if (!p) 96 return -ENOMEM; 97 98 err = i915_active_acquire(&p->active); 99 if (err) 100 goto out; 101 102 err = i915_active_acquire_preallocate_barrier(&p->active, engine); 103 if (err) { 104 i915_active_release(&p->active); 105 goto out; 106 } 107 108 i915_active_acquire_barrier(&p->active); 109 i915_active_release(&p->active); 110 111 GEM_BUG_ON(i915_active_is_idle(&p->active)); 112 GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); 113 114 err = fn(engine); 115 if (err) 116 goto out; 117 118 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); 119 120 if (engine_sync_barrier(engine)) { 121 struct drm_printer m = drm_err_printer("pulse"); 122 123 pr_err("%s: no heartbeat pulse?\n", engine->name); 124 intel_engine_dump(engine, &m, "%s", engine->name); 125 126 err = -ETIME; 127 goto out; 128 } 129 130 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); 131 132 pulse_unlock_wait(p); /* synchronize with the retirement callback */ 133 134 if (!i915_active_is_idle(&p->active)) { 135 struct drm_printer m = drm_err_printer("pulse"); 136 137 pr_err("%s: heartbeat pulse did not flush idle tasks\n", 138 engine->name); 139 i915_active_print(&p->active, &m); 140 141 err = -EINVAL; 142 goto out; 143 } 144 145 out: 146 pulse_put(p); 147 return err; 148 } 149 150 static int live_idle_flush(void *arg) 151 { 152 struct intel_gt *gt = arg; 153 struct intel_engine_cs *engine; 154 enum intel_engine_id id; 155 int err = 0; 156 157 /* Check that we can flush the idle barriers */ 158 159 for_each_engine(engine, gt, id) { 160 intel_engine_pm_get(engine); 161 err = __live_idle_pulse(engine, intel_engine_flush_barriers); 162 intel_engine_pm_put(engine); 163 if (err) 164 break; 165 } 166 167 return err; 168 } 169 170 static int live_idle_pulse(void *arg) 171 { 172 struct intel_gt *gt = arg; 173 struct intel_engine_cs *engine; 174 enum intel_engine_id id; 175 int err = 0; 176 177 /* Check that heartbeat pulses flush the idle barriers */ 178 179 for_each_engine(engine, gt, id) { 180 intel_engine_pm_get(engine); 181 err = __live_idle_pulse(engine, intel_engine_pulse); 182 intel_engine_pm_put(engine); 183 if (err && err != -ENODEV) 184 break; 185 186 err = 0; 187 } 188 189 return err; 190 } 191 192 static int cmp_u32(const void *_a, const void *_b) 193 { 194 const u32 *a = _a, *b = _b; 195 196 return *a - *b; 197 } 198 199 static int __live_heartbeat_fast(struct intel_engine_cs *engine) 200 { 201 struct intel_context *ce; 202 struct i915_request *rq; 203 ktime_t t0, t1; 204 u32 times[5]; 205 int err; 206 int i; 207 208 ce = intel_context_create(engine); 209 if (IS_ERR(ce)) 210 return PTR_ERR(ce); 211 212 intel_engine_pm_get(engine); 213 214 err = intel_engine_set_heartbeat(engine, 1); 215 if (err) 216 goto err_pm; 217 218 for (i = 0; i < ARRAY_SIZE(times); i++) { 219 /* Manufacture a tick */ 220 do { 221 while (READ_ONCE(engine->heartbeat.systole)) 222 flush_delayed_work(&engine->heartbeat.work); 223 224 engine->serial++; /* quick, pretend we are not idle! */ 225 flush_delayed_work(&engine->heartbeat.work); 226 if (!delayed_work_pending(&engine->heartbeat.work)) { 227 pr_err("%s: heartbeat did not start\n", 228 engine->name); 229 err = -EINVAL; 230 goto err_pm; 231 } 232 233 rcu_read_lock(); 234 rq = READ_ONCE(engine->heartbeat.systole); 235 if (rq) 236 rq = i915_request_get_rcu(rq); 237 rcu_read_unlock(); 238 } while (!rq); 239 240 t0 = ktime_get(); 241 while (rq == READ_ONCE(engine->heartbeat.systole)) 242 yield(); /* work is on the local cpu! */ 243 t1 = ktime_get(); 244 245 i915_request_put(rq); 246 times[i] = ktime_us_delta(t1, t0); 247 } 248 249 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); 250 251 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", 252 engine->name, 253 times[ARRAY_SIZE(times) / 2], 254 times[0], 255 times[ARRAY_SIZE(times) - 1]); 256 257 /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ 258 if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { 259 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", 260 engine->name, 261 times[ARRAY_SIZE(times) / 2], 262 jiffies_to_usecs(6)); 263 err = -EINVAL; 264 } 265 266 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); 267 err_pm: 268 intel_engine_pm_put(engine); 269 intel_context_put(ce); 270 return err; 271 } 272 273 static int live_heartbeat_fast(void *arg) 274 { 275 struct intel_gt *gt = arg; 276 struct intel_engine_cs *engine; 277 enum intel_engine_id id; 278 int err = 0; 279 280 /* Check that the heartbeat ticks at the desired rate. */ 281 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) 282 return 0; 283 284 for_each_engine(engine, gt, id) { 285 err = __live_heartbeat_fast(engine); 286 if (err) 287 break; 288 } 289 290 return err; 291 } 292 293 static int __live_heartbeat_off(struct intel_engine_cs *engine) 294 { 295 int err; 296 297 intel_engine_pm_get(engine); 298 299 engine->serial++; 300 flush_delayed_work(&engine->heartbeat.work); 301 if (!delayed_work_pending(&engine->heartbeat.work)) { 302 pr_err("%s: heartbeat not running\n", 303 engine->name); 304 err = -EINVAL; 305 goto err_pm; 306 } 307 308 err = intel_engine_set_heartbeat(engine, 0); 309 if (err) 310 goto err_pm; 311 312 engine->serial++; 313 flush_delayed_work(&engine->heartbeat.work); 314 if (delayed_work_pending(&engine->heartbeat.work)) { 315 pr_err("%s: heartbeat still running\n", 316 engine->name); 317 err = -EINVAL; 318 goto err_beat; 319 } 320 321 if (READ_ONCE(engine->heartbeat.systole)) { 322 pr_err("%s: heartbeat still allocated\n", 323 engine->name); 324 err = -EINVAL; 325 goto err_beat; 326 } 327 328 err_beat: 329 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); 330 err_pm: 331 intel_engine_pm_put(engine); 332 return err; 333 } 334 335 static int live_heartbeat_off(void *arg) 336 { 337 struct intel_gt *gt = arg; 338 struct intel_engine_cs *engine; 339 enum intel_engine_id id; 340 int err = 0; 341 342 /* Check that we can turn off heartbeat and not interrupt VIP */ 343 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) 344 return 0; 345 346 for_each_engine(engine, gt, id) { 347 if (!intel_engine_has_preemption(engine)) 348 continue; 349 350 err = __live_heartbeat_off(engine); 351 if (err) 352 break; 353 } 354 355 return err; 356 } 357 358 int intel_heartbeat_live_selftests(struct drm_i915_private *i915) 359 { 360 static const struct i915_subtest tests[] = { 361 SUBTEST(live_idle_flush), 362 SUBTEST(live_idle_pulse), 363 SUBTEST(live_heartbeat_fast), 364 SUBTEST(live_heartbeat_off), 365 }; 366 int saved_hangcheck; 367 int err; 368 369 if (intel_gt_is_wedged(&i915->gt)) 370 return 0; 371 372 saved_hangcheck = i915_modparams.enable_hangcheck; 373 i915_modparams.enable_hangcheck = INT_MAX; 374 375 err = intel_gt_live_subtests(tests, &i915->gt); 376 377 i915_modparams.enable_hangcheck = saved_hangcheck; 378 return err; 379 } 380