1 /* $NetBSD: execlist.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */ 2 3 /* 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhiyuan Lv <zhiyuan.lv (at) intel.com> 27 * Zhi Wang <zhi.a.wang (at) intel.com> 28 * 29 * Contributors: 30 * Min He <min.he (at) intel.com> 31 * Bing Niu <bing.niu (at) intel.com> 32 * Ping Gao <ping.a.gao (at) intel.com> 33 * Tina Zhang <tina.zhang (at) intel.com> 34 * 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: execlist.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $"); 39 40 #include "i915_drv.h" 41 #include "gvt.h" 42 43 #define _EL_OFFSET_STATUS 0x234 44 #define _EL_OFFSET_STATUS_BUF 0x370 45 #define _EL_OFFSET_STATUS_PTR 0x3A0 46 47 #define execlist_ring_mmio(gvt, ring_id, offset) \ 48 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset)) 49 50 #define valid_context(ctx) ((ctx)->valid) 51 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ 52 ((a)->lrca == (b)->lrca)) 53 54 static int context_switch_events[] = { 55 [RCS0] = RCS_AS_CONTEXT_SWITCH, 56 [BCS0] = BCS_AS_CONTEXT_SWITCH, 57 [VCS0] = VCS_AS_CONTEXT_SWITCH, 58 [VCS1] = VCS2_AS_CONTEXT_SWITCH, 59 [VECS0] = VECS_AS_CONTEXT_SWITCH, 60 }; 61 62 static int ring_id_to_context_switch_event(unsigned int ring_id) 63 { 64 if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events))) 65 return -EINVAL; 66 67 return context_switch_events[ring_id]; 68 } 69 70 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist) 71 { 72 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n", 73 execlist->running_slot ? 74 execlist->running_slot->index : -1, 75 execlist->running_context ? 76 execlist->running_context->context_id : 0, 77 execlist->pending_slot ? 78 execlist->pending_slot->index : -1); 79 80 execlist->running_slot = execlist->pending_slot; 81 execlist->pending_slot = NULL; 82 execlist->running_context = execlist->running_context ? 83 &execlist->running_slot->ctx[0] : NULL; 84 85 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n", 86 execlist->running_slot ? 87 execlist->running_slot->index : -1, 88 execlist->running_context ? 89 execlist->running_context->context_id : 0, 90 execlist->pending_slot ? 91 execlist->pending_slot->index : -1); 92 } 93 94 static void emulate_execlist_status(struct intel_vgpu_execlist *execlist) 95 { 96 struct intel_vgpu_execlist_slot *running = execlist->running_slot; 97 struct intel_vgpu_execlist_slot *pending = execlist->pending_slot; 98 struct execlist_ctx_descriptor_format *desc = execlist->running_context; 99 struct intel_vgpu *vgpu = execlist->vgpu; 100 struct execlist_status_format status; 101 int ring_id = execlist->ring_id; 102 u32 status_reg = execlist_ring_mmio(vgpu->gvt, 103 ring_id, _EL_OFFSET_STATUS); 104 105 status.ldw = vgpu_vreg(vgpu, status_reg); 106 status.udw = vgpu_vreg(vgpu, status_reg + 4); 107 108 if (running) { 109 status.current_execlist_pointer = !!running->index; 110 status.execlist_write_pointer = !!!running->index; 111 status.execlist_0_active = status.execlist_0_valid = 112 !!!(running->index); 113 status.execlist_1_active = status.execlist_1_valid = 114 !!(running->index); 115 } else { 116 status.context_id = 0; 117 status.execlist_0_active = status.execlist_0_valid = 0; 118 status.execlist_1_active = status.execlist_1_valid = 0; 119 } 120 121 status.context_id = desc ? desc->context_id : 0; 122 status.execlist_queue_full = !!(pending); 123 124 vgpu_vreg(vgpu, status_reg) = status.ldw; 125 vgpu_vreg(vgpu, status_reg + 4) = status.udw; 126 127 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n", 128 vgpu->id, status_reg, status.ldw, status.udw); 129 } 130 131 static void emulate_csb_update(struct intel_vgpu_execlist *execlist, 132 struct execlist_context_status_format *status, 133 bool trigger_interrupt_later) 134 { 135 struct intel_vgpu *vgpu = execlist->vgpu; 136 int ring_id = execlist->ring_id; 137 struct execlist_context_status_pointer_format ctx_status_ptr; 138 u32 write_pointer; 139 u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; 140 unsigned long hwsp_gpa; 141 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 142 143 ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, 144 _EL_OFFSET_STATUS_PTR); 145 ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id, 146 _EL_OFFSET_STATUS_BUF); 147 148 ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg); 149 150 write_pointer = ctx_status_ptr.write_ptr; 151 152 if (write_pointer == 0x7) 153 write_pointer = 0; 154 else { 155 ++write_pointer; 156 write_pointer %= 0x6; 157 } 158 159 offset = ctx_status_buf_reg + write_pointer * 8; 160 161 vgpu_vreg(vgpu, offset) = status->ldw; 162 vgpu_vreg(vgpu, offset + 4) = status->udw; 163 164 ctx_status_ptr.write_ptr = write_pointer; 165 vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; 166 167 /* Update the CSB and CSB write pointer in HWSP */ 168 hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 169 vgpu->hws_pga[ring_id]); 170 if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) { 171 intel_gvt_hypervisor_write_gpa(vgpu, 172 hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + 173 write_pointer * 8, 174 status, 8); 175 intel_gvt_hypervisor_write_gpa(vgpu, 176 hwsp_gpa + 177 intel_hws_csb_write_index(dev_priv) * 4, 178 &write_pointer, 4); 179 } 180 181 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", 182 vgpu->id, write_pointer, offset, status->ldw, status->udw); 183 184 if (trigger_interrupt_later) 185 return; 186 187 intel_vgpu_trigger_virtual_event(vgpu, 188 ring_id_to_context_switch_event(execlist->ring_id)); 189 } 190 191 static int emulate_execlist_ctx_schedule_out( 192 struct intel_vgpu_execlist *execlist, 193 struct execlist_ctx_descriptor_format *ctx) 194 { 195 struct intel_vgpu *vgpu = execlist->vgpu; 196 struct intel_vgpu_execlist_slot *running = execlist->running_slot; 197 struct intel_vgpu_execlist_slot *pending = execlist->pending_slot; 198 struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0]; 199 struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1]; 200 struct execlist_context_status_format status; 201 202 memset(&status, 0, sizeof(status)); 203 204 gvt_dbg_el("schedule out context id %x\n", ctx->context_id); 205 206 if (WARN_ON(!same_context(ctx, execlist->running_context))) { 207 gvt_vgpu_err("schedule out context is not running context," 208 "ctx id %x running ctx id %x\n", 209 ctx->context_id, 210 execlist->running_context->context_id); 211 return -EINVAL; 212 } 213 214 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */ 215 if (valid_context(ctx1) && same_context(ctx0, ctx)) { 216 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n"); 217 218 execlist->running_context = ctx1; 219 220 emulate_execlist_status(execlist); 221 222 status.context_complete = status.element_switch = 1; 223 status.context_id = ctx->context_id; 224 225 emulate_csb_update(execlist, &status, false); 226 /* 227 * ctx1 is not valid, ctx == ctx0 228 * ctx1 is valid, ctx1 == ctx 229 * --> last element is finished 230 * emulate: 231 * active-to-idle if there is *no* pending execlist 232 * context-complete if there *is* pending execlist 233 */ 234 } else if ((!valid_context(ctx1) && same_context(ctx0, ctx)) 235 || (valid_context(ctx1) && same_context(ctx1, ctx))) { 236 gvt_dbg_el("need to switch virtual execlist slot\n"); 237 238 switch_virtual_execlist_slot(execlist); 239 240 emulate_execlist_status(execlist); 241 242 status.context_complete = status.active_to_idle = 1; 243 status.context_id = ctx->context_id; 244 245 if (!pending) { 246 emulate_csb_update(execlist, &status, false); 247 } else { 248 emulate_csb_update(execlist, &status, true); 249 250 memset(&status, 0, sizeof(status)); 251 252 status.idle_to_active = 1; 253 status.context_id = 0; 254 255 emulate_csb_update(execlist, &status, false); 256 } 257 } else { 258 WARN_ON(1); 259 return -EINVAL; 260 } 261 262 return 0; 263 } 264 265 static struct intel_vgpu_execlist_slot *get_next_execlist_slot( 266 struct intel_vgpu_execlist *execlist) 267 { 268 struct intel_vgpu *vgpu = execlist->vgpu; 269 int ring_id = execlist->ring_id; 270 u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id, 271 _EL_OFFSET_STATUS); 272 struct execlist_status_format status; 273 274 status.ldw = vgpu_vreg(vgpu, status_reg); 275 status.udw = vgpu_vreg(vgpu, status_reg + 4); 276 277 if (status.execlist_queue_full) { 278 gvt_vgpu_err("virtual execlist slots are full\n"); 279 return NULL; 280 } 281 282 return &execlist->slot[status.execlist_write_pointer]; 283 } 284 285 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, 286 struct execlist_ctx_descriptor_format ctx[2]) 287 { 288 struct intel_vgpu_execlist_slot *running = execlist->running_slot; 289 struct intel_vgpu_execlist_slot *slot = 290 get_next_execlist_slot(execlist); 291 292 struct execlist_ctx_descriptor_format *ctx0, *ctx1; 293 struct execlist_context_status_format status; 294 struct intel_vgpu *vgpu = execlist->vgpu; 295 296 gvt_dbg_el("emulate schedule-in\n"); 297 298 if (!slot) { 299 gvt_vgpu_err("no available execlist slot\n"); 300 return -EINVAL; 301 } 302 303 memset(&status, 0, sizeof(status)); 304 memset(slot->ctx, 0, sizeof(slot->ctx)); 305 306 slot->ctx[0] = ctx[0]; 307 slot->ctx[1] = ctx[1]; 308 309 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n", 310 slot->index, ctx[0].context_id, 311 ctx[1].context_id); 312 313 /* 314 * no running execlist, make this write bundle as running execlist 315 * -> idle-to-active 316 */ 317 if (!running) { 318 gvt_dbg_el("no current running execlist\n"); 319 320 execlist->running_slot = slot; 321 execlist->pending_slot = NULL; 322 execlist->running_context = &slot->ctx[0]; 323 324 gvt_dbg_el("running slot index %d running context %x\n", 325 execlist->running_slot->index, 326 execlist->running_context->context_id); 327 328 emulate_execlist_status(execlist); 329 330 status.idle_to_active = 1; 331 status.context_id = 0; 332 333 emulate_csb_update(execlist, &status, false); 334 return 0; 335 } 336 337 ctx0 = &running->ctx[0]; 338 ctx1 = &running->ctx[1]; 339 340 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n", 341 running->index, ctx0->context_id, ctx1->context_id); 342 343 /* 344 * already has an running execlist 345 * a. running ctx1 is valid, 346 * ctx0 is finished, and running ctx1 == new execlist ctx[0] 347 * b. running ctx1 is not valid, 348 * ctx0 == new execlist ctx[0] 349 * ----> lite-restore + preempted 350 */ 351 if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) && 352 /* condition a */ 353 (!same_context(ctx0, execlist->running_context))) || 354 (!valid_context(ctx1) && 355 same_context(ctx0, &slot->ctx[0]))) { /* condition b */ 356 gvt_dbg_el("need to switch virtual execlist slot\n"); 357 358 execlist->pending_slot = slot; 359 switch_virtual_execlist_slot(execlist); 360 361 emulate_execlist_status(execlist); 362 363 status.lite_restore = status.preempted = 1; 364 status.context_id = ctx[0].context_id; 365 366 emulate_csb_update(execlist, &status, false); 367 } else { 368 gvt_dbg_el("emulate as pending slot\n"); 369 /* 370 * otherwise 371 * --> emulate pending execlist exist + but no preemption case 372 */ 373 execlist->pending_slot = slot; 374 emulate_execlist_status(execlist); 375 } 376 return 0; 377 } 378 379 #define get_desc_from_elsp_dwords(ed, i) \ 380 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) 381 382 static int prepare_execlist_workload(struct intel_vgpu_workload *workload) 383 { 384 struct intel_vgpu *vgpu = workload->vgpu; 385 struct intel_vgpu_submission *s = &vgpu->submission; 386 struct execlist_ctx_descriptor_format ctx[2]; 387 int ring_id = workload->ring_id; 388 int ret; 389 390 if (!workload->emulate_schedule_in) 391 return 0; 392 393 ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); 394 ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); 395 396 ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx); 397 if (ret) { 398 gvt_vgpu_err("fail to emulate execlist schedule in\n"); 399 return ret; 400 } 401 return 0; 402 } 403 404 static int complete_execlist_workload(struct intel_vgpu_workload *workload) 405 { 406 struct intel_vgpu *vgpu = workload->vgpu; 407 int ring_id = workload->ring_id; 408 struct intel_vgpu_submission *s = &vgpu->submission; 409 struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; 410 struct intel_vgpu_workload *next_workload; 411 struct list_head *next = workload_q_head(vgpu, ring_id)->next; 412 bool lite_restore = false; 413 int ret = 0; 414 415 gvt_dbg_el("complete workload %p status %d\n", workload, 416 workload->status); 417 418 if (workload->status || (vgpu->resetting_eng & BIT(ring_id))) 419 goto out; 420 421 if (!list_empty(workload_q_head(vgpu, ring_id))) { 422 struct execlist_ctx_descriptor_format *this_desc, *next_desc; 423 424 next_workload = container_of(next, 425 struct intel_vgpu_workload, list); 426 this_desc = &workload->ctx_desc; 427 next_desc = &next_workload->ctx_desc; 428 429 lite_restore = same_context(this_desc, next_desc); 430 } 431 432 if (lite_restore) { 433 gvt_dbg_el("next context == current - no schedule-out\n"); 434 goto out; 435 } 436 437 ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); 438 out: 439 intel_vgpu_unpin_mm(workload->shadow_mm); 440 intel_vgpu_destroy_workload(workload); 441 return ret; 442 } 443 444 static int submit_context(struct intel_vgpu *vgpu, int ring_id, 445 struct execlist_ctx_descriptor_format *desc, 446 bool emulate_schedule_in) 447 { 448 struct intel_vgpu_submission *s = &vgpu->submission; 449 struct intel_vgpu_workload *workload = NULL; 450 451 workload = intel_vgpu_create_workload(vgpu, ring_id, desc); 452 if (IS_ERR(workload)) 453 return PTR_ERR(workload); 454 455 workload->prepare = prepare_execlist_workload; 456 workload->complete = complete_execlist_workload; 457 workload->emulate_schedule_in = emulate_schedule_in; 458 459 if (emulate_schedule_in) 460 workload->elsp_dwords = s->execlist[ring_id].elsp_dwords; 461 462 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload, 463 emulate_schedule_in); 464 465 intel_vgpu_queue_workload(workload); 466 return 0; 467 } 468 469 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) 470 { 471 struct intel_vgpu_submission *s = &vgpu->submission; 472 struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; 473 struct execlist_ctx_descriptor_format *desc[2]; 474 int i, ret; 475 476 desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); 477 desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); 478 479 if (!desc[0]->valid) { 480 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n"); 481 goto inv_desc; 482 } 483 484 for (i = 0; i < ARRAY_SIZE(desc); i++) { 485 if (!desc[i]->valid) 486 continue; 487 if (!desc[i]->privilege_access) { 488 gvt_vgpu_err("unexpected GGTT elsp submission\n"); 489 goto inv_desc; 490 } 491 } 492 493 /* submit workload */ 494 for (i = 0; i < ARRAY_SIZE(desc); i++) { 495 if (!desc[i]->valid) 496 continue; 497 ret = submit_context(vgpu, ring_id, desc[i], i == 0); 498 if (ret) { 499 gvt_vgpu_err("failed to submit desc %d\n", i); 500 return ret; 501 } 502 } 503 504 return 0; 505 506 inv_desc: 507 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n", 508 desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw); 509 return -EINVAL; 510 } 511 512 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) 513 { 514 struct intel_vgpu_submission *s = &vgpu->submission; 515 struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; 516 struct execlist_context_status_pointer_format ctx_status_ptr; 517 u32 ctx_status_ptr_reg; 518 519 memset(execlist, 0, sizeof(*execlist)); 520 521 execlist->vgpu = vgpu; 522 execlist->ring_id = ring_id; 523 execlist->slot[0].index = 0; 524 execlist->slot[1].index = 1; 525 526 ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, 527 _EL_OFFSET_STATUS_PTR); 528 ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg); 529 ctx_status_ptr.read_ptr = 0; 530 ctx_status_ptr.write_ptr = 0x7; 531 vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; 532 } 533 534 static void clean_execlist(struct intel_vgpu *vgpu, 535 intel_engine_mask_t engine_mask) 536 { 537 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 538 struct intel_engine_cs *engine; 539 struct intel_vgpu_submission *s = &vgpu->submission; 540 intel_engine_mask_t tmp; 541 542 for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { 543 kfree(s->ring_scan_buffer[engine->id]); 544 s->ring_scan_buffer[engine->id] = NULL; 545 s->ring_scan_buffer_size[engine->id] = 0; 546 } 547 } 548 549 static void reset_execlist(struct intel_vgpu *vgpu, 550 intel_engine_mask_t engine_mask) 551 { 552 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 553 struct intel_engine_cs *engine; 554 intel_engine_mask_t tmp; 555 556 for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) 557 init_vgpu_execlist(vgpu, engine->id); 558 } 559 560 static int init_execlist(struct intel_vgpu *vgpu, 561 intel_engine_mask_t engine_mask) 562 { 563 reset_execlist(vgpu, engine_mask); 564 return 0; 565 } 566 567 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = { 568 .name = "execlist", 569 .init = init_execlist, 570 .reset = reset_execlist, 571 .clean = clean_execlist, 572 }; 573