1 /* $NetBSD: intel_engine_types.h,v 1.7 2021/12/19 11:51:59 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2019 Intel Corporation 7 */ 8 9 #ifndef __INTEL_ENGINE_TYPES__ 10 #define __INTEL_ENGINE_TYPES__ 11 12 #include <linux/average.h> 13 #include <linux/completion.h> 14 #include <linux/hashtable.h> 15 #include <linux/irq_work.h> 16 #include <linux/kref.h> 17 #include <linux/list.h> 18 #include <linux/llist.h> 19 #include <linux/notifier.h> 20 #include <linux/rbtree.h> 21 #include <linux/timer.h> 22 #include <linux/types.h> 23 #include <linux/workqueue.h> 24 25 #include "i915_gem.h" 26 #include "i915_pmu.h" 27 #include "i915_priolist_types.h" 28 #include "i915_selftest.h" 29 #include "intel_engine_pool_types.h" 30 #include "intel_sseu.h" 31 #include "intel_timeline_types.h" 32 #include "intel_wakeref.h" 33 #include "intel_workarounds_types.h" 34 35 /* Legacy HW Engine ID */ 36 37 #define RCS0_HW 0 38 #define VCS0_HW 1 39 #define BCS0_HW 2 40 #define VECS0_HW 3 41 #define VCS1_HW 4 42 #define VCS2_HW 6 43 #define VCS3_HW 7 44 #define VECS1_HW 12 45 46 /* Gen11+ HW Engine class + instance */ 47 #define RENDER_CLASS 0 48 #define VIDEO_DECODE_CLASS 1 49 #define VIDEO_ENHANCEMENT_CLASS 2 50 #define COPY_ENGINE_CLASS 3 51 #define OTHER_CLASS 4 52 #define MAX_ENGINE_CLASS 4 53 #define MAX_ENGINE_INSTANCE 3 54 55 #define I915_MAX_SLICES 3 56 #define I915_MAX_SUBSLICES 8 57 58 #define I915_CMD_HASH_ORDER 9 59 60 struct dma_fence; 61 struct drm_i915_gem_object; 62 struct drm_i915_reg_table; 63 struct i915_gem_context; 64 struct i915_request; 65 struct i915_sched_attr; 66 struct intel_gt; 67 struct intel_ring; 68 struct intel_uncore; 69 70 typedef u8 intel_engine_mask_t; 71 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 72 73 struct intel_hw_status_page { 74 struct i915_vma *vma; 75 u32 *addr; 76 }; 77 78 struct intel_instdone { 79 u32 instdone; 80 /* The following exist only in the RCS engine */ 81 u32 slice_common; 82 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 83 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 84 }; 85 86 /* 87 * we use a single page to load ctx workarounds so all of these 88 * values are referred in terms of dwords 89 * 90 * struct i915_wa_ctx_bb: 91 * offset: specifies batch starting position, also helpful in case 92 * if we want to have multiple batches at different offsets based on 93 * some criteria. It is not a requirement at the moment but provides 94 * an option for future use. 95 * size: size of the batch in DWORDS 96 */ 97 struct i915_ctx_workarounds { 98 struct i915_wa_ctx_bb { 99 u32 offset; 100 u32 size; 101 } indirect_ctx, per_ctx; 102 struct i915_vma *vma; 103 }; 104 105 #define I915_MAX_VCS 4 106 #define I915_MAX_VECS 2 107 108 /* 109 * Engine IDs definitions. 110 * Keep instances of the same type engine together. 111 */ 112 enum intel_engine_id { 113 RCS0 = 0, 114 BCS0, 115 VCS0, 116 VCS1, 117 VCS2, 118 VCS3, 119 #define _VCS(n) (VCS0 + (n)) 120 VECS0, 121 VECS1, 122 #define _VECS(n) (VECS0 + (n)) 123 I915_NUM_ENGINES 124 #define INVALID_ENGINE ((enum intel_engine_id)-1) 125 }; 126 127 /* A simple estimator for the round-trip latency of an engine */ 128 DECLARE_EWMA(_engine_latency, 6, 4) 129 130 struct st_preempt_hang { 131 struct completion completion; 132 unsigned int count; 133 bool inject_hang; 134 }; 135 136 /** 137 * struct intel_engine_execlists - execlist submission queue and port state 138 * 139 * The struct intel_engine_execlists represents the combined logical state of 140 * driver and the hardware state for execlist mode of submission. 141 */ 142 struct intel_engine_execlists { 143 /** 144 * @tasklet: softirq tasklet for bottom handler 145 */ 146 struct tasklet_struct tasklet; 147 148 /** 149 * @timer: kick the current context if its timeslice expires 150 */ 151 struct timer_list timer; 152 153 /** 154 * @preempt: reset the current context if it fails to give way 155 */ 156 struct timer_list preempt; 157 158 /** 159 * @default_priolist: priority list for I915_PRIORITY_NORMAL 160 */ 161 struct i915_priolist default_priolist; 162 163 /** 164 * @no_priolist: priority lists disabled 165 */ 166 bool no_priolist; 167 168 #ifdef __NetBSD__ 169 bus_space_tag_t bst; 170 bus_space_handle_t bsh; 171 bus_size_t submit_reg; 172 bus_size_t ctrl_reg; 173 #else 174 /** 175 * @submit_reg: gen-specific execlist submission register 176 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 177 * the ExecList Submission Queue Contents register array for Gen11+ 178 */ 179 u32 __iomem *submit_reg; 180 181 /** 182 * @ctrl_reg: the enhanced execlists control register, used to load the 183 * submit queue on the HW and to request preemptions to idle 184 */ 185 u32 __iomem *ctrl_reg; 186 #endif 187 188 #define EXECLIST_MAX_PORTS 2 189 /** 190 * @active: the currently known context executing on HW 191 */ 192 struct i915_request * const *active; 193 /** 194 * @inflight: the set of contexts submitted and acknowleged by HW 195 * 196 * The set of inflight contexts is managed by reading CS events 197 * from the HW. On a context-switch event (not preemption), we 198 * know the HW has transitioned from port0 to port1, and we 199 * advance our inflight/active tracking accordingly. 200 */ 201 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 202 /** 203 * @pending: the next set of contexts submitted to ELSP 204 * 205 * We store the array of contexts that we submit to HW (via ELSP) and 206 * promote them to the inflight array once HW has signaled the 207 * preemption or idle-to-active event. 208 */ 209 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 210 211 /** 212 * @port_mask: number of execlist ports - 1 213 */ 214 unsigned int port_mask; 215 216 /** 217 * @switch_priority_hint: Second context priority. 218 * 219 * We submit multiple contexts to the HW simultaneously and would 220 * like to occasionally switch between them to emulate timeslicing. 221 * To know when timeslicing is suitable, we track the priority of 222 * the context submitted second. 223 */ 224 int switch_priority_hint; 225 226 /** 227 * @queue_priority_hint: Highest pending priority. 228 * 229 * When we add requests into the queue, or adjust the priority of 230 * executing requests, we compute the maximum priority of those 231 * pending requests. We can then use this value to determine if 232 * we need to preempt the executing requests to service the queue. 233 * However, since the we may have recorded the priority of an inflight 234 * request we wanted to preempt but since completed, at the time of 235 * dequeuing the priority hint may no longer may match the highest 236 * available request priority. 237 */ 238 int queue_priority_hint; 239 240 /** 241 * @queue: queue of requests, in priority lists 242 */ 243 struct rb_root_cached queue; 244 struct rb_root_cached virtual; 245 246 /** 247 * @csb_write: control register for Context Switch buffer 248 * 249 * Note this register may be either mmio or HWSP shadow. 250 */ 251 u32 *csb_write; 252 253 /** 254 * @csb_status: status array for Context Switch buffer 255 * 256 * Note these register may be either mmio or HWSP shadow. 257 */ 258 u32 *csb_status; 259 260 /** 261 * @csb_size: context status buffer FIFO size 262 */ 263 u8 csb_size; 264 265 /** 266 * @csb_head: context status buffer head 267 */ 268 u8 csb_head; 269 270 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 271 }; 272 273 #define INTEL_ENGINE_CS_MAX_NAME 8 274 275 struct intel_engine_cs { 276 struct drm_i915_private *i915; 277 struct intel_gt *gt; 278 struct intel_uncore *uncore; 279 char name[INTEL_ENGINE_CS_MAX_NAME]; 280 281 enum intel_engine_id id; 282 enum intel_engine_id legacy_idx; 283 284 unsigned int hw_id; 285 unsigned int guc_id; 286 287 intel_engine_mask_t mask; 288 289 u8 class; 290 u8 instance; 291 292 u16 uabi_class; 293 u16 uabi_instance; 294 295 u32 uabi_capabilities; 296 u32 context_size; 297 u32 mmio_base; 298 299 unsigned int context_tag; 300 #define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) 301 302 union { 303 struct rb_node rbtree; 304 struct llist_node llist; 305 struct list_head list; 306 } uabi_node; 307 308 struct intel_sseu sseu; 309 310 struct { 311 spinlock_t lock; 312 struct list_head requests; 313 struct list_head hold; /* ready requests, but on hold */ 314 } active; 315 316 struct llist_head barrier_tasks; 317 318 struct intel_context *kernel_context; /* pinned */ 319 320 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 321 322 struct { 323 struct delayed_work work; 324 struct i915_request *systole; 325 } heartbeat; 326 327 unsigned long serial; 328 329 unsigned long wakeref_serial; 330 struct intel_wakeref wakeref; 331 struct drm_i915_gem_object *default_state; 332 void *pinned_default_state; 333 334 struct { 335 struct intel_ring *ring; 336 struct intel_timeline *timeline; 337 } legacy; 338 339 /* 340 * We track the average duration of the idle pulse on parking the 341 * engine to keep an estimate of the how the fast the engine is 342 * under ideal conditions. 343 */ 344 struct ewma__engine_latency latency; 345 346 /* Rather than have every client wait upon all user interrupts, 347 * with the herd waking after every interrupt and each doing the 348 * heavyweight seqno dance, we delegate the task (of being the 349 * bottom-half of the user interrupt) to the first client. After 350 * every interrupt, we wake up one client, who does the heavyweight 351 * coherent seqno read and either goes back to sleep (if incomplete), 352 * or wakes up all the completed clients in parallel, before then 353 * transferring the bottom-half status to the next client in the queue. 354 * 355 * Compared to walking the entire list of waiters in a single dedicated 356 * bottom-half, we reduce the latency of the first waiter by avoiding 357 * a context switch, but incur additional coherent seqno reads when 358 * following the chain of request breadcrumbs. Since it is most likely 359 * that we have a single client waiting on each seqno, then reducing 360 * the overhead of waking that client is much preferred. 361 */ 362 struct intel_breadcrumbs { 363 spinlock_t irq_lock; 364 struct list_head signalers; 365 366 struct irq_work irq_work; /* for use from inside irq_lock */ 367 368 unsigned int irq_enabled; 369 370 bool irq_armed; 371 } breadcrumbs; 372 373 struct intel_engine_pmu { 374 /** 375 * @enable: Bitmask of enable sample events on this engine. 376 * 377 * Bits correspond to sample event types, for instance 378 * I915_SAMPLE_QUEUED is bit 0 etc. 379 */ 380 u32 enable; 381 /** 382 * @enable_count: Reference count for the enabled samplers. 383 * 384 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 385 */ 386 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 387 /** 388 * @sample: Counter values for sampling events. 389 * 390 * Our internal timer stores the current counters in this field. 391 * 392 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 393 */ 394 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 395 } pmu; 396 397 /* 398 * A pool of objects to use as shadow copies of client batch buffers 399 * when the command parser is enabled. Prevents the client from 400 * modifying the batch contents after software parsing. 401 */ 402 struct intel_engine_pool pool; 403 404 struct intel_hw_status_page status_page; 405 struct i915_ctx_workarounds wa_ctx; 406 struct i915_wa_list ctx_wa_list; 407 struct i915_wa_list wa_list; 408 struct i915_wa_list whitelist; 409 410 u32 irq_keep_mask; /* always keep these interrupts */ 411 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 412 void (*irq_enable)(struct intel_engine_cs *engine); 413 void (*irq_disable)(struct intel_engine_cs *engine); 414 415 int (*resume)(struct intel_engine_cs *engine); 416 417 struct { 418 void (*prepare)(struct intel_engine_cs *engine); 419 420 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 421 void (*cancel)(struct intel_engine_cs *engine); 422 423 void (*finish)(struct intel_engine_cs *engine); 424 } reset; 425 426 void (*park)(struct intel_engine_cs *engine); 427 void (*unpark)(struct intel_engine_cs *engine); 428 429 void (*set_default_submission)(struct intel_engine_cs *engine); 430 431 const struct intel_context_ops *cops; 432 433 int (*request_alloc)(struct i915_request *rq); 434 435 int (*emit_flush)(struct i915_request *request, u32 mode); 436 #define EMIT_INVALIDATE BIT(0) 437 #define EMIT_FLUSH BIT(1) 438 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 439 int (*emit_bb_start)(struct i915_request *rq, 440 u64 offset, u32 length, 441 unsigned int dispatch_flags); 442 #define I915_DISPATCH_SECURE BIT(0) 443 #define I915_DISPATCH_PINNED BIT(1) 444 int (*emit_init_breadcrumb)(struct i915_request *rq); 445 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 446 u32 *cs); 447 unsigned int emit_fini_breadcrumb_dw; 448 449 /* Pass the request to the hardware queue (e.g. directly into 450 * the legacy ringbuffer or to the end of an execlist). 451 * 452 * This is called from an atomic context with irqs disabled; must 453 * be irq safe. 454 */ 455 void (*submit_request)(struct i915_request *rq); 456 457 /* 458 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 459 * request down to the bonded pairs. 460 */ 461 void (*bond_execute)(struct i915_request *rq, 462 struct dma_fence *signal); 463 464 /* 465 * Call when the priority on a request has changed and it and its 466 * dependencies may need rescheduling. Note the request itself may 467 * not be ready to run! 468 */ 469 void (*schedule)(struct i915_request *request, 470 const struct i915_sched_attr *attr); 471 472 void (*release)(struct intel_engine_cs *engine); 473 474 struct intel_engine_execlists execlists; 475 476 /* 477 * Keep track of completed timelines on this engine for early 478 * retirement with the goal of quickly enabling powersaving as 479 * soon as the engine is idle. 480 */ 481 struct intel_timeline *retire; 482 struct work_struct retire_work; 483 484 /* status_notifier: list of callbacks for context-switch changes */ 485 struct atomic_notifier_head context_status_notifier; 486 487 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 488 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 489 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 490 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 491 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) 492 #define I915_ENGINE_IS_VIRTUAL BIT(5) 493 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) 494 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 495 unsigned int flags; 496 497 /* 498 * Table of commands the command parser needs to know about 499 * for this engine. 500 */ 501 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 502 503 /* 504 * Table of registers allowed in commands that read/write registers. 505 */ 506 const struct drm_i915_reg_table *reg_tables; 507 int reg_table_count; 508 509 /* 510 * Returns the bitmask for the length field of the specified command. 511 * Return 0 for an unrecognized/invalid command. 512 * 513 * If the command parser finds an entry for a command in the engine's 514 * cmd_tables, it gets the command's length based on the table entry. 515 * If not, it calls this function to determine the per-engine length 516 * field encoding for the command (i.e. different opcode ranges use 517 * certain bits to encode the command length in the header). 518 */ 519 u32 (*get_cmd_length_mask)(u32 cmd_header); 520 521 struct { 522 /** 523 * @lock: Lock protecting the below fields. 524 */ 525 seqlock_t lock; 526 /** 527 * @enabled: Reference count indicating number of listeners. 528 */ 529 unsigned int enabled; 530 /** 531 * @active: Number of contexts currently scheduled in. 532 */ 533 unsigned int active; 534 /** 535 * @enabled_at: Timestamp when busy stats were enabled. 536 */ 537 ktime_t enabled_at; 538 /** 539 * @start: Timestamp of the last idle to active transition. 540 * 541 * Idle is defined as active == 0, active is active > 0. 542 */ 543 ktime_t start; 544 /** 545 * @total: Total time this engine was busy. 546 * 547 * Accumulated time not counting the most recent block in cases 548 * where engine is currently busy (active > 0). 549 */ 550 ktime_t total; 551 } stats; 552 553 struct { 554 unsigned long heartbeat_interval_ms; 555 unsigned long preempt_timeout_ms; 556 unsigned long stop_timeout_ms; 557 unsigned long timeslice_duration_ms; 558 } props; 559 }; 560 561 static inline bool 562 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 563 { 564 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 565 } 566 567 static inline bool 568 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 569 { 570 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 571 } 572 573 static inline bool 574 intel_engine_supports_stats(const struct intel_engine_cs *engine) 575 { 576 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 577 } 578 579 static inline bool 580 intel_engine_has_preemption(const struct intel_engine_cs *engine) 581 { 582 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 583 } 584 585 static inline bool 586 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 587 { 588 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 589 } 590 591 static inline bool 592 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 593 { 594 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 595 } 596 597 static inline bool 598 intel_engine_is_virtual(const struct intel_engine_cs *engine) 599 { 600 return engine->flags & I915_ENGINE_IS_VIRTUAL; 601 } 602 603 static inline bool 604 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 605 { 606 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 607 } 608 609 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 610 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 611 612 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 613 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 614 intel_sseu_has_subslice(sseu__, 0, subslice__)) 615 616 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 617 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 618 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 619 (slice_) += ((subslice_) == 0)) \ 620 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 621 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 622 subslice_))) 623 #endif /* __INTEL_ENGINE_TYPES_H__ */ 624