1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "genX_boilerplate.h" 25#include "brw_defines.h" 26#include "brw_state.h" 27 28static unsigned 29flags_to_post_sync_op(uint32_t flags) 30{ 31 if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) 32 return WriteImmediateData; 33 34 if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) 35 return WritePSDepthCount; 36 37 if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) 38 return WriteTimestamp; 39 40 return 0; 41} 42 43/** 44 * Do the given flags have a Post Sync or LRI Post Sync operation? 45 */ 46static enum pipe_control_flags 47get_post_sync_flags(enum pipe_control_flags flags) 48{ 49 flags &= PIPE_CONTROL_WRITE_IMMEDIATE | 50 PIPE_CONTROL_WRITE_DEPTH_COUNT | 51 PIPE_CONTROL_WRITE_TIMESTAMP | 52 PIPE_CONTROL_LRI_POST_SYNC_OP; 53 54 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with 55 * "LRI Post Sync Operation". So more than one bit set would be illegal. 56 */ 57 assert(util_bitcount(flags) <= 1); 58 59 return flags; 60} 61 62#define IS_COMPUTE_PIPELINE(brw) \ 63 (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE) 64 65/* Closed interval - GFX_VER \in [x, y] */ 66#define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y) 67#define IS_GFX_VERx10_BETWEEN(x, y) \ 68 (GFX_VERx10 >= x && GFX_VERx10 <= y) 69 70/** 71 * Emit a series of PIPE_CONTROL commands, taking into account any 72 * workarounds necessary to actually accomplish the caller's request. 73 * 74 * Unless otherwise noted, spec quotations in this function come from: 75 * 76 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming 77 * Restrictions for PIPE_CONTROL. 78 * 79 * You should not use this function directly. Use the helpers in 80 * brw_pipe_control.c instead, which may split the pipe control further. 81 */ 82void 83genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, 84 struct brw_bo *bo, uint32_t offset, uint64_t imm) 85{ 86 UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; 87 enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); 88 enum pipe_control_flags non_lri_post_sync_flags = 89 post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; 90 91 /* Recursive PIPE_CONTROL workarounds -------------------------------- 92 * (http://knowyourmeme.com/memes/xzibit-yo-dawg) 93 * 94 * We do these first because we want to look at the original operation, 95 * rather than any workarounds we set. 96 */ 97 if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { 98 /* Hardware workaround: SNB B-Spec says: 99 * 100 * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush 101 * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is 102 * required." 103 */ 104 brw_emit_post_sync_nonzero_flush(brw); 105 } 106 107 if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { 108 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description 109 * lists several workarounds: 110 * 111 * "Project: SKL, KBL, BXT 112 * 113 * If the VF Cache Invalidation Enable is set to a 1 in a 114 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields 115 * sets to 0, with the VF Cache Invalidation Enable set to 0 116 * needs to be sent prior to the PIPE_CONTROL with VF Cache 117 * Invalidation Enable set to a 1." 118 */ 119 genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0); 120 } 121 122 if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) { 123 /* Project: SKL / Argument: LRI Post Sync Operation [23] 124 * 125 * "PIPECONTROL command with “Command Streamer Stall Enable” must be 126 * programmed prior to programming a PIPECONTROL command with "LRI 127 * Post Sync Operation" in GPGPU mode of operation (i.e when 128 * PIPELINE_SELECT command is set to GPGPU mode of operation)." 129 * 130 * The same text exists a few rows below for Post Sync Op. 131 */ 132 genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0); 133 } 134 135 /* "Flush Types" workarounds --------------------------------------------- 136 * We do these now because they may add post-sync operations or CS stalls. 137 */ 138 139 if (IS_GFX_VER_BETWEEN(8, 10) && 140 (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { 141 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate 142 * 143 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or 144 * 'Write PS Depth Count' or 'Write Timestamp'." 145 */ 146 if (!bo) { 147 flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 148 post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 149 non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 150 bo = brw->workaround_bo; 151 offset = brw->workaround_bo_offset; 152 } 153 } 154 155 if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) { 156 /* Project: PRE-HSW / Argument: Depth Stall 157 * 158 * "The following bits must be clear: 159 * - Render Target Cache Flush Enable ([12] of DW1) 160 * - Depth Cache Flush Enable ([0] of DW1)" 161 */ 162 assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 163 PIPE_CONTROL_DEPTH_CACHE_FLUSH))); 164 } 165 166 if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) { 167 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): 168 * 169 * "This bit must be DISABLED for operations other than writing 170 * PS_DEPTH_COUNT." 171 * 172 * This seems like nonsense. An Ivybridge workaround requires us to 173 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync 174 * operation. Gfx8+ requires us to emit depth stalls and depth cache 175 * flushes together. So, it's hard to imagine this means anything other 176 * than "we originally intended this to be used for PS_DEPTH_COUNT". 177 * 178 * We ignore the supposed restriction and do nothing. 179 */ 180 } 181 182 if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { 183 /* Project: PRE-HSW / Argument: Depth Cache Flush 184 * 185 * "Depth Stall must be clear ([13] of DW1)." 186 */ 187 assert(!(flags & PIPE_CONTROL_DEPTH_STALL)); 188 } 189 190 if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 191 PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 192 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: 193 * 194 * "This bit must be DISABLED for End-of-pipe (Read) fences, 195 * PS_DEPTH_COUNT or TIMESTAMP queries." 196 * 197 * TODO: Implement end-of-pipe checking. 198 */ 199 assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | 200 PIPE_CONTROL_WRITE_TIMESTAMP))); 201 } 202 203 if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 204 /* From the PIPE_CONTROL instruction table, bit 1: 205 * 206 * "This bit is ignored if Depth Stall Enable is set. 207 * Further, the render cache is not flushed even if Write Cache 208 * Flush Enable bit is set." 209 * 210 * We assert that the caller doesn't do this combination, to try and 211 * prevent mistakes. It shouldn't hurt the GPU, though. 212 * 213 * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard" 214 * and "Render Target Flush" combo is explicitly required for BTI 215 * update workarounds. 216 */ 217 assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | 218 PIPE_CONTROL_RENDER_TARGET_FLUSH))); 219 } 220 221 /* PIPE_CONTROL page workarounds ------------------------------------- */ 222 223 if (IS_GFX_VER_BETWEEN(7, 8) && 224 (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { 225 /* From the PIPE_CONTROL page itself: 226 * 227 * "IVB, HSW, BDW 228 * Restriction: Pipe_control with CS-stall bit set must be issued 229 * before a pipe-control command that has the State Cache 230 * Invalidate bit set." 231 */ 232 flags |= PIPE_CONTROL_CS_STALL; 233 } 234 235 if (GFX_VERx10 == 75) { 236 /* From the PIPE_CONTROL page itself: 237 * 238 * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation: 239 * Prior to programming a PIPECONTROL command with any of the RO 240 * cache invalidation bit set, program a PIPECONTROL flush command 241 * with “CS stall” bit and “HDC Flush” bit set." 242 * 243 * TODO: Actually implement this. What's an HDC Flush? 244 */ 245 } 246 247 if (flags & PIPE_CONTROL_FLUSH_LLC) { 248 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): 249 * 250 * "Project: ALL 251 * SW must always program Post-Sync Operation to "Write Immediate 252 * Data" when Flush LLC is set." 253 * 254 * For now, we just require the caller to do it. 255 */ 256 assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); 257 } 258 259 /* "Post-Sync Operation" workarounds -------------------------------- */ 260 261 /* Project: All / Argument: Global Snapshot Count Reset [19] 262 * 263 * "This bit must not be exercised on any product. 264 * Requires stall bit ([20] of DW1) set." 265 * 266 * We don't use this, so we just assert that it isn't used. The 267 * PIPE_CONTROL instruction page indicates that they intended this 268 * as a debug feature and don't think it is useful in production, 269 * but it may actually be usable, should we ever want to. 270 */ 271 assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); 272 273 if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | 274 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { 275 /* Project: All / Arguments: 276 * 277 * - Generic Media State Clear [16] 278 * - Indirect State Pointers Disable [16] 279 * 280 * "Requires stall bit ([20] of DW1) set." 281 * 282 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media 283 * State Clear) says: 284 * 285 * "PIPECONTROL command with “Command Streamer Stall Enable” must be 286 * programmed prior to programming a PIPECONTROL command with "Media 287 * State Clear" set in GPGPU mode of operation" 288 * 289 * This is a subset of the earlier rule, so there's nothing to do. 290 */ 291 flags |= PIPE_CONTROL_CS_STALL; 292 } 293 294 if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { 295 /* Project: All / Argument: Store Data Index 296 * 297 * "Post-Sync Operation ([15:14] of DW1) must be set to something other 298 * than '0'." 299 * 300 * For now, we just assert that the caller does this. We might want to 301 * automatically add a write to the workaround BO... 302 */ 303 assert(non_lri_post_sync_flags != 0); 304 } 305 306 if (flags & PIPE_CONTROL_SYNC_GFDT) { 307 /* Project: All / Argument: Sync GFDT 308 * 309 * "Post-Sync Operation ([15:14] of DW1) must be set to something other 310 * than '0' or 0x2520[13] must be set." 311 * 312 * For now, we just assert that the caller does this. 313 */ 314 assert(non_lri_post_sync_flags != 0); 315 } 316 317 if (IS_GFX_VERx10_BETWEEN(60, 75) && 318 (flags & PIPE_CONTROL_TLB_INVALIDATE)) { 319 /* Project: SNB, IVB, HSW / Argument: TLB inv 320 * 321 * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1) 322 * must be set to something other than '0'." 323 * 324 * For now, we just assert that the caller does this. 325 */ 326 assert(non_lri_post_sync_flags != 0); 327 } 328 329 if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { 330 /* Project: IVB+ / Argument: TLB inv 331 * 332 * "Requires stall bit ([20] of DW1) set." 333 * 334 * Also, from the PIPE_CONTROL instruction table: 335 * 336 * "Project: SKL+ 337 * Post Sync Operation or CS stall must be set to ensure a TLB 338 * invalidation occurs. Otherwise no cycle will occur to the TLB 339 * cache to invalidate." 340 * 341 * This is not a subset of the earlier rule, so there's nothing to do. 342 */ 343 flags |= PIPE_CONTROL_CS_STALL; 344 } 345 346 if (GFX_VER == 9 && devinfo->gt == 4) { 347 /* TODO: The big Skylake GT4 post sync op workaround */ 348 } 349 350 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ 351 352 if (IS_COMPUTE_PIPELINE(brw)) { 353 if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { 354 /* Project: SKL+ / Argument: Tex Invalidate 355 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." 356 */ 357 flags |= PIPE_CONTROL_CS_STALL; 358 } 359 360 if (GFX_VER == 8 && (post_sync_flags || 361 (flags & (PIPE_CONTROL_NOTIFY_ENABLE | 362 PIPE_CONTROL_DEPTH_STALL | 363 PIPE_CONTROL_RENDER_TARGET_FLUSH | 364 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 365 PIPE_CONTROL_DATA_CACHE_FLUSH)))) { 366 /* Project: BDW / Arguments: 367 * 368 * - LRI Post Sync Operation [23] 369 * - Post Sync Op [15:14] 370 * - Notify En [8] 371 * - Depth Stall [13] 372 * - Render Target Cache Flush [12] 373 * - Depth Cache Flush [0] 374 * - DC Flush Enable [5] 375 * 376 * "Requires stall bit ([20] of DW) set for all GPGPU and Media 377 * Workloads." 378 * 379 * (The docs have separate table rows for each bit, with essentially 380 * the same workaround text. We've combined them here.) 381 */ 382 flags |= PIPE_CONTROL_CS_STALL; 383 384 /* Also, from the PIPE_CONTROL instruction table, bit 20: 385 * 386 * "Project: BDW 387 * This bit must be always set when PIPE_CONTROL command is 388 * programmed by GPGPU and MEDIA workloads, except for the cases 389 * when only Read Only Cache Invalidation bits are set (State 390 * Cache Invalidation Enable, Instruction cache Invalidation 391 * Enable, Texture Cache Invalidation Enable, Constant Cache 392 * Invalidation Enable). This is to WA FFDOP CG issue, this WA 393 * need not implemented when FF_DOP_CG is disable via "Fixed 394 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." 395 * 396 * It sounds like we could avoid CS stalls in some cases, but we 397 * don't currently bother. This list isn't exactly the list above, 398 * either... 399 */ 400 } 401 } 402 403 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: 404 * 405 * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with 406 * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." 407 * 408 * Note that the kernel does CS stalls between batches, so we only need 409 * to count them within a batch. We currently naively count every 4, and 410 * don't skip the ones with only read-cache-invalidate bits set. This 411 * may or may not be a problem... 412 */ 413 if (GFX_VERx10 == 70) { 414 if (flags & PIPE_CONTROL_CS_STALL) { 415 /* If we're doing a CS stall, reset the counter and carry on. */ 416 brw->pipe_controls_since_last_cs_stall = 0; 417 } 418 419 /* If this is the fourth pipe control without a CS stall, do one now. */ 420 if (++brw->pipe_controls_since_last_cs_stall == 4) { 421 brw->pipe_controls_since_last_cs_stall = 0; 422 flags |= PIPE_CONTROL_CS_STALL; 423 } 424 } 425 426 /* "Stall" workarounds ---------------------------------------------- 427 * These have to come after the earlier ones because we may have added 428 * some additional CS stalls above. 429 */ 430 431 if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) { 432 /* Project: PRE-SKL, VLV, CHV 433 * 434 * "[All Stepping][All SKUs]: 435 * 436 * One of the following must also be set: 437 * 438 * - Render Target Cache Flush Enable ([12] of DW1) 439 * - Depth Cache Flush Enable ([0] of DW1) 440 * - Stall at Pixel Scoreboard ([1] of DW1) 441 * - Depth Stall ([13] of DW1) 442 * - Post-Sync Operation ([13] of DW1) 443 * - DC Flush Enable ([5] of DW1)" 444 * 445 * If we don't already have one of those bits set, we choose to add 446 * "Stall at Pixel Scoreboard". Some of the other bits require a 447 * CS stall as a workaround (see above), which would send us into 448 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" 449 * appears to be safe, so we choose that. 450 */ 451 const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | 452 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 453 PIPE_CONTROL_WRITE_IMMEDIATE | 454 PIPE_CONTROL_WRITE_DEPTH_COUNT | 455 PIPE_CONTROL_WRITE_TIMESTAMP | 456 PIPE_CONTROL_STALL_AT_SCOREBOARD | 457 PIPE_CONTROL_DEPTH_STALL | 458 PIPE_CONTROL_DATA_CACHE_FLUSH; 459 if (!(flags & wa_bits)) 460 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 461 } 462 463 /* Emit --------------------------------------------------------------- */ 464 465 brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { 466 #if GFX_VER >= 9 467 pc.FlushLLC = 0; 468 #endif 469 #if GFX_VER >= 7 470 pc.LRIPostSyncOperation = NoLRIOperation; 471 pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; 472 pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; 473 #endif 474 #if GFX_VER >= 6 475 pc.StoreDataIndex = 0; 476 pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; 477 pc.GlobalSnapshotCountReset = 478 flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; 479 pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; 480 pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; 481 pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; 482 pc.RenderTargetCacheFlushEnable = 483 flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 484 pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; 485 pc.StateCacheInvalidationEnable = 486 flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; 487 pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; 488 pc.ConstantCacheInvalidationEnable = 489 flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; 490 #else 491 pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 492 #endif 493 pc.PostSyncOperation = flags_to_post_sync_op(flags); 494 pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; 495 pc.InstructionCacheInvalidateEnable = 496 flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; 497 pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; 498 #if GFX_VERx10 >= 45 499 pc.IndirectStatePointersDisable = 500 flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; 501 #endif 502 #if GFX_VER >= 6 503 pc.TextureCacheInvalidationEnable = 504 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 505 #elif GFX_VER == 5 || GFX_VERx10 == 45 506 pc.TextureCacheFlushEnable = 507 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 508 #endif 509 pc.Address = ggtt_bo(bo, offset); 510 if (GFX_VER < 7 && bo) 511 pc.DestinationAddressType = DAT_GGTT; 512 pc.ImmediateData = imm; 513 } 514} 515