1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "genX_boilerplate.h"
25#include "brw_defines.h"
26#include "brw_state.h"
27
28static unsigned
29flags_to_post_sync_op(uint32_t flags)
30{
31   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
32      return WriteImmediateData;
33
34   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
35      return WritePSDepthCount;
36
37   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
38      return WriteTimestamp;
39
40   return 0;
41}
42
43/**
44 * Do the given flags have a Post Sync or LRI Post Sync operation?
45 */
46static enum pipe_control_flags
47get_post_sync_flags(enum pipe_control_flags flags)
48{
49   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
50            PIPE_CONTROL_WRITE_DEPTH_COUNT |
51            PIPE_CONTROL_WRITE_TIMESTAMP |
52            PIPE_CONTROL_LRI_POST_SYNC_OP;
53
54   /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
55    * "LRI Post Sync Operation".  So more than one bit set would be illegal.
56    */
57   assert(util_bitcount(flags) <= 1);
58
59   return flags;
60}
61
62#define IS_COMPUTE_PIPELINE(brw) \
63   (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
64
65/* Closed interval - GFX_VER \in [x, y] */
66#define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y)
67#define IS_GFX_VERx10_BETWEEN(x, y) \
68   (GFX_VERx10 >= x && GFX_VERx10 <= y)
69
70/**
71 * Emit a series of PIPE_CONTROL commands, taking into account any
72 * workarounds necessary to actually accomplish the caller's request.
73 *
74 * Unless otherwise noted, spec quotations in this function come from:
75 *
76 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
77 * Restrictions for PIPE_CONTROL.
78 *
79 * You should not use this function directly.  Use the helpers in
80 * brw_pipe_control.c instead, which may split the pipe control further.
81 */
82void
83genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
84                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
85{
86   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
87   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
88   enum pipe_control_flags non_lri_post_sync_flags =
89      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
90
91   /* Recursive PIPE_CONTROL workarounds --------------------------------
92    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
93    *
94    * We do these first because we want to look at the original operation,
95    * rather than any workarounds we set.
96    */
97   if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
98      /* Hardware workaround: SNB B-Spec says:
99       *
100       *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
101       *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
102       *     required."
103       */
104      brw_emit_post_sync_nonzero_flush(brw);
105   }
106
107   if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
108      /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
109       * lists several workarounds:
110       *
111       *    "Project: SKL, KBL, BXT
112       *
113       *     If the VF Cache Invalidation Enable is set to a 1 in a
114       *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
115       *     sets to 0, with the VF Cache Invalidation Enable set to 0
116       *     needs to be sent prior to the PIPE_CONTROL with VF Cache
117       *     Invalidation Enable set to a 1."
118       */
119      genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
120   }
121
122   if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
123      /* Project: SKL / Argument: LRI Post Sync Operation [23]
124       *
125       * "PIPECONTROL command with “Command Streamer Stall Enable” must be
126       *  programmed prior to programming a PIPECONTROL command with "LRI
127       *  Post Sync Operation" in GPGPU mode of operation (i.e when
128       *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
129       *
130       * The same text exists a few rows below for Post Sync Op.
131       */
132      genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
133   }
134
135   /* "Flush Types" workarounds ---------------------------------------------
136    * We do these now because they may add post-sync operations or CS stalls.
137    */
138
139   if (IS_GFX_VER_BETWEEN(8, 10) &&
140       (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
141      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
142       *
143       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
144       *  'Write PS Depth Count' or 'Write Timestamp'."
145       */
146      if (!bo) {
147         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
148         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
149         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
150         bo = brw->workaround_bo;
151         offset = brw->workaround_bo_offset;
152      }
153   }
154
155   if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
156      /* Project: PRE-HSW / Argument: Depth Stall
157       *
158       * "The following bits must be clear:
159       *  - Render Target Cache Flush Enable ([12] of DW1)
160       *  - Depth Cache Flush Enable ([0] of DW1)"
161       */
162      assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
163                        PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
164   }
165
166   if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
167      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
168       *
169       *    "This bit must be DISABLED for operations other than writing
170       *     PS_DEPTH_COUNT."
171       *
172       * This seems like nonsense.  An Ivybridge workaround requires us to
173       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
174       * operation.  Gfx8+ requires us to emit depth stalls and depth cache
175       * flushes together.  So, it's hard to imagine this means anything other
176       * than "we originally intended this to be used for PS_DEPTH_COUNT".
177       *
178       * We ignore the supposed restriction and do nothing.
179       */
180   }
181
182   if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
183      /* Project: PRE-HSW / Argument: Depth Cache Flush
184       *
185       * "Depth Stall must be clear ([13] of DW1)."
186       */
187      assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
188   }
189
190   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
191                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
192      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
193       *
194       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
195       *     PS_DEPTH_COUNT or TIMESTAMP queries."
196       *
197       * TODO: Implement end-of-pipe checking.
198       */
199      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
200                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
201   }
202
203   if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
204      /* From the PIPE_CONTROL instruction table, bit 1:
205       *
206       *    "This bit is ignored if Depth Stall Enable is set.
207       *     Further, the render cache is not flushed even if Write Cache
208       *     Flush Enable bit is set."
209       *
210       * We assert that the caller doesn't do this combination, to try and
211       * prevent mistakes.  It shouldn't hurt the GPU, though.
212       *
213       * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard"
214       * and "Render Target Flush" combo is explicitly required for BTI
215       * update workarounds.
216       */
217      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
218                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
219   }
220
221   /* PIPE_CONTROL page workarounds ------------------------------------- */
222
223   if (IS_GFX_VER_BETWEEN(7, 8) &&
224       (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
225      /* From the PIPE_CONTROL page itself:
226       *
227       *    "IVB, HSW, BDW
228       *     Restriction: Pipe_control with CS-stall bit set must be issued
229       *     before a pipe-control command that has the State Cache
230       *     Invalidate bit set."
231       */
232      flags |= PIPE_CONTROL_CS_STALL;
233   }
234
235   if (GFX_VERx10 == 75) {
236      /* From the PIPE_CONTROL page itself:
237       *
238       *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
239       *     Prior to programming a PIPECONTROL command with any of the RO
240       *     cache invalidation bit set, program a PIPECONTROL flush command
241       *     with “CS stall” bit and “HDC Flush” bit set."
242       *
243       * TODO: Actually implement this.  What's an HDC Flush?
244       */
245   }
246
247   if (flags & PIPE_CONTROL_FLUSH_LLC) {
248      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
249       *
250       *    "Project: ALL
251       *     SW must always program Post-Sync Operation to "Write Immediate
252       *     Data" when Flush LLC is set."
253       *
254       * For now, we just require the caller to do it.
255       */
256      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
257   }
258
259   /* "Post-Sync Operation" workarounds -------------------------------- */
260
261   /* Project: All / Argument: Global Snapshot Count Reset [19]
262    *
263    * "This bit must not be exercised on any product.
264    *  Requires stall bit ([20] of DW1) set."
265    *
266    * We don't use this, so we just assert that it isn't used.  The
267    * PIPE_CONTROL instruction page indicates that they intended this
268    * as a debug feature and don't think it is useful in production,
269    * but it may actually be usable, should we ever want to.
270    */
271   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
272
273   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
274                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
275      /* Project: All / Arguments:
276       *
277       * - Generic Media State Clear [16]
278       * - Indirect State Pointers Disable [16]
279       *
280       *    "Requires stall bit ([20] of DW1) set."
281       *
282       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
283       * State Clear) says:
284       *
285       *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
286       *     programmed prior to programming a PIPECONTROL command with "Media
287       *     State Clear" set in GPGPU mode of operation"
288       *
289       * This is a subset of the earlier rule, so there's nothing to do.
290       */
291      flags |= PIPE_CONTROL_CS_STALL;
292   }
293
294   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
295      /* Project: All / Argument: Store Data Index
296       *
297       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
298       *  than '0'."
299       *
300       * For now, we just assert that the caller does this.  We might want to
301       * automatically add a write to the workaround BO...
302       */
303      assert(non_lri_post_sync_flags != 0);
304   }
305
306   if (flags & PIPE_CONTROL_SYNC_GFDT) {
307      /* Project: All / Argument: Sync GFDT
308       *
309       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
310       *  than '0' or 0x2520[13] must be set."
311       *
312       * For now, we just assert that the caller does this.
313       */
314      assert(non_lri_post_sync_flags != 0);
315   }
316
317   if (IS_GFX_VERx10_BETWEEN(60, 75) &&
318       (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
319      /* Project: SNB, IVB, HSW / Argument: TLB inv
320       *
321       * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
322       *  must be set to something other than '0'."
323       *
324       * For now, we just assert that the caller does this.
325       */
326      assert(non_lri_post_sync_flags != 0);
327   }
328
329   if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
330      /* Project: IVB+ / Argument: TLB inv
331       *
332       *    "Requires stall bit ([20] of DW1) set."
333       *
334       * Also, from the PIPE_CONTROL instruction table:
335       *
336       *    "Project: SKL+
337       *     Post Sync Operation or CS stall must be set to ensure a TLB
338       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
339       *     cache to invalidate."
340       *
341       * This is not a subset of the earlier rule, so there's nothing to do.
342       */
343      flags |= PIPE_CONTROL_CS_STALL;
344   }
345
346   if (GFX_VER == 9 && devinfo->gt == 4) {
347      /* TODO: The big Skylake GT4 post sync op workaround */
348   }
349
350   /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
351
352   if (IS_COMPUTE_PIPELINE(brw)) {
353      if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
354         /* Project: SKL+ / Argument: Tex Invalidate
355          * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
356          */
357         flags |= PIPE_CONTROL_CS_STALL;
358      }
359
360      if (GFX_VER == 8 && (post_sync_flags ||
361                           (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
362                                     PIPE_CONTROL_DEPTH_STALL |
363                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
364                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
365                                     PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
366         /* Project: BDW / Arguments:
367          *
368          * - LRI Post Sync Operation   [23]
369          * - Post Sync Op              [15:14]
370          * - Notify En                 [8]
371          * - Depth Stall               [13]
372          * - Render Target Cache Flush [12]
373          * - Depth Cache Flush         [0]
374          * - DC Flush Enable           [5]
375          *
376          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
377          *     Workloads."
378          *
379          * (The docs have separate table rows for each bit, with essentially
380          * the same workaround text.  We've combined them here.)
381          */
382         flags |= PIPE_CONTROL_CS_STALL;
383
384         /* Also, from the PIPE_CONTROL instruction table, bit 20:
385          *
386          *    "Project: BDW
387          *     This bit must be always set when PIPE_CONTROL command is
388          *     programmed by GPGPU and MEDIA workloads, except for the cases
389          *     when only Read Only Cache Invalidation bits are set (State
390          *     Cache Invalidation Enable, Instruction cache Invalidation
391          *     Enable, Texture Cache Invalidation Enable, Constant Cache
392          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
393          *     need not implemented when FF_DOP_CG is disable via "Fixed
394          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
395          *
396          * It sounds like we could avoid CS stalls in some cases, but we
397          * don't currently bother.  This list isn't exactly the list above,
398          * either...
399          */
400      }
401   }
402
403   /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
404    *
405    * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
406    *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
407    *
408    * Note that the kernel does CS stalls between batches, so we only need
409    * to count them within a batch.  We currently naively count every 4, and
410    * don't skip the ones with only read-cache-invalidate bits set.  This
411    * may or may not be a problem...
412    */
413   if (GFX_VERx10 == 70) {
414      if (flags & PIPE_CONTROL_CS_STALL) {
415         /* If we're doing a CS stall, reset the counter and carry on. */
416         brw->pipe_controls_since_last_cs_stall = 0;
417      }
418
419      /* If this is the fourth pipe control without a CS stall, do one now. */
420      if (++brw->pipe_controls_since_last_cs_stall == 4) {
421         brw->pipe_controls_since_last_cs_stall = 0;
422         flags |= PIPE_CONTROL_CS_STALL;
423      }
424   }
425
426   /* "Stall" workarounds ----------------------------------------------
427    * These have to come after the earlier ones because we may have added
428    * some additional CS stalls above.
429    */
430
431   if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
432      /* Project: PRE-SKL, VLV, CHV
433       *
434       * "[All Stepping][All SKUs]:
435       *
436       *  One of the following must also be set:
437       *
438       *  - Render Target Cache Flush Enable ([12] of DW1)
439       *  - Depth Cache Flush Enable ([0] of DW1)
440       *  - Stall at Pixel Scoreboard ([1] of DW1)
441       *  - Depth Stall ([13] of DW1)
442       *  - Post-Sync Operation ([13] of DW1)
443       *  - DC Flush Enable ([5] of DW1)"
444       *
445       * If we don't already have one of those bits set, we choose to add
446       * "Stall at Pixel Scoreboard".  Some of the other bits require a
447       * CS stall as a workaround (see above), which would send us into
448       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
449       * appears to be safe, so we choose that.
450       */
451      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
452                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
453                               PIPE_CONTROL_WRITE_IMMEDIATE |
454                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
455                               PIPE_CONTROL_WRITE_TIMESTAMP |
456                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
457                               PIPE_CONTROL_DEPTH_STALL |
458                               PIPE_CONTROL_DATA_CACHE_FLUSH;
459      if (!(flags & wa_bits))
460         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
461   }
462
463   /* Emit --------------------------------------------------------------- */
464
465   brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
466   #if GFX_VER >= 9
467      pc.FlushLLC = 0;
468   #endif
469   #if GFX_VER >= 7
470      pc.LRIPostSyncOperation = NoLRIOperation;
471      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
472      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
473   #endif
474   #if GFX_VER >= 6
475      pc.StoreDataIndex = 0;
476      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
477      pc.GlobalSnapshotCountReset =
478         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
479      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
480      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
481      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
482      pc.RenderTargetCacheFlushEnable =
483         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
484      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
485      pc.StateCacheInvalidationEnable =
486         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
487      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
488      pc.ConstantCacheInvalidationEnable =
489         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
490   #else
491      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
492   #endif
493      pc.PostSyncOperation = flags_to_post_sync_op(flags);
494      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
495      pc.InstructionCacheInvalidateEnable =
496         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
497      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
498   #if GFX_VERx10 >= 45
499      pc.IndirectStatePointersDisable =
500         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
501   #endif
502   #if GFX_VER >= 6
503      pc.TextureCacheInvalidationEnable =
504         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
505   #elif GFX_VER == 5 || GFX_VERx10 == 45
506      pc.TextureCacheFlushEnable =
507         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
508   #endif
509      pc.Address = ggtt_bo(bo, offset);
510      if (GFX_VER < 7 && bo)
511         pc.DestinationAddressType = DAT_GGTT;
512      pc.ImmediateData = imm;
513   }
514}
515