101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2011 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * @file brw_vue_map.c
2601e04c3fSmrg *
2701e04c3fSmrg * This file computes the "VUE map" for a (non-fragment) shader stage, which
2801e04c3fSmrg * describes the layout of its output varyings.  The VUE map is used to match
2901e04c3fSmrg * outputs from one stage with the inputs of the next.
3001e04c3fSmrg *
3101e04c3fSmrg * Largely, varyings can be placed however we like - producers/consumers simply
3201e04c3fSmrg * have to agree on the layout.  However, there is also a "VUE Header" that
3301e04c3fSmrg * prescribes a fixed-layout for items that interact with fixed function
3401e04c3fSmrg * hardware, such as the clipper and rasterizer.
3501e04c3fSmrg *
3601e04c3fSmrg * Authors:
3701e04c3fSmrg *   Paul Berry <stereotype441@gmail.com>
3801e04c3fSmrg *   Chris Forbes <chrisf@ijw.co.nz>
3901e04c3fSmrg *   Eric Anholt <eric@anholt.net>
4001e04c3fSmrg */
4101e04c3fSmrg
4201e04c3fSmrg
4301e04c3fSmrg#include "brw_compiler.h"
441463c08dSmrg#include "dev/intel_debug.h"
4501e04c3fSmrg
4601e04c3fSmrgstatic inline void
4701e04c3fSmrgassign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
4801e04c3fSmrg{
4901e04c3fSmrg   /* Make sure this varying hasn't been assigned a slot already */
5001e04c3fSmrg   assert (vue_map->varying_to_slot[varying] == -1);
5101e04c3fSmrg
5201e04c3fSmrg   vue_map->varying_to_slot[varying] = slot;
5301e04c3fSmrg   vue_map->slot_to_varying[slot] = varying;
5401e04c3fSmrg}
5501e04c3fSmrg
5601e04c3fSmrg/**
5701e04c3fSmrg * Compute the VUE map for a shader stage.
5801e04c3fSmrg */
5901e04c3fSmrgvoid
601463c08dSmrgbrw_compute_vue_map(const struct intel_device_info *devinfo,
6101e04c3fSmrg                    struct brw_vue_map *vue_map,
6201e04c3fSmrg                    uint64_t slots_valid,
631463c08dSmrg                    bool separate,
641463c08dSmrg                    uint32_t pos_slots)
6501e04c3fSmrg{
6601e04c3fSmrg   /* Keep using the packed/contiguous layout on old hardware - we only need
6701e04c3fSmrg    * the SSO layout when using geometry/tessellation shaders or 32 FS input
6801e04c3fSmrg    * varyings, which only exist on Gen >= 6.  It's also a bit more efficient.
6901e04c3fSmrg    */
701463c08dSmrg   if (devinfo->ver < 6)
7101e04c3fSmrg      separate = false;
7201e04c3fSmrg
7301e04c3fSmrg   if (separate) {
7401e04c3fSmrg      /* In SSO mode, we don't know whether the adjacent stage will
7501e04c3fSmrg       * read/write gl_ClipDistance, which has a fixed slot location.
7601e04c3fSmrg       * We have to assume the worst and reserve a slot for it, or else
7701e04c3fSmrg       * the rest of our varyings will be off by a slot.
7801e04c3fSmrg       *
7901e04c3fSmrg       * Note that we don't have to worry about COL/BFC, as those built-in
8001e04c3fSmrg       * variables only exist in legacy GL, which only supports VS and FS.
8101e04c3fSmrg       */
8201e04c3fSmrg      slots_valid |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
8301e04c3fSmrg      slots_valid |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
8401e04c3fSmrg   }
8501e04c3fSmrg
8601e04c3fSmrg   vue_map->slots_valid = slots_valid;
8701e04c3fSmrg   vue_map->separate = separate;
8801e04c3fSmrg
8901e04c3fSmrg   /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
9001e04c3fSmrg    * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
9101e04c3fSmrg    */
9201e04c3fSmrg   slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
9301e04c3fSmrg
9401e04c3fSmrg   /* Make sure that the values we store in vue_map->varying_to_slot and
9501e04c3fSmrg    * vue_map->slot_to_varying won't overflow the signed chars that are used
9601e04c3fSmrg    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
9701e04c3fSmrg    * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
9801e04c3fSmrg    * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
9901e04c3fSmrg    */
10001e04c3fSmrg   STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
10101e04c3fSmrg
10201e04c3fSmrg   for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
10301e04c3fSmrg      vue_map->varying_to_slot[i] = -1;
10401e04c3fSmrg      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
10501e04c3fSmrg   }
10601e04c3fSmrg
10701e04c3fSmrg   int slot = 0;
10801e04c3fSmrg
10901e04c3fSmrg   /* VUE header: format depends on chip generation and whether clipping is
11001e04c3fSmrg    * enabled.
11101e04c3fSmrg    *
11201e04c3fSmrg    * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30),
11301e04c3fSmrg    * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout.
11401e04c3fSmrg    */
1151463c08dSmrg   if (devinfo->ver < 6) {
11601e04c3fSmrg      /* There are 8 dwords in VUE header pre-Ironlake:
11701e04c3fSmrg       * dword 0-3 is indices, point width, clip flags.
11801e04c3fSmrg       * dword 4-7 is ndc position
11901e04c3fSmrg       * dword 8-11 is the first vertex data.
12001e04c3fSmrg       *
12101e04c3fSmrg       * On Ironlake the VUE header is nominally 20 dwords, but the hardware
1221463c08dSmrg       * will accept the same header layout as Gfx4 [and should be a bit faster]
12301e04c3fSmrg       */
12401e04c3fSmrg      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
12501e04c3fSmrg      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
12601e04c3fSmrg      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
12701e04c3fSmrg   } else {
12801e04c3fSmrg      /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
12901e04c3fSmrg       * dword 0-3 of the header is indices, point width, clip flags.
13001e04c3fSmrg       * dword 4-7 is the 4D space position
13101e04c3fSmrg       * dword 8-15 of the vertex header is the user clip distance if
13201e04c3fSmrg       * enabled.
13301e04c3fSmrg       * dword 8-11 or 16-19 is the first vertex element data we fill.
13401e04c3fSmrg       */
13501e04c3fSmrg      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
13601e04c3fSmrg      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
1371463c08dSmrg
1381463c08dSmrg      /* When using Primitive Replication, multiple slots are used for storing
1391463c08dSmrg       * positions for each view.
1401463c08dSmrg       */
1411463c08dSmrg      assert(pos_slots >= 1);
1421463c08dSmrg      if (pos_slots > 1) {
1431463c08dSmrg         for (int i = 1; i < pos_slots; i++) {
1441463c08dSmrg            vue_map->slot_to_varying[slot++] = VARYING_SLOT_POS;
1451463c08dSmrg         }
1461463c08dSmrg      }
1471463c08dSmrg
14801e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
14901e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
15001e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
15101e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
15201e04c3fSmrg
1531463c08dSmrg      /* Vertex URB Formats table says: "Vertex Header shall be padded at the
1541463c08dSmrg       * end so that the header ends on a 32-byte boundary".
1551463c08dSmrg       */
1561463c08dSmrg      slot += slot % 2;
1571463c08dSmrg
15801e04c3fSmrg      /* front and back colors need to be consecutive so that we can use
15901e04c3fSmrg       * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
16001e04c3fSmrg       * two-sided color.
16101e04c3fSmrg       */
16201e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
16301e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_COL0, slot++);
16401e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
16501e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_BFC0, slot++);
16601e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
16701e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_COL1, slot++);
16801e04c3fSmrg      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
16901e04c3fSmrg         assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
17001e04c3fSmrg   }
17101e04c3fSmrg
17201e04c3fSmrg   /* The hardware doesn't care about the rest of the vertex outputs, so we
17301e04c3fSmrg    * can assign them however we like.  For normal programs, we simply assign
17401e04c3fSmrg    * them contiguously.
17501e04c3fSmrg    *
17601e04c3fSmrg    * For separate shader pipelines, we first assign built-in varyings
17701e04c3fSmrg    * contiguous slots.  This works because ARB_separate_shader_objects
17801e04c3fSmrg    * requires that all shaders have matching built-in varying interface
17901e04c3fSmrg    * blocks.  Next, we assign generic varyings based on their location
18001e04c3fSmrg    * (either explicit or linker assigned).  This guarantees a fixed layout.
18101e04c3fSmrg    *
18201e04c3fSmrg    * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
18301e04c3fSmrg    * since it's encoded as the clip distances by emit_clip_distances().
18401e04c3fSmrg    * However, it may be output by transform feedback, and we'd rather not
18501e04c3fSmrg    * recompute state when TF changes, so we just always include it.
18601e04c3fSmrg    */
18701e04c3fSmrg   uint64_t builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
18801e04c3fSmrg   while (builtins != 0) {
18901e04c3fSmrg      const int varying = ffsll(builtins) - 1;
19001e04c3fSmrg      if (vue_map->varying_to_slot[varying] == -1) {
19101e04c3fSmrg         assign_vue_slot(vue_map, varying, slot++);
19201e04c3fSmrg      }
19301e04c3fSmrg      builtins &= ~BITFIELD64_BIT(varying);
19401e04c3fSmrg   }
19501e04c3fSmrg
19601e04c3fSmrg   const int first_generic_slot = slot;
19701e04c3fSmrg   uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
19801e04c3fSmrg   while (generics != 0) {
19901e04c3fSmrg      const int varying = ffsll(generics) - 1;
20001e04c3fSmrg      if (separate) {
20101e04c3fSmrg         slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
20201e04c3fSmrg      }
20301e04c3fSmrg      assign_vue_slot(vue_map, varying, slot++);
20401e04c3fSmrg      generics &= ~BITFIELD64_BIT(varying);
20501e04c3fSmrg   }
20601e04c3fSmrg
20701e04c3fSmrg   vue_map->num_slots = slot;
20801e04c3fSmrg   vue_map->num_per_vertex_slots = 0;
20901e04c3fSmrg   vue_map->num_per_patch_slots = 0;
21001e04c3fSmrg}
21101e04c3fSmrg
21201e04c3fSmrg/**
21301e04c3fSmrg * Compute the VUE map for tessellation control shader outputs and
21401e04c3fSmrg * tessellation evaluation shader inputs.
21501e04c3fSmrg */
21601e04c3fSmrgvoid
21701e04c3fSmrgbrw_compute_tess_vue_map(struct brw_vue_map *vue_map,
21801e04c3fSmrg                         uint64_t vertex_slots,
21901e04c3fSmrg                         uint32_t patch_slots)
22001e04c3fSmrg{
22101e04c3fSmrg   /* I don't think anything actually uses this... */
22201e04c3fSmrg   vue_map->slots_valid = vertex_slots;
22301e04c3fSmrg
22401e04c3fSmrg   /* separate isn't really meaningful, but make sure it's initialized */
22501e04c3fSmrg   vue_map->separate = false;
22601e04c3fSmrg
22701e04c3fSmrg   vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
22801e04c3fSmrg                     VARYING_BIT_TESS_LEVEL_INNER);
22901e04c3fSmrg
23001e04c3fSmrg   /* Make sure that the values we store in vue_map->varying_to_slot and
23101e04c3fSmrg    * vue_map->slot_to_varying won't overflow the signed chars that are used
23201e04c3fSmrg    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
23301e04c3fSmrg    * values equal to VARYING_SLOT_TESS_MAX , we need to ensure that
23401e04c3fSmrg    * VARYING_SLOT_TESS_MAX is <= 127, not 128.
23501e04c3fSmrg    */
23601e04c3fSmrg   STATIC_ASSERT(VARYING_SLOT_TESS_MAX <= 127);
23701e04c3fSmrg
23801e04c3fSmrg   for (int i = 0; i < VARYING_SLOT_TESS_MAX ; ++i) {
23901e04c3fSmrg      vue_map->varying_to_slot[i] = -1;
24001e04c3fSmrg      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
24101e04c3fSmrg   }
24201e04c3fSmrg
24301e04c3fSmrg   int slot = 0;
24401e04c3fSmrg
24501e04c3fSmrg   /* The first 8 DWords are reserved for the "Patch Header".
24601e04c3fSmrg    *
24701e04c3fSmrg    * VARYING_SLOT_TESS_LEVEL_OUTER / INNER live here, but the exact layout
24801e04c3fSmrg    * depends on the domain type.  They might not be in slots 0 and 1 as
24901e04c3fSmrg    * described here, but pretending they're separate allows us to uniquely
25001e04c3fSmrg    * identify them by distinct slot locations.
25101e04c3fSmrg    */
25201e04c3fSmrg   assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_INNER, slot++);
25301e04c3fSmrg   assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_OUTER, slot++);
25401e04c3fSmrg
25501e04c3fSmrg   /* first assign per-patch varyings */
25601e04c3fSmrg   while (patch_slots != 0) {
25701e04c3fSmrg      const int varying = ffsll(patch_slots) - 1;
25801e04c3fSmrg      if (vue_map->varying_to_slot[varying + VARYING_SLOT_PATCH0] == -1) {
25901e04c3fSmrg         assign_vue_slot(vue_map, varying + VARYING_SLOT_PATCH0, slot++);
26001e04c3fSmrg      }
26101e04c3fSmrg      patch_slots &= ~BITFIELD64_BIT(varying);
26201e04c3fSmrg   }
26301e04c3fSmrg
26401e04c3fSmrg   /* apparently, including the patch header... */
26501e04c3fSmrg   vue_map->num_per_patch_slots = slot;
26601e04c3fSmrg
26701e04c3fSmrg   /* then assign per-vertex varyings for each vertex in our patch */
26801e04c3fSmrg   while (vertex_slots != 0) {
26901e04c3fSmrg      const int varying = ffsll(vertex_slots) - 1;
27001e04c3fSmrg      if (vue_map->varying_to_slot[varying] == -1) {
27101e04c3fSmrg         assign_vue_slot(vue_map, varying, slot++);
27201e04c3fSmrg      }
27301e04c3fSmrg      vertex_slots &= ~BITFIELD64_BIT(varying);
27401e04c3fSmrg   }
27501e04c3fSmrg
27601e04c3fSmrg   vue_map->num_per_vertex_slots = slot - vue_map->num_per_patch_slots;
27701e04c3fSmrg   vue_map->num_slots = slot;
27801e04c3fSmrg}
27901e04c3fSmrg
28001e04c3fSmrgstatic const char *
2811463c08dSmrgvarying_name(brw_varying_slot slot, gl_shader_stage stage)
28201e04c3fSmrg{
28301e04c3fSmrg   assume(slot < BRW_VARYING_SLOT_COUNT);
28401e04c3fSmrg
28501e04c3fSmrg   if (slot < VARYING_SLOT_MAX)
2861463c08dSmrg      return gl_varying_slot_name_for_stage((gl_varying_slot)slot, stage);
28701e04c3fSmrg
28801e04c3fSmrg   static const char *brw_names[] = {
28901e04c3fSmrg      [BRW_VARYING_SLOT_NDC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_NDC",
29001e04c3fSmrg      [BRW_VARYING_SLOT_PAD - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PAD",
29101e04c3fSmrg      [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC",
29201e04c3fSmrg   };
29301e04c3fSmrg
29401e04c3fSmrg   return brw_names[slot - VARYING_SLOT_MAX];
29501e04c3fSmrg}
29601e04c3fSmrg
29701e04c3fSmrgvoid
2981463c08dSmrgbrw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map,
2991463c08dSmrg                  gl_shader_stage stage)
30001e04c3fSmrg{
30101e04c3fSmrg   if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
30201e04c3fSmrg      fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
30301e04c3fSmrg              vue_map->num_slots,
30401e04c3fSmrg              vue_map->num_per_patch_slots,
30501e04c3fSmrg              vue_map->num_per_vertex_slots,
30601e04c3fSmrg              vue_map->separate ? "SSO" : "non-SSO");
30701e04c3fSmrg      for (int i = 0; i < vue_map->num_slots; i++) {
30801e04c3fSmrg         if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
30901e04c3fSmrg            fprintf(fp, "  [%d] VARYING_SLOT_PATCH%d\n", i,
31001e04c3fSmrg                    vue_map->slot_to_varying[i] - VARYING_SLOT_PATCH0);
31101e04c3fSmrg         } else {
31201e04c3fSmrg            fprintf(fp, "  [%d] %s\n", i,
3131463c08dSmrg                    varying_name(vue_map->slot_to_varying[i], stage));
31401e04c3fSmrg         }
31501e04c3fSmrg      }
31601e04c3fSmrg   } else {
31701e04c3fSmrg      fprintf(fp, "VUE map (%d slots, %s)\n",
31801e04c3fSmrg              vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
31901e04c3fSmrg      for (int i = 0; i < vue_map->num_slots; i++) {
32001e04c3fSmrg         fprintf(fp, "  [%d] %s\n", i,
3211463c08dSmrg                 varying_name(vue_map->slot_to_varying[i], stage));
32201e04c3fSmrg      }
32301e04c3fSmrg   }
32401e04c3fSmrg   fprintf(fp, "\n");
32501e04c3fSmrg}
326