1b8e80941Smrg/*
2b8e80941Smrg * Copyright (c) 2011 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include <stdlib.h>
25b8e80941Smrg#include <math.h>
26b8e80941Smrg
27b8e80941Smrg#include "util/macros.h"
28b8e80941Smrg#include "main/macros.h"
29b8e80941Smrg#include "compiler/shader_enums.h"
30b8e80941Smrg
31b8e80941Smrg#include "gen_l3_config.h"
32b8e80941Smrg
33b8e80941Smrg/**
34b8e80941Smrg * The following diagram shows how we partition the URB:
35b8e80941Smrg *
36b8e80941Smrg *        16kb or 32kb               Rest of the URB space
37b8e80941Smrg *   __________-__________   _________________-_________________
38b8e80941Smrg *  /                     \ /                                   \
39b8e80941Smrg * +-------------------------------------------------------------+
40b8e80941Smrg * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
41b8e80941Smrg * |       Constants       |               Entries               |
42b8e80941Smrg * +-------------------------------------------------------------+
43b8e80941Smrg *
44b8e80941Smrg * Push constants must be stored at the beginning of the URB space,
45b8e80941Smrg * while URB entries can be stored anywhere.  We choose to lay them
46b8e80941Smrg * out in pipeline order (VS -> HS -> DS -> GS).
47b8e80941Smrg */
48b8e80941Smrg
49b8e80941Smrg/**
50b8e80941Smrg * Decide how to partition the URB among the various stages.
51b8e80941Smrg *
52b8e80941Smrg * \param[in] push_constant_bytes - space allocate for push constants.
53b8e80941Smrg * \param[in] urb_size_bytes - total size of the URB (from L3 config).
54b8e80941Smrg * \param[in] tess_present - are tessellation shaders active?
55b8e80941Smrg * \param[in] gs_present - are geometry shaders active?
56b8e80941Smrg * \param[in] entry_size - the URB entry size (from the shader compiler)
57b8e80941Smrg * \param[out] entries - the number of URB entries for each stage
58b8e80941Smrg * \param[out] start - the starting offset for each stage
59b8e80941Smrg */
60b8e80941Smrgvoid
61b8e80941Smrggen_get_urb_config(const struct gen_device_info *devinfo,
62b8e80941Smrg                   unsigned push_constant_bytes, unsigned urb_size_bytes,
63b8e80941Smrg                   bool tess_present, bool gs_present,
64b8e80941Smrg                   const unsigned entry_size[4],
65b8e80941Smrg                   unsigned entries[4], unsigned start[4])
66b8e80941Smrg{
67b8e80941Smrg   const bool active[4] = { true, tess_present, tess_present, gs_present };
68b8e80941Smrg
69b8e80941Smrg   /* URB allocations must be done in 8k chunks. */
70b8e80941Smrg   const unsigned chunk_size_bytes = 8192;
71b8e80941Smrg
72b8e80941Smrg   const unsigned push_constant_chunks =
73b8e80941Smrg      push_constant_bytes / chunk_size_bytes;
74b8e80941Smrg   const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes;
75b8e80941Smrg
76b8e80941Smrg   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
77b8e80941Smrg    *
78b8e80941Smrg    *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
79b8e80941Smrg    *     Allocation Size is less than 9 512-bit URB entries.
80b8e80941Smrg    *
81b8e80941Smrg    * Similar text exists for HS, DS and GS.
82b8e80941Smrg    */
83b8e80941Smrg   unsigned granularity[4];
84b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
85b8e80941Smrg      granularity[i] = (entry_size[i] < 9) ? 8 : 1;
86b8e80941Smrg   }
87b8e80941Smrg
88b8e80941Smrg   unsigned min_entries[4] = {
89b8e80941Smrg      /* VS has a lower limit on the number of URB entries.
90b8e80941Smrg       *
91b8e80941Smrg       * From the Broadwell PRM, 3DSTATE_URB_VS instruction:
92b8e80941Smrg       * "When tessellation is enabled, the VS Number of URB Entries must be
93b8e80941Smrg       *  greater than or equal to 192."
94b8e80941Smrg       */
95b8e80941Smrg      [MESA_SHADER_VERTEX] = tess_present && devinfo->gen == 8 ?
96b8e80941Smrg         192 : devinfo->urb.min_entries[MESA_SHADER_VERTEX],
97b8e80941Smrg
98b8e80941Smrg      /* There are two constraints on the minimum amount of URB space we can
99b8e80941Smrg       * allocate:
100b8e80941Smrg       *
101b8e80941Smrg       * (1) We need room for at least 2 URB entries, since we always operate
102b8e80941Smrg       * the GS in DUAL_OBJECT mode.
103b8e80941Smrg       *
104b8e80941Smrg       * (2) We can't allocate less than nr_gs_entries_granularity.
105b8e80941Smrg       */
106b8e80941Smrg      [MESA_SHADER_GEOMETRY] = gs_present ? 2 : 0,
107b8e80941Smrg
108b8e80941Smrg      [MESA_SHADER_TESS_CTRL] = tess_present ? 1 : 0,
109b8e80941Smrg
110b8e80941Smrg      [MESA_SHADER_TESS_EVAL] = tess_present ?
111b8e80941Smrg         devinfo->urb.min_entries[MESA_SHADER_TESS_EVAL] : 0,
112b8e80941Smrg   };
113b8e80941Smrg
114b8e80941Smrg   /* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up.
115b8e80941Smrg    * Round them all up.
116b8e80941Smrg    */
117b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
118b8e80941Smrg      min_entries[i] = ALIGN(min_entries[i], granularity[i]);
119b8e80941Smrg   }
120b8e80941Smrg
121b8e80941Smrg   unsigned entry_size_bytes[4];
122b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
123b8e80941Smrg      entry_size_bytes[i] = 64 * entry_size[i];
124b8e80941Smrg   }
125b8e80941Smrg
126b8e80941Smrg   /* Initially, assign each stage the minimum amount of URB space it needs,
127b8e80941Smrg    * and make a note of how much additional space it "wants" (the amount of
128b8e80941Smrg    * additional space it could actually make use of).
129b8e80941Smrg    */
130b8e80941Smrg   unsigned chunks[4];
131b8e80941Smrg   unsigned wants[4];
132b8e80941Smrg   unsigned total_needs = push_constant_chunks;
133b8e80941Smrg   unsigned total_wants = 0;
134b8e80941Smrg
135b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
136b8e80941Smrg      if (active[i]) {
137b8e80941Smrg         chunks[i] = DIV_ROUND_UP(min_entries[i] * entry_size_bytes[i],
138b8e80941Smrg                                  chunk_size_bytes);
139b8e80941Smrg
140b8e80941Smrg         wants[i] =
141b8e80941Smrg            DIV_ROUND_UP(devinfo->urb.max_entries[i] * entry_size_bytes[i],
142b8e80941Smrg                         chunk_size_bytes) - chunks[i];
143b8e80941Smrg      } else {
144b8e80941Smrg         chunks[i] = 0;
145b8e80941Smrg         wants[i] = 0;
146b8e80941Smrg      }
147b8e80941Smrg
148b8e80941Smrg      total_needs += chunks[i];
149b8e80941Smrg      total_wants += wants[i];
150b8e80941Smrg   }
151b8e80941Smrg
152b8e80941Smrg   assert(total_needs <= urb_chunks);
153b8e80941Smrg
154b8e80941Smrg   /* Mete out remaining space (if any) in proportion to "wants". */
155b8e80941Smrg   unsigned remaining_space = MIN2(urb_chunks - total_needs, total_wants);
156b8e80941Smrg
157b8e80941Smrg   if (remaining_space > 0) {
158b8e80941Smrg      for (int i = MESA_SHADER_VERTEX;
159b8e80941Smrg           total_wants > 0 && i <= MESA_SHADER_TESS_EVAL; i++) {
160b8e80941Smrg         unsigned additional = (unsigned)
161b8e80941Smrg            roundf(wants[i] * (((float) remaining_space) / total_wants));
162b8e80941Smrg         chunks[i] += additional;
163b8e80941Smrg         remaining_space -= additional;
164b8e80941Smrg         total_wants -= wants[i];
165b8e80941Smrg      }
166b8e80941Smrg
167b8e80941Smrg      chunks[MESA_SHADER_GEOMETRY] += remaining_space;
168b8e80941Smrg   }
169b8e80941Smrg
170b8e80941Smrg   /* Sanity check that we haven't over-allocated. */
171b8e80941Smrg   unsigned total_chunks = push_constant_chunks;
172b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
173b8e80941Smrg      total_chunks += chunks[i];
174b8e80941Smrg   }
175b8e80941Smrg   assert(total_chunks <= urb_chunks);
176b8e80941Smrg
177b8e80941Smrg   /* Finally, compute the number of entries that can fit in the space
178b8e80941Smrg    * allocated to each stage.
179b8e80941Smrg    */
180b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
181b8e80941Smrg      entries[i] = chunks[i] * chunk_size_bytes / entry_size_bytes[i];
182b8e80941Smrg
183b8e80941Smrg      /* Since we rounded up when computing wants[], this may be slightly
184b8e80941Smrg       * more than the maximum allowed amount, so correct for that.
185b8e80941Smrg       */
186b8e80941Smrg      entries[i] = MIN2(entries[i], devinfo->urb.max_entries[i]);
187b8e80941Smrg
188b8e80941Smrg      /* Ensure that we program a multiple of the granularity. */
189b8e80941Smrg      entries[i] = ROUND_DOWN_TO(entries[i], granularity[i]);
190b8e80941Smrg
191b8e80941Smrg      /* Finally, sanity check to make sure we have at least the minimum
192b8e80941Smrg       * number of entries needed for each stage.
193b8e80941Smrg       */
194b8e80941Smrg      assert(entries[i] >= min_entries[i]);
195b8e80941Smrg   }
196b8e80941Smrg
197b8e80941Smrg   /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
198b8e80941Smrg   int next = push_constant_chunks;
199b8e80941Smrg   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
200b8e80941Smrg      if (entries[i]) {
201b8e80941Smrg         start[i] = next;
202b8e80941Smrg         next += chunks[i];
203b8e80941Smrg      } else {
204b8e80941Smrg         /* Just put disabled stages at the beginning. */
205b8e80941Smrg         start[i] = 0;
206b8e80941Smrg      }
207b8e80941Smrg   }
208b8e80941Smrg}
209