1b8e80941Smrg/* 2b8e80941Smrg * Copyright (c) 2011 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <stdlib.h> 25b8e80941Smrg#include <math.h> 26b8e80941Smrg 27b8e80941Smrg#include "util/macros.h" 28b8e80941Smrg#include "main/macros.h" 29b8e80941Smrg#include "compiler/shader_enums.h" 30b8e80941Smrg 31b8e80941Smrg#include "gen_l3_config.h" 32b8e80941Smrg 33b8e80941Smrg/** 34b8e80941Smrg * The following diagram shows how we partition the URB: 35b8e80941Smrg * 36b8e80941Smrg * 16kb or 32kb Rest of the URB space 37b8e80941Smrg * __________-__________ _________________-_________________ 38b8e80941Smrg * / \ / \ 39b8e80941Smrg * +-------------------------------------------------------------+ 40b8e80941Smrg * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB | 41b8e80941Smrg * | Constants | Entries | 42b8e80941Smrg * +-------------------------------------------------------------+ 43b8e80941Smrg * 44b8e80941Smrg * Push constants must be stored at the beginning of the URB space, 45b8e80941Smrg * while URB entries can be stored anywhere. We choose to lay them 46b8e80941Smrg * out in pipeline order (VS -> HS -> DS -> GS). 47b8e80941Smrg */ 48b8e80941Smrg 49b8e80941Smrg/** 50b8e80941Smrg * Decide how to partition the URB among the various stages. 51b8e80941Smrg * 52b8e80941Smrg * \param[in] push_constant_bytes - space allocate for push constants. 53b8e80941Smrg * \param[in] urb_size_bytes - total size of the URB (from L3 config). 54b8e80941Smrg * \param[in] tess_present - are tessellation shaders active? 55b8e80941Smrg * \param[in] gs_present - are geometry shaders active? 56b8e80941Smrg * \param[in] entry_size - the URB entry size (from the shader compiler) 57b8e80941Smrg * \param[out] entries - the number of URB entries for each stage 58b8e80941Smrg * \param[out] start - the starting offset for each stage 59b8e80941Smrg */ 60b8e80941Smrgvoid 61b8e80941Smrggen_get_urb_config(const struct gen_device_info *devinfo, 62b8e80941Smrg unsigned push_constant_bytes, unsigned urb_size_bytes, 63b8e80941Smrg bool tess_present, bool gs_present, 64b8e80941Smrg const unsigned entry_size[4], 65b8e80941Smrg unsigned entries[4], unsigned start[4]) 66b8e80941Smrg{ 67b8e80941Smrg const bool active[4] = { true, tess_present, tess_present, gs_present }; 68b8e80941Smrg 69b8e80941Smrg /* URB allocations must be done in 8k chunks. */ 70b8e80941Smrg const unsigned chunk_size_bytes = 8192; 71b8e80941Smrg 72b8e80941Smrg const unsigned push_constant_chunks = 73b8e80941Smrg push_constant_bytes / chunk_size_bytes; 74b8e80941Smrg const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes; 75b8e80941Smrg 76b8e80941Smrg /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): 77b8e80941Smrg * 78b8e80941Smrg * VS Number of URB Entries must be divisible by 8 if the VS URB Entry 79b8e80941Smrg * Allocation Size is less than 9 512-bit URB entries. 80b8e80941Smrg * 81b8e80941Smrg * Similar text exists for HS, DS and GS. 82b8e80941Smrg */ 83b8e80941Smrg unsigned granularity[4]; 84b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 85b8e80941Smrg granularity[i] = (entry_size[i] < 9) ? 8 : 1; 86b8e80941Smrg } 87b8e80941Smrg 88b8e80941Smrg unsigned min_entries[4] = { 89b8e80941Smrg /* VS has a lower limit on the number of URB entries. 90b8e80941Smrg * 91b8e80941Smrg * From the Broadwell PRM, 3DSTATE_URB_VS instruction: 92b8e80941Smrg * "When tessellation is enabled, the VS Number of URB Entries must be 93b8e80941Smrg * greater than or equal to 192." 94b8e80941Smrg */ 95b8e80941Smrg [MESA_SHADER_VERTEX] = tess_present && devinfo->gen == 8 ? 96b8e80941Smrg 192 : devinfo->urb.min_entries[MESA_SHADER_VERTEX], 97b8e80941Smrg 98b8e80941Smrg /* There are two constraints on the minimum amount of URB space we can 99b8e80941Smrg * allocate: 100b8e80941Smrg * 101b8e80941Smrg * (1) We need room for at least 2 URB entries, since we always operate 102b8e80941Smrg * the GS in DUAL_OBJECT mode. 103b8e80941Smrg * 104b8e80941Smrg * (2) We can't allocate less than nr_gs_entries_granularity. 105b8e80941Smrg */ 106b8e80941Smrg [MESA_SHADER_GEOMETRY] = gs_present ? 2 : 0, 107b8e80941Smrg 108b8e80941Smrg [MESA_SHADER_TESS_CTRL] = tess_present ? 1 : 0, 109b8e80941Smrg 110b8e80941Smrg [MESA_SHADER_TESS_EVAL] = tess_present ? 111b8e80941Smrg devinfo->urb.min_entries[MESA_SHADER_TESS_EVAL] : 0, 112b8e80941Smrg }; 113b8e80941Smrg 114b8e80941Smrg /* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up. 115b8e80941Smrg * Round them all up. 116b8e80941Smrg */ 117b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 118b8e80941Smrg min_entries[i] = ALIGN(min_entries[i], granularity[i]); 119b8e80941Smrg } 120b8e80941Smrg 121b8e80941Smrg unsigned entry_size_bytes[4]; 122b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 123b8e80941Smrg entry_size_bytes[i] = 64 * entry_size[i]; 124b8e80941Smrg } 125b8e80941Smrg 126b8e80941Smrg /* Initially, assign each stage the minimum amount of URB space it needs, 127b8e80941Smrg * and make a note of how much additional space it "wants" (the amount of 128b8e80941Smrg * additional space it could actually make use of). 129b8e80941Smrg */ 130b8e80941Smrg unsigned chunks[4]; 131b8e80941Smrg unsigned wants[4]; 132b8e80941Smrg unsigned total_needs = push_constant_chunks; 133b8e80941Smrg unsigned total_wants = 0; 134b8e80941Smrg 135b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 136b8e80941Smrg if (active[i]) { 137b8e80941Smrg chunks[i] = DIV_ROUND_UP(min_entries[i] * entry_size_bytes[i], 138b8e80941Smrg chunk_size_bytes); 139b8e80941Smrg 140b8e80941Smrg wants[i] = 141b8e80941Smrg DIV_ROUND_UP(devinfo->urb.max_entries[i] * entry_size_bytes[i], 142b8e80941Smrg chunk_size_bytes) - chunks[i]; 143b8e80941Smrg } else { 144b8e80941Smrg chunks[i] = 0; 145b8e80941Smrg wants[i] = 0; 146b8e80941Smrg } 147b8e80941Smrg 148b8e80941Smrg total_needs += chunks[i]; 149b8e80941Smrg total_wants += wants[i]; 150b8e80941Smrg } 151b8e80941Smrg 152b8e80941Smrg assert(total_needs <= urb_chunks); 153b8e80941Smrg 154b8e80941Smrg /* Mete out remaining space (if any) in proportion to "wants". */ 155b8e80941Smrg unsigned remaining_space = MIN2(urb_chunks - total_needs, total_wants); 156b8e80941Smrg 157b8e80941Smrg if (remaining_space > 0) { 158b8e80941Smrg for (int i = MESA_SHADER_VERTEX; 159b8e80941Smrg total_wants > 0 && i <= MESA_SHADER_TESS_EVAL; i++) { 160b8e80941Smrg unsigned additional = (unsigned) 161b8e80941Smrg roundf(wants[i] * (((float) remaining_space) / total_wants)); 162b8e80941Smrg chunks[i] += additional; 163b8e80941Smrg remaining_space -= additional; 164b8e80941Smrg total_wants -= wants[i]; 165b8e80941Smrg } 166b8e80941Smrg 167b8e80941Smrg chunks[MESA_SHADER_GEOMETRY] += remaining_space; 168b8e80941Smrg } 169b8e80941Smrg 170b8e80941Smrg /* Sanity check that we haven't over-allocated. */ 171b8e80941Smrg unsigned total_chunks = push_constant_chunks; 172b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 173b8e80941Smrg total_chunks += chunks[i]; 174b8e80941Smrg } 175b8e80941Smrg assert(total_chunks <= urb_chunks); 176b8e80941Smrg 177b8e80941Smrg /* Finally, compute the number of entries that can fit in the space 178b8e80941Smrg * allocated to each stage. 179b8e80941Smrg */ 180b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 181b8e80941Smrg entries[i] = chunks[i] * chunk_size_bytes / entry_size_bytes[i]; 182b8e80941Smrg 183b8e80941Smrg /* Since we rounded up when computing wants[], this may be slightly 184b8e80941Smrg * more than the maximum allowed amount, so correct for that. 185b8e80941Smrg */ 186b8e80941Smrg entries[i] = MIN2(entries[i], devinfo->urb.max_entries[i]); 187b8e80941Smrg 188b8e80941Smrg /* Ensure that we program a multiple of the granularity. */ 189b8e80941Smrg entries[i] = ROUND_DOWN_TO(entries[i], granularity[i]); 190b8e80941Smrg 191b8e80941Smrg /* Finally, sanity check to make sure we have at least the minimum 192b8e80941Smrg * number of entries needed for each stage. 193b8e80941Smrg */ 194b8e80941Smrg assert(entries[i] >= min_entries[i]); 195b8e80941Smrg } 196b8e80941Smrg 197b8e80941Smrg /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */ 198b8e80941Smrg int next = push_constant_chunks; 199b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 200b8e80941Smrg if (entries[i]) { 201b8e80941Smrg start[i] = next; 202b8e80941Smrg next += chunks[i]; 203b8e80941Smrg } else { 204b8e80941Smrg /* Just put disabled stages at the beginning. */ 205b8e80941Smrg start[i] = 0; 206b8e80941Smrg } 207b8e80941Smrg } 208b8e80941Smrg} 209