1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25 26#include "anv_private.h" 27 28/* These are defined in anv_private.h and blorp_genX_exec.h */ 29#undef __gen_address_type 30#undef __gen_user_data 31#undef __gen_combine_address 32 33#include "common/gen_l3_config.h" 34#include "common/gen_sample_positions.h" 35#include "blorp/blorp_genX_exec.h" 36 37static void * 38blorp_emit_dwords(struct blorp_batch *batch, unsigned n) 39{ 40 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 41 return anv_batch_emit_dwords(&cmd_buffer->batch, n); 42} 43 44static uint64_t 45blorp_emit_reloc(struct blorp_batch *batch, 46 void *location, struct blorp_address address, uint32_t delta) 47{ 48 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 49 assert(cmd_buffer->batch.start <= location && 50 location < cmd_buffer->batch.end); 51 return anv_batch_emit_reloc(&cmd_buffer->batch, location, 52 address.buffer, address.offset + delta); 53} 54 55static void 56blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, 57 struct blorp_address address, uint32_t delta) 58{ 59 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 60 VkResult result = 61 anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, 62 ss_offset, address.buffer, address.offset + delta); 63 if (result != VK_SUCCESS) 64 anv_batch_set_error(&cmd_buffer->batch, result); 65 66 void *dest = anv_block_pool_map( 67 &cmd_buffer->device->surface_state_pool.block_pool, ss_offset); 68 uint64_t val = ((struct anv_bo*)address.buffer)->offset + address.offset + 69 delta; 70 write_reloc(cmd_buffer->device, dest, val, false); 71} 72 73static uint64_t 74blorp_get_surface_address(struct blorp_batch *blorp_batch, 75 struct blorp_address address) 76{ 77 /* We'll let blorp_surface_reloc write the address. */ 78 return 0ull; 79} 80 81#if GEN_GEN >= 7 && GEN_GEN < 10 82static struct blorp_address 83blorp_get_surface_base_address(struct blorp_batch *batch) 84{ 85 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 86 return (struct blorp_address) { 87 .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo, 88 .offset = 0, 89 }; 90} 91#endif 92 93static void * 94blorp_alloc_dynamic_state(struct blorp_batch *batch, 95 uint32_t size, 96 uint32_t alignment, 97 uint32_t *offset) 98{ 99 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 100 101 struct anv_state state = 102 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); 103 104 *offset = state.offset; 105 return state.map; 106} 107 108static void 109blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, 110 unsigned state_size, unsigned state_alignment, 111 uint32_t *bt_offset, 112 uint32_t *surface_offsets, void **surface_maps) 113{ 114 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 115 116 uint32_t state_offset; 117 struct anv_state bt_state; 118 119 VkResult result = 120 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries, 121 &state_offset, &bt_state); 122 if (result != VK_SUCCESS) 123 return; 124 125 uint32_t *bt_map = bt_state.map; 126 *bt_offset = bt_state.offset; 127 128 for (unsigned i = 0; i < num_entries; i++) { 129 struct anv_state surface_state = 130 anv_cmd_buffer_alloc_surface_state(cmd_buffer); 131 bt_map[i] = surface_state.offset + state_offset; 132 surface_offsets[i] = surface_state.offset; 133 surface_maps[i] = surface_state.map; 134 } 135} 136 137static void * 138blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, 139 struct blorp_address *addr) 140{ 141 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 142 143 /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS: 144 * 145 * "The VF cache needs to be invalidated before binding and then using 146 * Vertex Buffers that overlap with any previously bound Vertex Buffer 147 * (at a 64B granularity) since the last invalidation. A VF cache 148 * invalidate is performed by setting the "VF Cache Invalidation Enable" 149 * bit in PIPE_CONTROL." 150 * 151 * This restriction first appears in the Skylake PRM but the internal docs 152 * also list it as being an issue on Broadwell. In order to avoid this 153 * problem, we align all vertex buffer allocations to 64 bytes. 154 */ 155 struct anv_state vb_state = 156 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64); 157 158 *addr = (struct blorp_address) { 159 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, 160 .offset = vb_state.offset, 161 .mocs = cmd_buffer->device->default_mocs, 162 }; 163 164 return vb_state.map; 165} 166 167static void 168blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, 169 const struct blorp_address *addrs, 170 unsigned num_vbs) 171{ 172 /* anv forces all vertex buffers into the low 4GB so there are never any 173 * transitions that require a VF invalidation. 174 */ 175} 176 177#if GEN_GEN >= 8 178static struct blorp_address 179blorp_get_workaround_page(struct blorp_batch *batch) 180{ 181 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 182 183 return (struct blorp_address) { 184 .buffer = &cmd_buffer->device->workaround_bo, 185 }; 186} 187#endif 188 189static void 190blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) 191{ 192 /* We don't need to flush states anymore, since everything will be snooped. 193 */ 194} 195 196static void 197blorp_emit_urb_config(struct blorp_batch *batch, 198 unsigned vs_entry_size, unsigned sf_entry_size) 199{ 200 struct anv_device *device = batch->blorp->driver_ctx; 201 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 202 203 assert(sf_entry_size == 0); 204 205 const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 }; 206 207 genX(emit_urb_setup)(device, &cmd_buffer->batch, 208 cmd_buffer->state.current_l3_config, 209 VK_SHADER_STAGE_VERTEX_BIT | 210 VK_SHADER_STAGE_FRAGMENT_BIT, 211 entry_size); 212} 213 214void 215genX(blorp_exec)(struct blorp_batch *batch, 216 const struct blorp_params *params) 217{ 218 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 219 220 if (!cmd_buffer->state.current_l3_config) { 221 const struct gen_l3_config *cfg = 222 gen_get_default_l3_config(&cmd_buffer->device->info); 223 genX(cmd_buffer_config_l3)(cmd_buffer, cfg); 224 } 225 226#if GEN_GEN >= 11 227 /* The PIPE_CONTROL command description says: 228 * 229 * "Whenever a Binding Table Index (BTI) used by a Render Taget Message 230 * points to a different RENDER_SURFACE_STATE, SW must issue a Render 231 * Target Cache Flush by enabling this bit. When render target flush 232 * is set due to new association of BTI, PS Scoreboard Stall bit must 233 * be set in this packet." 234 */ 235 cmd_buffer->state.pending_pipe_bits |= 236 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 237 ANV_PIPE_STALL_AT_SCOREBOARD_BIT; 238#endif 239 240#if GEN_GEN == 7 241 /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement 242 * indirect fast-clear colors can cause GPU hangs if we don't stall first. 243 * See genX(cmd_buffer_mi_memcpy) for more details. 244 */ 245 if (params->src.clear_color_addr.buffer || 246 params->dst.clear_color_addr.buffer) 247 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; 248#endif 249 250 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); 251 252 genX(flush_pipeline_select_3d)(cmd_buffer); 253 254 genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); 255 256 /* BLORP doesn't do anything fancy with depth such as discards, so we want 257 * the PMA fix off. Also, off is always the safe option. 258 */ 259 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); 260 261 blorp_exec(batch, params); 262 263 cmd_buffer->state.gfx.vb_dirty = ~0; 264 cmd_buffer->state.gfx.dirty = ~0; 265 cmd_buffer->state.push_constants_dirty = ~0; 266} 267