1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_blorp.c 25 * 26 * ============================= GENXML CODE ============================= 27 * [This file is compiled once per generation.] 28 * ======================================================================= 29 * 30 * GenX specific code for working with BLORP (blitting, resolves, clears 31 * on the 3D engine). This provides the driver-specific hooks needed to 32 * implement the BLORP API. 33 * 34 * See crocus_blit.c, crocus_clear.c, and so on. 35 */ 36 37#include <assert.h> 38 39#include "crocus_batch.h" 40#include "crocus_resource.h" 41#include "crocus_context.h" 42 43#include "util/u_upload_mgr.h" 44#include "intel/common/intel_l3_config.h" 45 46#include "blorp/blorp_genX_exec.h" 47 48#if GFX_VER <= 5 49#include "gen4_blorp_exec.h" 50#endif 51 52static uint32_t * 53stream_state(struct crocus_batch *batch, 54 unsigned size, 55 unsigned alignment, 56 uint32_t *out_offset, 57 struct crocus_bo **out_bo) 58{ 59 uint32_t offset = ALIGN(batch->state.used, alignment); 60 61 if (offset + size >= STATE_SZ && !batch->no_wrap) { 62 crocus_batch_flush(batch); 63 offset = ALIGN(batch->state.used, alignment); 64 } else if (offset + size >= batch->state.bo->size) { 65 const unsigned new_size = 66 MIN2(batch->state.bo->size + batch->state.bo->size / 2, 67 MAX_STATE_SIZE); 68 crocus_grow_buffer(batch, true, batch->state.used, new_size); 69 assert(offset + size < batch->state.bo->size); 70 } 71 72 crocus_record_state_size(batch->state_sizes, offset, size); 73 74 batch->state.used = offset + size; 75 *out_offset = offset; 76 77 /* If the caller has asked for a BO, we leave them the responsibility of 78 * adding bo->gtt_offset (say, by handing an address to genxml). If not, 79 * we assume they want the offset from a base address. 80 */ 81 if (out_bo) 82 *out_bo = batch->state.bo; 83 84 return (uint32_t *)batch->state.map + (offset >> 2); 85} 86 87static void * 88blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n) 89{ 90 struct crocus_batch *batch = blorp_batch->driver_batch; 91 return crocus_get_command_space(batch, n * sizeof(uint32_t)); 92} 93 94static uint64_t 95blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location, 96 struct blorp_address addr, uint32_t delta) 97{ 98 struct crocus_batch *batch = blorp_batch->driver_batch; 99 uint32_t offset; 100 101 if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) { 102 offset = (char *)location - (char *)batch->state.map; 103 return crocus_state_reloc(batch, offset, 104 addr.buffer, addr.offset + delta, 105 addr.reloc_flags); 106 } 107 108 assert(!crocus_ptr_in_state_buffer(batch, location)); 109 110 offset = (char *)location - (char *)batch->command.map; 111 return crocus_command_reloc(batch, offset, 112 addr.buffer, addr.offset + delta, 113 addr.reloc_flags); 114} 115 116static void 117blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset, 118 struct blorp_address addr, uint32_t delta) 119{ 120 struct crocus_batch *batch = blorp_batch->driver_batch; 121 struct crocus_bo *bo = addr.buffer; 122 123 uint64_t reloc_val = 124 crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta, 125 addr.reloc_flags); 126 127 void *reloc_ptr = (void *)batch->state.map + ss_offset; 128 *(uint32_t *)reloc_ptr = reloc_val; 129} 130 131static uint64_t 132blorp_get_surface_address(struct blorp_batch *blorp_batch, 133 struct blorp_address addr) 134{ 135 /* We'll let blorp_surface_reloc write the address. */ 136 return 0ull; 137} 138 139#if GFX_VER >= 7 140static struct blorp_address 141blorp_get_surface_base_address(struct blorp_batch *blorp_batch) 142{ 143 struct crocus_batch *batch = blorp_batch->driver_batch; 144 return (struct blorp_address) { 145 .buffer = batch->state.bo, 146 .offset = 0 147 }; 148} 149#endif 150 151static void * 152blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch, 153 uint32_t size, 154 uint32_t alignment, 155 uint32_t *offset) 156{ 157 struct crocus_batch *batch = blorp_batch->driver_batch; 158 159 return stream_state(batch, size, alignment, offset, NULL); 160} 161 162UNUSED static void * 163blorp_alloc_general_state(struct blorp_batch *blorp_batch, 164 uint32_t size, 165 uint32_t alignment, 166 uint32_t *offset) 167{ 168 /* Use dynamic state range for general state on crocus. */ 169 return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset); 170} 171 172static void 173blorp_alloc_binding_table(struct blorp_batch *blorp_batch, 174 unsigned num_entries, 175 unsigned state_size, 176 unsigned state_alignment, 177 uint32_t *bt_offset, 178 uint32_t *surface_offsets, 179 void **surface_maps) 180{ 181 struct crocus_batch *batch = blorp_batch->driver_batch; 182 uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32, 183 bt_offset, NULL); 184 185 for (unsigned i = 0; i < num_entries; i++) { 186 surface_maps[i] = stream_state(batch, 187 state_size, state_alignment, 188 &(surface_offsets)[i], NULL); 189 bt_map[i] = surface_offsets[i]; 190 } 191} 192 193static void * 194blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch, 195 uint32_t size, 196 struct blorp_address *addr) 197{ 198 struct crocus_batch *batch = blorp_batch->driver_batch; 199 struct crocus_bo *bo; 200 uint32_t offset; 201 202 void *map = stream_state(batch, size, 64, 203 &offset, &bo); 204 205 *addr = (struct blorp_address) { 206 .buffer = bo, 207 .offset = offset, 208 .reloc_flags = RELOC_32BIT, 209#if GFX_VER >= 7 210 .mocs = crocus_mocs(bo, &batch->screen->isl_dev), 211#endif 212 }; 213 214 return map; 215} 216 217/** 218 */ 219static void 220blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch, 221 const struct blorp_address *addrs, 222 UNUSED uint32_t *sizes, 223 unsigned num_vbs) 224{ 225} 226 227static struct blorp_address 228blorp_get_workaround_address(struct blorp_batch *blorp_batch) 229{ 230 struct crocus_batch *batch = blorp_batch->driver_batch; 231 232 return (struct blorp_address) { 233 .buffer = batch->ice->workaround_bo, 234 .offset = batch->ice->workaround_offset, 235 }; 236} 237 238static void 239blorp_flush_range(UNUSED struct blorp_batch *blorp_batch, 240 UNUSED void *start, 241 UNUSED size_t size) 242{ 243 /* All allocated states come from the batch which we will flush before we 244 * submit it. There's nothing for us to do here. 245 */ 246} 247 248#if GFX_VER >= 7 249static const struct intel_l3_config * 250blorp_get_l3_config(struct blorp_batch *blorp_batch) 251{ 252 struct crocus_batch *batch = blorp_batch->driver_batch; 253 return batch->screen->l3_config_3d; 254} 255#else /* GFX_VER < 7 */ 256static void 257blorp_emit_urb_config(struct blorp_batch *blorp_batch, 258 unsigned vs_entry_size, 259 UNUSED unsigned sf_entry_size) 260{ 261 struct crocus_batch *batch = blorp_batch->driver_batch; 262#if GFX_VER <= 5 263 batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size); 264#else 265 genX(crocus_upload_urb)(batch, vs_entry_size, false, vs_entry_size); 266#endif 267} 268#endif 269 270static void 271crocus_blorp_exec(struct blorp_batch *blorp_batch, 272 const struct blorp_params *params) 273{ 274 struct crocus_context *ice = blorp_batch->blorp->driver_ctx; 275 struct crocus_batch *batch = blorp_batch->driver_batch; 276 277 /* Flush the sampler and render caches. We definitely need to flush the 278 * sampler cache so that we get updated contents from the render cache for 279 * the glBlitFramebuffer() source. Also, we are sometimes warned in the 280 * docs to flush the cache between reinterpretations of the same surface 281 * data with different formats, which blorp does for stencil and depth 282 * data. 283 */ 284 if (params->src.enabled) 285 crocus_cache_flush_for_read(batch, params->src.addr.buffer); 286 if (params->dst.enabled) { 287 crocus_cache_flush_for_render(batch, params->dst.addr.buffer, 288 params->dst.view.format, 289 params->dst.aux_usage); 290 } 291 if (params->depth.enabled) 292 crocus_cache_flush_for_depth(batch, params->depth.addr.buffer); 293 if (params->stencil.enabled) 294 crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer); 295 296 crocus_require_command_space(batch, 1400); 297 crocus_require_statebuffer_space(batch, 600); 298 batch->no_wrap = true; 299 300#if GFX_VER == 8 301 genX(crocus_update_pma_fix)(ice, batch, false); 302#endif 303 304#if GFX_VER == 6 305 /* Emit workaround flushes when we switch from drawing to blorping. */ 306 crocus_emit_post_sync_nonzero_flush(batch); 307#endif 308 309#if GFX_VER >= 6 310 crocus_emit_depth_stall_flushes(batch); 311#endif 312 313 blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 314 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; 315 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; 316 } 317 318 batch->screen->vtbl.update_surface_base_address(batch); 319 crocus_handle_always_flush_cache(batch); 320 321 batch->contains_draw = true; 322 blorp_exec(blorp_batch, params); 323 324 batch->no_wrap = false; 325 crocus_handle_always_flush_cache(batch); 326 327 /* We've smashed all state compared to what the normal 3D pipeline 328 * rendering tracks for GL. 329 */ 330 331 uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE | 332 CROCUS_DIRTY_GEN7_SO_BUFFERS | 333 CROCUS_DIRTY_SO_DECL_LIST | 334 CROCUS_DIRTY_LINE_STIPPLE | 335 CROCUS_ALL_DIRTY_FOR_COMPUTE | 336 CROCUS_DIRTY_GEN6_SCISSOR_RECT | 337 CROCUS_DIRTY_GEN75_VF | 338 CROCUS_DIRTY_SF_CL_VIEWPORT); 339 340 uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE | 341 CROCUS_STAGE_DIRTY_UNCOMPILED_VS | 342 CROCUS_STAGE_DIRTY_UNCOMPILED_TCS | 343 CROCUS_STAGE_DIRTY_UNCOMPILED_TES | 344 CROCUS_STAGE_DIRTY_UNCOMPILED_GS | 345 CROCUS_STAGE_DIRTY_UNCOMPILED_FS | 346 CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS | 347 CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS | 348 CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES | 349 CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS); 350 351 if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) { 352 /* BLORP disabled tessellation, that's fine for the next draw */ 353 skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS | 354 CROCUS_STAGE_DIRTY_TES | 355 CROCUS_STAGE_DIRTY_CONSTANTS_TCS | 356 CROCUS_STAGE_DIRTY_CONSTANTS_TES | 357 CROCUS_STAGE_DIRTY_BINDINGS_TCS | 358 CROCUS_STAGE_DIRTY_BINDINGS_TES; 359 } 360 361 if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) { 362 /* BLORP disabled geometry shaders, that's fine for the next draw */ 363 skip_stage_bits |= CROCUS_STAGE_DIRTY_GS | 364 CROCUS_STAGE_DIRTY_CONSTANTS_GS | 365 CROCUS_STAGE_DIRTY_BINDINGS_GS; 366 } 367 368 /* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if 369 * BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set. 370 */ 371 if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) 372 skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER; 373 374 if (!params->wm_prog_data) 375 skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE; 376 377 ice->state.dirty |= ~skip_bits; 378 ice->state.stage_dirty |= ~skip_stage_bits; 379 380 ice->urb.vsize = 0; 381 ice->urb.gs_present = false; 382 ice->urb.gsize = 0; 383 ice->urb.tess_present = false; 384 ice->urb.hsize = 0; 385 ice->urb.dsize = 0; 386 387 if (params->dst.enabled) { 388 crocus_render_cache_add_bo(batch, params->dst.addr.buffer, 389 params->dst.view.format, 390 params->dst.aux_usage); 391 } 392 if (params->depth.enabled) 393 crocus_depth_cache_add_bo(batch, params->depth.addr.buffer); 394 if (params->stencil.enabled) 395 crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer); 396} 397 398static void 399blorp_measure_start(struct blorp_batch *blorp_batch, 400 const struct blorp_params *params) 401{ 402} 403 404void 405genX(crocus_init_blorp)(struct crocus_context *ice) 406{ 407 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 408 409 blorp_init(&ice->blorp, ice, &screen->isl_dev); 410 ice->blorp.compiler = screen->compiler; 411 ice->blorp.lookup_shader = crocus_blorp_lookup_shader; 412 ice->blorp.upload_shader = crocus_blorp_upload_shader; 413 ice->blorp.exec = crocus_blorp_exec; 414} 415