1/* 2 * Copyright © 2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/u_format.h" 25#include "v3d_context.h" 26#include "v3d_tiling.h" 27#include "broadcom/common/v3d_macros.h" 28#include "broadcom/cle/v3dx_pack.h" 29 30#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ 31 PIPE_CLEAR_COLOR1 | \ 32 PIPE_CLEAR_COLOR2 | \ 33 PIPE_CLEAR_COLOR3) \ 34 35#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) 36 37/* The HW queues up the load until the tile coordinates show up, but can only 38 * track one at a time. If we need to do more than one load, then we need to 39 * flush out the previous load by emitting the tile coordinates and doing a 40 * dummy store. 41 */ 42static void 43flush_last_load(struct v3d_cl *cl) 44{ 45 if (V3D_VERSION >= 40) 46 return; 47 48 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 49 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 50 store.buffer_to_store = NONE; 51 } 52} 53 54static void 55load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, 56 uint32_t pipe_bit, uint32_t *loads_pending) 57{ 58 struct v3d_surface *surf = v3d_surface(psurf); 59 bool separate_stencil = surf->separate_stencil && buffer == STENCIL; 60 if (separate_stencil) { 61 psurf = surf->separate_stencil; 62 surf = v3d_surface(psurf); 63 } 64 65 struct v3d_resource *rsc = v3d_resource(psurf->texture); 66 67 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 68 load.buffer_to_load = buffer; 69 load.address = cl_address(rsc->bo, surf->offset); 70 71#if V3D_VERSION >= 40 72 load.memory_format = surf->tiling; 73 if (separate_stencil) 74 load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; 75 else 76 load.input_image_format = surf->format; 77 load.r_b_swap = surf->swap_rb; 78 79 if (surf->tiling == VC5_TILING_UIF_NO_XOR || 80 surf->tiling == VC5_TILING_UIF_XOR) { 81 load.height_in_ub_or_stride = 82 surf->padded_height_of_output_image_in_uif_blocks; 83 } else if (surf->tiling == VC5_TILING_RASTER) { 84 struct v3d_resource_slice *slice = 85 &rsc->slices[psurf->u.tex.level]; 86 load.height_in_ub_or_stride = slice->stride; 87 } 88 89 if (psurf->texture->nr_samples > 1) 90 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 91 else 92 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 93 94#else /* V3D_VERSION < 40 */ 95 /* Can't do raw ZSTENCIL loads -- need to load/store them to 96 * separate buffers for Z and stencil. 97 */ 98 assert(buffer != ZSTENCIL); 99 load.raw_mode = true; 100 load.padded_height_of_output_image_in_uif_blocks = 101 surf->padded_height_of_output_image_in_uif_blocks; 102#endif /* V3D_VERSION < 40 */ 103 } 104 105 *loads_pending &= ~pipe_bit; 106 if (*loads_pending) 107 flush_last_load(cl); 108} 109 110static void 111store_general(struct v3d_job *job, 112 struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, 113 int pipe_bit, uint32_t *stores_pending, bool general_color_clear) 114{ 115 struct v3d_surface *surf = v3d_surface(psurf); 116 bool separate_stencil = surf->separate_stencil && buffer == STENCIL; 117 if (separate_stencil) { 118 psurf = surf->separate_stencil; 119 surf = v3d_surface(psurf); 120 } 121 122 *stores_pending &= ~pipe_bit; 123 bool last_store = !(*stores_pending); 124 125 struct v3d_resource *rsc = v3d_resource(psurf->texture); 126 127 rsc->writes++; 128 129 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 130 store.buffer_to_store = buffer; 131 store.address = cl_address(rsc->bo, surf->offset); 132 133#if V3D_VERSION >= 40 134 store.clear_buffer_being_stored = false; 135 136 if (separate_stencil) 137 store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; 138 else 139 store.output_image_format = surf->format; 140 141 store.r_b_swap = surf->swap_rb; 142 store.memory_format = surf->tiling; 143 144 if (surf->tiling == VC5_TILING_UIF_NO_XOR || 145 surf->tiling == VC5_TILING_UIF_XOR) { 146 store.height_in_ub_or_stride = 147 surf->padded_height_of_output_image_in_uif_blocks; 148 } else if (surf->tiling == VC5_TILING_RASTER) { 149 struct v3d_resource_slice *slice = 150 &rsc->slices[psurf->u.tex.level]; 151 store.height_in_ub_or_stride = slice->stride; 152 } 153 154 if (psurf->texture->nr_samples > 1) 155 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 156 else 157 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 158 159#else /* V3D_VERSION < 40 */ 160 /* Can't do raw ZSTENCIL stores -- need to load/store them to 161 * separate buffers for Z and stencil. 162 */ 163 assert(buffer != ZSTENCIL); 164 store.raw_mode = true; 165 if (!last_store) { 166 store.disable_color_buffers_clear_on_write = true; 167 store.disable_z_buffer_clear_on_write = true; 168 store.disable_stencil_buffer_clear_on_write = true; 169 } else { 170 store.disable_color_buffers_clear_on_write = 171 !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && 172 general_color_clear && 173 (job->clear & pipe_bit))); 174 store.disable_z_buffer_clear_on_write = 175 !(job->clear & PIPE_CLEAR_DEPTH); 176 store.disable_stencil_buffer_clear_on_write = 177 !(job->clear & PIPE_CLEAR_STENCIL); 178 } 179 store.padded_height_of_output_image_in_uif_blocks = 180 surf->padded_height_of_output_image_in_uif_blocks; 181#endif /* V3D_VERSION < 40 */ 182 } 183 184 /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ 185 if (V3D_VERSION < 40 && !last_store) { 186 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 187 } 188} 189 190static int 191zs_buffer_from_pipe_bits(int pipe_clear_bits) 192{ 193 switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { 194 case PIPE_CLEAR_DEPTHSTENCIL: 195 return ZSTENCIL; 196 case PIPE_CLEAR_DEPTH: 197 return Z; 198 case PIPE_CLEAR_STENCIL: 199 return STENCIL; 200 default: 201 return NONE; 202 } 203} 204 205static void 206v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl) 207{ 208 uint32_t loads_pending = job->load; 209 210 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { 211 uint32_t bit = PIPE_CLEAR_COLOR0 << i; 212 if (!(loads_pending & bit)) 213 continue; 214 215 struct pipe_surface *psurf = job->cbufs[i]; 216 if (!psurf || (V3D_VERSION < 40 && 217 psurf->texture->nr_samples <= 1)) { 218 continue; 219 } 220 221 load_general(cl, psurf, RENDER_TARGET_0 + i, 222 bit, &loads_pending); 223 } 224 225 if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && 226 (V3D_VERSION >= 40 || 227 (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { 228 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture); 229 230 if (rsc->separate_stencil && 231 (loads_pending & PIPE_CLEAR_STENCIL)) { 232 load_general(cl, job->zsbuf, 233 STENCIL, 234 PIPE_CLEAR_STENCIL, 235 &loads_pending); 236 } 237 238 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { 239 load_general(cl, job->zsbuf, 240 zs_buffer_from_pipe_bits(loads_pending), 241 loads_pending & PIPE_CLEAR_DEPTHSTENCIL, 242 &loads_pending); 243 } 244 } 245 246#if V3D_VERSION < 40 247 /* The initial reload will be queued until we get the 248 * tile coordinates. 249 */ 250 if (loads_pending) { 251 cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) { 252 load.disable_color_buffer_load = 253 (~loads_pending & 254 PIPE_CLEAR_COLOR_BUFFERS) >> 255 PIPE_FIRST_COLOR_BUFFER_BIT; 256 load.enable_z_load = 257 loads_pending & PIPE_CLEAR_DEPTH; 258 load.enable_stencil_load = 259 loads_pending & PIPE_CLEAR_STENCIL; 260 } 261 } 262#else /* V3D_VERSION >= 40 */ 263 assert(!loads_pending); 264 cl_emit(cl, END_OF_LOADS, end); 265#endif 266} 267 268static void 269v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl) 270{ 271#if V3D_VERSION < 40 272 MAYBE_UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS; 273 MAYBE_UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH; 274 MAYBE_UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL; 275 276 /* For clearing color in a TLB general on V3D 3.3: 277 * 278 * - NONE buffer store clears all TLB color buffers. 279 * - color buffer store clears just the TLB color buffer being stored. 280 * - Z/S buffers store may not clear the TLB color buffer. 281 * 282 * And on V3D 4.1, we only have one flag for "clear the buffer being 283 * stored" in the general packet, and a separate packet to clear all 284 * color TLB buffers. 285 * 286 * As a result, we only bother flagging TLB color clears in a general 287 * packet when we don't have to emit a separate packet to clear all 288 * TLB color buffers. 289 */ 290 bool general_color_clear = (needs_color_clear && 291 (job->clear & PIPE_CLEAR_COLOR_BUFFERS) == 292 (job->store & PIPE_CLEAR_COLOR_BUFFERS)); 293#else 294 bool general_color_clear = false; 295#endif 296 297 uint32_t stores_pending = job->store; 298 299 /* For V3D 4.1, use general stores for all TLB stores. 300 * 301 * For V3D 3.3, we only use general stores to do raw stores for any 302 * MSAA surfaces. These output UIF tiled images where each 4x MSAA 303 * pixel is a 2x2 quad, and the format will be that of the 304 * internal_type/internal_bpp, rather than the format from GL's 305 * perspective. Non-MSAA surfaces will use 306 * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. 307 */ 308 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { 309 uint32_t bit = PIPE_CLEAR_COLOR0 << i; 310 if (!(job->store & bit)) 311 continue; 312 313 struct pipe_surface *psurf = job->cbufs[i]; 314 if (!psurf || 315 (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { 316 continue; 317 } 318 319 store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit, 320 &stores_pending, general_color_clear); 321 } 322 323 if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && 324 !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { 325 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture); 326 if (rsc->separate_stencil) { 327 if (job->store & PIPE_CLEAR_DEPTH) { 328 store_general(job, cl, job->zsbuf, Z, 329 PIPE_CLEAR_DEPTH, 330 &stores_pending, 331 general_color_clear); 332 } 333 334 if (job->store & PIPE_CLEAR_STENCIL) { 335 store_general(job, cl, job->zsbuf, STENCIL, 336 PIPE_CLEAR_STENCIL, 337 &stores_pending, 338 general_color_clear); 339 } 340 } else { 341 store_general(job, cl, job->zsbuf, 342 zs_buffer_from_pipe_bits(job->store), 343 job->store & PIPE_CLEAR_DEPTHSTENCIL, 344 &stores_pending, general_color_clear); 345 } 346 } 347 348#if V3D_VERSION < 40 349 if (stores_pending) { 350 cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { 351 352 store.disable_color_buffer_write = 353 (~stores_pending >> 354 PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; 355 store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; 356 store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; 357 358 /* Note that when set this will clear all of the color 359 * buffers. 360 */ 361 store.disable_color_buffers_clear_on_write = 362 !needs_color_clear; 363 store.disable_z_buffer_clear_on_write = 364 !needs_z_clear; 365 store.disable_stencil_buffer_clear_on_write = 366 !needs_s_clear; 367 }; 368 } else if (needs_color_clear && !general_color_clear) { 369 /* If we didn't do our color clears in the general packet, 370 * then emit a packet to clear all the TLB color buffers now. 371 */ 372 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 373 store.buffer_to_store = NONE; 374 } 375 } 376#else /* V3D_VERSION >= 40 */ 377 /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments, 378 * we still need to emit some sort of store. 379 */ 380 if (!job->store) { 381 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 382 store.buffer_to_store = NONE; 383 } 384 } 385 386 assert(!stores_pending); 387 388 /* GFXH-1461/GFXH-1689: The per-buffer store command's clear 389 * buffer bit is broken for depth/stencil. In addition, the 390 * clear packet's Z/S bit is broken, but the RTs bit ends up 391 * clearing Z/S. 392 */ 393 if (job->clear) { 394 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { 395 clear.clear_z_stencil_buffer = true; 396 clear.clear_all_render_targets = true; 397 } 398 } 399#endif /* V3D_VERSION >= 40 */ 400} 401 402static void 403v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int last_cbuf) 404{ 405 /* Emit the generic list in our indirect state -- the rcl will just 406 * have pointers into it. 407 */ 408 struct v3d_cl *cl = &job->indirect; 409 v3d_cl_ensure_space(cl, 200, 1); 410 struct v3d_cl_reloc tile_list_start = cl_get_address(cl); 411 412 if (V3D_VERSION >= 40) { 413 /* V3D 4.x only requires a single tile coordinates, and 414 * END_OF_LOADS switches us between loading and rendering. 415 */ 416 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 417 } 418 419 v3d_rcl_emit_loads(job, cl); 420 421 if (V3D_VERSION < 40) { 422 /* Tile Coordinates triggers the last reload and sets where 423 * the stores go. There must be one per store packet. 424 */ 425 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 426 } 427 428 /* The binner starts out writing tiles assuming that the initial mode 429 * is triangles, so make sure that's the case. 430 */ 431 cl_emit(cl, PRIM_LIST_FORMAT, fmt) { 432 fmt.primitive_type = LIST_TRIANGLES; 433 } 434 435 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 436 437 v3d_rcl_emit_stores(job, cl); 438 439#if V3D_VERSION >= 40 440 cl_emit(cl, END_OF_TILE_MARKER, end); 441#endif 442 443 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 444 445 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 446 branch.start = tile_list_start; 447 branch.end = cl_get_address(cl); 448 } 449} 450 451#if V3D_VERSION >= 40 452static void 453v3d_setup_render_target(struct v3d_job *job, int cbuf, 454 uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) 455{ 456 if (!job->cbufs[cbuf]) 457 return; 458 459 struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]); 460 *rt_bpp = surf->internal_bpp; 461 *rt_type = surf->internal_type; 462 *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 463} 464 465#else /* V3D_VERSION < 40 */ 466 467static void 468v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf, 469 struct v3d_resource *rsc, bool is_separate_stencil) 470{ 471 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) { 472 zs.address = cl_address(rsc->bo, surf->offset); 473 474 if (!is_separate_stencil) { 475 zs.internal_type = surf->internal_type; 476 zs.output_image_format = surf->format; 477 } else { 478 zs.z_stencil_id = 1; /* Separate stencil */ 479 } 480 481 zs.padded_height_of_output_image_in_uif_blocks = 482 surf->padded_height_of_output_image_in_uif_blocks; 483 484 assert(surf->tiling != VC5_TILING_RASTER); 485 zs.memory_format = surf->tiling; 486 } 487 488 if (job->store & (is_separate_stencil ? 489 PIPE_CLEAR_STENCIL : 490 PIPE_CLEAR_DEPTHSTENCIL)) { 491 rsc->writes++; 492 } 493} 494#endif /* V3D_VERSION < 40 */ 495 496#define div_round_up(a, b) (((a) + (b) - 1) / b) 497 498void 499v3dX(emit_rcl)(struct v3d_job *job) 500{ 501 /* The RCL list should be empty. */ 502 assert(!job->rcl.bo); 503 504 v3d_cl_ensure_space_with_branch(&job->rcl, 200 + 256 * 505 cl_packet_length(SUPERTILE_COORDINATES)); 506 job->submit.rcl_start = job->rcl.bo->offset; 507 v3d_job_add_bo(job, job->rcl.bo); 508 509 int nr_cbufs = 0; 510 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { 511 if (job->cbufs[i]) 512 nr_cbufs = i + 1; 513 } 514 515 /* Comon config must be the first TILE_RENDERING_MODE_CFG 516 * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are 517 * optional updates to the previous HW state. 518 */ 519 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 520#if V3D_VERSION < 40 521 config.enable_z_store = job->store & PIPE_CLEAR_DEPTH; 522 config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL; 523#else /* V3D_VERSION >= 40 */ 524 if (job->zsbuf) { 525 struct v3d_surface *surf = v3d_surface(job->zsbuf); 526 config.internal_depth_type = surf->internal_type; 527 } 528#endif /* V3D_VERSION >= 40 */ 529 530 /* XXX: Early D/S clear */ 531 532 switch (job->first_ez_state) { 533 case VC5_EZ_UNDECIDED: 534 case VC5_EZ_LT_LE: 535 config.early_z_disable = false; 536 config.early_z_test_and_update_direction = 537 EARLY_Z_DIRECTION_LT_LE; 538 break; 539 case VC5_EZ_GT_GE: 540 config.early_z_disable = false; 541 config.early_z_test_and_update_direction = 542 EARLY_Z_DIRECTION_GT_GE; 543 break; 544 case VC5_EZ_DISABLED: 545 config.early_z_disable = true; 546 } 547 548 config.image_width_pixels = job->draw_width; 549 config.image_height_pixels = job->draw_height; 550 551 config.number_of_render_targets = MAX2(nr_cbufs, 1); 552 553 config.multisample_mode_4x = job->msaa; 554 555 config.maximum_bpp_of_all_render_targets = job->internal_bpp; 556 } 557 558 for (int i = 0; i < nr_cbufs; i++) { 559 struct pipe_surface *psurf = job->cbufs[i]; 560 if (!psurf) 561 continue; 562 struct v3d_surface *surf = v3d_surface(psurf); 563 struct v3d_resource *rsc = v3d_resource(psurf->texture); 564 565 MAYBE_UNUSED uint32_t config_pad = 0; 566 uint32_t clear_pad = 0; 567 568 /* XXX: Set the pad for raster. */ 569 if (surf->tiling == VC5_TILING_UIF_NO_XOR || 570 surf->tiling == VC5_TILING_UIF_XOR) { 571 int uif_block_height = v3d_utile_height(rsc->cpp) * 2; 572 uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) / 573 uif_block_height); 574 if (surf->padded_height_of_output_image_in_uif_blocks - 575 implicit_padded_height < 15) { 576 config_pad = (surf->padded_height_of_output_image_in_uif_blocks - 577 implicit_padded_height); 578 } else { 579 config_pad = 15; 580 clear_pad = surf->padded_height_of_output_image_in_uif_blocks; 581 } 582 } 583 584#if V3D_VERSION < 40 585 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 586 rt.address = cl_address(rsc->bo, surf->offset); 587 rt.internal_type = surf->internal_type; 588 rt.output_image_format = surf->format; 589 rt.memory_format = surf->tiling; 590 rt.internal_bpp = surf->internal_bpp; 591 rt.render_target_number = i; 592 rt.pad = config_pad; 593 594 if (job->store & PIPE_CLEAR_COLOR0 << i) 595 rsc->writes++; 596 } 597#endif /* V3D_VERSION < 40 */ 598 599 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, 600 clear) { 601 clear.clear_color_low_32_bits = job->clear_color[i][0]; 602 clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; 603 clear.render_target_number = i; 604 }; 605 606 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { 607 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, 608 clear) { 609 clear.clear_color_mid_low_32_bits = 610 ((job->clear_color[i][1] >> 24) | 611 (job->clear_color[i][2] << 8)); 612 clear.clear_color_mid_high_24_bits = 613 ((job->clear_color[i][2] >> 24) | 614 ((job->clear_color[i][3] & 0xffff) << 8)); 615 clear.render_target_number = i; 616 }; 617 } 618 619 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 620 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, 621 clear) { 622 clear.uif_padded_height_in_uif_blocks = clear_pad; 623 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; 624 clear.render_target_number = i; 625 }; 626 } 627 } 628 629#if V3D_VERSION >= 40 630 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 631 v3d_setup_render_target(job, 0, 632 &rt.render_target_0_internal_bpp, 633 &rt.render_target_0_internal_type, 634 &rt.render_target_0_clamp); 635 v3d_setup_render_target(job, 1, 636 &rt.render_target_1_internal_bpp, 637 &rt.render_target_1_internal_type, 638 &rt.render_target_1_clamp); 639 v3d_setup_render_target(job, 2, 640 &rt.render_target_2_internal_bpp, 641 &rt.render_target_2_internal_type, 642 &rt.render_target_2_clamp); 643 v3d_setup_render_target(job, 3, 644 &rt.render_target_3_internal_bpp, 645 &rt.render_target_3_internal_type, 646 &rt.render_target_3_clamp); 647 } 648#endif 649 650#if V3D_VERSION < 40 651 /* TODO: Don't bother emitting if we don't load/clear Z/S. */ 652 if (job->zsbuf) { 653 struct pipe_surface *psurf = job->zsbuf; 654 struct v3d_surface *surf = v3d_surface(psurf); 655 struct v3d_resource *rsc = v3d_resource(psurf->texture); 656 657 v3d_emit_z_stencil_config(job, surf, rsc, false); 658 659 /* Emit the separate stencil packet if we have a resource for 660 * it. The HW will only load/store this buffer if the 661 * Z/Stencil config doesn't have stencil in its format. 662 */ 663 if (surf->separate_stencil) { 664 v3d_emit_z_stencil_config(job, 665 v3d_surface(surf->separate_stencil), 666 rsc->separate_stencil, true); 667 } 668 } 669#endif /* V3D_VERSION < 40 */ 670 671 /* Ends rendering mode config. */ 672 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, 673 clear) { 674 clear.z_clear_value = job->clear_z; 675 clear.stencil_clear_value = job->clear_s; 676 }; 677 678 /* Always set initial block size before the first branch, which needs 679 * to match the value from binning mode config. 680 */ 681 cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 682 init.use_auto_chained_tile_lists = true; 683 init.size_of_first_block_in_chained_tile_lists = 684 TILE_ALLOCATION_BLOCK_SIZE_64B; 685 } 686 687 uint32_t supertile_w = 1, supertile_h = 1; 688 689 /* If doing multicore binning, we would need to initialize each core's 690 * tile list here. 691 */ 692 cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 693 list.address = cl_address(job->tile_alloc, 0); 694 } 695 696 cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 697 uint32_t frame_w_in_supertiles, frame_h_in_supertiles; 698 const uint32_t max_supertiles = 256; 699 700 /* Size up our supertiles until we get under the limit. */ 701 for (;;) { 702 frame_w_in_supertiles = div_round_up(job->draw_tiles_x, 703 supertile_w); 704 frame_h_in_supertiles = div_round_up(job->draw_tiles_y, 705 supertile_h); 706 if (frame_w_in_supertiles * frame_h_in_supertiles < 707 max_supertiles) { 708 break; 709 } 710 711 if (supertile_w < supertile_h) 712 supertile_w++; 713 else 714 supertile_h++; 715 } 716 717 config.number_of_bin_tile_lists = 1; 718 config.total_frame_width_in_tiles = job->draw_tiles_x; 719 config.total_frame_height_in_tiles = job->draw_tiles_y; 720 721 config.supertile_width_in_tiles = supertile_w; 722 config.supertile_height_in_tiles = supertile_h; 723 724 config.total_frame_width_in_supertiles = frame_w_in_supertiles; 725 config.total_frame_height_in_supertiles = frame_h_in_supertiles; 726 } 727 728 /* Start by clearing the tile buffer. */ 729 cl_emit(&job->rcl, TILE_COORDINATES, coords) { 730 coords.tile_column_number = 0; 731 coords.tile_row_number = 0; 732 } 733 734 /* Emit an initial clear of the tile buffers. This is necessary for 735 * any buffers that should be cleared (since clearing normally happens 736 * at the *end* of the generic tile list), but it's also nice to clear 737 * everything so the first tile doesn't inherit any contents from some 738 * previous frame. 739 * 740 * Also, implement the GFXH-1742 workaround. There's a race in the HW 741 * between the RCL updating the TLB's internal type/size and the 742 * spawning of the QPU instances using the TLB's current internal 743 * type/size. To make sure the QPUs get the right state,, we need 1 744 * dummy store in between internal type/size changes on V3D 3.x, and 2 745 * dummy stores on 4.x. 746 */ 747#if V3D_VERSION < 40 748 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { 749 store.buffer_to_store = NONE; 750 } 751#else 752 for (int i = 0; i < 2; i++) { 753 if (i > 0) 754 cl_emit(&job->rcl, TILE_COORDINATES, coords); 755 cl_emit(&job->rcl, END_OF_LOADS, end); 756 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { 757 store.buffer_to_store = NONE; 758 } 759 if (i == 0) { 760 cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { 761 clear.clear_z_stencil_buffer = true; 762 clear.clear_all_render_targets = true; 763 } 764 } 765 cl_emit(&job->rcl, END_OF_TILE_MARKER, end); 766 } 767#endif 768 769 cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); 770 771 v3d_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); 772 773 /* XXX perf: We should expose GL_MESA_tile_raster_order to improve X11 774 * performance, but we should use Morton order otherwise to improve 775 * cache locality. 776 */ 777 uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; 778 uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; 779 uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; 780 uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; 781 782 uint32_t max_x_supertile = 0; 783 uint32_t max_y_supertile = 0; 784 if (job->draw_max_x != 0 && job->draw_max_y != 0) { 785 max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; 786 max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; 787 } 788 789 for (int y = min_y_supertile; y <= max_y_supertile; y++) { 790 for (int x = min_x_supertile; x <= max_x_supertile; x++) { 791 cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { 792 coords.column_number_in_supertiles = x; 793 coords.row_number_in_supertiles = y; 794 } 795 } 796 } 797 798 if (job->tmu_dirty_rcl) { 799 cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) { 800 flush.tmu_config_cache_clear = 0xf; 801 flush.tmu_data_cache_clear = 0xf; 802 flush.uniforms_cache_clear = 0xf; 803 flush.instruction_cache_clear = 0xf; 804 } 805 806 cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) { 807 flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN; 808 flush.l2t_flush_start = cl_address(NULL, 0); 809 flush.l2t_flush_end = cl_address(NULL, ~0); 810 } 811 } 812 813 cl_emit(&job->rcl, END_OF_RENDERING, end); 814} 815