1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28#include "pipe/p_state.h" 29#include "util/u_string.h" 30#include "util/u_memory.h" 31#include "util/u_helpers.h" 32#include "util/u_format.h" 33#include "util/u_viewport.h" 34 35#include "freedreno_resource.h" 36#include "freedreno_query_hw.h" 37 38#include "fd6_emit.h" 39#include "fd6_blend.h" 40#include "fd6_context.h" 41#include "fd6_image.h" 42#include "fd6_program.h" 43#include "fd6_rasterizer.h" 44#include "fd6_texture.h" 45#include "fd6_format.h" 46#include "fd6_zsa.h" 47 48static uint32_t 49shader_t_to_opcode(gl_shader_stage type) 50{ 51 switch (type) { 52 case MESA_SHADER_VERTEX: 53 case MESA_SHADER_TESS_CTRL: 54 case MESA_SHADER_TESS_EVAL: 55 case MESA_SHADER_GEOMETRY: 56 return CP_LOAD_STATE6_GEOM; 57 case MESA_SHADER_FRAGMENT: 58 case MESA_SHADER_COMPUTE: 59 case MESA_SHADER_KERNEL: 60 return CP_LOAD_STATE6_FRAG; 61 default: 62 unreachable("bad shader type"); 63 } 64} 65 66/* regid: base const register 67 * prsc or dwords: buffer containing constant values 68 * sizedwords: size of const value buffer 69 */ 70static void 71fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, 72 uint32_t regid, uint32_t offset, uint32_t sizedwords, 73 const uint32_t *dwords, struct pipe_resource *prsc) 74{ 75 uint32_t i, sz, align_sz; 76 enum a6xx_state_src src; 77 78 debug_assert((regid % 4) == 0); 79 80 if (prsc) { 81 sz = 0; 82 src = SS6_INDIRECT; 83 } else { 84 sz = sizedwords; 85 src = SS6_DIRECT; 86 } 87 88 align_sz = align(sz, 4); 89 90 OUT_PKT7(ring, shader_t_to_opcode(type), 3 + align_sz); 91 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | 92 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 93 CP_LOAD_STATE6_0_STATE_SRC(src) | 94 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | 95 CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); 96 if (prsc) { 97 struct fd_bo *bo = fd_resource(prsc)->bo; 98 OUT_RELOC(ring, bo, offset, 0, 0); 99 } else { 100 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 101 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 102 dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; 103 } 104 105 for (i = 0; i < sz; i++) { 106 OUT_RING(ring, dwords[i]); 107 } 108 109 /* Zero-pad to multiple of 4 dwords */ 110 for (i = sz; i < align_sz; i++) { 111 OUT_RING(ring, 0); 112 } 113} 114 115static void 116fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, 117 uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) 118{ 119 uint32_t anum = align(num, 2); 120 uint32_t i; 121 122 debug_assert((regid % 4) == 0); 123 124 OUT_PKT7(ring, shader_t_to_opcode(type), 3 + (2 * anum)); 125 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | 126 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| 127 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 128 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | 129 CP_LOAD_STATE6_0_NUM_UNIT(anum/2)); 130 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 131 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 132 133 for (i = 0; i < num; i++) { 134 if (prscs[i]) { 135 if (write) { 136 OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); 137 } else { 138 OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); 139 } 140 } else { 141 OUT_RING(ring, 0xbad00000 | (i << 16)); 142 OUT_RING(ring, 0xbad00000 | (i << 16)); 143 } 144 } 145 146 for (; i < anum; i++) { 147 OUT_RING(ring, 0xffffffff); 148 OUT_RING(ring, 0xffffffff); 149 } 150} 151 152/* Border color layout is diff from a4xx/a5xx.. if it turns out to be 153 * the same as a6xx then move this somewhere common ;-) 154 * 155 * Entry layout looks like (total size, 0x60 bytes): 156 */ 157 158struct PACKED bcolor_entry { 159 uint32_t fp32[4]; 160 uint16_t ui16[4]; 161 int16_t si16[4]; 162 uint16_t fp16[4]; 163 uint16_t rgb565; 164 uint16_t rgb5a1; 165 uint16_t rgba4; 166 uint8_t __pad0[2]; 167 uint8_t ui8[4]; 168 int8_t si8[4]; 169 uint32_t rgb10a2; 170 uint32_t z24; /* also s8? */ 171 uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */ 172 uint8_t __pad1[56]; 173}; 174 175#define FD6_BORDER_COLOR_SIZE sizeof(struct bcolor_entry) 176#define FD6_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE) 177 178static void 179setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries) 180{ 181 unsigned i, j; 182 STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE); 183 184 for (i = 0; i < tex->num_samplers; i++) { 185 struct bcolor_entry *e = &entries[i]; 186 struct pipe_sampler_state *sampler = tex->samplers[i]; 187 union pipe_color_union *bc; 188 189 if (!sampler) 190 continue; 191 192 bc = &sampler->border_color; 193 194 /* 195 * XXX HACK ALERT XXX 196 * 197 * The border colors need to be swizzled in a particular 198 * format-dependent order. Even though samplers don't know about 199 * formats, we can assume that with a GL state tracker, there's a 200 * 1:1 correspondence between sampler and texture. Take advantage 201 * of that knowledge. 202 */ 203 if ((i >= tex->num_textures) || !tex->textures[i]) 204 continue; 205 206 struct pipe_sampler_view *view = tex->textures[i]; 207 enum pipe_format format = view->format; 208 const struct util_format_description *desc = 209 util_format_description(format); 210 211 e->rgb565 = 0; 212 e->rgb5a1 = 0; 213 e->rgba4 = 0; 214 e->rgb10a2 = 0; 215 e->z24 = 0; 216 217 unsigned char swiz[4]; 218 219 fd6_tex_swiz(format, swiz, 220 view->swizzle_r, view->swizzle_g, 221 view->swizzle_b, view->swizzle_a); 222 223 for (j = 0; j < 4; j++) { 224 int c = swiz[j]; 225 int cd = c; 226 227 /* 228 * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the 229 * stencil border color value in bc->ui[0] but according 230 * to desc->swizzle and desc->channel, the .x/.w component 231 * is NONE and the stencil value is in the y component. 232 * Meanwhile the hardware wants this in the .w component 233 * for x24s8 and the .x component for x32_s8x24. 234 */ 235 if ((format == PIPE_FORMAT_X24S8_UINT) || 236 (format == PIPE_FORMAT_X32_S8X24_UINT)) { 237 if (j == 0) { 238 c = 1; 239 cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3; 240 } else { 241 continue; 242 } 243 } 244 245 if (c >= 4) 246 continue; 247 248 if (desc->channel[c].pure_integer) { 249 uint16_t clamped; 250 switch (desc->channel[c].size) { 251 case 2: 252 assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); 253 clamped = CLAMP(bc->ui[j], 0, 0x3); 254 break; 255 case 8: 256 if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) 257 clamped = CLAMP(bc->i[j], -128, 127); 258 else 259 clamped = CLAMP(bc->ui[j], 0, 255); 260 break; 261 case 10: 262 assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); 263 clamped = CLAMP(bc->ui[j], 0, 0x3ff); 264 break; 265 case 16: 266 if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) 267 clamped = CLAMP(bc->i[j], -32768, 32767); 268 else 269 clamped = CLAMP(bc->ui[j], 0, 65535); 270 break; 271 default: 272 assert(!"Unexpected bit size"); 273 case 32: 274 clamped = 0; 275 break; 276 } 277 e->fp32[cd] = bc->ui[j]; 278 e->fp16[cd] = clamped; 279 } else { 280 float f = bc->f[j]; 281 float f_u = CLAMP(f, 0, 1); 282 float f_s = CLAMP(f, -1, 1); 283 284 e->fp32[c] = fui(f); 285 e->fp16[c] = util_float_to_half(f); 286 e->srgb[c] = util_float_to_half(f_u); 287 e->ui16[c] = f_u * 0xffff; 288 e->si16[c] = f_s * 0x7fff; 289 e->ui8[c] = f_u * 0xff; 290 e->si8[c] = f_s * 0x7f; 291 if (c == 1) 292 e->rgb565 |= (int)(f_u * 0x3f) << 5; 293 else if (c < 3) 294 e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0); 295 if (c == 3) 296 e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0; 297 else 298 e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5); 299 if (c == 3) 300 e->rgb10a2 |= (int)(f_u * 0x3) << 30; 301 else 302 e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10); 303 e->rgba4 |= (int)(f_u * 0xf) << (c * 4); 304 if (c == 0) 305 e->z24 = f_u * 0xffffff; 306 } 307 } 308 309#ifdef DEBUG 310 memset(&e->__pad0, 0, sizeof(e->__pad0)); 311 memset(&e->__pad1, 0, sizeof(e->__pad1)); 312#endif 313 } 314} 315 316static void 317emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) 318{ 319 struct fd6_context *fd6_ctx = fd6_context(ctx); 320 struct bcolor_entry *entries; 321 unsigned off; 322 void *ptr; 323 324 STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE); 325 326 u_upload_alloc(fd6_ctx->border_color_uploader, 327 0, FD6_BORDER_COLOR_UPLOAD_SIZE, 328 FD6_BORDER_COLOR_UPLOAD_SIZE, &off, 329 &fd6_ctx->border_color_buf, 330 &ptr); 331 332 entries = ptr; 333 334 setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]); 335 setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT], 336 &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]); 337 338 OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2); 339 OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0); 340 341 u_upload_unmap(fd6_ctx->border_color_uploader); 342} 343 344static void 345fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) 346{ 347 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; 348 struct pipe_surface *psurf = pfb->cbufs[0]; 349 struct fd_resource *rsc = fd_resource(psurf->texture); 350 351 uint32_t texconst0 = fd6_tex_const_0(psurf->texture, psurf->u.tex.level, 352 psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 353 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W); 354 355 /* always TILE6_2 mode in GMEM.. which also means no swap: */ 356 texconst0 &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); 357 texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); 358 359 OUT_RING(state, texconst0); 360 OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) | 361 A6XX_TEX_CONST_1_HEIGHT(pfb->height)); 362 OUT_RINGP(state, A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | 363 A6XX_TEX_CONST_2_FETCHSIZE(TFETCH6_2_BYTE), 364 &ctx->batch->fb_read_patches); 365 OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size)); 366 367 OUT_RING(state, A6XX_TEX_CONST_4_BASE_LO(ctx->screen->gmem_base)); 368 OUT_RING(state, A6XX_TEX_CONST_5_BASE_HI(ctx->screen->gmem_base >> 32) | 369 A6XX_TEX_CONST_5_DEPTH(1)); 370 OUT_RING(state, 0); /* texconst6 */ 371 OUT_RING(state, 0); /* texconst7 */ 372 OUT_RING(state, 0); /* texconst8 */ 373 OUT_RING(state, 0); /* texconst9 */ 374 OUT_RING(state, 0); /* texconst10 */ 375 OUT_RING(state, 0); /* texconst11 */ 376 OUT_RING(state, 0); 377 OUT_RING(state, 0); 378 OUT_RING(state, 0); 379 OUT_RING(state, 0); 380} 381 382bool 383fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, 384 enum pipe_shader_type type, struct fd_texture_stateobj *tex, 385 unsigned bcolor_offset, 386 /* can be NULL if no image/SSBO/fb state to merge in: */ 387 const struct ir3_shader_variant *v, struct fd_context *ctx) 388{ 389 bool needs_border = false; 390 unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg; 391 enum a6xx_state_block sb; 392 393 switch (type) { 394 case PIPE_SHADER_VERTEX: 395 sb = SB6_VS_TEX; 396 opcode = CP_LOAD_STATE6_GEOM; 397 tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO; 398 tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO; 399 tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT; 400 break; 401 case PIPE_SHADER_FRAGMENT: 402 sb = SB6_FS_TEX; 403 opcode = CP_LOAD_STATE6_FRAG; 404 tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO; 405 tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO; 406 tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT; 407 break; 408 case PIPE_SHADER_COMPUTE: 409 sb = SB6_CS_TEX; 410 opcode = CP_LOAD_STATE6_FRAG; 411 tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO; 412 tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO; 413 tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT; 414 break; 415 default: 416 unreachable("bad state block"); 417 } 418 419 if (tex->num_samplers > 0) { 420 struct fd_ringbuffer *state = 421 fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4); 422 for (unsigned i = 0; i < tex->num_samplers; i++) { 423 static const struct fd6_sampler_stateobj dummy_sampler = {}; 424 const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ? 425 fd6_sampler_stateobj(tex->samplers[i]) : &dummy_sampler; 426 OUT_RING(state, sampler->texsamp0); 427 OUT_RING(state, sampler->texsamp1); 428 OUT_RING(state, sampler->texsamp2 | 429 A6XX_TEX_SAMP_2_BCOLOR_OFFSET((i + bcolor_offset) * sizeof(struct bcolor_entry))); 430 OUT_RING(state, sampler->texsamp3); 431 needs_border |= sampler->needs_border; 432 } 433 434 /* output sampler state: */ 435 OUT_PKT7(ring, opcode, 3); 436 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | 437 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 438 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 439 CP_LOAD_STATE6_0_STATE_BLOCK(sb) | 440 CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers)); 441 OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ 442 443 OUT_PKT4(ring, tex_samp_reg, 2); 444 OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ 445 446 fd_ringbuffer_del(state); 447 } 448 449 unsigned num_merged_textures = tex->num_textures; 450 unsigned num_textures = tex->num_textures; 451 if (v) { 452 num_merged_textures += v->image_mapping.num_tex; 453 454 if (v->fb_read) 455 num_merged_textures++; 456 457 /* There could be more bound textures than what the shader uses. 458 * Which isn't known at shader compile time. So in the case we 459 * are merging tex state, only emit the textures that the shader 460 * uses (since the image/SSBO related tex state comes immediately 461 * after) 462 */ 463 num_textures = v->image_mapping.tex_base; 464 } 465 466 if (num_merged_textures > 0) { 467 struct fd_ringbuffer *state = 468 fd_ringbuffer_new_object(pipe, num_merged_textures * 16 * 4); 469 for (unsigned i = 0; i < num_textures; i++) { 470 static const struct fd6_pipe_sampler_view dummy_view = {}; 471 const struct fd6_pipe_sampler_view *view = tex->textures[i] ? 472 fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view; 473 struct fd_resource *rsc = NULL; 474 475 if (view->base.texture) 476 rsc = fd_resource(view->base.texture); 477 478 OUT_RING(state, view->texconst0); 479 OUT_RING(state, view->texconst1); 480 OUT_RING(state, view->texconst2); 481 OUT_RING(state, view->texconst3); 482 483 if (rsc) { 484 if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT) 485 rsc = rsc->stencil; 486 OUT_RELOC(state, rsc->bo, view->offset, 487 (uint64_t)view->texconst5 << 32, 0); 488 } else { 489 OUT_RING(state, 0x00000000); 490 OUT_RING(state, view->texconst5); 491 } 492 493 OUT_RING(state, view->texconst6); 494 495 if (rsc && view->ubwc_enabled) { 496 OUT_RELOC(state, rsc->bo, view->ubwc_offset, 0, 0); 497 } else { 498 OUT_RING(state, 0); 499 OUT_RING(state, 0); 500 } 501 502 OUT_RING(state, view->texconst9); 503 OUT_RING(state, view->texconst10); 504 OUT_RING(state, view->texconst11); 505 OUT_RING(state, 0); 506 OUT_RING(state, 0); 507 OUT_RING(state, 0); 508 OUT_RING(state, 0); 509 } 510 511 if (v) { 512 const struct ir3_ibo_mapping *mapping = &v->image_mapping; 513 struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type]; 514 struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type]; 515 516 for (unsigned i = 0; i < mapping->num_tex; i++) { 517 unsigned idx = mapping->tex_to_image[i]; 518 if (idx & IBO_SSBO) { 519 fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]); 520 } else { 521 fd6_emit_image_tex(state, &img->si[idx]); 522 } 523 } 524 525 if (v->fb_read) { 526 fd6_emit_fb_tex(state, ctx); 527 } 528 } 529 530 /* emit texture state: */ 531 OUT_PKT7(ring, opcode, 3); 532 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | 533 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 534 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 535 CP_LOAD_STATE6_0_STATE_BLOCK(sb) | 536 CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures)); 537 OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ 538 539 OUT_PKT4(ring, tex_const_reg, 2); 540 OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ 541 542 fd_ringbuffer_del(state); 543 } 544 545 OUT_PKT4(ring, tex_count_reg, 1); 546 OUT_RING(ring, num_merged_textures); 547 548 return needs_border; 549} 550 551/* Emits combined texture state, which also includes any Image/SSBO 552 * related texture state merged in (because we must have all texture 553 * state for a given stage in a single buffer). In the fast-path, if 554 * we don't need to merge in any image/ssbo related texture state, we 555 * just use cached texture stateobj. Otherwise we generate a single- 556 * use stateobj. 557 * 558 * TODO Is there some sane way we can still use cached texture stateobj 559 * with image/ssbo in use? 560 * 561 * returns whether border_color is required: 562 */ 563static bool 564fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit, 565 enum pipe_shader_type type, const struct ir3_shader_variant *v) 566{ 567 struct fd_context *ctx = emit->ctx; 568 bool needs_border = false; 569 570 static const enum fd6_state_id state_id[PIPE_SHADER_TYPES] = { 571 [PIPE_SHADER_VERTEX] = FD6_GROUP_VS_TEX, 572 [PIPE_SHADER_FRAGMENT] = FD6_GROUP_FS_TEX, 573 }; 574 575 debug_assert(state_id[type]); 576 577 if (!v->image_mapping.num_tex && !v->fb_read) { 578 /* in the fast-path, when we don't have to mix in any image/SSBO 579 * related texture state, we can just lookup the stateobj and 580 * re-emit that: 581 * 582 * Also, framebuffer-read is a slow-path because an extra 583 * texture needs to be inserted. 584 * 585 * TODO we can probably simmplify things if we also treated 586 * border_color as a slow-path.. this way the tex state key 587 * wouldn't depend on bcolor_offset.. but fb_read might rather 588 * be *somehow* a fast-path if we eventually used it for PLS. 589 * I suppose there would be no harm in just *always* inserting 590 * an fb_read texture? 591 */ 592 if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) && 593 ctx->tex[type].num_textures > 0) { 594 struct fd6_texture_state *tex = fd6_texture_state(ctx, 595 type, &ctx->tex[type]); 596 597 needs_border |= tex->needs_border; 598 599 fd6_emit_add_group(emit, tex->stateobj, state_id[type], 0x7); 600 } 601 } else { 602 /* In the slow-path, create a one-shot texture state object 603 * if either TEX|PROG|SSBO|IMAGE state is dirty: 604 */ 605 if ((ctx->dirty_shader[type] & 606 (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | 607 FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) || 608 v->fb_read) { 609 struct fd_texture_stateobj *tex = &ctx->tex[type]; 610 struct fd_ringbuffer *stateobj = 611 fd_submit_new_ringbuffer(ctx->batch->submit, 612 0x1000, FD_RINGBUFFER_STREAMING); 613 unsigned bcolor_offset = 614 fd6_border_color_offset(ctx, type, tex); 615 616 needs_border |= fd6_emit_textures(ctx->pipe, stateobj, type, tex, 617 bcolor_offset, v, ctx); 618 619 fd6_emit_add_group(emit, stateobj, state_id[type], 0x7); 620 621 fd_ringbuffer_del(stateobj); 622 } 623 } 624 625 return needs_border; 626} 627 628static struct fd_ringbuffer * 629build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) 630{ 631 const struct fd_vertex_state *vtx = emit->vtx; 632 int32_t i, j; 633 634 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, 635 4 * (10 * vp->inputs_count + 2), FD_RINGBUFFER_STREAMING); 636 637 for (i = 0, j = 0; i <= vp->inputs_count; i++) { 638 if (vp->inputs[i].sysval) 639 continue; 640 if (vp->inputs[i].compmask) { 641 struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; 642 const struct pipe_vertex_buffer *vb = 643 &vtx->vertexbuf.vb[elem->vertex_buffer_index]; 644 struct fd_resource *rsc = fd_resource(vb->buffer.resource); 645 enum pipe_format pfmt = elem->src_format; 646 enum a6xx_vtx_fmt fmt = fd6_pipe2vtx(pfmt); 647 bool isint = util_format_is_pure_integer(pfmt); 648 uint32_t off = vb->buffer_offset + elem->src_offset; 649 uint32_t size = fd_bo_size(rsc->bo) - off; 650 debug_assert(fmt != ~0); 651 652#ifdef DEBUG 653 /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10 654 */ 655 if (off > fd_bo_size(rsc->bo)) 656 continue; 657#endif 658 659 OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4); 660 OUT_RELOC(ring, rsc->bo, off, 0, 0); 661 OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ 662 OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ 663 664 OUT_PKT4(ring, REG_A6XX_VFD_DECODE(j), 2); 665 OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(j) | 666 A6XX_VFD_DECODE_INSTR_FORMAT(fmt) | 667 COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) | 668 A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) | 669 A6XX_VFD_DECODE_INSTR_UNK30 | 670 COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT)); 671 OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ 672 673 OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(j), 1); 674 OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) | 675 A6XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid)); 676 677 j++; 678 } 679 } 680 681 OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1); 682 OUT_RING(ring, A6XX_VFD_CONTROL_0_VTXCNT(j) | (j << 8)); 683 684 return ring; 685} 686 687static struct fd_ringbuffer * 688build_lrz(struct fd6_emit *emit, bool binning_pass) 689{ 690 struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(emit->ctx->zsa); 691 struct pipe_framebuffer_state *pfb = &emit->ctx->batch->framebuffer; 692 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 693 uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; 694 uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl; 695 696 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, 697 16, FD_RINGBUFFER_STREAMING); 698 699 if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) { 700 gras_lrz_cntl = 0; 701 rb_lrz_cntl = 0; 702 } else if (binning_pass && zsa->lrz_write) { 703 gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE; 704 } 705 706 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); 707 OUT_RING(ring, gras_lrz_cntl); 708 709 OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1); 710 OUT_RING(ring, rb_lrz_cntl); 711 712 return ring; 713} 714 715static void 716fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_stream_output_info *info) 717{ 718 struct fd_context *ctx = emit->ctx; 719 const struct fd6_program_state *prog = fd6_emit_get_prog(emit); 720 struct fd_streamout_stateobj *so = &ctx->streamout; 721 722 emit->streamout_mask = 0; 723 724 for (unsigned i = 0; i < so->num_targets; i++) { 725 struct pipe_stream_output_target *target = so->targets[i]; 726 727 if (!target) 728 continue; 729 730 unsigned offset = (so->offsets[i] * info->stride[i] * 4) + 731 target->buffer_offset; 732 733 OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3); 734 /* VPC_SO[i].BUFFER_BASE_LO: */ 735 OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0); 736 OUT_RING(ring, target->buffer_size + offset); 737 738 OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3); 739 OUT_RING(ring, offset); 740 /* VPC_SO[i].FLUSH_BASE_LO/HI: */ 741 // TODO just give hw a dummy addr for now.. we should 742 // be using this an then CP_MEM_TO_REG to set the 743 // VPC_SO[i].BUFFER_OFFSET for the next draw.. 744 OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0x100, 0, 0); 745 746 emit->streamout_mask |= (1 << i); 747 } 748 749 if (emit->streamout_mask) { 750 const struct fd6_streamout_state *tf = &prog->tf; 751 752 OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * tf->prog_count)); 753 OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL); 754 OUT_RING(ring, tf->vpc_so_buf_cntl); 755 OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(0)); 756 OUT_RING(ring, tf->ncomp[0]); 757 OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(1)); 758 OUT_RING(ring, tf->ncomp[1]); 759 OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(2)); 760 OUT_RING(ring, tf->ncomp[2]); 761 OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3)); 762 OUT_RING(ring, tf->ncomp[3]); 763 OUT_RING(ring, REG_A6XX_VPC_SO_CNTL); 764 OUT_RING(ring, A6XX_VPC_SO_CNTL_ENABLE); 765 for (unsigned i = 0; i < tf->prog_count; i++) { 766 OUT_RING(ring, REG_A6XX_VPC_SO_PROG); 767 OUT_RING(ring, tf->prog[i]); 768 } 769 770 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); 771 OUT_RING(ring, 0x0); 772 } else { 773 OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 4); 774 OUT_RING(ring, REG_A6XX_VPC_SO_CNTL); 775 OUT_RING(ring, 0); 776 OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL); 777 OUT_RING(ring, 0); 778 779 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); 780 OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); 781 } 782 783} 784 785void 786fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) 787{ 788 struct fd_context *ctx = emit->ctx; 789 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; 790 const struct fd6_program_state *prog = fd6_emit_get_prog(emit); 791 const struct ir3_shader_variant *vp = emit->vs; 792 const struct ir3_shader_variant *fp = emit->fs; 793 const enum fd_dirty_3d_state dirty = emit->dirty; 794 bool needs_border = false; 795 796 emit_marker6(ring, 5); 797 798 /* NOTE: we track fb_read differently than _BLEND_ENABLED since 799 * we might at some point decide to do sysmem in some cases when 800 * blend is enabled: 801 */ 802 if (fp->fb_read) 803 ctx->batch->gmem_reason |= FD_GMEM_FB_READ; 804 805 if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) { 806 struct fd_ringbuffer *state; 807 808 state = build_vbo_state(emit, emit->vs); 809 fd6_emit_add_group(emit, state, FD6_GROUP_VBO, 0x6); 810 fd_ringbuffer_del(state); 811 812 state = build_vbo_state(emit, emit->bs); 813 fd6_emit_add_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1); 814 fd_ringbuffer_del(state); 815 } 816 817 if (dirty & FD_DIRTY_ZSA) { 818 struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); 819 820 if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) 821 fd6_emit_add_group(emit, zsa->stateobj_no_alpha, FD6_GROUP_ZSA, 0x7); 822 else 823 fd6_emit_add_group(emit, zsa->stateobj, FD6_GROUP_ZSA, 0x7); 824 } 825 826 if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && pfb->zsbuf) { 827 struct fd_ringbuffer *state; 828 829 state = build_lrz(emit, false); 830 fd6_emit_add_group(emit, state, FD6_GROUP_LRZ, 0x6); 831 fd_ringbuffer_del(state); 832 833 state = build_lrz(emit, true); 834 fd6_emit_add_group(emit, state, FD6_GROUP_LRZ_BINNING, 0x1); 835 fd_ringbuffer_del(state); 836 } 837 838 if (dirty & FD_DIRTY_STENCIL_REF) { 839 struct pipe_stencil_ref *sr = &ctx->stencil_ref; 840 841 OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 1); 842 OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0]) | 843 A6XX_RB_STENCILREF_BFREF(sr->ref_value[1])); 844 } 845 846 /* NOTE: scissor enabled bit is part of rasterizer state: */ 847 if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) { 848 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); 849 850 OUT_PKT4(ring, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); 851 OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) | 852 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny)); 853 OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) | 854 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1)); 855 856 ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); 857 ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); 858 ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); 859 ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); 860 } 861 862 if (dirty & FD_DIRTY_VIEWPORT) { 863 struct pipe_scissor_state *scissor = &ctx->viewport_scissor; 864 865 OUT_PKT4(ring, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); 866 OUT_RING(ring, A6XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); 867 OUT_RING(ring, A6XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); 868 OUT_RING(ring, A6XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); 869 OUT_RING(ring, A6XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); 870 OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); 871 OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); 872 873 OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); 874 OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | 875 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); 876 OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | 877 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); 878 879 unsigned guardband_x = fd_calc_guardband(scissor->maxx - scissor->minx); 880 unsigned guardband_y = fd_calc_guardband(scissor->maxy - scissor->miny); 881 882 OUT_PKT4(ring, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); 883 OUT_RING(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_x) | 884 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_y)); 885 } 886 887 if (dirty & FD_DIRTY_PROG) { 888 fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, 0x6); 889 fd6_emit_add_group(emit, prog->binning_stateobj, 890 FD6_GROUP_PROG_BINNING, 0x1); 891 892 /* emit remaining non-stateobj program state, ie. what depends 893 * on other emit state, so cannot be pre-baked. This could 894 * be moved to a separate stateobj which is dynamically 895 * created. 896 */ 897 fd6_program_emit(ring, emit); 898 } 899 900 if (dirty & FD_DIRTY_RASTERIZER) { 901 struct fd6_rasterizer_stateobj *rasterizer = 902 fd6_rasterizer_stateobj(ctx->rasterizer); 903 fd6_emit_add_group(emit, rasterizer->stateobj, 904 FD6_GROUP_RASTERIZER, 0x7); 905 } 906 907 /* Since the primitive restart state is not part of a tracked object, we 908 * re-emit this register every time. 909 */ 910 if (emit->info && ctx->rasterizer) { 911 struct fd6_rasterizer_stateobj *rasterizer = 912 fd6_rasterizer_stateobj(ctx->rasterizer); 913 OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9806, 1); 914 OUT_RING(ring, 0); 915 OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9990, 1); 916 OUT_RING(ring, 0); 917 OUT_PKT4(ring, REG_A6XX_VFD_UNKNOWN_A008, 1); 918 OUT_RING(ring, 0); 919 920 OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); 921 OUT_RING(ring, rasterizer->pc_primitive_cntl | 922 COND(emit->info->primitive_restart && emit->info->index_size, 923 A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART)); 924 } 925 926 if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { 927 unsigned nr = pfb->nr_cbufs; 928 929 if (ctx->rasterizer->rasterizer_discard) 930 nr = 0; 931 932 OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); 933 OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) | 934 COND(fp->writes_smask && pfb->samples > 1, 935 A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK)); 936 OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr)); 937 938 OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1); 939 OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr)); 940 } 941 942#define DIRTY_CONST (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST | \ 943 FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE) 944 945 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) { 946 struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer( 947 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 948 949 OUT_WFI5(vsconstobj); 950 ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info); 951 fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7); 952 fd_ringbuffer_del(vsconstobj); 953 } 954 955 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) { 956 struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer( 957 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 958 959 OUT_WFI5(fsconstobj); 960 ir3_emit_fs_consts(fp, fsconstobj, ctx); 961 fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6); 962 fd_ringbuffer_del(fsconstobj); 963 } 964 965 struct ir3_stream_output_info *info = &vp->shader->stream_output; 966 if (info->num_outputs) 967 fd6_emit_streamout(ring, emit, info); 968 969 if (dirty & FD_DIRTY_BLEND) { 970 struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); 971 uint32_t i; 972 973 for (i = 0; i < pfb->nr_cbufs; i++) { 974 enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); 975 bool is_int = util_format_is_pure_integer(format); 976 bool has_alpha = util_format_has_alpha(format); 977 uint32_t control = blend->rb_mrt[i].control; 978 uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; 979 980 if (is_int) { 981 control &= A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; 982 control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); 983 } 984 985 if (has_alpha) { 986 blend_control |= blend->rb_mrt[i].blend_control_rgb; 987 } else { 988 blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; 989 control &= ~A6XX_RB_MRT_CONTROL_BLEND2; 990 } 991 992 OUT_PKT4(ring, REG_A6XX_RB_MRT_CONTROL(i), 1); 993 OUT_RING(ring, control); 994 995 OUT_PKT4(ring, REG_A6XX_RB_MRT_BLEND_CONTROL(i), 1); 996 OUT_RING(ring, blend_control); 997 } 998 999 OUT_PKT4(ring, REG_A6XX_SP_BLEND_CNTL, 1); 1000 OUT_RING(ring, blend->sp_blend_cntl); 1001 } 1002 1003 if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) { 1004 struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); 1005 1006 OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1); 1007 OUT_RING(ring, blend->rb_blend_cntl | 1008 A6XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask)); 1009 } 1010 1011 if (dirty & FD_DIRTY_BLEND_COLOR) { 1012 struct pipe_blend_color *bcolor = &ctx->blend_color; 1013 1014 OUT_PKT4(ring, REG_A6XX_RB_BLEND_RED_F32, 4); 1015 OUT_RING(ring, A6XX_RB_BLEND_RED_F32(bcolor->color[0])); 1016 OUT_RING(ring, A6XX_RB_BLEND_GREEN_F32(bcolor->color[1])); 1017 OUT_RING(ring, A6XX_RB_BLEND_BLUE_F32(bcolor->color[2])); 1018 OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); 1019 } 1020 1021 needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vp); 1022 needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fp); 1023 1024 if (needs_border) 1025 emit_border_color(ctx, ring); 1026 1027 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & 1028 (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { 1029 struct fd_ringbuffer *state = 1030 fd6_build_ibo_state(ctx, fp, PIPE_SHADER_FRAGMENT); 1031 struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( 1032 ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING); 1033 const struct ir3_ibo_mapping *mapping = &fp->image_mapping; 1034 1035 OUT_PKT7(obj, CP_LOAD_STATE6, 3); 1036 OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | 1037 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 1038 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 1039 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | 1040 CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); 1041 OUT_RB(obj, state); 1042 1043 OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); 1044 OUT_RB(obj, state); 1045 1046 OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); 1047 OUT_RING(obj, mapping->num_ibo); 1048 1049 fd6_emit_add_group(emit, obj, FD6_GROUP_IBO, 0x7); 1050 fd_ringbuffer_del(obj); 1051 fd_ringbuffer_del(state); 1052 } 1053 1054 if (emit->num_groups > 0) { 1055 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups); 1056 for (unsigned i = 0; i < emit->num_groups; i++) { 1057 struct fd6_state_group *g = &emit->groups[i]; 1058 unsigned n = fd_ringbuffer_size(g->stateobj) / 4; 1059 1060 if (n == 0) { 1061 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | 1062 CP_SET_DRAW_STATE__0_DISABLE | 1063 CP_SET_DRAW_STATE__0_ENABLE_MASK(g->enable_mask) | 1064 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); 1065 OUT_RING(ring, 0x00000000); 1066 OUT_RING(ring, 0x00000000); 1067 } else { 1068 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | 1069 CP_SET_DRAW_STATE__0_ENABLE_MASK(g->enable_mask) | 1070 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); 1071 OUT_RB(ring, g->stateobj); 1072 } 1073 1074 fd_ringbuffer_del(g->stateobj); 1075 } 1076 emit->num_groups = 0; 1077 } 1078} 1079 1080void 1081fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 1082 struct ir3_shader_variant *cp) 1083{ 1084 enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; 1085 1086 if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | 1087 FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) { 1088 struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE]; 1089 unsigned bcolor_offset = fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex); 1090 1091 bool needs_border = fd6_emit_textures(ctx->pipe, ring, PIPE_SHADER_COMPUTE, tex, 1092 bcolor_offset, cp, ctx); 1093 1094 if (needs_border) 1095 emit_border_color(ctx, ring); 1096 1097 OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); 1098 OUT_RING(ring, 0); 1099 1100 OUT_PKT4(ring, REG_A6XX_SP_HS_TEX_COUNT, 1); 1101 OUT_RING(ring, 0); 1102 1103 OUT_PKT4(ring, REG_A6XX_SP_DS_TEX_COUNT, 1); 1104 OUT_RING(ring, 0); 1105 1106 OUT_PKT4(ring, REG_A6XX_SP_GS_TEX_COUNT, 1); 1107 OUT_RING(ring, 0); 1108 1109 OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1); 1110 OUT_RING(ring, 0); 1111 } 1112 1113 if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { 1114 struct fd_ringbuffer *state = 1115 fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE); 1116 const struct ir3_ibo_mapping *mapping = &cp->image_mapping; 1117 1118 OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); 1119 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | 1120 CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | 1121 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 1122 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | 1123 CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); 1124 OUT_RB(ring, state); 1125 1126 OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2); 1127 OUT_RB(ring, state); 1128 1129 OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); 1130 OUT_RING(ring, mapping->num_ibo); 1131 1132 fd_ringbuffer_del(state); 1133 } 1134} 1135 1136 1137/* emit setup at begin of new cmdstream buffer (don't rely on previous 1138 * state, there could have been a context switch between ioctls): 1139 */ 1140void 1141fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) 1142{ 1143 //struct fd_context *ctx = batch->ctx; 1144 1145 fd6_cache_inv(batch, ring); 1146 1147 OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); 1148 OUT_RING(ring, 0xfffff); 1149 1150/* 1151t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) 11520000000500024048: 70d08003 00000000 001c5000 00000005 1153t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) 11540000000500024058: 70d08003 00000010 001c7000 00000005 1155 1156t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) 11570000000500024068: 70268000 1158*/ 1159 1160 WRITE(REG_A6XX_RB_CCU_CNTL, 0x7c400004); 1161 WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); 1162 WRITE(REG_A6XX_SP_UNKNOWN_AE04, 0x8); 1163 WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0); 1164 WRITE(REG_A6XX_SP_UNKNOWN_AE0F, 0x3f); 1165 WRITE(REG_A6XX_SP_UNKNOWN_B605, 0x44); 1166 WRITE(REG_A6XX_SP_UNKNOWN_B600, 0x100000); 1167 WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); 1168 WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0); 1169 1170 WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0); 1171 WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880); 1172 WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000); 1173 WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430); 1174 WRITE(REG_A6XX_SP_IBO_COUNT, 0); 1175 WRITE(REG_A6XX_SP_UNKNOWN_B182, 0); 1176 WRITE(REG_A6XX_HLSQ_UNKNOWN_BB11, 0); 1177 WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); 1178 WRITE(REG_A6XX_UCHE_CLIENT_PF, 4); 1179 WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1); 1180 WRITE(REG_A6XX_SP_UNKNOWN_AB00, 0x5); 1181 WRITE(REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); 1182 WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010); 1183 WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f); 1184 1185 OUT_PKT4(ring, REG_A6XX_RB_SRGB_CNTL, 1); 1186 OUT_RING(ring, 0); 1187 1188 WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0); 1189 WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0); 1190 WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2); 1191 1192 WRITE(REG_A6XX_RB_RENDER_CONTROL0, 0x401); 1193 WRITE(REG_A6XX_RB_RENDER_CONTROL1, 0); 1194 WRITE(REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); 1195 WRITE(REG_A6XX_RB_SAMPLE_CNTL, 0); 1196 WRITE(REG_A6XX_RB_UNKNOWN_8818, 0); 1197 WRITE(REG_A6XX_RB_UNKNOWN_8819, 0); 1198 WRITE(REG_A6XX_RB_UNKNOWN_881A, 0); 1199 WRITE(REG_A6XX_RB_UNKNOWN_881B, 0); 1200 WRITE(REG_A6XX_RB_UNKNOWN_881C, 0); 1201 WRITE(REG_A6XX_RB_UNKNOWN_881D, 0); 1202 WRITE(REG_A6XX_RB_UNKNOWN_881E, 0); 1203 WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0); 1204 1205 WRITE(REG_A6XX_VPC_UNKNOWN_9101, 0xffff00); 1206 WRITE(REG_A6XX_VPC_UNKNOWN_9107, 0); 1207 1208 WRITE(REG_A6XX_VPC_UNKNOWN_9236, 1209 A6XX_VPC_UNKNOWN_9236_POINT_COORD_INVERT(0)); 1210 WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0); 1211 1212 WRITE(REG_A6XX_VPC_SO_OVERRIDE, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); 1213 1214 WRITE(REG_A6XX_PC_UNKNOWN_9801, 0); 1215 WRITE(REG_A6XX_PC_UNKNOWN_9806, 0); 1216 WRITE(REG_A6XX_PC_UNKNOWN_9980, 0); 1217 1218 WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); 1219 WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); 1220 1221 WRITE(REG_A6XX_SP_UNKNOWN_A81B, 0); 1222 1223 WRITE(REG_A6XX_SP_UNKNOWN_B183, 0); 1224 1225 WRITE(REG_A6XX_GRAS_UNKNOWN_8099, 0); 1226 WRITE(REG_A6XX_GRAS_UNKNOWN_809B, 0); 1227 WRITE(REG_A6XX_GRAS_UNKNOWN_80A0, 2); 1228 WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0); 1229 WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0); 1230 WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0); 1231 WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0); 1232 WRITE(REG_A6XX_PC_UNKNOWN_9981, 0x3); 1233 WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); 1234 WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3); 1235 WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0); 1236 /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309 1237 * but this seems to kill texture gather offsets. 1238 */ 1239 WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2); 1240 WRITE(REG_A6XX_RB_UNKNOWN_8804, 0); 1241 WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0); 1242 WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0); 1243 WRITE(REG_A6XX_GRAS_UNKNOWN_80A6, 0); 1244 WRITE(REG_A6XX_RB_UNKNOWN_8805, 0); 1245 WRITE(REG_A6XX_RB_UNKNOWN_8806, 0); 1246 WRITE(REG_A6XX_RB_UNKNOWN_8878, 0); 1247 WRITE(REG_A6XX_RB_UNKNOWN_8879, 0); 1248 WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); 1249 1250 emit_marker6(ring, 7); 1251 1252 OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); 1253 OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */ 1254 1255 WRITE(REG_A6XX_VFD_UNKNOWN_A008, 0); 1256 1257 OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1); 1258 OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */ 1259 1260 /* we don't use this yet.. probably best to disable.. */ 1261 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); 1262 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | 1263 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | 1264 CP_SET_DRAW_STATE__0_GROUP_ID(0)); 1265 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); 1266 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); 1267 1268 OUT_PKT4(ring, REG_A6XX_VPC_SO_BUF_CNTL, 1); 1269 OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */ 1270 1271 OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); 1272 OUT_RING(ring, 0x00000000); 1273 1274 OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1); 1275 OUT_RING(ring, 0x00000000); 1276 1277 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); 1278 OUT_RING(ring, 0x00000000); 1279 1280 OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1); 1281 OUT_RING(ring, 0x00000000); 1282} 1283 1284static void 1285fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, 1286 unsigned dst_off, struct pipe_resource *src, unsigned src_off, 1287 unsigned sizedwords) 1288{ 1289 struct fd_bo *src_bo = fd_resource(src)->bo; 1290 struct fd_bo *dst_bo = fd_resource(dst)->bo; 1291 unsigned i; 1292 1293 for (i = 0; i < sizedwords; i++) { 1294 OUT_PKT7(ring, CP_MEM_TO_MEM, 5); 1295 OUT_RING(ring, 0x00000000); 1296 OUT_RELOCW(ring, dst_bo, dst_off, 0, 0); 1297 OUT_RELOC (ring, src_bo, src_off, 0, 0); 1298 1299 dst_off += 4; 1300 src_off += 4; 1301 } 1302} 1303 1304/* this is *almost* the same as fd6_cache_flush().. which I guess 1305 * could be re-worked to be something a bit more generic w/ param 1306 * indicating what needs to be flushed.. although that would mean 1307 * figuring out which events trigger what state to flush.. 1308 */ 1309static void 1310fd6_framebuffer_barrier(struct fd_context *ctx) 1311{ 1312 struct fd6_context *fd6_ctx = fd6_context(ctx); 1313 struct fd_batch *batch = ctx->batch; 1314 struct fd_ringbuffer *ring = batch->draw; 1315 unsigned seqno; 1316 1317 seqno = fd6_event_write(batch, ring, CACHE_FLUSH_AND_INV_EVENT, true); 1318 1319 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 1320 OUT_RING(ring, 0x00000013); 1321 OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0); 1322 OUT_RING(ring, seqno); 1323 OUT_RING(ring, 0xffffffff); 1324 OUT_RING(ring, 0x00000010); 1325 1326 fd6_event_write(batch, ring, UNK_1D, true); 1327 fd6_event_write(batch, ring, UNK_1C, true); 1328 1329 seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); 1330 1331 fd6_event_write(batch, ring, 0x31, false); 1332 1333 OUT_PKT7(ring, CP_UNK_A6XX_14, 4); 1334 OUT_RING(ring, 0x00000000); 1335 OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0); 1336 OUT_RING(ring, seqno); 1337} 1338 1339void 1340fd6_emit_init(struct pipe_context *pctx) 1341{ 1342 struct fd_context *ctx = fd_context(pctx); 1343 ctx->emit_const = fd6_emit_const; 1344 ctx->emit_const_bo = fd6_emit_const_bo; 1345 ctx->emit_ib = fd6_emit_ib; 1346 ctx->mem_to_mem = fd6_mem_to_mem; 1347 ctx->framebuffer_barrier = fd6_framebuffer_barrier; 1348} 1349