panvk_vX_cs.c revision 7ec681f3
1/* 2 * Copyright (C) 2021 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "genxml/gen_macros.h" 25 26#include "util/macros.h" 27#include "compiler/shader_enums.h" 28 29#include "vk_util.h" 30 31#include "panfrost-quirks.h" 32#include "pan_cs.h" 33#include "pan_encoder.h" 34#include "pan_pool.h" 35#include "pan_shader.h" 36 37#include "panvk_cs.h" 38#include "panvk_private.h" 39#include "panvk_varyings.h" 40 41static enum mali_mipmap_mode 42panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode) 43{ 44 switch (mode) { 45 case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST; 46 case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; 47 default: unreachable("Invalid mipmap mode"); 48 } 49} 50 51static unsigned 52panvk_translate_sampler_address_mode(VkSamplerAddressMode mode) 53{ 54 switch (mode) { 55 case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT; 56 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; 57 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; 58 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; 59 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; 60 default: unreachable("Invalid wrap"); 61 } 62} 63 64static void 65panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo, 66 uint32_t border_color[4]) 67{ 68 const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = 69 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); 70 71 switch (pCreateInfo->borderColor) { 72 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: 73 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: 74 border_color[0] = border_color[1] = border_color[2] = fui(0.0); 75 border_color[3] = 76 pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? 77 fui(1.0) : fui(0.0); 78 break; 79 case VK_BORDER_COLOR_INT_OPAQUE_BLACK: 80 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: 81 border_color[0] = border_color[1] = border_color[2] = 0; 82 border_color[3] = 83 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? 84 UINT_MAX : 0; 85 break; 86 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: 87 border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0); 88 break; 89 case VK_BORDER_COLOR_INT_OPAQUE_WHITE: 90 border_color[0] = border_color[1] = border_color[2] = border_color[3] = UINT_MAX; 91 break; 92 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: 93 case VK_BORDER_COLOR_INT_CUSTOM_EXT: 94 memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4); 95 break; 96 default: 97 unreachable("Invalid border color"); 98 } 99} 100 101static mali_pixel_format 102panvk_varying_hw_format(const struct panvk_device *dev, 103 const struct panvk_varyings_info *varyings, 104 gl_shader_stage stage, unsigned idx) 105{ 106 const struct panfrost_device *pdev = &dev->physical_device->pdev; 107 gl_varying_slot loc = varyings->stage[stage].loc[idx]; 108 bool fs = stage == MESA_SHADER_FRAGMENT; 109 110 switch (loc) { 111 case VARYING_SLOT_PNTC: 112 case VARYING_SLOT_PSIZ: 113#if PAN_ARCH <= 6 114 return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); 115#else 116 return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; 117#endif 118 case VARYING_SLOT_POS: 119#if PAN_ARCH <= 6 120 return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) | 121 panfrost_get_default_swizzle(4); 122#else 123 return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) | 124 MALI_RGB_COMPONENT_ORDER_RGBA; 125#endif 126 default: 127 assert(!panvk_varying_is_builtin(stage, loc)); 128 if (varyings->varying[loc].format != PIPE_FORMAT_NONE) 129 return pdev->formats[varyings->varying[loc].format].hw; 130#if PAN_ARCH >= 7 131 return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; 132#else 133 return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); 134#endif 135 } 136} 137 138static void 139panvk_emit_varying(const struct panvk_device *dev, 140 const struct panvk_varyings_info *varyings, 141 gl_shader_stage stage, unsigned idx, 142 void *attrib) 143{ 144 gl_varying_slot loc = varyings->stage[stage].loc[idx]; 145 bool fs = stage == MESA_SHADER_FRAGMENT; 146 147 pan_pack(attrib, ATTRIBUTE, cfg) { 148 if (!panvk_varying_is_builtin(stage, loc)) { 149 cfg.buffer_index = varyings->varying[loc].buf; 150 cfg.offset = varyings->varying[loc].offset; 151 } else { 152 cfg.buffer_index = 153 panvk_varying_buf_index(varyings, 154 panvk_varying_buf_id(fs, loc)); 155 } 156 cfg.offset_enable = PAN_ARCH == 5; 157 cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); 158 } 159} 160 161void 162panvk_per_arch(emit_varyings)(const struct panvk_device *dev, 163 const struct panvk_varyings_info *varyings, 164 gl_shader_stage stage, 165 void *descs) 166{ 167 struct mali_attribute_packed *attrib = descs; 168 169 for (unsigned i = 0; i < varyings->stage[stage].count; i++) 170 panvk_emit_varying(dev, varyings, stage, i, attrib++); 171} 172 173static void 174panvk_emit_varying_buf(const struct panvk_varyings_info *varyings, 175 enum panvk_varying_buf_id id, void *buf) 176{ 177 unsigned buf_idx = panvk_varying_buf_index(varyings, id); 178 179 pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { 180#if PAN_ARCH == 5 181 enum mali_attribute_special special_id = panvk_varying_special_buf_id(id); 182 if (special_id) { 183 cfg.type = 0; 184 cfg.special = special_id; 185 continue; 186 } 187#endif 188 unsigned offset = varyings->buf[buf_idx].address & 63; 189 190 cfg.stride = varyings->buf[buf_idx].stride; 191 cfg.size = varyings->buf[buf_idx].size + offset; 192 cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; 193 } 194} 195 196void 197panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, 198 void *descs) 199{ 200 struct mali_attribute_buffer_packed *buf = descs; 201 202 for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { 203 if (varyings->buf_mask & (1 << i)) 204 panvk_emit_varying_buf(varyings, i, buf++); 205 } 206 207 /* We need an empty entry to stop prefetching on Bifrost */ 208#if PAN_ARCH >= 6 209 memset(buf, 0, sizeof(*buf)); 210#endif 211} 212 213static void 214panvk_emit_attrib_buf(const struct panvk_attribs_info *info, 215 const struct panvk_draw_info *draw, 216 const struct panvk_attrib_buf *bufs, 217 unsigned buf_count, 218 unsigned idx, void *desc) 219{ 220 const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; 221 222#if PAN_ARCH == 5 223 if (buf_info->special) { 224 switch (buf_info->special_id) { 225 case PAN_VERTEX_ID: 226 panfrost_vertex_id(draw->padded_vertex_count, desc, 227 draw->instance_count > 1); 228 return; 229 case PAN_INSTANCE_ID: 230 panfrost_instance_id(draw->padded_vertex_count, desc, 231 draw->instance_count > 1); 232 return; 233 default: 234 unreachable("Invalid attribute ID"); 235 } 236 } 237#endif 238 239 assert(idx < buf_count); 240 const struct panvk_attrib_buf *buf = &bufs[idx]; 241 unsigned divisor = buf_info->per_instance ? 242 draw->padded_vertex_count : 0; 243 unsigned stride = divisor && draw->instance_count == 1 ? 244 0 : buf_info->stride; 245 mali_ptr addr = buf->address & ~63ULL; 246 unsigned size = buf->size + (buf->address & 63); 247 248 /* TODO: support instanced arrays */ 249 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 250 if (draw->instance_count > 1 && divisor) { 251 cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; 252 cfg.divisor = divisor; 253 } 254 255 cfg.pointer = addr; 256 cfg.stride = stride; 257 cfg.size = size; 258 } 259} 260 261void 262panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, 263 const struct panvk_attrib_buf *bufs, 264 unsigned buf_count, 265 const struct panvk_draw_info *draw, 266 void *descs) 267{ 268 struct mali_attribute_buffer_packed *buf = descs; 269 270 for (unsigned i = 0; i < info->buf_count; i++) 271 panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++); 272 273 /* A NULL entry is needed to stop prefecting on Bifrost */ 274#if PAN_ARCH >= 6 275 memset(buf, 0, sizeof(*buf)); 276#endif 277} 278 279void 280panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, 281 void *desc) 282{ 283 uint32_t border_color[4]; 284 285 panvk_translate_sampler_border_color(pCreateInfo, border_color); 286 287 pan_pack(desc, SAMPLER, cfg) { 288 cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; 289 cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; 290 cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); 291 cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; 292 293 cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); 294 cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); 295 cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); 296 cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); 297 cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); 298 cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); 299 cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo); 300 cfg.border_color_r = border_color[0]; 301 cfg.border_color_g = border_color[1]; 302 cfg.border_color_b = border_color[2]; 303 cfg.border_color_a = border_color[3]; 304 } 305} 306 307static void 308panvk_emit_attrib(const struct panvk_device *dev, 309 const struct panvk_attribs_info *attribs, 310 const struct panvk_attrib_buf *bufs, 311 unsigned buf_count, 312 unsigned idx, void *attrib) 313{ 314 const struct panfrost_device *pdev = &dev->physical_device->pdev; 315 316 pan_pack(attrib, ATTRIBUTE, cfg) { 317 cfg.buffer_index = attribs->attrib[idx].buf; 318 cfg.offset = attribs->attrib[idx].offset + 319 (bufs[cfg.buffer_index].address & 63); 320 cfg.format = pdev->formats[attribs->attrib[idx].format].hw; 321 } 322} 323 324void 325panvk_per_arch(emit_attribs)(const struct panvk_device *dev, 326 const struct panvk_attribs_info *attribs, 327 const struct panvk_attrib_buf *bufs, 328 unsigned buf_count, 329 void *descs) 330{ 331 struct mali_attribute_packed *attrib = descs; 332 333 for (unsigned i = 0; i < attribs->attrib_count; i++) 334 panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++); 335} 336 337void 338panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc) 339{ 340 pan_pack(desc, UNIFORM_BUFFER, cfg) { 341 cfg.pointer = address; 342 cfg.entries = DIV_ROUND_UP(size, 16); 343 } 344} 345 346void 347panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, 348 const struct panvk_descriptor_state *state, 349 void *descs) 350{ 351 struct mali_uniform_buffer_packed *ubos = descs; 352 353 for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) { 354 const struct panvk_descriptor_set_layout *set_layout = 355 pipeline->layout->sets[i].layout; 356 const struct panvk_descriptor_set *set = state->sets[i].set; 357 unsigned offset = pipeline->layout->sets[i].ubo_offset; 358 359 if (!set_layout) 360 continue; 361 362 if (!set) { 363 unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos; 364 memset(&ubos[offset], 0, num_ubos * sizeof(*ubos)); 365 } else { 366 memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos)); 367 if (set_layout->num_dynoffsets) { 368 panvk_per_arch(emit_ubo)(state->sets[i].dynoffsets.gpu, 369 set->layout->num_dynoffsets * sizeof(uint32_t), 370 &ubos[offset + set_layout->num_ubos]); 371 } 372 } 373 } 374 375 for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { 376 if (!pipeline->sysvals[i].ids.sysval_count) 377 continue; 378 379 panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i], 380 pipeline->sysvals[i].ids.sysval_count * 16, 381 &ubos[pipeline->sysvals[i].ubo_idx]); 382 } 383} 384 385void 386panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, 387 const struct panvk_draw_info *draw, 388 void *job) 389{ 390 void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); 391 392 memcpy(section, &draw->invocation, pan_size(INVOCATION)); 393 394 pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { 395 cfg.job_task_split = 5; 396 } 397 398 pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { 399 cfg.draw_descriptor_is_64b = true; 400 cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; 401 cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; 402 cfg.attribute_buffers = draw->attribute_bufs; 403 cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; 404 cfg.varying_buffers = draw->varying_bufs; 405 cfg.thread_storage = draw->tls; 406 cfg.offset_start = draw->offset_start; 407 cfg.instance_size = draw->instance_count > 1 ? 408 draw->padded_vertex_count : 1; 409 cfg.uniform_buffers = draw->ubos; 410 cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; 411 cfg.textures = draw->textures; 412 cfg.samplers = draw->samplers; 413 } 414} 415 416static void 417panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, 418 const struct panvk_draw_info *draw, 419 void *prim) 420{ 421 pan_pack(prim, PRIMITIVE, cfg) { 422 cfg.draw_mode = pipeline->ia.topology; 423 if (pipeline->ia.writes_point_size) 424 cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; 425 426 cfg.first_provoking_vertex = true; 427 if (pipeline->ia.primitive_restart) 428 cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; 429 cfg.job_task_split = 6; 430 /* TODO: indexed draws */ 431 cfg.index_count = draw->vertex_count; 432 } 433} 434 435static void 436panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline, 437 const struct panvk_draw_info *draw, 438 void *primsz) 439{ 440 pan_pack(primsz, PRIMITIVE_SIZE, cfg) { 441 if (pipeline->ia.writes_point_size) { 442 cfg.size_array = draw->psiz; 443 } else { 444 cfg.constant = draw->line_width; 445 } 446 } 447} 448 449static void 450panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline, 451 const struct panvk_draw_info *draw, 452 void *dcd) 453{ 454 pan_pack(dcd, DRAW, cfg) { 455 cfg.four_components_per_vertex = true; 456 cfg.draw_descriptor_is_64b = true; 457 cfg.front_face_ccw = pipeline->rast.front_ccw; 458 cfg.cull_front_face = pipeline->rast.cull_front_face; 459 cfg.cull_back_face = pipeline->rast.cull_back_face; 460 cfg.position = draw->position; 461 cfg.state = draw->fs_rsd; 462 cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; 463 cfg.attribute_buffers = draw->attribute_bufs; 464 cfg.viewport = draw->viewport; 465 cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; 466 cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; 467#if PAN_ARCH == 5 468 cfg.fbd = draw->fb; 469#else 470 cfg.thread_storage = draw->tls; 471#endif 472 473 /* For all primitives but lines DRAW.flat_shading_vertex must 474 * be set to 0 and the provoking vertex is selected with the 475 * PRIMITIVE.first_provoking_vertex field. 476 */ 477 if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || 478 pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || 479 pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { 480 /* The logic is inverted on bifrost. */ 481#if PAN_ARCH == 5 482 cfg.flat_shading_vertex = false; 483#else 484 cfg.flat_shading_vertex = true; 485#endif 486 } 487 488 cfg.offset_start = draw->offset_start; 489 cfg.instance_size = draw->instance_count > 1 ? 490 draw->padded_vertex_count : 1; 491 cfg.uniform_buffers = draw->ubos; 492 cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; 493 cfg.textures = draw->textures; 494 cfg.samplers = draw->samplers; 495 496 /* TODO: occlusion queries */ 497 } 498} 499 500void 501panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, 502 const struct panvk_draw_info *draw, 503 void *job) 504{ 505 void *section; 506 507 section = pan_section_ptr(job, TILER_JOB, INVOCATION); 508 memcpy(section, &draw->invocation, pan_size(INVOCATION)); 509 510 section = pan_section_ptr(job, TILER_JOB, PRIMITIVE); 511 panvk_emit_tiler_primitive(pipeline, draw, section); 512 513 section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); 514 panvk_emit_tiler_primitive_size(pipeline, draw, section); 515 516 section = pan_section_ptr(job, TILER_JOB, DRAW); 517 panvk_emit_tiler_dcd(pipeline, draw, section); 518 519#if PAN_ARCH >= 6 520 pan_section_pack(job, TILER_JOB, TILER, cfg) { 521 cfg.address = draw->tiler_ctx->bifrost; 522 } 523 pan_section_pack(job, TILER_JOB, PADDING, padding); 524#endif 525} 526 527void 528panvk_per_arch(emit_viewport)(const VkViewport *viewport, 529 const VkRect2D *scissor, 530 void *vpd) 531{ 532 /* The spec says "width must be greater than 0.0" */ 533 assert(viewport->x >= 0); 534 int minx = (int)viewport->x; 535 int maxx = (int)(viewport->x + viewport->width); 536 537 /* Viewport height can be negative */ 538 int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); 539 int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); 540 541 assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); 542 miny = MAX2(scissor->offset.x, minx); 543 miny = MAX2(scissor->offset.y, miny); 544 maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); 545 maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); 546 547 /* Make sure we don't end up with a max < min when width/height is 0 */ 548 maxx = maxx > minx ? maxx - 1 : maxx; 549 maxy = maxy > miny ? maxy - 1 : maxy; 550 551 assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); 552 assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); 553 554 pan_pack(vpd, VIEWPORT, cfg) { 555 cfg.scissor_minimum_x = minx; 556 cfg.scissor_minimum_y = miny; 557 cfg.scissor_maximum_x = maxx; 558 cfg.scissor_maximum_y = maxy; 559 cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); 560 cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); 561 } 562} 563 564#if PAN_ARCH >= 6 565static enum mali_register_file_format 566bifrost_blend_type_from_nir(nir_alu_type nir_type) 567{ 568 switch(nir_type) { 569 case 0: /* Render target not in use */ 570 return 0; 571 case nir_type_float16: 572 return MALI_REGISTER_FILE_FORMAT_F16; 573 case nir_type_float32: 574 return MALI_REGISTER_FILE_FORMAT_F32; 575 case nir_type_int32: 576 return MALI_REGISTER_FILE_FORMAT_I32; 577 case nir_type_uint32: 578 return MALI_REGISTER_FILE_FORMAT_U32; 579 case nir_type_int16: 580 return MALI_REGISTER_FILE_FORMAT_I16; 581 case nir_type_uint16: 582 return MALI_REGISTER_FILE_FORMAT_U16; 583 default: 584 unreachable("Unsupported blend shader type for NIR alu type"); 585 } 586} 587#endif 588 589void 590panvk_per_arch(emit_blend)(const struct panvk_device *dev, 591 const struct panvk_pipeline *pipeline, 592 unsigned rt, void *bd) 593{ 594 const struct pan_blend_state *blend = &pipeline->blend.state; 595 const struct pan_blend_rt_state *rts = &blend->rts[rt]; 596 bool dithered = false; 597 598 pan_pack(bd, BLEND, cfg) { 599 if (!blend->rt_count || !rts->equation.color_mask) { 600 cfg.enable = false; 601#if PAN_ARCH >= 6 602 cfg.internal.mode = MALI_BLEND_MODE_OFF; 603#endif 604 continue; 605 } 606 607 cfg.srgb = util_format_is_srgb(rts->format); 608 cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); 609 cfg.round_to_fb_precision = !dithered; 610 611#if PAN_ARCH <= 5 612 cfg.blend_shader = false; 613 pan_blend_to_fixed_function_equation(blend->rts[rt].equation, 614 &cfg.equation); 615 cfg.constant = 616 pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), 617 blend->constants); 618#else 619 const struct panfrost_device *pdev = &dev->physical_device->pdev; 620 const struct util_format_description *format_desc = 621 util_format_description(rts->format); 622 unsigned chan_size = 0; 623 for (unsigned i = 0; i < format_desc->nr_channels; i++) 624 chan_size = MAX2(format_desc->channel[i].size, chan_size); 625 626 pan_blend_to_fixed_function_equation(blend->rts[rt].equation, 627 &cfg.equation); 628 629 /* Fixed point constant */ 630 float fconst = 631 pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), 632 blend->constants); 633 u16 constant = fconst * ((1 << chan_size) - 1); 634 constant <<= 16 - chan_size; 635 cfg.constant = constant; 636 637 if (pan_blend_is_opaque(blend->rts[rt].equation)) 638 cfg.internal.mode = MALI_BLEND_MODE_OPAQUE; 639 else 640 cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION; 641 642 /* If we want the conversion to work properly, 643 * num_comps must be set to 4 644 */ 645 cfg.internal.fixed_function.num_comps = 4; 646 cfg.internal.fixed_function.conversion.memory_format = 647 panfrost_format_to_bifrost_blend(pdev, rts->format, dithered); 648 cfg.internal.fixed_function.conversion.register_format = 649 bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); 650 cfg.internal.fixed_function.rt = rt; 651#endif 652 } 653} 654 655void 656panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, 657 const struct panvk_pipeline *pipeline, 658 unsigned rt, const float *constants, 659 void *bd) 660{ 661 float constant = constants[pipeline->blend.constant[rt].index]; 662 663 pan_pack(bd, BLEND, cfg) { 664 cfg.enable = false; 665#if PAN_ARCH == 5 666 cfg.constant = constant; 667#else 668 cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor; 669#endif 670 } 671} 672 673void 674panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, 675 const struct panvk_cmd_state *state, 676 void *rsd) 677{ 678 pan_pack(rsd, RENDERER_STATE, cfg) { 679 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { 680 cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; 681 cfg.depth_factor = state->rast.depth_bias.slope_factor; 682 cfg.depth_bias_clamp = state->rast.depth_bias.clamp; 683 } 684 685 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { 686 cfg.stencil_front.mask = state->zs.s_front.compare_mask; 687 cfg.stencil_back.mask = state->zs.s_back.compare_mask; 688 } 689 690 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { 691 cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask; 692 cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; 693 } 694 695 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { 696 cfg.stencil_front.reference_value = state->zs.s_front.ref; 697 cfg.stencil_back.reference_value = state->zs.s_back.ref; 698 } 699 } 700} 701 702void 703panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, 704 const struct panvk_pipeline *pipeline, 705 void *rsd) 706{ 707 const struct pan_shader_info *info = &pipeline->fs.info; 708 709 pan_pack(rsd, RENDERER_STATE, cfg) { 710 if (pipeline->fs.required) { 711 pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg); 712 713#if PAN_ARCH == 5 714 /* If either depth or stencil is enabled, discard matters */ 715 bool zs_enabled = 716 (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) || 717 pipeline->zs.s_test; 718 719 cfg.properties.work_register_count = info->work_reg_count; 720 cfg.properties.force_early_z = 721 info->fs.can_early_z && !pipeline->ms.alpha_to_coverage && 722 pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS; 723 724 725 /* Workaround a hardware errata where early-z cannot be enabled 726 * when discarding even when the depth buffer is read-only, by 727 * lying to the hardware about the discard and setting the 728 * reads tilebuffer? flag to compensate */ 729 cfg.properties.shader_reads_tilebuffer = 730 info->fs.outputs_read || 731 (!zs_enabled && info->fs.can_discard); 732 cfg.properties.shader_contains_discard = 733 zs_enabled && info->fs.can_discard; 734#else 735 uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0; 736 uint8_t rt_mask = pipeline->fs.rt_mask; 737 cfg.properties.allow_forward_pixel_to_kill = 738 pipeline->fs.info.fs.can_fpk && 739 !(rt_mask & ~rt_written) && 740 !pipeline->ms.alpha_to_coverage && 741 !pipeline->blend.reads_dest; 742#endif 743 } else { 744#if PAN_ARCH == 5 745 cfg.shader.shader = 0x1; 746 cfg.properties.work_register_count = 1; 747 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 748 cfg.properties.force_early_z = true; 749#else 750 cfg.properties.shader_modifies_coverage = true; 751 cfg.properties.allow_forward_pixel_to_kill = true; 752 cfg.properties.allow_forward_pixel_to_be_killed = true; 753 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; 754#endif 755 } 756 757 bool msaa = pipeline->ms.rast_samples > 1; 758 cfg.multisample_misc.multisample_enable = msaa; 759 cfg.multisample_misc.sample_mask = 760 msaa ? pipeline->ms.sample_mask : UINT16_MAX; 761 762 cfg.multisample_misc.depth_function = 763 pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; 764 765 cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; 766 cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth; 767 cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth; 768 cfg.multisample_misc.shader_depth_range_fixed = true; 769 770 cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; 771 cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; 772 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; 773 cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable; 774 cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable; 775 cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1; 776 777 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { 778 cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; 779 cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; 780 cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; 781 } 782 783 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { 784 cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; 785 cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; 786 } 787 788 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { 789 cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask; 790 cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask; 791 } 792 793 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { 794 cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; 795 cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; 796 } 797 798 cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; 799 cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; 800 cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; 801 cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; 802 cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; 803 cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; 804 cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; 805 cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; 806 } 807} 808 809void 810panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, 811 const struct pan_shader_info *shader_info, 812 mali_ptr shader_ptr, 813 void *rsd) 814{ 815 assert(shader_info->stage != MESA_SHADER_FRAGMENT); 816 817 pan_pack(rsd, RENDERER_STATE, cfg) { 818 pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg); 819 } 820} 821 822void 823panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, 824 unsigned width, unsigned height, 825 const struct panfrost_ptr *descs) 826{ 827#if PAN_ARCH == 5 828 unreachable("Not supported on v5"); 829#else 830 const struct panfrost_device *pdev = &dev->physical_device->pdev; 831 832 pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) { 833 cfg.size = pdev->tiler_heap->size; 834 cfg.base = pdev->tiler_heap->ptr.gpu; 835 cfg.bottom = pdev->tiler_heap->ptr.gpu; 836 cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size; 837 } 838 839 pan_pack(descs->cpu, TILER_CONTEXT, cfg) { 840 cfg.hierarchy_mask = 0x28; 841 cfg.fb_width = width; 842 cfg.fb_height = height; 843 cfg.heap = descs->gpu + pan_size(TILER_CONTEXT); 844 } 845#endif 846} 847