pan_context.c revision 9f464c52
1/* 2 * © Copyright 2018 Alyssa Rosenzweig 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 */ 24 25#include <sys/poll.h> 26#include <errno.h> 27 28#include "pan_context.h" 29#include "pan_swizzle.h" 30#include "pan_format.h" 31 32#include "util/macros.h" 33#include "util/u_format.h" 34#include "util/u_inlines.h" 35#include "util/u_upload_mgr.h" 36#include "util/u_memory.h" 37#include "util/u_vbuf.h" 38#include "util/half_float.h" 39#include "util/u_helpers.h" 40#include "util/u_format.h" 41#include "indices/u_primconvert.h" 42#include "tgsi/tgsi_parse.h" 43#include "util/u_math.h" 44 45#include "pan_screen.h" 46#include "pan_blending.h" 47#include "pan_blend_shaders.h" 48#include "pan_util.h" 49#include "pan_wallpaper.h" 50 51static int performance_counter_number = 0; 52extern const char *pan_counters_base; 53 54/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ 55//#define DRY_RUN 56 57/* Can a given format support AFBC? Not all can. */ 58 59static bool 60panfrost_can_afbc(enum pipe_format format) 61{ 62 const struct util_format_description *desc = 63 util_format_description(format); 64 65 if (util_format_is_rgba8_variant(desc)) 66 return true; 67 68 /* TODO: AFBC of other formats */ 69 70 return false; 71} 72 73/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically 74 * indepdent between color buffers and depth/stencil). To enable, we allocate 75 * the AFBC metadata buffer and mark that it is enabled. We do -not- actually 76 * edit the fragment job here. This routine should be called ONCE per 77 * AFBC-compressed buffer, rather than on every frame. */ 78 79static void 80panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) 81{ 82 if (ctx->require_sfbd) { 83 DBG("AFBC not supported yet on SFBD\n"); 84 assert(0); 85 } 86 87 struct pipe_context *gallium = (struct pipe_context *) ctx; 88 struct panfrost_screen *screen = pan_screen(gallium->screen); 89 /* AFBC metadata is 16 bytes per tile */ 90 int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 91 int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 92 int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); 93 int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16); 94 95 stride *= 2; /* TODO: Should this be carried over? */ 96 int main_size = stride * rsrc->base.height0; 97 rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16; 98 99 /* Allocate the AFBC slab itself, large enough to hold the above */ 100 screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, 101 (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, 102 true, 0, 0, 0); 103 104 rsrc->bo->layout = PAN_AFBC; 105 106 /* Compressed textured reads use a tagged pointer to the metadata */ 107 108 rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); 109 rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu; 110 rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle; 111} 112 113static void 114panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) 115{ 116 struct pipe_context *gallium = (struct pipe_context *) ctx; 117 struct panfrost_screen *screen = pan_screen(gallium->screen); 118 int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 119 int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 120 121 /* 8 byte checksum per tile */ 122 rsrc->bo->checksum_stride = tile_w * 8; 123 int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); 124 screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); 125 126 rsrc->bo->has_checksum = true; 127} 128 129/* Framebuffer descriptor */ 130 131static void 132panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) 133{ 134 fb->width = MALI_POSITIVE(w); 135 fb->height = MALI_POSITIVE(h); 136 137 /* No idea why this is needed, but it's how resolution_check is 138 * calculated. It's not clear to us yet why the hardware wants this. 139 * The formula itself was discovered mostly by manual bruteforce and 140 * aggressive algebraic simplification. */ 141 142 fb->resolution_check = ((w + h) / 3) << 4; 143} 144 145struct mali_single_framebuffer 146panfrost_emit_sfbd(struct panfrost_context *ctx) 147{ 148 struct mali_single_framebuffer framebuffer = { 149 .unknown2 = 0x1f, 150 .format = 0x30000000, 151 .clear_flags = 0x1000, 152 .unknown_address_0 = ctx->scratchpad.gpu, 153 .unknown_address_1 = ctx->misc_0.gpu, 154 .unknown_address_2 = ctx->misc_0.gpu + 40960, 155 .tiler_flags = 0xf0, 156 .tiler_heap_free = ctx->tiler_heap.gpu, 157 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, 158 }; 159 160 panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); 161 162 return framebuffer; 163} 164 165struct bifrost_framebuffer 166panfrost_emit_mfbd(struct panfrost_context *ctx) 167{ 168 struct bifrost_framebuffer framebuffer = { 169 /* It is not yet clear what tiler_meta means or how it's 170 * calculated, but we can tell the lower 32-bits are a 171 * (monotonically increasing?) function of tile count and 172 * geometry complexity; I suspect it defines a memory size of 173 * some kind? for the tiler. It's really unclear at the 174 * moment... but to add to the confusion, the hardware is happy 175 * enough to accept a zero in this field, so we don't even have 176 * to worry about it right now. 177 * 178 * The byte (just after the 32-bit mark) is much more 179 * interesting. The higher nibble I've only ever seen as 0xF, 180 * but the lower one I've seen as 0x0 or 0xF, and it's not 181 * obvious what the difference is. But what -is- obvious is 182 * that when the lower nibble is zero, performance is severely 183 * degraded compared to when the lower nibble is set. 184 * Evidently, that nibble enables some sort of fast path, 185 * perhaps relating to caching or tile flush? Regardless, at 186 * this point there's no clear reason not to set it, aside from 187 * substantially increased memory requirements (of the misc_0 188 * buffer) */ 189 190 .tiler_meta = ((uint64_t) 0xff << 32) | 0x0, 191 192 .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), 193 .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), 194 .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width), 195 .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height), 196 197 .unk1 = 0x1080, 198 199 /* TODO: MRT */ 200 .rt_count_1 = MALI_POSITIVE(1), 201 .rt_count_2 = 4, 202 203 .unknown2 = 0x1f, 204 205 /* Corresponds to unknown_address_X of SFBD */ 206 .scratchpad = ctx->scratchpad.gpu, 207 .tiler_scratch_start = ctx->misc_0.gpu, 208 209 /* The constant added here is, like the lower word of 210 * tiler_meta, (loosely) another product of framebuffer size 211 * and geometry complexity. It must be sufficiently large for 212 * the tiler_meta fast path to work; if it's too small, there 213 * will be DATA_INVALID_FAULTs. Conversely, it must be less 214 * than the total size of misc_0, or else there's no room. It's 215 * possible this constant configures a partition between two 216 * parts of misc_0? We haven't investigated the functionality, 217 * as these buffers are internally used by the hardware 218 * (presumably by the tiler) but not seemingly touched by the driver 219 */ 220 221 .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, 222 223 .tiler_heap_start = ctx->tiler_heap.gpu, 224 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, 225 }; 226 227 return framebuffer; 228} 229 230/* Are we currently rendering to the screen (rather than an FBO)? */ 231 232bool 233panfrost_is_scanout(struct panfrost_context *ctx) 234{ 235 /* If there is no color buffer, it's an FBO */ 236 if (!ctx->pipe_framebuffer.nr_cbufs) 237 return false; 238 239 /* If we're too early that no framebuffer was sent, it's scanout */ 240 if (!ctx->pipe_framebuffer.cbufs[0]) 241 return true; 242 243 return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET || 244 ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT || 245 ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; 246} 247 248static uint32_t 249pan_pack_color(const union pipe_color_union *color, enum pipe_format format) 250{ 251 /* Alpha magicked to 1.0 if there is no alpha */ 252 253 bool has_alpha = util_format_has_alpha(format); 254 float clear_alpha = has_alpha ? color->f[3] : 1.0f; 255 256 /* Packed color depends on the framebuffer format */ 257 258 const struct util_format_description *desc = 259 util_format_description(format); 260 261 if (util_format_is_rgba8_variant(desc)) { 262 return (float_to_ubyte(clear_alpha) << 24) | 263 (float_to_ubyte(color->f[2]) << 16) | 264 (float_to_ubyte(color->f[1]) << 8) | 265 (float_to_ubyte(color->f[0]) << 0); 266 } else if (format == PIPE_FORMAT_B5G6R5_UNORM) { 267 /* First, we convert the components to R5, G6, B5 separately */ 268 unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0; 269 unsigned g6 = CLAMP(color->f[1], 0.0, 1.0) * 63.0; 270 unsigned b5 = CLAMP(color->f[2], 0.0, 1.0) * 31.0; 271 272 /* Then we pack into a sparse u32. TODO: Why these shifts? */ 273 return (b5 << 25) | (g6 << 14) | (r5 << 5); 274 } else { 275 /* Unknown format */ 276 assert(0); 277 } 278 279 return 0; 280} 281 282static void 283panfrost_clear( 284 struct pipe_context *pipe, 285 unsigned buffers, 286 const union pipe_color_union *color, 287 double depth, unsigned stencil) 288{ 289 struct panfrost_context *ctx = pan_context(pipe); 290 struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 291 292 if (buffers & PIPE_CLEAR_COLOR) { 293 enum pipe_format format = ctx->pipe_framebuffer.cbufs[0]->format; 294 job->clear_color = pan_pack_color(color, format); 295 } 296 297 if (buffers & PIPE_CLEAR_DEPTH) { 298 job->clear_depth = depth; 299 } 300 301 if (buffers & PIPE_CLEAR_STENCIL) { 302 job->clear_stencil = stencil; 303 } 304 305 job->clear |= buffers; 306} 307 308static mali_ptr 309panfrost_attach_vt_mfbd(struct panfrost_context *ctx) 310{ 311 /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */ 312 struct bifrost_render_target rts_list[] = { 313 { 314 .chunknown = { 315 .unk = 0x30005, 316 }, 317 .framebuffer = ctx->misc_0.gpu, 318 .zero2 = 0x3, 319 }, 320 }; 321 322 /* Allocate memory for the three components */ 323 int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list); 324 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 325 326 /* Opaque 1024-block */ 327 rts_list[0].chunknown.pointer = transfer.gpu; 328 329 memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)); 330 memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list)); 331 332 return (transfer.gpu + 1024) | MALI_MFBD; 333} 334 335static mali_ptr 336panfrost_attach_vt_sfbd(struct panfrost_context *ctx) 337{ 338 return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD; 339} 340 341static void 342panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) 343{ 344 mali_ptr framebuffer = ctx->require_sfbd ? 345 panfrost_attach_vt_sfbd(ctx) : 346 panfrost_attach_vt_mfbd(ctx); 347 348 ctx->payload_vertex.postfix.framebuffer = framebuffer; 349 ctx->payload_tiler.postfix.framebuffer = framebuffer; 350} 351 352/* Reset per-frame context, called on context initialisation as well as after 353 * flushing a frame */ 354 355static void 356panfrost_invalidate_frame(struct panfrost_context *ctx) 357{ 358 unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; 359 DBG("Uploaded transient %d bytes\n", transient_count); 360 361 /* Rotate cmdstream */ 362 if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) 363 ctx->cmdstream_i = 0; 364 365 if (ctx->require_sfbd) 366 ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 367 else 368 ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 369 370 /* Reset varyings allocated */ 371 ctx->varying_height = 0; 372 373 /* The transient cmdstream is dirty every frame; the only bits worth preserving 374 * (textures, shaders, etc) are in other buffers anyways */ 375 376 ctx->transient_pools[ctx->cmdstream_i].entry_index = 0; 377 ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0; 378 379 /* Regenerate payloads */ 380 panfrost_attach_vt_framebuffer(ctx); 381 382 if (ctx->rasterizer) 383 ctx->dirty |= PAN_DIRTY_RASTERIZER; 384 385 /* XXX */ 386 ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; 387} 388 389/* In practice, every field of these payloads should be configurable 390 * arbitrarily, which means these functions are basically catch-all's for 391 * as-of-yet unwavering unknowns */ 392 393static void 394panfrost_emit_vertex_payload(struct panfrost_context *ctx) 395{ 396 struct midgard_payload_vertex_tiler payload = { 397 .prefix = { 398 .workgroups_z_shift = 32, 399 .workgroups_x_shift_2 = 0x2, 400 .workgroups_x_shift_3 = 0x5, 401 }, 402 .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2), 403 }; 404 405 memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); 406} 407 408static void 409panfrost_emit_tiler_payload(struct panfrost_context *ctx) 410{ 411 struct midgard_payload_vertex_tiler payload = { 412 .prefix = { 413 .workgroups_z_shift = 32, 414 .workgroups_x_shift_2 = 0x2, 415 .workgroups_x_shift_3 = 0x6, 416 417 .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ 418 }, 419 }; 420 421 memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); 422} 423 424static unsigned 425translate_tex_wrap(enum pipe_tex_wrap w) 426{ 427 switch (w) { 428 case PIPE_TEX_WRAP_REPEAT: 429 return MALI_WRAP_REPEAT; 430 431 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 432 return MALI_WRAP_CLAMP_TO_EDGE; 433 434 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 435 return MALI_WRAP_CLAMP_TO_BORDER; 436 437 case PIPE_TEX_WRAP_MIRROR_REPEAT: 438 return MALI_WRAP_MIRRORED_REPEAT; 439 440 default: 441 assert(0); 442 return 0; 443 } 444} 445 446static unsigned 447translate_tex_filter(enum pipe_tex_filter f) 448{ 449 switch (f) { 450 case PIPE_TEX_FILTER_NEAREST: 451 return MALI_NEAREST; 452 453 case PIPE_TEX_FILTER_LINEAR: 454 return MALI_LINEAR; 455 456 default: 457 assert(0); 458 return 0; 459 } 460} 461 462static unsigned 463translate_mip_filter(enum pipe_tex_mipfilter f) 464{ 465 return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0; 466} 467 468static unsigned 469panfrost_translate_compare_func(enum pipe_compare_func in) 470{ 471 switch (in) { 472 case PIPE_FUNC_NEVER: 473 return MALI_FUNC_NEVER; 474 475 case PIPE_FUNC_LESS: 476 return MALI_FUNC_LESS; 477 478 case PIPE_FUNC_EQUAL: 479 return MALI_FUNC_EQUAL; 480 481 case PIPE_FUNC_LEQUAL: 482 return MALI_FUNC_LEQUAL; 483 484 case PIPE_FUNC_GREATER: 485 return MALI_FUNC_GREATER; 486 487 case PIPE_FUNC_NOTEQUAL: 488 return MALI_FUNC_NOTEQUAL; 489 490 case PIPE_FUNC_GEQUAL: 491 return MALI_FUNC_GEQUAL; 492 493 case PIPE_FUNC_ALWAYS: 494 return MALI_FUNC_ALWAYS; 495 } 496 497 assert (0); 498 return 0; /* Unreachable */ 499} 500 501static unsigned 502panfrost_translate_alt_compare_func(enum pipe_compare_func in) 503{ 504 switch (in) { 505 case PIPE_FUNC_NEVER: 506 return MALI_ALT_FUNC_NEVER; 507 508 case PIPE_FUNC_LESS: 509 return MALI_ALT_FUNC_LESS; 510 511 case PIPE_FUNC_EQUAL: 512 return MALI_ALT_FUNC_EQUAL; 513 514 case PIPE_FUNC_LEQUAL: 515 return MALI_ALT_FUNC_LEQUAL; 516 517 case PIPE_FUNC_GREATER: 518 return MALI_ALT_FUNC_GREATER; 519 520 case PIPE_FUNC_NOTEQUAL: 521 return MALI_ALT_FUNC_NOTEQUAL; 522 523 case PIPE_FUNC_GEQUAL: 524 return MALI_ALT_FUNC_GEQUAL; 525 526 case PIPE_FUNC_ALWAYS: 527 return MALI_ALT_FUNC_ALWAYS; 528 } 529 530 assert (0); 531 return 0; /* Unreachable */ 532} 533 534static unsigned 535panfrost_translate_stencil_op(enum pipe_stencil_op in) 536{ 537 switch (in) { 538 case PIPE_STENCIL_OP_KEEP: 539 return MALI_STENCIL_KEEP; 540 541 case PIPE_STENCIL_OP_ZERO: 542 return MALI_STENCIL_ZERO; 543 544 case PIPE_STENCIL_OP_REPLACE: 545 return MALI_STENCIL_REPLACE; 546 547 case PIPE_STENCIL_OP_INCR: 548 return MALI_STENCIL_INCR; 549 550 case PIPE_STENCIL_OP_DECR: 551 return MALI_STENCIL_DECR; 552 553 case PIPE_STENCIL_OP_INCR_WRAP: 554 return MALI_STENCIL_INCR_WRAP; 555 556 case PIPE_STENCIL_OP_DECR_WRAP: 557 return MALI_STENCIL_DECR_WRAP; 558 559 case PIPE_STENCIL_OP_INVERT: 560 return MALI_STENCIL_INVERT; 561 } 562 563 assert (0); 564 return 0; /* Unreachable */ 565} 566 567static void 568panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) 569{ 570 out->ref = 0; /* Gallium gets it from elsewhere */ 571 572 out->mask = in->valuemask; 573 out->func = panfrost_translate_compare_func(in->func); 574 out->sfail = panfrost_translate_stencil_op(in->fail_op); 575 out->dpfail = panfrost_translate_stencil_op(in->zfail_op); 576 out->dppass = panfrost_translate_stencil_op(in->zpass_op); 577} 578 579static void 580panfrost_default_shader_backend(struct panfrost_context *ctx) 581{ 582 struct mali_shader_meta shader = { 583 .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), 584 585 .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, 586 .unknown2_4 = MALI_NO_MSAA | 0x4e0, 587 }; 588 589 if (ctx->is_t6xx) { 590 shader.unknown2_4 |= 0x10; 591 } 592 593 struct pipe_stencil_state default_stencil = { 594 .enabled = 0, 595 .func = PIPE_FUNC_ALWAYS, 596 .fail_op = MALI_STENCIL_KEEP, 597 .zfail_op = MALI_STENCIL_KEEP, 598 .zpass_op = MALI_STENCIL_KEEP, 599 .writemask = 0xFF, 600 .valuemask = 0xFF 601 }; 602 603 panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); 604 shader.stencil_mask_front = default_stencil.writemask; 605 606 panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); 607 shader.stencil_mask_back = default_stencil.writemask; 608 609 if (default_stencil.enabled) 610 shader.unknown2_4 |= MALI_STENCIL_TEST; 611 612 memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); 613} 614 615/* Generates a vertex/tiler job. This is, in some sense, the heart of the 616 * graphics command stream. It should be called once per draw, accordding to 617 * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in 618 * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for 619 * vertex jobs. */ 620 621struct panfrost_transfer 622panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler) 623{ 624 /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */ 625 int draw_job_index = 1 + (2 * ctx->draw_count); 626 627 struct mali_job_descriptor_header job = { 628 .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, 629 .job_index = draw_job_index + (is_tiler ? 1 : 0), 630#ifdef __LP64__ 631 .job_descriptor_size = 1, 632#endif 633 }; 634 635 /* Only non-elided tiler jobs have dependencies which are known at this point */ 636 637 if (is_tiler && !is_elided_tiler) { 638 /* Tiler jobs depend on vertex jobs */ 639 640 job.job_dependency_index_1 = draw_job_index; 641 642 /* Tiler jobs also depend on the previous tiler job */ 643 644 if (ctx->draw_count) 645 job.job_dependency_index_2 = draw_job_index - 1; 646 } 647 648 struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; 649 650 /* There's some padding hacks on 32-bit */ 651 652#ifdef __LP64__ 653 int offset = 0; 654#else 655 int offset = 4; 656#endif 657 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); 658 memcpy(transfer.cpu, &job, sizeof(job)); 659 memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); 660 return transfer; 661} 662 663/* Generates a set value job. It's unclear what exactly this does, why it's 664 * necessary, and when to call it. */ 665 666static void 667panfrost_set_value_job(struct panfrost_context *ctx) 668{ 669 struct mali_job_descriptor_header job = { 670 .job_type = JOB_TYPE_SET_VALUE, 671 .job_descriptor_size = 1, 672 .job_index = 1 + (2 * ctx->draw_count), 673 }; 674 675 struct mali_payload_set_value payload = { 676 .out = ctx->misc_0.gpu, 677 .unknown = 0x3, 678 }; 679 680 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); 681 memcpy(transfer.cpu, &job, sizeof(job)); 682 memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); 683 684 ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu; 685 ctx->set_value_job = transfer.gpu; 686} 687 688static mali_ptr 689panfrost_emit_varyings( 690 struct panfrost_context *ctx, 691 union mali_attr *slot, 692 unsigned stride, 693 unsigned count) 694{ 695 mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; 696 697 /* Fill out the descriptor */ 698 slot->elements = varying_address | MALI_ATTR_LINEAR; 699 slot->stride = stride; 700 slot->size = stride * count; 701 702 ctx->varying_height += ALIGN(slot->size, 64); 703 assert(ctx->varying_height < ctx->varying_mem.size); 704 705 return varying_address; 706} 707 708static void 709panfrost_emit_point_coord(union mali_attr *slot) 710{ 711 slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR; 712 slot->stride = slot->size = 0; 713} 714 715static void 716panfrost_emit_varying_descriptor( 717 struct panfrost_context *ctx, 718 unsigned invocation_count) 719{ 720 /* Load the shaders */ 721 722 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 723 struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; 724 725 /* Allocate the varying descriptor */ 726 727 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; 728 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; 729 730 struct panfrost_transfer trans = panfrost_allocate_transient(ctx, 731 vs_size + fs_size); 732 733 memcpy(trans.cpu, vs->varyings, vs_size); 734 memcpy(trans.cpu + vs_size, fs->varyings, fs_size); 735 736 ctx->payload_vertex.postfix.varying_meta = trans.gpu; 737 ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size; 738 739 /* Buffer indices must be in this order per our convention */ 740 union mali_attr varyings[PIPE_MAX_ATTRIBS]; 741 unsigned idx = 0; 742 743 /* General varyings -- use the VS's, since those are more likely to be 744 * accurate on desktop */ 745 746 panfrost_emit_varyings(ctx, &varyings[idx++], 747 vs->general_varying_stride, invocation_count); 748 749 /* fp32 vec4 gl_Position */ 750 ctx->payload_tiler.postfix.position_varying = 751 panfrost_emit_varyings(ctx, &varyings[idx++], 752 sizeof(float) * 4, invocation_count); 753 754 755 if (vs->writes_point_size || fs->reads_point_coord) { 756 /* fp16 vec1 gl_PointSize */ 757 ctx->payload_tiler.primitive_size.pointer = 758 panfrost_emit_varyings(ctx, &varyings[idx++], 759 2, invocation_count); 760 } 761 762 if (fs->reads_point_coord) { 763 /* Special descriptor */ 764 panfrost_emit_point_coord(&varyings[idx++]); 765 } 766 767 mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); 768 ctx->payload_vertex.postfix.varyings = varyings_p; 769 ctx->payload_tiler.postfix.varyings = varyings_p; 770} 771 772static mali_ptr 773panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i) 774{ 775 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; 776 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); 777 778 return rsrc->bo->gpu + buf->buffer_offset; 779} 780 781/* Emits attributes and varying descriptors, which should be called every draw, 782 * excepting some obscure circumstances */ 783 784static void 785panfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job) 786{ 787 /* Staged mali_attr, and index into them. i =/= k, depending on the 788 * vertex buffer mask */ 789 union mali_attr attrs[PIPE_MAX_ATTRIBS]; 790 unsigned k = 0; 791 792 unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); 793 794 for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i) { 795 if (!(ctx->vb_mask & (1 << i))) continue; 796 797 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; 798 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); 799 800 if (!rsrc) continue; 801 802 /* Align to 64 bytes by masking off the lower bits. This 803 * will be adjusted back when we fixup the src_offset in 804 * mali_attr_meta */ 805 806 mali_ptr addr = panfrost_vertex_buffer_address(ctx, i) & ~63; 807 808 /* Offset vertex count by draw_start to make sure we upload enough */ 809 attrs[k].stride = buf->stride; 810 attrs[k].size = rsrc->base.width0; 811 812 panfrost_job_add_bo(job, rsrc->bo); 813 attrs[k].elements = addr | MALI_ATTR_LINEAR; 814 815 ++k; 816 } 817 818 ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr)); 819 820 panfrost_emit_varying_descriptor(ctx, invocation_count); 821} 822 823static bool 824panfrost_writes_point_size(struct panfrost_context *ctx) 825{ 826 assert(ctx->vs); 827 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 828 829 return vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS; 830} 831 832/* Stage the attribute descriptors so we can adjust src_offset 833 * to let BOs align nicely */ 834 835static void 836panfrost_stage_attributes(struct panfrost_context *ctx) 837{ 838 struct panfrost_vertex_state *so = ctx->vertex; 839 840 size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; 841 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); 842 struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; 843 844 /* Copy as-is for the first pass */ 845 memcpy(target, so->hw, sz); 846 847 /* Fixup offsets for the second pass. Recall that the hardware 848 * calculates attribute addresses as: 849 * 850 * addr = base + (stride * vtx) + src_offset; 851 * 852 * However, on Mali, base must be aligned to 64-bytes, so we 853 * instead let: 854 * 855 * base' = base & ~63 = base - (base & 63) 856 * 857 * To compensate when using base' (see emit_vertex_data), we have 858 * to adjust src_offset by the masked off piece: 859 * 860 * addr' = base' + (stride * vtx) + (src_offset + (base & 63)) 861 * = base - (base & 63) + (stride * vtx) + src_offset + (base & 63) 862 * = base + (stride * vtx) + src_offset 863 * = addr; 864 * 865 * QED. 866 */ 867 868 for (unsigned i = 0; i < so->num_elements; ++i) { 869 unsigned vbi = so->pipe[i].vertex_buffer_index; 870 mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi); 871 872 /* Adjust by the masked off bits of the offset */ 873 target[i].src_offset += (addr & 63); 874 } 875 876 ctx->payload_vertex.postfix.attribute_meta = transfer.gpu; 877} 878 879/* Go through dirty flags and actualise them in the cmdstream. */ 880 881void 882panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) 883{ 884 struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 885 886 if (with_vertex_data) { 887 panfrost_emit_vertex_data(ctx, job); 888 } 889 890 bool msaa = ctx->rasterizer->base.multisample; 891 892 if (ctx->dirty & PAN_DIRTY_RASTERIZER) { 893 ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; 894 895 /* TODO: Sample size */ 896 SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); 897 SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); 898 } 899 900 /* Enable job requirements at draw-time */ 901 902 if (msaa) 903 job->requirements |= PAN_REQ_MSAA; 904 905 if (ctx->depth_stencil->depth.writemask) 906 job->requirements |= PAN_REQ_DEPTH_WRITE; 907 908 if (ctx->occlusion_query) { 909 ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; 910 ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; 911 } 912 913 if (ctx->dirty & PAN_DIRTY_VS) { 914 assert(ctx->vs); 915 916 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 917 918 /* Late shader descriptor assignments */ 919 920 vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; 921 vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; 922 923 /* Who knows */ 924 vs->tripipe->midgard1.unknown1 = 0x2201; 925 926 ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; 927 } 928 929 if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { 930 /* Check if we need to link the gl_PointSize varying */ 931 if (!panfrost_writes_point_size(ctx)) { 932 /* If the size is constant, write it out. Otherwise, 933 * don't touch primitive_size (since we would clobber 934 * the pointer there) */ 935 936 ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width; 937 } 938 } 939 940 /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ 941 if (ctx->fs) 942 ctx->dirty |= PAN_DIRTY_FS; 943 944 if (ctx->dirty & PAN_DIRTY_FS) { 945 assert(ctx->fs); 946 struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; 947 948#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name 949 950 COPY(shader); 951 COPY(attribute_count); 952 COPY(varying_count); 953 COPY(midgard1.uniform_count); 954 COPY(midgard1.work_count); 955 COPY(midgard1.unknown2); 956 957#undef COPY 958 /* If there is a blend shader, work registers are shared */ 959 960 if (ctx->blend->has_blend_shader) 961 ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; 962 963 /* Set late due to depending on render state */ 964 /* The one at the end seems to mean "1 UBO" */ 965 ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201; 966 967 /* Assign texture/sample count right before upload */ 968 ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; 969 ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; 970 971 /* Assign the stencil refs late */ 972 ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; 973 ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; 974 975 /* CAN_DISCARD should be set if the fragment shader possibly 976 * contains a 'discard' instruction. It is likely this is 977 * related to optimizations related to forward-pixel kill, as 978 * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good 979 * thing?" by Peter Harris 980 */ 981 982 if (variant->can_discard) { 983 ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; 984 ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE; 985 ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000; 986 ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; 987 } 988 989 /* Check if we're using the default blend descriptor (fast path) */ 990 991 bool no_blending = 992 !ctx->blend->has_blend_shader && 993 (ctx->blend->equation.rgb_mode == 0x122) && 994 (ctx->blend->equation.alpha_mode == 0x122) && 995 (ctx->blend->equation.color_mask == 0xf); 996 997 /* Even on MFBD, the shader descriptor gets blend shaders. It's 998 * *also* copied to the blend_meta appended (by convention), 999 * but this is the field actually read by the hardware. (Or 1000 * maybe both are read...?) */ 1001 1002 if (ctx->blend->has_blend_shader) { 1003 ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; 1004 } 1005 1006 if (ctx->require_sfbd) { 1007 /* When only a single render target platform is used, the blend 1008 * information is inside the shader meta itself. We 1009 * additionally need to signal CAN_DISCARD for nontrivial blend 1010 * modes (so we're able to read back the destination buffer) */ 1011 1012 if (!ctx->blend->has_blend_shader) { 1013 memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation)); 1014 } 1015 1016 if (!no_blending) { 1017 ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; 1018 } 1019 } 1020 1021 size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); 1022 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 1023 memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); 1024 1025 ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; 1026 1027 if (!ctx->require_sfbd) { 1028 /* Additional blend descriptor tacked on for jobs using MFBD */ 1029 1030 unsigned blend_count = 0; 1031 1032 if (ctx->blend->has_blend_shader) { 1033 /* For a blend shader, the bottom nibble corresponds to 1034 * the number of work registers used, which signals the 1035 * -existence- of a blend shader */ 1036 1037 assert(ctx->blend->blend_work_count >= 2); 1038 blend_count |= MIN2(ctx->blend->blend_work_count, 3); 1039 } else { 1040 /* Otherwise, the bottom bit simply specifies if 1041 * blending (anything other than REPLACE) is enabled */ 1042 1043 1044 if (!no_blending) 1045 blend_count |= 0x1; 1046 } 1047 1048 /* Second blend equation is always a simple replace */ 1049 1050 uint64_t replace_magic = 0xf0122122; 1051 struct mali_blend_equation replace_mode; 1052 memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); 1053 1054 struct mali_blend_meta blend_meta[] = { 1055 { 1056 .unk1 = 0x200 | blend_count, 1057 .blend_equation_1 = ctx->blend->equation, 1058 .blend_equation_2 = replace_mode 1059 }, 1060 }; 1061 1062 if (ctx->blend->has_blend_shader) { 1063 blend_meta[0].blend_shader = ctx->blend->blend_shader; 1064 } 1065 1066 memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); 1067 } 1068 } 1069 1070 /* We stage to transient, so always dirty.. */ 1071 panfrost_stage_attributes(ctx); 1072 1073 if (ctx->dirty & PAN_DIRTY_SAMPLERS) { 1074 /* Upload samplers back to back, no padding */ 1075 1076 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { 1077 if (!ctx->sampler_count[t]) continue; 1078 1079 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); 1080 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; 1081 1082 for (int i = 0; i < ctx->sampler_count[t]; ++i) { 1083 desc[i] = ctx->samplers[t][i]->hw; 1084 } 1085 1086 if (t == PIPE_SHADER_FRAGMENT) 1087 ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; 1088 else if (t == PIPE_SHADER_VERTEX) 1089 ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; 1090 else 1091 assert(0); 1092 } 1093 } 1094 1095 if (ctx->dirty & PAN_DIRTY_TEXTURES) { 1096 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { 1097 /* Shortcircuit */ 1098 if (!ctx->sampler_view_count[t]) continue; 1099 1100 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 1101 1102 for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { 1103 if (!ctx->sampler_views[t][i]) 1104 continue; 1105 1106 struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; 1107 struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; 1108 1109 /* Inject the addresses in, interleaving cube 1110 * faces and mip levels appropriately. */ 1111 1112 for (int l = 0; l <= tex_rsrc->last_level; ++l) { 1113 for (int f = 0; f < tex_rsrc->array_size; ++f) { 1114 unsigned idx = (l * tex_rsrc->array_size) + f; 1115 1116 ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] = 1117 rsrc->bo->gpu + 1118 rsrc->bo->slices[l].offset + 1119 f * rsrc->bo->cubemap_stride; 1120 } 1121 } 1122 1123 trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); 1124 } 1125 1126 mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); 1127 1128 if (t == PIPE_SHADER_FRAGMENT) 1129 ctx->payload_tiler.postfix.texture_trampoline = trampoline; 1130 else if (t == PIPE_SHADER_VERTEX) 1131 ctx->payload_vertex.postfix.texture_trampoline = trampoline; 1132 else 1133 assert(0); 1134 } 1135 } 1136 1137 const struct pipe_viewport_state *vp = &ctx->pipe_viewport; 1138 1139 /* For flipped-Y buffers (signaled by negative scale), the translate is 1140 * flipped as well */ 1141 1142 bool invert_y = vp->scale[1] < 0.0; 1143 float translate_y = vp->translate[1]; 1144 1145 if (invert_y) 1146 translate_y = ctx->pipe_framebuffer.height - translate_y; 1147 1148 for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) { 1149 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i]; 1150 1151 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 1152 struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; 1153 struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs; 1154 1155 /* Allocate room for the sysval and the uniforms */ 1156 size_t sys_size = sizeof(float) * 4 * ss->sysval_count; 1157 size_t size = sys_size + buf->size; 1158 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 1159 1160 /* Upload sysvals requested by the shader */ 1161 float *uniforms = (float *) transfer.cpu; 1162 for (unsigned i = 0; i < ss->sysval_count; ++i) { 1163 int sysval = ss->sysval[i]; 1164 1165 if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) { 1166 uniforms[4*i + 0] = vp->scale[0]; 1167 uniforms[4*i + 1] = fabsf(vp->scale[1]); 1168 uniforms[4*i + 2] = vp->scale[2]; 1169 } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) { 1170 uniforms[4*i + 0] = vp->translate[0]; 1171 uniforms[4*i + 1] = translate_y; 1172 uniforms[4*i + 2] = vp->translate[2]; 1173 } else { 1174 assert(0); 1175 } 1176 } 1177 1178 /* Upload uniforms */ 1179 memcpy(transfer.cpu + sys_size, buf->buffer, buf->size); 1180 1181 int uniform_count = 0; 1182 1183 struct mali_vertex_tiler_postfix *postfix; 1184 1185 switch (i) { 1186 case PIPE_SHADER_VERTEX: 1187 uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count; 1188 postfix = &ctx->payload_vertex.postfix; 1189 break; 1190 1191 case PIPE_SHADER_FRAGMENT: 1192 uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count; 1193 postfix = &ctx->payload_tiler.postfix; 1194 break; 1195 1196 default: 1197 DBG("Unknown shader stage %d in uniform upload\n", i); 1198 assert(0); 1199 } 1200 1201 /* Also attach the same buffer as a UBO for extended access */ 1202 1203 struct mali_uniform_buffer_meta uniform_buffers[] = { 1204 { 1205 .size = MALI_POSITIVE((2 + uniform_count)), 1206 .ptr = transfer.gpu >> 2, 1207 }, 1208 }; 1209 1210 mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); 1211 postfix->uniforms = transfer.gpu; 1212 postfix->uniform_buffers = ubufs; 1213 1214 buf->dirty = 0; 1215 } 1216 1217 /* TODO: Upload the viewport somewhere more appropriate */ 1218 1219 /* Clip bounds are encoded as floats. The viewport itself is encoded as 1220 * (somewhat) asymmetric ints. */ 1221 const struct pipe_scissor_state *ss = &ctx->scissor; 1222 1223 struct mali_viewport view = { 1224 /* By default, do no viewport clipping, i.e. clip to (-inf, 1225 * inf) in each direction. Clipping to the viewport in theory 1226 * should work, but in practice causes issues when we're not 1227 * explicitly trying to scissor */ 1228 1229 .clip_minx = -inff, 1230 .clip_miny = -inff, 1231 .clip_maxx = inff, 1232 .clip_maxy = inff, 1233 1234 .clip_minz = 0.0, 1235 .clip_maxz = 1.0, 1236 }; 1237 1238 /* Always scissor to the viewport by default. */ 1239 view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]); 1240 view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0])); 1241 1242 view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1])); 1243 view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1]))); 1244 1245 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) { 1246 /* Invert scissor if needed */ 1247 unsigned miny = invert_y ? 1248 ctx->pipe_framebuffer.height - ss->maxy : ss->miny; 1249 1250 unsigned maxy = invert_y ? 1251 ctx->pipe_framebuffer.height - ss->miny : ss->maxy; 1252 1253 /* Set the actual scissor */ 1254 view.viewport0[0] = ss->minx; 1255 view.viewport0[1] = miny; 1256 view.viewport1[0] = MALI_POSITIVE(ss->maxx); 1257 view.viewport1[1] = MALI_POSITIVE(maxy); 1258 } 1259 1260 ctx->payload_tiler.postfix.viewport = 1261 panfrost_upload_transient(ctx, 1262 &view, 1263 sizeof(struct mali_viewport)); 1264 1265 ctx->dirty = 0; 1266} 1267 1268/* Corresponds to exactly one draw, but does not submit anything */ 1269 1270static void 1271panfrost_queue_draw(struct panfrost_context *ctx) 1272{ 1273 /* TODO: Expand the array? */ 1274 if (ctx->draw_count >= MAX_DRAW_CALLS) { 1275 DBG("Job buffer overflow, ignoring draw\n"); 1276 assert(0); 1277 } 1278 1279 /* Handle dirty flags now */ 1280 panfrost_emit_for_draw(ctx, true); 1281 1282 struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false); 1283 struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false); 1284 1285 ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu; 1286 ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu; 1287 1288 ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu; 1289 ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; 1290 1291 ctx->draw_count++; 1292} 1293 1294/* At the end of the frame, the vertex and tiler jobs are linked together and 1295 * then the fragment job is plonked at the end. Set value job is first for 1296 * unknown reasons. */ 1297 1298static void 1299panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) 1300{ 1301 if (first->job_descriptor_size) 1302 first->next_job_64 = (u64) (uintptr_t) next; 1303 else 1304 first->next_job_32 = (u32) (uintptr_t) next; 1305} 1306 1307static void 1308panfrost_link_jobs(struct panfrost_context *ctx) 1309{ 1310 if (ctx->draw_count) { 1311 /* Generate the set_value_job */ 1312 panfrost_set_value_job(ctx); 1313 1314 /* Have the first vertex job depend on the set value job */ 1315 ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index; 1316 1317 /* SV -> V */ 1318 panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]); 1319 } 1320 1321 /* V -> V/T ; T -> T/null */ 1322 for (int i = 0; i < ctx->vertex_job_count; ++i) { 1323 bool isLast = (i + 1) == ctx->vertex_job_count; 1324 1325 panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]); 1326 } 1327 1328 /* T -> T/null */ 1329 for (int i = 0; i < ctx->tiler_job_count; ++i) { 1330 bool isLast = (i + 1) == ctx->tiler_job_count; 1331 panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]); 1332 } 1333} 1334 1335/* The entire frame is in memory -- send it off to the kernel! */ 1336 1337static void 1338panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, 1339 struct pipe_fence_handle **fence, 1340 struct panfrost_job *job) 1341{ 1342 struct pipe_context *gallium = (struct pipe_context *) ctx; 1343 struct panfrost_screen *screen = pan_screen(gallium->screen); 1344 1345 /* Edge case if screen is cleared and nothing else */ 1346 bool has_draws = ctx->draw_count > 0; 1347 1348 /* Workaround a bizarre lockup (a hardware errata?) */ 1349 if (!has_draws) 1350 flush_immediate = true; 1351 1352 /* A number of jobs are batched -- this must be linked and cleared */ 1353 panfrost_link_jobs(ctx); 1354 1355 ctx->draw_count = 0; 1356 ctx->vertex_job_count = 0; 1357 ctx->tiler_job_count = 0; 1358 1359#ifndef DRY_RUN 1360 1361 bool is_scanout = panfrost_is_scanout(ctx); 1362 screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout); 1363 1364 /* If visual, we can stall a frame */ 1365 1366 if (!flush_immediate) 1367 screen->driver->force_flush_fragment(ctx, fence); 1368 1369 screen->last_fragment_flushed = false; 1370 screen->last_job = job; 1371 1372 /* If readback, flush now (hurts the pipelined performance) */ 1373 if (flush_immediate) 1374 screen->driver->force_flush_fragment(ctx, fence); 1375 1376 if (screen->driver->dump_counters && pan_counters_base) { 1377 screen->driver->dump_counters(screen); 1378 1379 char filename[128]; 1380 snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); 1381 FILE *fp = fopen(filename, "wb"); 1382 fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); 1383 fclose(fp); 1384 } 1385 1386#endif 1387} 1388 1389void 1390panfrost_flush( 1391 struct pipe_context *pipe, 1392 struct pipe_fence_handle **fence, 1393 unsigned flags) 1394{ 1395 struct panfrost_context *ctx = pan_context(pipe); 1396 struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 1397 1398 /* Nothing to do! */ 1399 if (!ctx->draw_count && !job->clear) return; 1400 1401 /* Whether to stall the pipeline for immediately correct results */ 1402 bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; 1403 1404 /* Submit the frame itself */ 1405 panfrost_submit_frame(ctx, flush_immediate, fence, job); 1406 1407 /* Prepare for the next frame */ 1408 panfrost_invalidate_frame(ctx); 1409} 1410 1411#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; 1412 1413static int 1414g2m_draw_mode(enum pipe_prim_type mode) 1415{ 1416 switch (mode) { 1417 DEFINE_CASE(POINTS); 1418 DEFINE_CASE(LINES); 1419 DEFINE_CASE(LINE_LOOP); 1420 DEFINE_CASE(LINE_STRIP); 1421 DEFINE_CASE(TRIANGLES); 1422 DEFINE_CASE(TRIANGLE_STRIP); 1423 DEFINE_CASE(TRIANGLE_FAN); 1424 DEFINE_CASE(QUADS); 1425 DEFINE_CASE(QUAD_STRIP); 1426 DEFINE_CASE(POLYGON); 1427 1428 default: 1429 DBG("Illegal draw mode %d\n", mode); 1430 assert(0); 1431 return MALI_LINE_LOOP; 1432 } 1433} 1434 1435#undef DEFINE_CASE 1436 1437static unsigned 1438panfrost_translate_index_size(unsigned size) 1439{ 1440 switch (size) { 1441 case 1: 1442 return MALI_DRAW_INDEXED_UINT8; 1443 1444 case 2: 1445 return MALI_DRAW_INDEXED_UINT16; 1446 1447 case 4: 1448 return MALI_DRAW_INDEXED_UINT32; 1449 1450 default: 1451 DBG("Unknown index size %d\n", size); 1452 assert(0); 1453 return 0; 1454 } 1455} 1456 1457/* Gets a GPU address for the associated index buffer. Only gauranteed to be 1458 * good for the duration of the draw (transient), could last longer */ 1459 1460static mali_ptr 1461panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) 1462{ 1463 struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); 1464 1465 off_t offset = info->start * info->index_size; 1466 1467 if (!info->has_user_indices) { 1468 /* Only resources can be directly mapped */ 1469 return rsrc->bo->gpu + offset; 1470 } else { 1471 /* Otherwise, we need to upload to transient memory */ 1472 const uint8_t *ibuf8 = (const uint8_t *) info->index.user; 1473 return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); 1474 } 1475} 1476 1477static void 1478panfrost_draw_vbo( 1479 struct pipe_context *pipe, 1480 const struct pipe_draw_info *info) 1481{ 1482 struct panfrost_context *ctx = pan_context(pipe); 1483 1484 ctx->payload_vertex.draw_start = info->start; 1485 ctx->payload_tiler.draw_start = info->start; 1486 1487 int mode = info->mode; 1488 1489 /* Fallback for unsupported modes */ 1490 1491 if (!(ctx->draw_modes & (1 << mode))) { 1492 if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { 1493 mode = PIPE_PRIM_TRIANGLE_FAN; 1494 } else { 1495 if (info->count < 4) { 1496 /* Degenerate case? */ 1497 return; 1498 } 1499 1500 util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base); 1501 util_primconvert_draw_vbo(ctx->primconvert, info); 1502 return; 1503 } 1504 } 1505 1506 /* Now that we have a guaranteed terminating path, find the job. 1507 * Assignment commented out to prevent unused warning */ 1508 1509 /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx); 1510 1511 ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); 1512 1513 ctx->vertex_count = info->count; 1514 1515 /* For non-indexed draws, they're the same */ 1516 unsigned invocation_count = ctx->vertex_count; 1517 1518 unsigned draw_flags = 0; 1519 1520 /* The draw flags interpret how primitive size is interpreted */ 1521 1522 if (panfrost_writes_point_size(ctx)) 1523 draw_flags |= MALI_DRAW_VARYING_SIZE; 1524 1525 /* For higher amounts of vertices (greater than what fits in a 16-bit 1526 * short), the other value is needed, otherwise there will be bizarre 1527 * rendering artefacts. It's not clear what these values mean yet. */ 1528 1529 draw_flags |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000; 1530 1531 if (info->index_size) { 1532 /* Calculate the min/max index used so we can figure out how 1533 * many times to invoke the vertex shader */ 1534 1535 /* Fetch / calculate index bounds */ 1536 unsigned min_index = 0, max_index = 0; 1537 1538 if (info->max_index == ~0u) { 1539 u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index); 1540 } else { 1541 min_index = info->min_index; 1542 max_index = info->max_index; 1543 } 1544 1545 /* Use the corresponding values */ 1546 invocation_count = max_index - min_index + 1; 1547 ctx->payload_vertex.draw_start = min_index; 1548 ctx->payload_tiler.draw_start = min_index; 1549 1550 ctx->payload_tiler.prefix.negative_start = -min_index; 1551 ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count); 1552 1553 //assert(!info->restart_index); /* TODO: Research */ 1554 assert(!info->index_bias); 1555 1556 draw_flags |= panfrost_translate_index_size(info->index_size); 1557 ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); 1558 } else { 1559 /* Index count == vertex count, if no indexing is applied, as 1560 * if it is internally indexed in the expected order */ 1561 1562 ctx->payload_tiler.prefix.negative_start = 0; 1563 ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); 1564 1565 /* Reverse index state */ 1566 ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; 1567 } 1568 1569 ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count); 1570 ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count); 1571 ctx->payload_tiler.prefix.unknown_draw = draw_flags; 1572 1573 /* Fire off the draw itself */ 1574 panfrost_queue_draw(ctx); 1575} 1576 1577/* CSO state */ 1578 1579static void 1580panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) 1581{ 1582 free(hwcso); 1583} 1584 1585static void * 1586panfrost_create_rasterizer_state( 1587 struct pipe_context *pctx, 1588 const struct pipe_rasterizer_state *cso) 1589{ 1590 struct panfrost_context *ctx = pan_context(pctx); 1591 struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); 1592 1593 so->base = *cso; 1594 1595 /* Bitmask, unknown meaning of the start value */ 1596 so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7; 1597 1598 so->tiler_gl_enables |= MALI_FRONT_FACE( 1599 cso->front_ccw ? MALI_CCW : MALI_CW); 1600 1601 if (cso->cull_face & PIPE_FACE_FRONT) 1602 so->tiler_gl_enables |= MALI_CULL_FACE_FRONT; 1603 1604 if (cso->cull_face & PIPE_FACE_BACK) 1605 so->tiler_gl_enables |= MALI_CULL_FACE_BACK; 1606 1607 return so; 1608} 1609 1610static void 1611panfrost_bind_rasterizer_state( 1612 struct pipe_context *pctx, 1613 void *hwcso) 1614{ 1615 struct panfrost_context *ctx = pan_context(pctx); 1616 1617 /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ 1618 if (!hwcso) 1619 return; 1620 1621 ctx->rasterizer = hwcso; 1622 ctx->dirty |= PAN_DIRTY_RASTERIZER; 1623} 1624 1625static void * 1626panfrost_create_vertex_elements_state( 1627 struct pipe_context *pctx, 1628 unsigned num_elements, 1629 const struct pipe_vertex_element *elements) 1630{ 1631 struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); 1632 1633 so->num_elements = num_elements; 1634 memcpy(so->pipe, elements, sizeof(*elements) * num_elements); 1635 1636 /* XXX: What the cornball? This is totally, 100%, unapologetically 1637 * nonsense. And yet it somehow fixes a regression in -bshadow 1638 * (previously, we allocated the descriptor here... a newer commit 1639 * removed that allocation, and then memory corruption led to 1640 * shader_meta getting overwritten in bad ways and then the whole test 1641 * case falling apart . TODO: LOOK INTO PLEASE XXX XXX BAD XXX XXX XXX 1642 */ 1643 panfrost_allocate_chunk(pan_context(pctx), 0, HEAP_DESCRIPTOR); 1644 1645 for (int i = 0; i < num_elements; ++i) { 1646 so->hw[i].index = elements[i].vertex_buffer_index; 1647 1648 enum pipe_format fmt = elements[i].src_format; 1649 const struct util_format_description *desc = util_format_description(fmt); 1650 so->hw[i].unknown1 = 0x2; 1651 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels); 1652 1653 so->hw[i].format = panfrost_find_format(desc); 1654 1655 /* The field itself should probably be shifted over */ 1656 so->hw[i].src_offset = elements[i].src_offset; 1657 } 1658 1659 return so; 1660} 1661 1662static void 1663panfrost_bind_vertex_elements_state( 1664 struct pipe_context *pctx, 1665 void *hwcso) 1666{ 1667 struct panfrost_context *ctx = pan_context(pctx); 1668 1669 ctx->vertex = hwcso; 1670 ctx->dirty |= PAN_DIRTY_VERTEX; 1671} 1672 1673static void 1674panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) 1675{ 1676 struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso; 1677 unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements; 1678 DBG("Vertex elements delete leaks descriptor (%d bytes)\n", bytes); 1679 free(hwcso); 1680} 1681 1682static void * 1683panfrost_create_shader_state( 1684 struct pipe_context *pctx, 1685 const struct pipe_shader_state *cso) 1686{ 1687 struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); 1688 so->base = *cso; 1689 1690 /* Token deep copy to prevent memory corruption */ 1691 1692 if (cso->type == PIPE_SHADER_IR_TGSI) 1693 so->base.tokens = tgsi_dup_tokens(so->base.tokens); 1694 1695 return so; 1696} 1697 1698static void 1699panfrost_delete_shader_state( 1700 struct pipe_context *pctx, 1701 void *so) 1702{ 1703 struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so; 1704 1705 if (cso->base.type == PIPE_SHADER_IR_TGSI) { 1706 DBG("Deleting TGSI shader leaks duplicated tokens\n"); 1707 } 1708 1709 unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta); 1710 DBG("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak); 1711 1712 free(so); 1713} 1714 1715static void * 1716panfrost_create_sampler_state( 1717 struct pipe_context *pctx, 1718 const struct pipe_sampler_state *cso) 1719{ 1720 struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); 1721 so->base = *cso; 1722 1723 /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */ 1724 1725 struct mali_sampler_descriptor sampler_descriptor = { 1726 .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter)) 1727 | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter)) 1728 | translate_mip_filter(cso->min_mip_filter) 1729 | 0x20, 1730 1731 .wrap_s = translate_tex_wrap(cso->wrap_s), 1732 .wrap_t = translate_tex_wrap(cso->wrap_t), 1733 .wrap_r = translate_tex_wrap(cso->wrap_r), 1734 .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), 1735 .border_color = { 1736 cso->border_color.f[0], 1737 cso->border_color.f[1], 1738 cso->border_color.f[2], 1739 cso->border_color.f[3] 1740 }, 1741 .min_lod = FIXED_16(cso->min_lod), 1742 .max_lod = FIXED_16(cso->max_lod), 1743 .unknown2 = 1, 1744 }; 1745 1746 so->hw = sampler_descriptor; 1747 1748 return so; 1749} 1750 1751static void 1752panfrost_bind_sampler_states( 1753 struct pipe_context *pctx, 1754 enum pipe_shader_type shader, 1755 unsigned start_slot, unsigned num_sampler, 1756 void **sampler) 1757{ 1758 assert(start_slot == 0); 1759 1760 struct panfrost_context *ctx = pan_context(pctx); 1761 1762 /* XXX: Should upload, not just copy? */ 1763 ctx->sampler_count[shader] = num_sampler; 1764 memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); 1765 1766 ctx->dirty |= PAN_DIRTY_SAMPLERS; 1767} 1768 1769static bool 1770panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant) 1771{ 1772 struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha; 1773 1774 if (alpha->enabled || variant->alpha_state.enabled) { 1775 /* Make sure enable state is at least the same */ 1776 if (alpha->enabled != variant->alpha_state.enabled) { 1777 return false; 1778 } 1779 1780 /* Check that the contents of the test are the same */ 1781 bool same_func = alpha->func == variant->alpha_state.func; 1782 bool same_ref = alpha->ref_value == variant->alpha_state.ref_value; 1783 1784 if (!(same_func && same_ref)) { 1785 return false; 1786 } 1787 } 1788 /* Otherwise, we're good to go */ 1789 return true; 1790} 1791 1792static void 1793panfrost_bind_fs_state( 1794 struct pipe_context *pctx, 1795 void *hwcso) 1796{ 1797 struct panfrost_context *ctx = pan_context(pctx); 1798 1799 ctx->fs = hwcso; 1800 1801 if (hwcso) { 1802 /* Match the appropriate variant */ 1803 1804 signed variant = -1; 1805 1806 struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso; 1807 1808 for (unsigned i = 0; i < variants->variant_count; ++i) { 1809 if (panfrost_variant_matches(ctx, &variants->variants[i])) { 1810 variant = i; 1811 break; 1812 } 1813 } 1814 1815 if (variant == -1) { 1816 /* No variant matched, so create a new one */ 1817 variant = variants->variant_count++; 1818 assert(variants->variant_count < MAX_SHADER_VARIANTS); 1819 1820 variants->variants[variant].base = hwcso; 1821 variants->variants[variant].alpha_state = ctx->depth_stencil->alpha; 1822 1823 /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */ 1824 struct panfrost_context *ctx = pan_context(pctx); 1825 struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); 1826 1827 variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu; 1828 variants->variants[variant].tripipe_gpu = transfer.gpu; 1829 1830 } 1831 1832 /* Select this variant */ 1833 variants->active_variant = variant; 1834 1835 struct panfrost_shader_state *shader_state = &variants->variants[variant]; 1836 assert(panfrost_variant_matches(ctx, shader_state)); 1837 1838 /* Now we have a variant selected, so compile and go */ 1839 1840 if (!shader_state->compiled) { 1841 panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state); 1842 shader_state->compiled = true; 1843 } 1844 } 1845 1846 ctx->dirty |= PAN_DIRTY_FS; 1847} 1848 1849static void 1850panfrost_bind_vs_state( 1851 struct pipe_context *pctx, 1852 void *hwcso) 1853{ 1854 struct panfrost_context *ctx = pan_context(pctx); 1855 1856 ctx->vs = hwcso; 1857 1858 if (hwcso) { 1859 if (!ctx->vs->variants[0].compiled) { 1860 ctx->vs->variants[0].base = hwcso; 1861 1862 /* TODO DRY from above */ 1863 struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); 1864 ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu; 1865 ctx->vs->variants[0].tripipe_gpu = transfer.gpu; 1866 1867 panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]); 1868 ctx->vs->variants[0].compiled = true; 1869 } 1870 } 1871 1872 ctx->dirty |= PAN_DIRTY_VS; 1873} 1874 1875static void 1876panfrost_set_vertex_buffers( 1877 struct pipe_context *pctx, 1878 unsigned start_slot, 1879 unsigned num_buffers, 1880 const struct pipe_vertex_buffer *buffers) 1881{ 1882 struct panfrost_context *ctx = pan_context(pctx); 1883 1884 util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers); 1885} 1886 1887static void 1888panfrost_set_constant_buffer( 1889 struct pipe_context *pctx, 1890 enum pipe_shader_type shader, uint index, 1891 const struct pipe_constant_buffer *buf) 1892{ 1893 struct panfrost_context *ctx = pan_context(pctx); 1894 struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; 1895 1896 size_t sz = buf ? buf->buffer_size : 0; 1897 1898 /* Free previous buffer */ 1899 1900 pbuf->dirty = true; 1901 pbuf->size = sz; 1902 1903 if (pbuf->buffer) { 1904 free(pbuf->buffer); 1905 pbuf->buffer = NULL; 1906 } 1907 1908 /* If unbinding, we're done */ 1909 1910 if (!buf) 1911 return; 1912 1913 /* Multiple constant buffers not yet supported */ 1914 assert(index == 0); 1915 1916 const uint8_t *cpu; 1917 1918 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); 1919 1920 if (rsrc) { 1921 cpu = rsrc->bo->cpu; 1922 } else if (buf->user_buffer) { 1923 cpu = buf->user_buffer; 1924 } else { 1925 DBG("No constant buffer?\n"); 1926 return; 1927 } 1928 1929 /* Copy the constant buffer into the driver context for later upload */ 1930 1931 pbuf->buffer = malloc(sz); 1932 memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); 1933} 1934 1935static void 1936panfrost_set_stencil_ref( 1937 struct pipe_context *pctx, 1938 const struct pipe_stencil_ref *ref) 1939{ 1940 struct panfrost_context *ctx = pan_context(pctx); 1941 ctx->stencil_ref = *ref; 1942 1943 /* Shader core dirty */ 1944 ctx->dirty |= PAN_DIRTY_FS; 1945} 1946 1947static struct pipe_sampler_view * 1948panfrost_create_sampler_view( 1949 struct pipe_context *pctx, 1950 struct pipe_resource *texture, 1951 const struct pipe_sampler_view *template) 1952{ 1953 struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view); 1954 int bytes_per_pixel = util_format_get_blocksize(texture->format); 1955 1956 pipe_reference(NULL, &texture->reference); 1957 1958 struct panfrost_resource *prsrc = (struct panfrost_resource *) texture; 1959 1960 so->base = *template; 1961 so->base.texture = texture; 1962 so->base.reference.count = 1; 1963 so->base.context = pctx; 1964 1965 /* sampler_views correspond to texture descriptors, minus the texture 1966 * (data) itself. So, we serialise the descriptor here and cache it for 1967 * later. */ 1968 1969 /* Make sure it's something with which we're familiar */ 1970 assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4); 1971 1972 /* TODO: Detect from format better */ 1973 const struct util_format_description *desc = util_format_description(prsrc->base.format); 1974 1975 unsigned char user_swizzle[4] = { 1976 template->swizzle_r, 1977 template->swizzle_g, 1978 template->swizzle_b, 1979 template->swizzle_a 1980 }; 1981 1982 enum mali_format format = panfrost_find_format(desc); 1983 1984 bool is_depth = desc->format == PIPE_FORMAT_Z32_UNORM; 1985 1986 unsigned usage2_layout = 0x10; 1987 1988 switch (prsrc->bo->layout) { 1989 case PAN_AFBC: 1990 usage2_layout |= 0x8 | 0x4; 1991 break; 1992 case PAN_TILED: 1993 usage2_layout |= 0x1; 1994 break; 1995 case PAN_LINEAR: 1996 usage2_layout |= is_depth ? 0x1 : 0x2; 1997 break; 1998 default: 1999 assert(0); 2000 break; 2001 } 2002 2003 struct mali_texture_descriptor texture_descriptor = { 2004 .width = MALI_POSITIVE(texture->width0), 2005 .height = MALI_POSITIVE(texture->height0), 2006 .depth = MALI_POSITIVE(texture->depth0), 2007 2008 /* TODO: Decode */ 2009 .format = { 2010 .swizzle = panfrost_translate_swizzle_4(desc->swizzle), 2011 .format = format, 2012 2013 .usage1 = 0x0, 2014 .is_not_cubemap = texture->target != PIPE_TEXTURE_CUBE, 2015 2016 .usage2 = usage2_layout 2017 }, 2018 2019 .swizzle = panfrost_translate_swizzle_4(user_swizzle) 2020 }; 2021 2022 /* TODO: Other base levels require adjusting dimensions / level numbers / etc */ 2023 assert (template->u.tex.first_level == 0); 2024 2025 /* Disable mipmapping for now to avoid regressions while automipmapping 2026 * is being implemented. TODO: Remove me once automipmaps work */ 2027 2028 //texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level; 2029 texture_descriptor.nr_mipmap_levels = 0; 2030 2031 so->hw = texture_descriptor; 2032 2033 return (struct pipe_sampler_view *) so; 2034} 2035 2036static void 2037panfrost_set_sampler_views( 2038 struct pipe_context *pctx, 2039 enum pipe_shader_type shader, 2040 unsigned start_slot, unsigned num_views, 2041 struct pipe_sampler_view **views) 2042{ 2043 struct panfrost_context *ctx = pan_context(pctx); 2044 2045 assert(start_slot == 0); 2046 2047 ctx->sampler_view_count[shader] = num_views; 2048 memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); 2049 2050 ctx->dirty |= PAN_DIRTY_TEXTURES; 2051} 2052 2053static void 2054panfrost_sampler_view_destroy( 2055 struct pipe_context *pctx, 2056 struct pipe_sampler_view *views) 2057{ 2058 //struct panfrost_context *ctx = pan_context(pctx); 2059 2060 /* TODO */ 2061 2062 free(views); 2063} 2064 2065static void 2066panfrost_set_framebuffer_state(struct pipe_context *pctx, 2067 const struct pipe_framebuffer_state *fb) 2068{ 2069 struct panfrost_context *ctx = pan_context(pctx); 2070 2071 /* Flush when switching away from an FBO */ 2072 2073 if (!panfrost_is_scanout(ctx)) { 2074 panfrost_flush(pctx, NULL, 0); 2075 } 2076 2077 ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; 2078 ctx->pipe_framebuffer.samples = fb->samples; 2079 ctx->pipe_framebuffer.layers = fb->layers; 2080 ctx->pipe_framebuffer.width = fb->width; 2081 ctx->pipe_framebuffer.height = fb->height; 2082 2083 for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 2084 struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; 2085 2086 /* check if changing cbuf */ 2087 if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; 2088 2089 if (cb && (i != 0)) { 2090 DBG("XXX: Multiple render targets not supported before t7xx!\n"); 2091 assert(0); 2092 } 2093 2094 /* assign new */ 2095 pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb); 2096 2097 if (!cb) 2098 continue; 2099 2100 if (ctx->require_sfbd) 2101 ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 2102 else 2103 ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 2104 2105 panfrost_attach_vt_framebuffer(ctx); 2106 2107 struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); 2108 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format; 2109 bool is_scanout = panfrost_is_scanout(ctx); 2110 2111 if (!is_scanout && tex->bo->layout != PAN_AFBC && panfrost_can_afbc(format)) { 2112 /* The blob is aggressive about enabling AFBC. As such, 2113 * it's pretty much necessary to use it here, since we 2114 * have no traces of non-compressed FBO. */ 2115 2116 panfrost_enable_afbc(ctx, tex, false); 2117 } 2118 2119 if (!is_scanout && !tex->bo->has_checksum) { 2120 /* Enable transaction elimination if we can */ 2121 panfrost_enable_checksum(ctx, tex); 2122 } 2123 } 2124 2125 { 2126 struct pipe_surface *zb = fb->zsbuf; 2127 2128 if (ctx->pipe_framebuffer.zsbuf != zb) { 2129 pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb); 2130 2131 if (zb) { 2132 /* FBO has depth */ 2133 2134 if (ctx->require_sfbd) 2135 ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 2136 else 2137 ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 2138 2139 panfrost_attach_vt_framebuffer(ctx); 2140 2141 /* Keep the depth FBO linear */ 2142 } 2143 } 2144 } 2145} 2146 2147static void * 2148panfrost_create_blend_state(struct pipe_context *pipe, 2149 const struct pipe_blend_state *blend) 2150{ 2151 struct panfrost_context *ctx = pan_context(pipe); 2152 struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); 2153 so->base = *blend; 2154 2155 /* TODO: The following features are not yet implemented */ 2156 assert(!blend->logicop_enable); 2157 assert(!blend->alpha_to_coverage); 2158 assert(!blend->alpha_to_one); 2159 2160 /* Compile the blend state, first as fixed-function if we can */ 2161 2162 if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color)) 2163 return so; 2164 2165 /* If we can't, compile a blend shader instead */ 2166 2167 panfrost_make_blend_shader(ctx, so, &ctx->blend_color); 2168 2169 return so; 2170} 2171 2172static void 2173panfrost_bind_blend_state(struct pipe_context *pipe, 2174 void *cso) 2175{ 2176 struct panfrost_context *ctx = pan_context(pipe); 2177 struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; 2178 struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; 2179 ctx->blend = pblend; 2180 2181 if (!blend) 2182 return; 2183 2184 SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); 2185 2186 /* TODO: Attach color */ 2187 2188 /* Shader itself is not dirty, but the shader core is */ 2189 ctx->dirty |= PAN_DIRTY_FS; 2190} 2191 2192static void 2193panfrost_delete_blend_state(struct pipe_context *pipe, 2194 void *blend) 2195{ 2196 struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend; 2197 2198 if (so->has_blend_shader) { 2199 DBG("Deleting blend state leak blend shaders bytecode\n"); 2200 } 2201 2202 free(blend); 2203} 2204 2205static void 2206panfrost_set_blend_color(struct pipe_context *pipe, 2207 const struct pipe_blend_color *blend_color) 2208{ 2209 struct panfrost_context *ctx = pan_context(pipe); 2210 2211 /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */ 2212 2213 if (blend_color) { 2214 ctx->blend_color = *blend_color; 2215 2216 /* The blend mode depends on the blend constant color, due to the 2217 * fixed/programmable split. So, we're forced to regenerate the blend 2218 * equation */ 2219 2220 /* TODO: Attach color */ 2221 } 2222} 2223 2224static void * 2225panfrost_create_depth_stencil_state(struct pipe_context *pipe, 2226 const struct pipe_depth_stencil_alpha_state *depth_stencil) 2227{ 2228 return mem_dup(depth_stencil, sizeof(*depth_stencil)); 2229} 2230 2231static void 2232panfrost_bind_depth_stencil_state(struct pipe_context *pipe, 2233 void *cso) 2234{ 2235 struct panfrost_context *ctx = pan_context(pipe); 2236 struct pipe_depth_stencil_alpha_state *depth_stencil = cso; 2237 ctx->depth_stencil = depth_stencil; 2238 2239 if (!depth_stencil) 2240 return; 2241 2242 /* Alpha does not exist in the hardware (it's not in ES3), so it's 2243 * emulated in the fragment shader */ 2244 2245 if (depth_stencil->alpha.enabled) { 2246 /* We need to trigger a new shader (maybe) */ 2247 ctx->base.bind_fs_state(&ctx->base, ctx->fs); 2248 } 2249 2250 /* Stencil state */ 2251 SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */ 2252 2253 panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); 2254 ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; 2255 2256 panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back); 2257 ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask; 2258 2259 /* Depth state (TODO: Refactor) */ 2260 SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled); 2261 2262 int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; 2263 2264 ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; 2265 ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); 2266 2267 /* Bounds test not implemented */ 2268 assert(!depth_stencil->depth.bounds_test); 2269 2270 ctx->dirty |= PAN_DIRTY_FS; 2271} 2272 2273static void 2274panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) 2275{ 2276 free( depth ); 2277} 2278 2279static void 2280panfrost_set_sample_mask(struct pipe_context *pipe, 2281 unsigned sample_mask) 2282{ 2283} 2284 2285static void 2286panfrost_set_clip_state(struct pipe_context *pipe, 2287 const struct pipe_clip_state *clip) 2288{ 2289 //struct panfrost_context *panfrost = pan_context(pipe); 2290} 2291 2292static void 2293panfrost_set_viewport_states(struct pipe_context *pipe, 2294 unsigned start_slot, 2295 unsigned num_viewports, 2296 const struct pipe_viewport_state *viewports) 2297{ 2298 struct panfrost_context *ctx = pan_context(pipe); 2299 2300 assert(start_slot == 0); 2301 assert(num_viewports == 1); 2302 2303 ctx->pipe_viewport = *viewports; 2304 2305#if 0 2306 /* TODO: What if not centered? */ 2307 float w = abs(viewports->scale[0]) * 2.0; 2308 float h = abs(viewports->scale[1]) * 2.0; 2309 2310 ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w); 2311 ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h); 2312#endif 2313} 2314 2315static void 2316panfrost_set_scissor_states(struct pipe_context *pipe, 2317 unsigned start_slot, 2318 unsigned num_scissors, 2319 const struct pipe_scissor_state *scissors) 2320{ 2321 struct panfrost_context *ctx = pan_context(pipe); 2322 2323 assert(start_slot == 0); 2324 assert(num_scissors == 1); 2325 2326 ctx->scissor = *scissors; 2327} 2328 2329static void 2330panfrost_set_polygon_stipple(struct pipe_context *pipe, 2331 const struct pipe_poly_stipple *stipple) 2332{ 2333 //struct panfrost_context *panfrost = pan_context(pipe); 2334} 2335 2336static void 2337panfrost_set_active_query_state(struct pipe_context *pipe, 2338 boolean enable) 2339{ 2340 //struct panfrost_context *panfrost = pan_context(pipe); 2341} 2342 2343static void 2344panfrost_destroy(struct pipe_context *pipe) 2345{ 2346 struct panfrost_context *panfrost = pan_context(pipe); 2347 struct panfrost_screen *screen = pan_screen(pipe->screen); 2348 2349 if (panfrost->blitter) 2350 util_blitter_destroy(panfrost->blitter); 2351 2352 screen->driver->free_slab(screen, &panfrost->scratchpad); 2353 screen->driver->free_slab(screen, &panfrost->varying_mem); 2354 screen->driver->free_slab(screen, &panfrost->shaders); 2355 screen->driver->free_slab(screen, &panfrost->tiler_heap); 2356 screen->driver->free_slab(screen, &panfrost->misc_0); 2357} 2358 2359static struct pipe_query * 2360panfrost_create_query(struct pipe_context *pipe, 2361 unsigned type, 2362 unsigned index) 2363{ 2364 struct panfrost_query *q = CALLOC_STRUCT(panfrost_query); 2365 2366 q->type = type; 2367 q->index = index; 2368 2369 return (struct pipe_query *) q; 2370} 2371 2372static void 2373panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q) 2374{ 2375 FREE(q); 2376} 2377 2378static boolean 2379panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) 2380{ 2381 struct panfrost_context *ctx = pan_context(pipe); 2382 struct panfrost_query *query = (struct panfrost_query *) q; 2383 2384 switch (query->type) { 2385 case PIPE_QUERY_OCCLUSION_COUNTER: 2386 case PIPE_QUERY_OCCLUSION_PREDICATE: 2387 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 2388 { 2389 /* Allocate a word for the query results to be stored */ 2390 query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR); 2391 2392 ctx->occlusion_query = query; 2393 2394 break; 2395 } 2396 2397 default: 2398 DBG("Skipping query %d\n", query->type); 2399 break; 2400 } 2401 2402 return true; 2403} 2404 2405static bool 2406panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) 2407{ 2408 struct panfrost_context *ctx = pan_context(pipe); 2409 ctx->occlusion_query = NULL; 2410 return true; 2411} 2412 2413static boolean 2414panfrost_get_query_result(struct pipe_context *pipe, 2415 struct pipe_query *q, 2416 boolean wait, 2417 union pipe_query_result *vresult) 2418{ 2419 /* STUB */ 2420 struct panfrost_query *query = (struct panfrost_query *) q; 2421 2422 /* We need to flush out the jobs to actually run the counter, TODO 2423 * check wait, TODO wallpaper after if needed */ 2424 2425 panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); 2426 2427 switch (query->type) { 2428 case PIPE_QUERY_OCCLUSION_COUNTER: 2429 case PIPE_QUERY_OCCLUSION_PREDICATE: 2430 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { 2431 /* Read back the query results */ 2432 unsigned *result = (unsigned *) query->transfer.cpu; 2433 unsigned passed = *result; 2434 2435 if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { 2436 vresult->u64 = passed; 2437 } else { 2438 vresult->b = !!passed; 2439 } 2440 2441 break; 2442 } 2443 default: 2444 DBG("Skipped query get %d\n", query->type); 2445 break; 2446 } 2447 2448 return true; 2449} 2450 2451static struct pipe_stream_output_target * 2452panfrost_create_stream_output_target(struct pipe_context *pctx, 2453 struct pipe_resource *prsc, 2454 unsigned buffer_offset, 2455 unsigned buffer_size) 2456{ 2457 struct pipe_stream_output_target *target; 2458 2459 target = CALLOC_STRUCT(pipe_stream_output_target); 2460 2461 if (!target) 2462 return NULL; 2463 2464 pipe_reference_init(&target->reference, 1); 2465 pipe_resource_reference(&target->buffer, prsc); 2466 2467 target->context = pctx; 2468 target->buffer_offset = buffer_offset; 2469 target->buffer_size = buffer_size; 2470 2471 return target; 2472} 2473 2474static void 2475panfrost_stream_output_target_destroy(struct pipe_context *pctx, 2476 struct pipe_stream_output_target *target) 2477{ 2478 pipe_resource_reference(&target->buffer, NULL); 2479 free(target); 2480} 2481 2482static void 2483panfrost_set_stream_output_targets(struct pipe_context *pctx, 2484 unsigned num_targets, 2485 struct pipe_stream_output_target **targets, 2486 const unsigned *offsets) 2487{ 2488 /* STUB */ 2489} 2490 2491static void 2492panfrost_setup_hardware(struct panfrost_context *ctx) 2493{ 2494 struct pipe_context *gallium = (struct pipe_context *) ctx; 2495 struct panfrost_screen *screen = pan_screen(gallium->screen); 2496 2497 for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) { 2498 /* Allocate the beginning of the transient pool */ 2499 int entry_size = (1 << 22); /* 4MB */ 2500 2501 ctx->transient_pools[i].entry_size = entry_size; 2502 ctx->transient_pools[i].entry_count = 1; 2503 2504 ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); 2505 } 2506 2507 screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); 2508 screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); 2509 screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); 2510 screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); 2511 screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); 2512 2513} 2514 2515/* New context creation, which also does hardware initialisation since I don't 2516 * know the better way to structure this :smirk: */ 2517 2518struct pipe_context * 2519panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) 2520{ 2521 struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); 2522 struct panfrost_screen *pscreen = pan_screen(screen); 2523 memset(ctx, 0, sizeof(*ctx)); 2524 struct pipe_context *gallium = (struct pipe_context *) ctx; 2525 unsigned gpu_id; 2526 2527 gpu_id = pscreen->driver->query_gpu_version(pscreen); 2528 2529 ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */ 2530 ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */ 2531 2532 gallium->screen = screen; 2533 2534 gallium->destroy = panfrost_destroy; 2535 2536 gallium->set_framebuffer_state = panfrost_set_framebuffer_state; 2537 2538 gallium->flush = panfrost_flush; 2539 gallium->clear = panfrost_clear; 2540 gallium->draw_vbo = panfrost_draw_vbo; 2541 2542 gallium->set_vertex_buffers = panfrost_set_vertex_buffers; 2543 gallium->set_constant_buffer = panfrost_set_constant_buffer; 2544 2545 gallium->set_stencil_ref = panfrost_set_stencil_ref; 2546 2547 gallium->create_sampler_view = panfrost_create_sampler_view; 2548 gallium->set_sampler_views = panfrost_set_sampler_views; 2549 gallium->sampler_view_destroy = panfrost_sampler_view_destroy; 2550 2551 gallium->create_rasterizer_state = panfrost_create_rasterizer_state; 2552 gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state; 2553 gallium->delete_rasterizer_state = panfrost_generic_cso_delete; 2554 2555 gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state; 2556 gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; 2557 gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state; 2558 2559 gallium->create_fs_state = panfrost_create_shader_state; 2560 gallium->delete_fs_state = panfrost_delete_shader_state; 2561 gallium->bind_fs_state = panfrost_bind_fs_state; 2562 2563 gallium->create_vs_state = panfrost_create_shader_state; 2564 gallium->delete_vs_state = panfrost_delete_shader_state; 2565 gallium->bind_vs_state = panfrost_bind_vs_state; 2566 2567 gallium->create_sampler_state = panfrost_create_sampler_state; 2568 gallium->delete_sampler_state = panfrost_generic_cso_delete; 2569 gallium->bind_sampler_states = panfrost_bind_sampler_states; 2570 2571 gallium->create_blend_state = panfrost_create_blend_state; 2572 gallium->bind_blend_state = panfrost_bind_blend_state; 2573 gallium->delete_blend_state = panfrost_delete_blend_state; 2574 2575 gallium->set_blend_color = panfrost_set_blend_color; 2576 2577 gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; 2578 gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state; 2579 gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state; 2580 2581 gallium->set_sample_mask = panfrost_set_sample_mask; 2582 2583 gallium->set_clip_state = panfrost_set_clip_state; 2584 gallium->set_viewport_states = panfrost_set_viewport_states; 2585 gallium->set_scissor_states = panfrost_set_scissor_states; 2586 gallium->set_polygon_stipple = panfrost_set_polygon_stipple; 2587 gallium->set_active_query_state = panfrost_set_active_query_state; 2588 2589 gallium->create_query = panfrost_create_query; 2590 gallium->destroy_query = panfrost_destroy_query; 2591 gallium->begin_query = panfrost_begin_query; 2592 gallium->end_query = panfrost_end_query; 2593 gallium->get_query_result = panfrost_get_query_result; 2594 2595 gallium->create_stream_output_target = panfrost_create_stream_output_target; 2596 gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy; 2597 gallium->set_stream_output_targets = panfrost_set_stream_output_targets; 2598 2599 panfrost_resource_context_init(gallium); 2600 2601 pscreen->driver->init_context(ctx); 2602 2603 panfrost_setup_hardware(ctx); 2604 2605 /* XXX: leaks */ 2606 gallium->stream_uploader = u_upload_create_default(gallium); 2607 gallium->const_uploader = gallium->stream_uploader; 2608 assert(gallium->stream_uploader); 2609 2610 /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */ 2611 ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1; 2612 2613 ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes); 2614 2615 ctx->blitter = util_blitter_create(gallium); 2616 assert(ctx->blitter); 2617 2618 /* Prepare for render! */ 2619 2620 panfrost_job_init(ctx); 2621 panfrost_emit_vertex_payload(ctx); 2622 panfrost_emit_tiler_payload(ctx); 2623 panfrost_invalidate_frame(ctx); 2624 panfrost_default_shader_backend(ctx); 2625 panfrost_generate_space_filler_indices(); 2626 2627 return gallium; 2628} 2629