1b8e80941Smrg/* 2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#include <sys/poll.h> 26b8e80941Smrg#include <errno.h> 27b8e80941Smrg 28b8e80941Smrg#include "pan_context.h" 29b8e80941Smrg#include "pan_swizzle.h" 30b8e80941Smrg#include "pan_format.h" 31b8e80941Smrg 32b8e80941Smrg#include "util/macros.h" 33b8e80941Smrg#include "util/u_format.h" 34b8e80941Smrg#include "util/u_inlines.h" 35b8e80941Smrg#include "util/u_upload_mgr.h" 36b8e80941Smrg#include "util/u_memory.h" 37b8e80941Smrg#include "util/u_vbuf.h" 38b8e80941Smrg#include "util/half_float.h" 39b8e80941Smrg#include "util/u_helpers.h" 40b8e80941Smrg#include "util/u_format.h" 41b8e80941Smrg#include "indices/u_primconvert.h" 42b8e80941Smrg#include "tgsi/tgsi_parse.h" 43b8e80941Smrg#include "util/u_math.h" 44b8e80941Smrg 45b8e80941Smrg#include "pan_screen.h" 46b8e80941Smrg#include "pan_blending.h" 47b8e80941Smrg#include "pan_blend_shaders.h" 48b8e80941Smrg#include "pan_util.h" 49b8e80941Smrg#include "pan_wallpaper.h" 50b8e80941Smrg 51b8e80941Smrgstatic int performance_counter_number = 0; 52b8e80941Smrgextern const char *pan_counters_base; 53b8e80941Smrg 54b8e80941Smrg/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ 55b8e80941Smrg//#define DRY_RUN 56b8e80941Smrg 57b8e80941Smrg/* Can a given format support AFBC? Not all can. */ 58b8e80941Smrg 59b8e80941Smrgstatic bool 60b8e80941Smrgpanfrost_can_afbc(enum pipe_format format) 61b8e80941Smrg{ 62b8e80941Smrg const struct util_format_description *desc = 63b8e80941Smrg util_format_description(format); 64b8e80941Smrg 65b8e80941Smrg if (util_format_is_rgba8_variant(desc)) 66b8e80941Smrg return true; 67b8e80941Smrg 68b8e80941Smrg /* TODO: AFBC of other formats */ 69b8e80941Smrg 70b8e80941Smrg return false; 71b8e80941Smrg} 72b8e80941Smrg 73b8e80941Smrg/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically 74b8e80941Smrg * indepdent between color buffers and depth/stencil). To enable, we allocate 75b8e80941Smrg * the AFBC metadata buffer and mark that it is enabled. We do -not- actually 76b8e80941Smrg * edit the fragment job here. This routine should be called ONCE per 77b8e80941Smrg * AFBC-compressed buffer, rather than on every frame. */ 78b8e80941Smrg 79b8e80941Smrgstatic void 80b8e80941Smrgpanfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) 81b8e80941Smrg{ 82b8e80941Smrg if (ctx->require_sfbd) { 83b8e80941Smrg DBG("AFBC not supported yet on SFBD\n"); 84b8e80941Smrg assert(0); 85b8e80941Smrg } 86b8e80941Smrg 87b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 88b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 89b8e80941Smrg /* AFBC metadata is 16 bytes per tile */ 90b8e80941Smrg int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 91b8e80941Smrg int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 92b8e80941Smrg int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); 93b8e80941Smrg int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16); 94b8e80941Smrg 95b8e80941Smrg stride *= 2; /* TODO: Should this be carried over? */ 96b8e80941Smrg int main_size = stride * rsrc->base.height0; 97b8e80941Smrg rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16; 98b8e80941Smrg 99b8e80941Smrg /* Allocate the AFBC slab itself, large enough to hold the above */ 100b8e80941Smrg screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, 101b8e80941Smrg (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, 102b8e80941Smrg true, 0, 0, 0); 103b8e80941Smrg 104b8e80941Smrg rsrc->bo->layout = PAN_AFBC; 105b8e80941Smrg 106b8e80941Smrg /* Compressed textured reads use a tagged pointer to the metadata */ 107b8e80941Smrg 108b8e80941Smrg rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); 109b8e80941Smrg rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu; 110b8e80941Smrg rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle; 111b8e80941Smrg} 112b8e80941Smrg 113b8e80941Smrgstatic void 114b8e80941Smrgpanfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) 115b8e80941Smrg{ 116b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 117b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 118b8e80941Smrg int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 119b8e80941Smrg int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; 120b8e80941Smrg 121b8e80941Smrg /* 8 byte checksum per tile */ 122b8e80941Smrg rsrc->bo->checksum_stride = tile_w * 8; 123b8e80941Smrg int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); 124b8e80941Smrg screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); 125b8e80941Smrg 126b8e80941Smrg rsrc->bo->has_checksum = true; 127b8e80941Smrg} 128b8e80941Smrg 129b8e80941Smrg/* Framebuffer descriptor */ 130b8e80941Smrg 131b8e80941Smrgstatic void 132b8e80941Smrgpanfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) 133b8e80941Smrg{ 134b8e80941Smrg fb->width = MALI_POSITIVE(w); 135b8e80941Smrg fb->height = MALI_POSITIVE(h); 136b8e80941Smrg 137b8e80941Smrg /* No idea why this is needed, but it's how resolution_check is 138b8e80941Smrg * calculated. It's not clear to us yet why the hardware wants this. 139b8e80941Smrg * The formula itself was discovered mostly by manual bruteforce and 140b8e80941Smrg * aggressive algebraic simplification. */ 141b8e80941Smrg 142b8e80941Smrg fb->resolution_check = ((w + h) / 3) << 4; 143b8e80941Smrg} 144b8e80941Smrg 145b8e80941Smrgstruct mali_single_framebuffer 146b8e80941Smrgpanfrost_emit_sfbd(struct panfrost_context *ctx) 147b8e80941Smrg{ 148b8e80941Smrg struct mali_single_framebuffer framebuffer = { 149b8e80941Smrg .unknown2 = 0x1f, 150b8e80941Smrg .format = 0x30000000, 151b8e80941Smrg .clear_flags = 0x1000, 152b8e80941Smrg .unknown_address_0 = ctx->scratchpad.gpu, 153b8e80941Smrg .unknown_address_1 = ctx->misc_0.gpu, 154b8e80941Smrg .unknown_address_2 = ctx->misc_0.gpu + 40960, 155b8e80941Smrg .tiler_flags = 0xf0, 156b8e80941Smrg .tiler_heap_free = ctx->tiler_heap.gpu, 157b8e80941Smrg .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, 158b8e80941Smrg }; 159b8e80941Smrg 160b8e80941Smrg panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); 161b8e80941Smrg 162b8e80941Smrg return framebuffer; 163b8e80941Smrg} 164b8e80941Smrg 165b8e80941Smrgstruct bifrost_framebuffer 166b8e80941Smrgpanfrost_emit_mfbd(struct panfrost_context *ctx) 167b8e80941Smrg{ 168b8e80941Smrg struct bifrost_framebuffer framebuffer = { 169b8e80941Smrg /* It is not yet clear what tiler_meta means or how it's 170b8e80941Smrg * calculated, but we can tell the lower 32-bits are a 171b8e80941Smrg * (monotonically increasing?) function of tile count and 172b8e80941Smrg * geometry complexity; I suspect it defines a memory size of 173b8e80941Smrg * some kind? for the tiler. It's really unclear at the 174b8e80941Smrg * moment... but to add to the confusion, the hardware is happy 175b8e80941Smrg * enough to accept a zero in this field, so we don't even have 176b8e80941Smrg * to worry about it right now. 177b8e80941Smrg * 178b8e80941Smrg * The byte (just after the 32-bit mark) is much more 179b8e80941Smrg * interesting. The higher nibble I've only ever seen as 0xF, 180b8e80941Smrg * but the lower one I've seen as 0x0 or 0xF, and it's not 181b8e80941Smrg * obvious what the difference is. But what -is- obvious is 182b8e80941Smrg * that when the lower nibble is zero, performance is severely 183b8e80941Smrg * degraded compared to when the lower nibble is set. 184b8e80941Smrg * Evidently, that nibble enables some sort of fast path, 185b8e80941Smrg * perhaps relating to caching or tile flush? Regardless, at 186b8e80941Smrg * this point there's no clear reason not to set it, aside from 187b8e80941Smrg * substantially increased memory requirements (of the misc_0 188b8e80941Smrg * buffer) */ 189b8e80941Smrg 190b8e80941Smrg .tiler_meta = ((uint64_t) 0xff << 32) | 0x0, 191b8e80941Smrg 192b8e80941Smrg .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), 193b8e80941Smrg .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), 194b8e80941Smrg .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width), 195b8e80941Smrg .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height), 196b8e80941Smrg 197b8e80941Smrg .unk1 = 0x1080, 198b8e80941Smrg 199b8e80941Smrg /* TODO: MRT */ 200b8e80941Smrg .rt_count_1 = MALI_POSITIVE(1), 201b8e80941Smrg .rt_count_2 = 4, 202b8e80941Smrg 203b8e80941Smrg .unknown2 = 0x1f, 204b8e80941Smrg 205b8e80941Smrg /* Corresponds to unknown_address_X of SFBD */ 206b8e80941Smrg .scratchpad = ctx->scratchpad.gpu, 207b8e80941Smrg .tiler_scratch_start = ctx->misc_0.gpu, 208b8e80941Smrg 209b8e80941Smrg /* The constant added here is, like the lower word of 210b8e80941Smrg * tiler_meta, (loosely) another product of framebuffer size 211b8e80941Smrg * and geometry complexity. It must be sufficiently large for 212b8e80941Smrg * the tiler_meta fast path to work; if it's too small, there 213b8e80941Smrg * will be DATA_INVALID_FAULTs. Conversely, it must be less 214b8e80941Smrg * than the total size of misc_0, or else there's no room. It's 215b8e80941Smrg * possible this constant configures a partition between two 216b8e80941Smrg * parts of misc_0? We haven't investigated the functionality, 217b8e80941Smrg * as these buffers are internally used by the hardware 218b8e80941Smrg * (presumably by the tiler) but not seemingly touched by the driver 219b8e80941Smrg */ 220b8e80941Smrg 221b8e80941Smrg .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, 222b8e80941Smrg 223b8e80941Smrg .tiler_heap_start = ctx->tiler_heap.gpu, 224b8e80941Smrg .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, 225b8e80941Smrg }; 226b8e80941Smrg 227b8e80941Smrg return framebuffer; 228b8e80941Smrg} 229b8e80941Smrg 230b8e80941Smrg/* Are we currently rendering to the screen (rather than an FBO)? */ 231b8e80941Smrg 232b8e80941Smrgbool 233b8e80941Smrgpanfrost_is_scanout(struct panfrost_context *ctx) 234b8e80941Smrg{ 235b8e80941Smrg /* If there is no color buffer, it's an FBO */ 236b8e80941Smrg if (!ctx->pipe_framebuffer.nr_cbufs) 237b8e80941Smrg return false; 238b8e80941Smrg 239b8e80941Smrg /* If we're too early that no framebuffer was sent, it's scanout */ 240b8e80941Smrg if (!ctx->pipe_framebuffer.cbufs[0]) 241b8e80941Smrg return true; 242b8e80941Smrg 243b8e80941Smrg return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET || 244b8e80941Smrg ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT || 245b8e80941Smrg ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; 246b8e80941Smrg} 247b8e80941Smrg 248b8e80941Smrgstatic uint32_t 249b8e80941Smrgpan_pack_color(const union pipe_color_union *color, enum pipe_format format) 250b8e80941Smrg{ 251b8e80941Smrg /* Alpha magicked to 1.0 if there is no alpha */ 252b8e80941Smrg 253b8e80941Smrg bool has_alpha = util_format_has_alpha(format); 254b8e80941Smrg float clear_alpha = has_alpha ? color->f[3] : 1.0f; 255b8e80941Smrg 256b8e80941Smrg /* Packed color depends on the framebuffer format */ 257b8e80941Smrg 258b8e80941Smrg const struct util_format_description *desc = 259b8e80941Smrg util_format_description(format); 260b8e80941Smrg 261b8e80941Smrg if (util_format_is_rgba8_variant(desc)) { 262b8e80941Smrg return (float_to_ubyte(clear_alpha) << 24) | 263b8e80941Smrg (float_to_ubyte(color->f[2]) << 16) | 264b8e80941Smrg (float_to_ubyte(color->f[1]) << 8) | 265b8e80941Smrg (float_to_ubyte(color->f[0]) << 0); 266b8e80941Smrg } else if (format == PIPE_FORMAT_B5G6R5_UNORM) { 267b8e80941Smrg /* First, we convert the components to R5, G6, B5 separately */ 268b8e80941Smrg unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0; 269b8e80941Smrg unsigned g6 = CLAMP(color->f[1], 0.0, 1.0) * 63.0; 270b8e80941Smrg unsigned b5 = CLAMP(color->f[2], 0.0, 1.0) * 31.0; 271b8e80941Smrg 272b8e80941Smrg /* Then we pack into a sparse u32. TODO: Why these shifts? */ 273b8e80941Smrg return (b5 << 25) | (g6 << 14) | (r5 << 5); 274b8e80941Smrg } else { 275b8e80941Smrg /* Unknown format */ 276b8e80941Smrg assert(0); 277b8e80941Smrg } 278b8e80941Smrg 279b8e80941Smrg return 0; 280b8e80941Smrg} 281b8e80941Smrg 282b8e80941Smrgstatic void 283b8e80941Smrgpanfrost_clear( 284b8e80941Smrg struct pipe_context *pipe, 285b8e80941Smrg unsigned buffers, 286b8e80941Smrg const union pipe_color_union *color, 287b8e80941Smrg double depth, unsigned stencil) 288b8e80941Smrg{ 289b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 290b8e80941Smrg struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 291b8e80941Smrg 292b8e80941Smrg if (buffers & PIPE_CLEAR_COLOR) { 293b8e80941Smrg enum pipe_format format = ctx->pipe_framebuffer.cbufs[0]->format; 294b8e80941Smrg job->clear_color = pan_pack_color(color, format); 295b8e80941Smrg } 296b8e80941Smrg 297b8e80941Smrg if (buffers & PIPE_CLEAR_DEPTH) { 298b8e80941Smrg job->clear_depth = depth; 299b8e80941Smrg } 300b8e80941Smrg 301b8e80941Smrg if (buffers & PIPE_CLEAR_STENCIL) { 302b8e80941Smrg job->clear_stencil = stencil; 303b8e80941Smrg } 304b8e80941Smrg 305b8e80941Smrg job->clear |= buffers; 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrgstatic mali_ptr 309b8e80941Smrgpanfrost_attach_vt_mfbd(struct panfrost_context *ctx) 310b8e80941Smrg{ 311b8e80941Smrg /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */ 312b8e80941Smrg struct bifrost_render_target rts_list[] = { 313b8e80941Smrg { 314b8e80941Smrg .chunknown = { 315b8e80941Smrg .unk = 0x30005, 316b8e80941Smrg }, 317b8e80941Smrg .framebuffer = ctx->misc_0.gpu, 318b8e80941Smrg .zero2 = 0x3, 319b8e80941Smrg }, 320b8e80941Smrg }; 321b8e80941Smrg 322b8e80941Smrg /* Allocate memory for the three components */ 323b8e80941Smrg int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list); 324b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 325b8e80941Smrg 326b8e80941Smrg /* Opaque 1024-block */ 327b8e80941Smrg rts_list[0].chunknown.pointer = transfer.gpu; 328b8e80941Smrg 329b8e80941Smrg memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)); 330b8e80941Smrg memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list)); 331b8e80941Smrg 332b8e80941Smrg return (transfer.gpu + 1024) | MALI_MFBD; 333b8e80941Smrg} 334b8e80941Smrg 335b8e80941Smrgstatic mali_ptr 336b8e80941Smrgpanfrost_attach_vt_sfbd(struct panfrost_context *ctx) 337b8e80941Smrg{ 338b8e80941Smrg return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD; 339b8e80941Smrg} 340b8e80941Smrg 341b8e80941Smrgstatic void 342b8e80941Smrgpanfrost_attach_vt_framebuffer(struct panfrost_context *ctx) 343b8e80941Smrg{ 344b8e80941Smrg mali_ptr framebuffer = ctx->require_sfbd ? 345b8e80941Smrg panfrost_attach_vt_sfbd(ctx) : 346b8e80941Smrg panfrost_attach_vt_mfbd(ctx); 347b8e80941Smrg 348b8e80941Smrg ctx->payload_vertex.postfix.framebuffer = framebuffer; 349b8e80941Smrg ctx->payload_tiler.postfix.framebuffer = framebuffer; 350b8e80941Smrg} 351b8e80941Smrg 352b8e80941Smrg/* Reset per-frame context, called on context initialisation as well as after 353b8e80941Smrg * flushing a frame */ 354b8e80941Smrg 355b8e80941Smrgstatic void 356b8e80941Smrgpanfrost_invalidate_frame(struct panfrost_context *ctx) 357b8e80941Smrg{ 358b8e80941Smrg unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; 359b8e80941Smrg DBG("Uploaded transient %d bytes\n", transient_count); 360b8e80941Smrg 361b8e80941Smrg /* Rotate cmdstream */ 362b8e80941Smrg if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) 363b8e80941Smrg ctx->cmdstream_i = 0; 364b8e80941Smrg 365b8e80941Smrg if (ctx->require_sfbd) 366b8e80941Smrg ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 367b8e80941Smrg else 368b8e80941Smrg ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 369b8e80941Smrg 370b8e80941Smrg /* Reset varyings allocated */ 371b8e80941Smrg ctx->varying_height = 0; 372b8e80941Smrg 373b8e80941Smrg /* The transient cmdstream is dirty every frame; the only bits worth preserving 374b8e80941Smrg * (textures, shaders, etc) are in other buffers anyways */ 375b8e80941Smrg 376b8e80941Smrg ctx->transient_pools[ctx->cmdstream_i].entry_index = 0; 377b8e80941Smrg ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0; 378b8e80941Smrg 379b8e80941Smrg /* Regenerate payloads */ 380b8e80941Smrg panfrost_attach_vt_framebuffer(ctx); 381b8e80941Smrg 382b8e80941Smrg if (ctx->rasterizer) 383b8e80941Smrg ctx->dirty |= PAN_DIRTY_RASTERIZER; 384b8e80941Smrg 385b8e80941Smrg /* XXX */ 386b8e80941Smrg ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; 387b8e80941Smrg} 388b8e80941Smrg 389b8e80941Smrg/* In practice, every field of these payloads should be configurable 390b8e80941Smrg * arbitrarily, which means these functions are basically catch-all's for 391b8e80941Smrg * as-of-yet unwavering unknowns */ 392b8e80941Smrg 393b8e80941Smrgstatic void 394b8e80941Smrgpanfrost_emit_vertex_payload(struct panfrost_context *ctx) 395b8e80941Smrg{ 396b8e80941Smrg struct midgard_payload_vertex_tiler payload = { 397b8e80941Smrg .prefix = { 398b8e80941Smrg .workgroups_z_shift = 32, 399b8e80941Smrg .workgroups_x_shift_2 = 0x2, 400b8e80941Smrg .workgroups_x_shift_3 = 0x5, 401b8e80941Smrg }, 402b8e80941Smrg .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2), 403b8e80941Smrg }; 404b8e80941Smrg 405b8e80941Smrg memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); 406b8e80941Smrg} 407b8e80941Smrg 408b8e80941Smrgstatic void 409b8e80941Smrgpanfrost_emit_tiler_payload(struct panfrost_context *ctx) 410b8e80941Smrg{ 411b8e80941Smrg struct midgard_payload_vertex_tiler payload = { 412b8e80941Smrg .prefix = { 413b8e80941Smrg .workgroups_z_shift = 32, 414b8e80941Smrg .workgroups_x_shift_2 = 0x2, 415b8e80941Smrg .workgroups_x_shift_3 = 0x6, 416b8e80941Smrg 417b8e80941Smrg .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ 418b8e80941Smrg }, 419b8e80941Smrg }; 420b8e80941Smrg 421b8e80941Smrg memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); 422b8e80941Smrg} 423b8e80941Smrg 424b8e80941Smrgstatic unsigned 425b8e80941Smrgtranslate_tex_wrap(enum pipe_tex_wrap w) 426b8e80941Smrg{ 427b8e80941Smrg switch (w) { 428b8e80941Smrg case PIPE_TEX_WRAP_REPEAT: 429b8e80941Smrg return MALI_WRAP_REPEAT; 430b8e80941Smrg 431b8e80941Smrg case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 432b8e80941Smrg return MALI_WRAP_CLAMP_TO_EDGE; 433b8e80941Smrg 434b8e80941Smrg case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 435b8e80941Smrg return MALI_WRAP_CLAMP_TO_BORDER; 436b8e80941Smrg 437b8e80941Smrg case PIPE_TEX_WRAP_MIRROR_REPEAT: 438b8e80941Smrg return MALI_WRAP_MIRRORED_REPEAT; 439b8e80941Smrg 440b8e80941Smrg default: 441b8e80941Smrg assert(0); 442b8e80941Smrg return 0; 443b8e80941Smrg } 444b8e80941Smrg} 445b8e80941Smrg 446b8e80941Smrgstatic unsigned 447b8e80941Smrgtranslate_tex_filter(enum pipe_tex_filter f) 448b8e80941Smrg{ 449b8e80941Smrg switch (f) { 450b8e80941Smrg case PIPE_TEX_FILTER_NEAREST: 451b8e80941Smrg return MALI_NEAREST; 452b8e80941Smrg 453b8e80941Smrg case PIPE_TEX_FILTER_LINEAR: 454b8e80941Smrg return MALI_LINEAR; 455b8e80941Smrg 456b8e80941Smrg default: 457b8e80941Smrg assert(0); 458b8e80941Smrg return 0; 459b8e80941Smrg } 460b8e80941Smrg} 461b8e80941Smrg 462b8e80941Smrgstatic unsigned 463b8e80941Smrgtranslate_mip_filter(enum pipe_tex_mipfilter f) 464b8e80941Smrg{ 465b8e80941Smrg return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0; 466b8e80941Smrg} 467b8e80941Smrg 468b8e80941Smrgstatic unsigned 469b8e80941Smrgpanfrost_translate_compare_func(enum pipe_compare_func in) 470b8e80941Smrg{ 471b8e80941Smrg switch (in) { 472b8e80941Smrg case PIPE_FUNC_NEVER: 473b8e80941Smrg return MALI_FUNC_NEVER; 474b8e80941Smrg 475b8e80941Smrg case PIPE_FUNC_LESS: 476b8e80941Smrg return MALI_FUNC_LESS; 477b8e80941Smrg 478b8e80941Smrg case PIPE_FUNC_EQUAL: 479b8e80941Smrg return MALI_FUNC_EQUAL; 480b8e80941Smrg 481b8e80941Smrg case PIPE_FUNC_LEQUAL: 482b8e80941Smrg return MALI_FUNC_LEQUAL; 483b8e80941Smrg 484b8e80941Smrg case PIPE_FUNC_GREATER: 485b8e80941Smrg return MALI_FUNC_GREATER; 486b8e80941Smrg 487b8e80941Smrg case PIPE_FUNC_NOTEQUAL: 488b8e80941Smrg return MALI_FUNC_NOTEQUAL; 489b8e80941Smrg 490b8e80941Smrg case PIPE_FUNC_GEQUAL: 491b8e80941Smrg return MALI_FUNC_GEQUAL; 492b8e80941Smrg 493b8e80941Smrg case PIPE_FUNC_ALWAYS: 494b8e80941Smrg return MALI_FUNC_ALWAYS; 495b8e80941Smrg } 496b8e80941Smrg 497b8e80941Smrg assert (0); 498b8e80941Smrg return 0; /* Unreachable */ 499b8e80941Smrg} 500b8e80941Smrg 501b8e80941Smrgstatic unsigned 502b8e80941Smrgpanfrost_translate_alt_compare_func(enum pipe_compare_func in) 503b8e80941Smrg{ 504b8e80941Smrg switch (in) { 505b8e80941Smrg case PIPE_FUNC_NEVER: 506b8e80941Smrg return MALI_ALT_FUNC_NEVER; 507b8e80941Smrg 508b8e80941Smrg case PIPE_FUNC_LESS: 509b8e80941Smrg return MALI_ALT_FUNC_LESS; 510b8e80941Smrg 511b8e80941Smrg case PIPE_FUNC_EQUAL: 512b8e80941Smrg return MALI_ALT_FUNC_EQUAL; 513b8e80941Smrg 514b8e80941Smrg case PIPE_FUNC_LEQUAL: 515b8e80941Smrg return MALI_ALT_FUNC_LEQUAL; 516b8e80941Smrg 517b8e80941Smrg case PIPE_FUNC_GREATER: 518b8e80941Smrg return MALI_ALT_FUNC_GREATER; 519b8e80941Smrg 520b8e80941Smrg case PIPE_FUNC_NOTEQUAL: 521b8e80941Smrg return MALI_ALT_FUNC_NOTEQUAL; 522b8e80941Smrg 523b8e80941Smrg case PIPE_FUNC_GEQUAL: 524b8e80941Smrg return MALI_ALT_FUNC_GEQUAL; 525b8e80941Smrg 526b8e80941Smrg case PIPE_FUNC_ALWAYS: 527b8e80941Smrg return MALI_ALT_FUNC_ALWAYS; 528b8e80941Smrg } 529b8e80941Smrg 530b8e80941Smrg assert (0); 531b8e80941Smrg return 0; /* Unreachable */ 532b8e80941Smrg} 533b8e80941Smrg 534b8e80941Smrgstatic unsigned 535b8e80941Smrgpanfrost_translate_stencil_op(enum pipe_stencil_op in) 536b8e80941Smrg{ 537b8e80941Smrg switch (in) { 538b8e80941Smrg case PIPE_STENCIL_OP_KEEP: 539b8e80941Smrg return MALI_STENCIL_KEEP; 540b8e80941Smrg 541b8e80941Smrg case PIPE_STENCIL_OP_ZERO: 542b8e80941Smrg return MALI_STENCIL_ZERO; 543b8e80941Smrg 544b8e80941Smrg case PIPE_STENCIL_OP_REPLACE: 545b8e80941Smrg return MALI_STENCIL_REPLACE; 546b8e80941Smrg 547b8e80941Smrg case PIPE_STENCIL_OP_INCR: 548b8e80941Smrg return MALI_STENCIL_INCR; 549b8e80941Smrg 550b8e80941Smrg case PIPE_STENCIL_OP_DECR: 551b8e80941Smrg return MALI_STENCIL_DECR; 552b8e80941Smrg 553b8e80941Smrg case PIPE_STENCIL_OP_INCR_WRAP: 554b8e80941Smrg return MALI_STENCIL_INCR_WRAP; 555b8e80941Smrg 556b8e80941Smrg case PIPE_STENCIL_OP_DECR_WRAP: 557b8e80941Smrg return MALI_STENCIL_DECR_WRAP; 558b8e80941Smrg 559b8e80941Smrg case PIPE_STENCIL_OP_INVERT: 560b8e80941Smrg return MALI_STENCIL_INVERT; 561b8e80941Smrg } 562b8e80941Smrg 563b8e80941Smrg assert (0); 564b8e80941Smrg return 0; /* Unreachable */ 565b8e80941Smrg} 566b8e80941Smrg 567b8e80941Smrgstatic void 568b8e80941Smrgpanfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) 569b8e80941Smrg{ 570b8e80941Smrg out->ref = 0; /* Gallium gets it from elsewhere */ 571b8e80941Smrg 572b8e80941Smrg out->mask = in->valuemask; 573b8e80941Smrg out->func = panfrost_translate_compare_func(in->func); 574b8e80941Smrg out->sfail = panfrost_translate_stencil_op(in->fail_op); 575b8e80941Smrg out->dpfail = panfrost_translate_stencil_op(in->zfail_op); 576b8e80941Smrg out->dppass = panfrost_translate_stencil_op(in->zpass_op); 577b8e80941Smrg} 578b8e80941Smrg 579b8e80941Smrgstatic void 580b8e80941Smrgpanfrost_default_shader_backend(struct panfrost_context *ctx) 581b8e80941Smrg{ 582b8e80941Smrg struct mali_shader_meta shader = { 583b8e80941Smrg .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), 584b8e80941Smrg 585b8e80941Smrg .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, 586b8e80941Smrg .unknown2_4 = MALI_NO_MSAA | 0x4e0, 587b8e80941Smrg }; 588b8e80941Smrg 589b8e80941Smrg if (ctx->is_t6xx) { 590b8e80941Smrg shader.unknown2_4 |= 0x10; 591b8e80941Smrg } 592b8e80941Smrg 593b8e80941Smrg struct pipe_stencil_state default_stencil = { 594b8e80941Smrg .enabled = 0, 595b8e80941Smrg .func = PIPE_FUNC_ALWAYS, 596b8e80941Smrg .fail_op = MALI_STENCIL_KEEP, 597b8e80941Smrg .zfail_op = MALI_STENCIL_KEEP, 598b8e80941Smrg .zpass_op = MALI_STENCIL_KEEP, 599b8e80941Smrg .writemask = 0xFF, 600b8e80941Smrg .valuemask = 0xFF 601b8e80941Smrg }; 602b8e80941Smrg 603b8e80941Smrg panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); 604b8e80941Smrg shader.stencil_mask_front = default_stencil.writemask; 605b8e80941Smrg 606b8e80941Smrg panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); 607b8e80941Smrg shader.stencil_mask_back = default_stencil.writemask; 608b8e80941Smrg 609b8e80941Smrg if (default_stencil.enabled) 610b8e80941Smrg shader.unknown2_4 |= MALI_STENCIL_TEST; 611b8e80941Smrg 612b8e80941Smrg memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); 613b8e80941Smrg} 614b8e80941Smrg 615b8e80941Smrg/* Generates a vertex/tiler job. This is, in some sense, the heart of the 616b8e80941Smrg * graphics command stream. It should be called once per draw, accordding to 617b8e80941Smrg * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in 618b8e80941Smrg * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for 619b8e80941Smrg * vertex jobs. */ 620b8e80941Smrg 621b8e80941Smrgstruct panfrost_transfer 622b8e80941Smrgpanfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler) 623b8e80941Smrg{ 624b8e80941Smrg /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */ 625b8e80941Smrg int draw_job_index = 1 + (2 * ctx->draw_count); 626b8e80941Smrg 627b8e80941Smrg struct mali_job_descriptor_header job = { 628b8e80941Smrg .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, 629b8e80941Smrg .job_index = draw_job_index + (is_tiler ? 1 : 0), 630b8e80941Smrg#ifdef __LP64__ 631b8e80941Smrg .job_descriptor_size = 1, 632b8e80941Smrg#endif 633b8e80941Smrg }; 634b8e80941Smrg 635b8e80941Smrg /* Only non-elided tiler jobs have dependencies which are known at this point */ 636b8e80941Smrg 637b8e80941Smrg if (is_tiler && !is_elided_tiler) { 638b8e80941Smrg /* Tiler jobs depend on vertex jobs */ 639b8e80941Smrg 640b8e80941Smrg job.job_dependency_index_1 = draw_job_index; 641b8e80941Smrg 642b8e80941Smrg /* Tiler jobs also depend on the previous tiler job */ 643b8e80941Smrg 644b8e80941Smrg if (ctx->draw_count) 645b8e80941Smrg job.job_dependency_index_2 = draw_job_index - 1; 646b8e80941Smrg } 647b8e80941Smrg 648b8e80941Smrg struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; 649b8e80941Smrg 650b8e80941Smrg /* There's some padding hacks on 32-bit */ 651b8e80941Smrg 652b8e80941Smrg#ifdef __LP64__ 653b8e80941Smrg int offset = 0; 654b8e80941Smrg#else 655b8e80941Smrg int offset = 4; 656b8e80941Smrg#endif 657b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); 658b8e80941Smrg memcpy(transfer.cpu, &job, sizeof(job)); 659b8e80941Smrg memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); 660b8e80941Smrg return transfer; 661b8e80941Smrg} 662b8e80941Smrg 663b8e80941Smrg/* Generates a set value job. It's unclear what exactly this does, why it's 664b8e80941Smrg * necessary, and when to call it. */ 665b8e80941Smrg 666b8e80941Smrgstatic void 667b8e80941Smrgpanfrost_set_value_job(struct panfrost_context *ctx) 668b8e80941Smrg{ 669b8e80941Smrg struct mali_job_descriptor_header job = { 670b8e80941Smrg .job_type = JOB_TYPE_SET_VALUE, 671b8e80941Smrg .job_descriptor_size = 1, 672b8e80941Smrg .job_index = 1 + (2 * ctx->draw_count), 673b8e80941Smrg }; 674b8e80941Smrg 675b8e80941Smrg struct mali_payload_set_value payload = { 676b8e80941Smrg .out = ctx->misc_0.gpu, 677b8e80941Smrg .unknown = 0x3, 678b8e80941Smrg }; 679b8e80941Smrg 680b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); 681b8e80941Smrg memcpy(transfer.cpu, &job, sizeof(job)); 682b8e80941Smrg memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); 683b8e80941Smrg 684b8e80941Smrg ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu; 685b8e80941Smrg ctx->set_value_job = transfer.gpu; 686b8e80941Smrg} 687b8e80941Smrg 688b8e80941Smrgstatic mali_ptr 689b8e80941Smrgpanfrost_emit_varyings( 690b8e80941Smrg struct panfrost_context *ctx, 691b8e80941Smrg union mali_attr *slot, 692b8e80941Smrg unsigned stride, 693b8e80941Smrg unsigned count) 694b8e80941Smrg{ 695b8e80941Smrg mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; 696b8e80941Smrg 697b8e80941Smrg /* Fill out the descriptor */ 698b8e80941Smrg slot->elements = varying_address | MALI_ATTR_LINEAR; 699b8e80941Smrg slot->stride = stride; 700b8e80941Smrg slot->size = stride * count; 701b8e80941Smrg 702b8e80941Smrg ctx->varying_height += ALIGN(slot->size, 64); 703b8e80941Smrg assert(ctx->varying_height < ctx->varying_mem.size); 704b8e80941Smrg 705b8e80941Smrg return varying_address; 706b8e80941Smrg} 707b8e80941Smrg 708b8e80941Smrgstatic void 709b8e80941Smrgpanfrost_emit_point_coord(union mali_attr *slot) 710b8e80941Smrg{ 711b8e80941Smrg slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR; 712b8e80941Smrg slot->stride = slot->size = 0; 713b8e80941Smrg} 714b8e80941Smrg 715b8e80941Smrgstatic void 716b8e80941Smrgpanfrost_emit_varying_descriptor( 717b8e80941Smrg struct panfrost_context *ctx, 718b8e80941Smrg unsigned invocation_count) 719b8e80941Smrg{ 720b8e80941Smrg /* Load the shaders */ 721b8e80941Smrg 722b8e80941Smrg struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 723b8e80941Smrg struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; 724b8e80941Smrg 725b8e80941Smrg /* Allocate the varying descriptor */ 726b8e80941Smrg 727b8e80941Smrg size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; 728b8e80941Smrg size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; 729b8e80941Smrg 730b8e80941Smrg struct panfrost_transfer trans = panfrost_allocate_transient(ctx, 731b8e80941Smrg vs_size + fs_size); 732b8e80941Smrg 733b8e80941Smrg memcpy(trans.cpu, vs->varyings, vs_size); 734b8e80941Smrg memcpy(trans.cpu + vs_size, fs->varyings, fs_size); 735b8e80941Smrg 736b8e80941Smrg ctx->payload_vertex.postfix.varying_meta = trans.gpu; 737b8e80941Smrg ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size; 738b8e80941Smrg 739b8e80941Smrg /* Buffer indices must be in this order per our convention */ 740b8e80941Smrg union mali_attr varyings[PIPE_MAX_ATTRIBS]; 741b8e80941Smrg unsigned idx = 0; 742b8e80941Smrg 743b8e80941Smrg /* General varyings -- use the VS's, since those are more likely to be 744b8e80941Smrg * accurate on desktop */ 745b8e80941Smrg 746b8e80941Smrg panfrost_emit_varyings(ctx, &varyings[idx++], 747b8e80941Smrg vs->general_varying_stride, invocation_count); 748b8e80941Smrg 749b8e80941Smrg /* fp32 vec4 gl_Position */ 750b8e80941Smrg ctx->payload_tiler.postfix.position_varying = 751b8e80941Smrg panfrost_emit_varyings(ctx, &varyings[idx++], 752b8e80941Smrg sizeof(float) * 4, invocation_count); 753b8e80941Smrg 754b8e80941Smrg 755b8e80941Smrg if (vs->writes_point_size || fs->reads_point_coord) { 756b8e80941Smrg /* fp16 vec1 gl_PointSize */ 757b8e80941Smrg ctx->payload_tiler.primitive_size.pointer = 758b8e80941Smrg panfrost_emit_varyings(ctx, &varyings[idx++], 759b8e80941Smrg 2, invocation_count); 760b8e80941Smrg } 761b8e80941Smrg 762b8e80941Smrg if (fs->reads_point_coord) { 763b8e80941Smrg /* Special descriptor */ 764b8e80941Smrg panfrost_emit_point_coord(&varyings[idx++]); 765b8e80941Smrg } 766b8e80941Smrg 767b8e80941Smrg mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); 768b8e80941Smrg ctx->payload_vertex.postfix.varyings = varyings_p; 769b8e80941Smrg ctx->payload_tiler.postfix.varyings = varyings_p; 770b8e80941Smrg} 771b8e80941Smrg 772b8e80941Smrgstatic mali_ptr 773b8e80941Smrgpanfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i) 774b8e80941Smrg{ 775b8e80941Smrg struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; 776b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); 777b8e80941Smrg 778b8e80941Smrg return rsrc->bo->gpu + buf->buffer_offset; 779b8e80941Smrg} 780b8e80941Smrg 781b8e80941Smrg/* Emits attributes and varying descriptors, which should be called every draw, 782b8e80941Smrg * excepting some obscure circumstances */ 783b8e80941Smrg 784b8e80941Smrgstatic void 785b8e80941Smrgpanfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job) 786b8e80941Smrg{ 787b8e80941Smrg /* Staged mali_attr, and index into them. i =/= k, depending on the 788b8e80941Smrg * vertex buffer mask */ 789b8e80941Smrg union mali_attr attrs[PIPE_MAX_ATTRIBS]; 790b8e80941Smrg unsigned k = 0; 791b8e80941Smrg 792b8e80941Smrg unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); 793b8e80941Smrg 794b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i) { 795b8e80941Smrg if (!(ctx->vb_mask & (1 << i))) continue; 796b8e80941Smrg 797b8e80941Smrg struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; 798b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); 799b8e80941Smrg 800b8e80941Smrg if (!rsrc) continue; 801b8e80941Smrg 802b8e80941Smrg /* Align to 64 bytes by masking off the lower bits. This 803b8e80941Smrg * will be adjusted back when we fixup the src_offset in 804b8e80941Smrg * mali_attr_meta */ 805b8e80941Smrg 806b8e80941Smrg mali_ptr addr = panfrost_vertex_buffer_address(ctx, i) & ~63; 807b8e80941Smrg 808b8e80941Smrg /* Offset vertex count by draw_start to make sure we upload enough */ 809b8e80941Smrg attrs[k].stride = buf->stride; 810b8e80941Smrg attrs[k].size = rsrc->base.width0; 811b8e80941Smrg 812b8e80941Smrg panfrost_job_add_bo(job, rsrc->bo); 813b8e80941Smrg attrs[k].elements = addr | MALI_ATTR_LINEAR; 814b8e80941Smrg 815b8e80941Smrg ++k; 816b8e80941Smrg } 817b8e80941Smrg 818b8e80941Smrg ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr)); 819b8e80941Smrg 820b8e80941Smrg panfrost_emit_varying_descriptor(ctx, invocation_count); 821b8e80941Smrg} 822b8e80941Smrg 823b8e80941Smrgstatic bool 824b8e80941Smrgpanfrost_writes_point_size(struct panfrost_context *ctx) 825b8e80941Smrg{ 826b8e80941Smrg assert(ctx->vs); 827b8e80941Smrg struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 828b8e80941Smrg 829b8e80941Smrg return vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS; 830b8e80941Smrg} 831b8e80941Smrg 832b8e80941Smrg/* Stage the attribute descriptors so we can adjust src_offset 833b8e80941Smrg * to let BOs align nicely */ 834b8e80941Smrg 835b8e80941Smrgstatic void 836b8e80941Smrgpanfrost_stage_attributes(struct panfrost_context *ctx) 837b8e80941Smrg{ 838b8e80941Smrg struct panfrost_vertex_state *so = ctx->vertex; 839b8e80941Smrg 840b8e80941Smrg size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; 841b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); 842b8e80941Smrg struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; 843b8e80941Smrg 844b8e80941Smrg /* Copy as-is for the first pass */ 845b8e80941Smrg memcpy(target, so->hw, sz); 846b8e80941Smrg 847b8e80941Smrg /* Fixup offsets for the second pass. Recall that the hardware 848b8e80941Smrg * calculates attribute addresses as: 849b8e80941Smrg * 850b8e80941Smrg * addr = base + (stride * vtx) + src_offset; 851b8e80941Smrg * 852b8e80941Smrg * However, on Mali, base must be aligned to 64-bytes, so we 853b8e80941Smrg * instead let: 854b8e80941Smrg * 855b8e80941Smrg * base' = base & ~63 = base - (base & 63) 856b8e80941Smrg * 857b8e80941Smrg * To compensate when using base' (see emit_vertex_data), we have 858b8e80941Smrg * to adjust src_offset by the masked off piece: 859b8e80941Smrg * 860b8e80941Smrg * addr' = base' + (stride * vtx) + (src_offset + (base & 63)) 861b8e80941Smrg * = base - (base & 63) + (stride * vtx) + src_offset + (base & 63) 862b8e80941Smrg * = base + (stride * vtx) + src_offset 863b8e80941Smrg * = addr; 864b8e80941Smrg * 865b8e80941Smrg * QED. 866b8e80941Smrg */ 867b8e80941Smrg 868b8e80941Smrg for (unsigned i = 0; i < so->num_elements; ++i) { 869b8e80941Smrg unsigned vbi = so->pipe[i].vertex_buffer_index; 870b8e80941Smrg mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi); 871b8e80941Smrg 872b8e80941Smrg /* Adjust by the masked off bits of the offset */ 873b8e80941Smrg target[i].src_offset += (addr & 63); 874b8e80941Smrg } 875b8e80941Smrg 876b8e80941Smrg ctx->payload_vertex.postfix.attribute_meta = transfer.gpu; 877b8e80941Smrg} 878b8e80941Smrg 879b8e80941Smrg/* Go through dirty flags and actualise them in the cmdstream. */ 880b8e80941Smrg 881b8e80941Smrgvoid 882b8e80941Smrgpanfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) 883b8e80941Smrg{ 884b8e80941Smrg struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 885b8e80941Smrg 886b8e80941Smrg if (with_vertex_data) { 887b8e80941Smrg panfrost_emit_vertex_data(ctx, job); 888b8e80941Smrg } 889b8e80941Smrg 890b8e80941Smrg bool msaa = ctx->rasterizer->base.multisample; 891b8e80941Smrg 892b8e80941Smrg if (ctx->dirty & PAN_DIRTY_RASTERIZER) { 893b8e80941Smrg ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; 894b8e80941Smrg 895b8e80941Smrg /* TODO: Sample size */ 896b8e80941Smrg SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); 897b8e80941Smrg SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); 898b8e80941Smrg } 899b8e80941Smrg 900b8e80941Smrg /* Enable job requirements at draw-time */ 901b8e80941Smrg 902b8e80941Smrg if (msaa) 903b8e80941Smrg job->requirements |= PAN_REQ_MSAA; 904b8e80941Smrg 905b8e80941Smrg if (ctx->depth_stencil->depth.writemask) 906b8e80941Smrg job->requirements |= PAN_REQ_DEPTH_WRITE; 907b8e80941Smrg 908b8e80941Smrg if (ctx->occlusion_query) { 909b8e80941Smrg ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; 910b8e80941Smrg ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; 911b8e80941Smrg } 912b8e80941Smrg 913b8e80941Smrg if (ctx->dirty & PAN_DIRTY_VS) { 914b8e80941Smrg assert(ctx->vs); 915b8e80941Smrg 916b8e80941Smrg struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 917b8e80941Smrg 918b8e80941Smrg /* Late shader descriptor assignments */ 919b8e80941Smrg 920b8e80941Smrg vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; 921b8e80941Smrg vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; 922b8e80941Smrg 923b8e80941Smrg /* Who knows */ 924b8e80941Smrg vs->tripipe->midgard1.unknown1 = 0x2201; 925b8e80941Smrg 926b8e80941Smrg ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; 927b8e80941Smrg } 928b8e80941Smrg 929b8e80941Smrg if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { 930b8e80941Smrg /* Check if we need to link the gl_PointSize varying */ 931b8e80941Smrg if (!panfrost_writes_point_size(ctx)) { 932b8e80941Smrg /* If the size is constant, write it out. Otherwise, 933b8e80941Smrg * don't touch primitive_size (since we would clobber 934b8e80941Smrg * the pointer there) */ 935b8e80941Smrg 936b8e80941Smrg ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width; 937b8e80941Smrg } 938b8e80941Smrg } 939b8e80941Smrg 940b8e80941Smrg /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ 941b8e80941Smrg if (ctx->fs) 942b8e80941Smrg ctx->dirty |= PAN_DIRTY_FS; 943b8e80941Smrg 944b8e80941Smrg if (ctx->dirty & PAN_DIRTY_FS) { 945b8e80941Smrg assert(ctx->fs); 946b8e80941Smrg struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; 947b8e80941Smrg 948b8e80941Smrg#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name 949b8e80941Smrg 950b8e80941Smrg COPY(shader); 951b8e80941Smrg COPY(attribute_count); 952b8e80941Smrg COPY(varying_count); 953b8e80941Smrg COPY(midgard1.uniform_count); 954b8e80941Smrg COPY(midgard1.work_count); 955b8e80941Smrg COPY(midgard1.unknown2); 956b8e80941Smrg 957b8e80941Smrg#undef COPY 958b8e80941Smrg /* If there is a blend shader, work registers are shared */ 959b8e80941Smrg 960b8e80941Smrg if (ctx->blend->has_blend_shader) 961b8e80941Smrg ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; 962b8e80941Smrg 963b8e80941Smrg /* Set late due to depending on render state */ 964b8e80941Smrg /* The one at the end seems to mean "1 UBO" */ 965b8e80941Smrg ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201; 966b8e80941Smrg 967b8e80941Smrg /* Assign texture/sample count right before upload */ 968b8e80941Smrg ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; 969b8e80941Smrg ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; 970b8e80941Smrg 971b8e80941Smrg /* Assign the stencil refs late */ 972b8e80941Smrg ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; 973b8e80941Smrg ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; 974b8e80941Smrg 975b8e80941Smrg /* CAN_DISCARD should be set if the fragment shader possibly 976b8e80941Smrg * contains a 'discard' instruction. It is likely this is 977b8e80941Smrg * related to optimizations related to forward-pixel kill, as 978b8e80941Smrg * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good 979b8e80941Smrg * thing?" by Peter Harris 980b8e80941Smrg */ 981b8e80941Smrg 982b8e80941Smrg if (variant->can_discard) { 983b8e80941Smrg ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; 984b8e80941Smrg ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE; 985b8e80941Smrg ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000; 986b8e80941Smrg ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; 987b8e80941Smrg } 988b8e80941Smrg 989b8e80941Smrg /* Check if we're using the default blend descriptor (fast path) */ 990b8e80941Smrg 991b8e80941Smrg bool no_blending = 992b8e80941Smrg !ctx->blend->has_blend_shader && 993b8e80941Smrg (ctx->blend->equation.rgb_mode == 0x122) && 994b8e80941Smrg (ctx->blend->equation.alpha_mode == 0x122) && 995b8e80941Smrg (ctx->blend->equation.color_mask == 0xf); 996b8e80941Smrg 997b8e80941Smrg /* Even on MFBD, the shader descriptor gets blend shaders. It's 998b8e80941Smrg * *also* copied to the blend_meta appended (by convention), 999b8e80941Smrg * but this is the field actually read by the hardware. (Or 1000b8e80941Smrg * maybe both are read...?) */ 1001b8e80941Smrg 1002b8e80941Smrg if (ctx->blend->has_blend_shader) { 1003b8e80941Smrg ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; 1004b8e80941Smrg } 1005b8e80941Smrg 1006b8e80941Smrg if (ctx->require_sfbd) { 1007b8e80941Smrg /* When only a single render target platform is used, the blend 1008b8e80941Smrg * information is inside the shader meta itself. We 1009b8e80941Smrg * additionally need to signal CAN_DISCARD for nontrivial blend 1010b8e80941Smrg * modes (so we're able to read back the destination buffer) */ 1011b8e80941Smrg 1012b8e80941Smrg if (!ctx->blend->has_blend_shader) { 1013b8e80941Smrg memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation)); 1014b8e80941Smrg } 1015b8e80941Smrg 1016b8e80941Smrg if (!no_blending) { 1017b8e80941Smrg ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; 1018b8e80941Smrg } 1019b8e80941Smrg } 1020b8e80941Smrg 1021b8e80941Smrg size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); 1022b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 1023b8e80941Smrg memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); 1024b8e80941Smrg 1025b8e80941Smrg ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; 1026b8e80941Smrg 1027b8e80941Smrg if (!ctx->require_sfbd) { 1028b8e80941Smrg /* Additional blend descriptor tacked on for jobs using MFBD */ 1029b8e80941Smrg 1030b8e80941Smrg unsigned blend_count = 0; 1031b8e80941Smrg 1032b8e80941Smrg if (ctx->blend->has_blend_shader) { 1033b8e80941Smrg /* For a blend shader, the bottom nibble corresponds to 1034b8e80941Smrg * the number of work registers used, which signals the 1035b8e80941Smrg * -existence- of a blend shader */ 1036b8e80941Smrg 1037b8e80941Smrg assert(ctx->blend->blend_work_count >= 2); 1038b8e80941Smrg blend_count |= MIN2(ctx->blend->blend_work_count, 3); 1039b8e80941Smrg } else { 1040b8e80941Smrg /* Otherwise, the bottom bit simply specifies if 1041b8e80941Smrg * blending (anything other than REPLACE) is enabled */ 1042b8e80941Smrg 1043b8e80941Smrg 1044b8e80941Smrg if (!no_blending) 1045b8e80941Smrg blend_count |= 0x1; 1046b8e80941Smrg } 1047b8e80941Smrg 1048b8e80941Smrg /* Second blend equation is always a simple replace */ 1049b8e80941Smrg 1050b8e80941Smrg uint64_t replace_magic = 0xf0122122; 1051b8e80941Smrg struct mali_blend_equation replace_mode; 1052b8e80941Smrg memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); 1053b8e80941Smrg 1054b8e80941Smrg struct mali_blend_meta blend_meta[] = { 1055b8e80941Smrg { 1056b8e80941Smrg .unk1 = 0x200 | blend_count, 1057b8e80941Smrg .blend_equation_1 = ctx->blend->equation, 1058b8e80941Smrg .blend_equation_2 = replace_mode 1059b8e80941Smrg }, 1060b8e80941Smrg }; 1061b8e80941Smrg 1062b8e80941Smrg if (ctx->blend->has_blend_shader) { 1063b8e80941Smrg blend_meta[0].blend_shader = ctx->blend->blend_shader; 1064b8e80941Smrg } 1065b8e80941Smrg 1066b8e80941Smrg memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); 1067b8e80941Smrg } 1068b8e80941Smrg } 1069b8e80941Smrg 1070b8e80941Smrg /* We stage to transient, so always dirty.. */ 1071b8e80941Smrg panfrost_stage_attributes(ctx); 1072b8e80941Smrg 1073b8e80941Smrg if (ctx->dirty & PAN_DIRTY_SAMPLERS) { 1074b8e80941Smrg /* Upload samplers back to back, no padding */ 1075b8e80941Smrg 1076b8e80941Smrg for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { 1077b8e80941Smrg if (!ctx->sampler_count[t]) continue; 1078b8e80941Smrg 1079b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); 1080b8e80941Smrg struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; 1081b8e80941Smrg 1082b8e80941Smrg for (int i = 0; i < ctx->sampler_count[t]; ++i) { 1083b8e80941Smrg desc[i] = ctx->samplers[t][i]->hw; 1084b8e80941Smrg } 1085b8e80941Smrg 1086b8e80941Smrg if (t == PIPE_SHADER_FRAGMENT) 1087b8e80941Smrg ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; 1088b8e80941Smrg else if (t == PIPE_SHADER_VERTEX) 1089b8e80941Smrg ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; 1090b8e80941Smrg else 1091b8e80941Smrg assert(0); 1092b8e80941Smrg } 1093b8e80941Smrg } 1094b8e80941Smrg 1095b8e80941Smrg if (ctx->dirty & PAN_DIRTY_TEXTURES) { 1096b8e80941Smrg for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { 1097b8e80941Smrg /* Shortcircuit */ 1098b8e80941Smrg if (!ctx->sampler_view_count[t]) continue; 1099b8e80941Smrg 1100b8e80941Smrg uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 1101b8e80941Smrg 1102b8e80941Smrg for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { 1103b8e80941Smrg if (!ctx->sampler_views[t][i]) 1104b8e80941Smrg continue; 1105b8e80941Smrg 1106b8e80941Smrg struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; 1107b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; 1108b8e80941Smrg 1109b8e80941Smrg /* Inject the addresses in, interleaving cube 1110b8e80941Smrg * faces and mip levels appropriately. */ 1111b8e80941Smrg 1112b8e80941Smrg for (int l = 0; l <= tex_rsrc->last_level; ++l) { 1113b8e80941Smrg for (int f = 0; f < tex_rsrc->array_size; ++f) { 1114b8e80941Smrg unsigned idx = (l * tex_rsrc->array_size) + f; 1115b8e80941Smrg 1116b8e80941Smrg ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] = 1117b8e80941Smrg rsrc->bo->gpu + 1118b8e80941Smrg rsrc->bo->slices[l].offset + 1119b8e80941Smrg f * rsrc->bo->cubemap_stride; 1120b8e80941Smrg } 1121b8e80941Smrg } 1122b8e80941Smrg 1123b8e80941Smrg trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); 1124b8e80941Smrg } 1125b8e80941Smrg 1126b8e80941Smrg mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); 1127b8e80941Smrg 1128b8e80941Smrg if (t == PIPE_SHADER_FRAGMENT) 1129b8e80941Smrg ctx->payload_tiler.postfix.texture_trampoline = trampoline; 1130b8e80941Smrg else if (t == PIPE_SHADER_VERTEX) 1131b8e80941Smrg ctx->payload_vertex.postfix.texture_trampoline = trampoline; 1132b8e80941Smrg else 1133b8e80941Smrg assert(0); 1134b8e80941Smrg } 1135b8e80941Smrg } 1136b8e80941Smrg 1137b8e80941Smrg const struct pipe_viewport_state *vp = &ctx->pipe_viewport; 1138b8e80941Smrg 1139b8e80941Smrg /* For flipped-Y buffers (signaled by negative scale), the translate is 1140b8e80941Smrg * flipped as well */ 1141b8e80941Smrg 1142b8e80941Smrg bool invert_y = vp->scale[1] < 0.0; 1143b8e80941Smrg float translate_y = vp->translate[1]; 1144b8e80941Smrg 1145b8e80941Smrg if (invert_y) 1146b8e80941Smrg translate_y = ctx->pipe_framebuffer.height - translate_y; 1147b8e80941Smrg 1148b8e80941Smrg for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) { 1149b8e80941Smrg struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i]; 1150b8e80941Smrg 1151b8e80941Smrg struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; 1152b8e80941Smrg struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; 1153b8e80941Smrg struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs; 1154b8e80941Smrg 1155b8e80941Smrg /* Allocate room for the sysval and the uniforms */ 1156b8e80941Smrg size_t sys_size = sizeof(float) * 4 * ss->sysval_count; 1157b8e80941Smrg size_t size = sys_size + buf->size; 1158b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); 1159b8e80941Smrg 1160b8e80941Smrg /* Upload sysvals requested by the shader */ 1161b8e80941Smrg float *uniforms = (float *) transfer.cpu; 1162b8e80941Smrg for (unsigned i = 0; i < ss->sysval_count; ++i) { 1163b8e80941Smrg int sysval = ss->sysval[i]; 1164b8e80941Smrg 1165b8e80941Smrg if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) { 1166b8e80941Smrg uniforms[4*i + 0] = vp->scale[0]; 1167b8e80941Smrg uniforms[4*i + 1] = fabsf(vp->scale[1]); 1168b8e80941Smrg uniforms[4*i + 2] = vp->scale[2]; 1169b8e80941Smrg } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) { 1170b8e80941Smrg uniforms[4*i + 0] = vp->translate[0]; 1171b8e80941Smrg uniforms[4*i + 1] = translate_y; 1172b8e80941Smrg uniforms[4*i + 2] = vp->translate[2]; 1173b8e80941Smrg } else { 1174b8e80941Smrg assert(0); 1175b8e80941Smrg } 1176b8e80941Smrg } 1177b8e80941Smrg 1178b8e80941Smrg /* Upload uniforms */ 1179b8e80941Smrg memcpy(transfer.cpu + sys_size, buf->buffer, buf->size); 1180b8e80941Smrg 1181b8e80941Smrg int uniform_count = 0; 1182b8e80941Smrg 1183b8e80941Smrg struct mali_vertex_tiler_postfix *postfix; 1184b8e80941Smrg 1185b8e80941Smrg switch (i) { 1186b8e80941Smrg case PIPE_SHADER_VERTEX: 1187b8e80941Smrg uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count; 1188b8e80941Smrg postfix = &ctx->payload_vertex.postfix; 1189b8e80941Smrg break; 1190b8e80941Smrg 1191b8e80941Smrg case PIPE_SHADER_FRAGMENT: 1192b8e80941Smrg uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count; 1193b8e80941Smrg postfix = &ctx->payload_tiler.postfix; 1194b8e80941Smrg break; 1195b8e80941Smrg 1196b8e80941Smrg default: 1197b8e80941Smrg DBG("Unknown shader stage %d in uniform upload\n", i); 1198b8e80941Smrg assert(0); 1199b8e80941Smrg } 1200b8e80941Smrg 1201b8e80941Smrg /* Also attach the same buffer as a UBO for extended access */ 1202b8e80941Smrg 1203b8e80941Smrg struct mali_uniform_buffer_meta uniform_buffers[] = { 1204b8e80941Smrg { 1205b8e80941Smrg .size = MALI_POSITIVE((2 + uniform_count)), 1206b8e80941Smrg .ptr = transfer.gpu >> 2, 1207b8e80941Smrg }, 1208b8e80941Smrg }; 1209b8e80941Smrg 1210b8e80941Smrg mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); 1211b8e80941Smrg postfix->uniforms = transfer.gpu; 1212b8e80941Smrg postfix->uniform_buffers = ubufs; 1213b8e80941Smrg 1214b8e80941Smrg buf->dirty = 0; 1215b8e80941Smrg } 1216b8e80941Smrg 1217b8e80941Smrg /* TODO: Upload the viewport somewhere more appropriate */ 1218b8e80941Smrg 1219b8e80941Smrg /* Clip bounds are encoded as floats. The viewport itself is encoded as 1220b8e80941Smrg * (somewhat) asymmetric ints. */ 1221b8e80941Smrg const struct pipe_scissor_state *ss = &ctx->scissor; 1222b8e80941Smrg 1223b8e80941Smrg struct mali_viewport view = { 1224b8e80941Smrg /* By default, do no viewport clipping, i.e. clip to (-inf, 1225b8e80941Smrg * inf) in each direction. Clipping to the viewport in theory 1226b8e80941Smrg * should work, but in practice causes issues when we're not 1227b8e80941Smrg * explicitly trying to scissor */ 1228b8e80941Smrg 1229b8e80941Smrg .clip_minx = -inff, 1230b8e80941Smrg .clip_miny = -inff, 1231b8e80941Smrg .clip_maxx = inff, 1232b8e80941Smrg .clip_maxy = inff, 1233b8e80941Smrg 1234b8e80941Smrg .clip_minz = 0.0, 1235b8e80941Smrg .clip_maxz = 1.0, 1236b8e80941Smrg }; 1237b8e80941Smrg 1238b8e80941Smrg /* Always scissor to the viewport by default. */ 1239b8e80941Smrg view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]); 1240b8e80941Smrg view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0])); 1241b8e80941Smrg 1242b8e80941Smrg view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1])); 1243b8e80941Smrg view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1]))); 1244b8e80941Smrg 1245b8e80941Smrg if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) { 1246b8e80941Smrg /* Invert scissor if needed */ 1247b8e80941Smrg unsigned miny = invert_y ? 1248b8e80941Smrg ctx->pipe_framebuffer.height - ss->maxy : ss->miny; 1249b8e80941Smrg 1250b8e80941Smrg unsigned maxy = invert_y ? 1251b8e80941Smrg ctx->pipe_framebuffer.height - ss->miny : ss->maxy; 1252b8e80941Smrg 1253b8e80941Smrg /* Set the actual scissor */ 1254b8e80941Smrg view.viewport0[0] = ss->minx; 1255b8e80941Smrg view.viewport0[1] = miny; 1256b8e80941Smrg view.viewport1[0] = MALI_POSITIVE(ss->maxx); 1257b8e80941Smrg view.viewport1[1] = MALI_POSITIVE(maxy); 1258b8e80941Smrg } 1259b8e80941Smrg 1260b8e80941Smrg ctx->payload_tiler.postfix.viewport = 1261b8e80941Smrg panfrost_upload_transient(ctx, 1262b8e80941Smrg &view, 1263b8e80941Smrg sizeof(struct mali_viewport)); 1264b8e80941Smrg 1265b8e80941Smrg ctx->dirty = 0; 1266b8e80941Smrg} 1267b8e80941Smrg 1268b8e80941Smrg/* Corresponds to exactly one draw, but does not submit anything */ 1269b8e80941Smrg 1270b8e80941Smrgstatic void 1271b8e80941Smrgpanfrost_queue_draw(struct panfrost_context *ctx) 1272b8e80941Smrg{ 1273b8e80941Smrg /* TODO: Expand the array? */ 1274b8e80941Smrg if (ctx->draw_count >= MAX_DRAW_CALLS) { 1275b8e80941Smrg DBG("Job buffer overflow, ignoring draw\n"); 1276b8e80941Smrg assert(0); 1277b8e80941Smrg } 1278b8e80941Smrg 1279b8e80941Smrg /* Handle dirty flags now */ 1280b8e80941Smrg panfrost_emit_for_draw(ctx, true); 1281b8e80941Smrg 1282b8e80941Smrg struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false); 1283b8e80941Smrg struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false); 1284b8e80941Smrg 1285b8e80941Smrg ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu; 1286b8e80941Smrg ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu; 1287b8e80941Smrg 1288b8e80941Smrg ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu; 1289b8e80941Smrg ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; 1290b8e80941Smrg 1291b8e80941Smrg ctx->draw_count++; 1292b8e80941Smrg} 1293b8e80941Smrg 1294b8e80941Smrg/* At the end of the frame, the vertex and tiler jobs are linked together and 1295b8e80941Smrg * then the fragment job is plonked at the end. Set value job is first for 1296b8e80941Smrg * unknown reasons. */ 1297b8e80941Smrg 1298b8e80941Smrgstatic void 1299b8e80941Smrgpanfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) 1300b8e80941Smrg{ 1301b8e80941Smrg if (first->job_descriptor_size) 1302b8e80941Smrg first->next_job_64 = (u64) (uintptr_t) next; 1303b8e80941Smrg else 1304b8e80941Smrg first->next_job_32 = (u32) (uintptr_t) next; 1305b8e80941Smrg} 1306b8e80941Smrg 1307b8e80941Smrgstatic void 1308b8e80941Smrgpanfrost_link_jobs(struct panfrost_context *ctx) 1309b8e80941Smrg{ 1310b8e80941Smrg if (ctx->draw_count) { 1311b8e80941Smrg /* Generate the set_value_job */ 1312b8e80941Smrg panfrost_set_value_job(ctx); 1313b8e80941Smrg 1314b8e80941Smrg /* Have the first vertex job depend on the set value job */ 1315b8e80941Smrg ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index; 1316b8e80941Smrg 1317b8e80941Smrg /* SV -> V */ 1318b8e80941Smrg panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]); 1319b8e80941Smrg } 1320b8e80941Smrg 1321b8e80941Smrg /* V -> V/T ; T -> T/null */ 1322b8e80941Smrg for (int i = 0; i < ctx->vertex_job_count; ++i) { 1323b8e80941Smrg bool isLast = (i + 1) == ctx->vertex_job_count; 1324b8e80941Smrg 1325b8e80941Smrg panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]); 1326b8e80941Smrg } 1327b8e80941Smrg 1328b8e80941Smrg /* T -> T/null */ 1329b8e80941Smrg for (int i = 0; i < ctx->tiler_job_count; ++i) { 1330b8e80941Smrg bool isLast = (i + 1) == ctx->tiler_job_count; 1331b8e80941Smrg panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]); 1332b8e80941Smrg } 1333b8e80941Smrg} 1334b8e80941Smrg 1335b8e80941Smrg/* The entire frame is in memory -- send it off to the kernel! */ 1336b8e80941Smrg 1337b8e80941Smrgstatic void 1338b8e80941Smrgpanfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, 1339b8e80941Smrg struct pipe_fence_handle **fence, 1340b8e80941Smrg struct panfrost_job *job) 1341b8e80941Smrg{ 1342b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 1343b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 1344b8e80941Smrg 1345b8e80941Smrg /* Edge case if screen is cleared and nothing else */ 1346b8e80941Smrg bool has_draws = ctx->draw_count > 0; 1347b8e80941Smrg 1348b8e80941Smrg /* Workaround a bizarre lockup (a hardware errata?) */ 1349b8e80941Smrg if (!has_draws) 1350b8e80941Smrg flush_immediate = true; 1351b8e80941Smrg 1352b8e80941Smrg /* A number of jobs are batched -- this must be linked and cleared */ 1353b8e80941Smrg panfrost_link_jobs(ctx); 1354b8e80941Smrg 1355b8e80941Smrg ctx->draw_count = 0; 1356b8e80941Smrg ctx->vertex_job_count = 0; 1357b8e80941Smrg ctx->tiler_job_count = 0; 1358b8e80941Smrg 1359b8e80941Smrg#ifndef DRY_RUN 1360b8e80941Smrg 1361b8e80941Smrg bool is_scanout = panfrost_is_scanout(ctx); 1362b8e80941Smrg screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout); 1363b8e80941Smrg 1364b8e80941Smrg /* If visual, we can stall a frame */ 1365b8e80941Smrg 1366b8e80941Smrg if (!flush_immediate) 1367b8e80941Smrg screen->driver->force_flush_fragment(ctx, fence); 1368b8e80941Smrg 1369b8e80941Smrg screen->last_fragment_flushed = false; 1370b8e80941Smrg screen->last_job = job; 1371b8e80941Smrg 1372b8e80941Smrg /* If readback, flush now (hurts the pipelined performance) */ 1373b8e80941Smrg if (flush_immediate) 1374b8e80941Smrg screen->driver->force_flush_fragment(ctx, fence); 1375b8e80941Smrg 1376b8e80941Smrg if (screen->driver->dump_counters && pan_counters_base) { 1377b8e80941Smrg screen->driver->dump_counters(screen); 1378b8e80941Smrg 1379b8e80941Smrg char filename[128]; 1380b8e80941Smrg snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); 1381b8e80941Smrg FILE *fp = fopen(filename, "wb"); 1382b8e80941Smrg fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); 1383b8e80941Smrg fclose(fp); 1384b8e80941Smrg } 1385b8e80941Smrg 1386b8e80941Smrg#endif 1387b8e80941Smrg} 1388b8e80941Smrg 1389b8e80941Smrgvoid 1390b8e80941Smrgpanfrost_flush( 1391b8e80941Smrg struct pipe_context *pipe, 1392b8e80941Smrg struct pipe_fence_handle **fence, 1393b8e80941Smrg unsigned flags) 1394b8e80941Smrg{ 1395b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 1396b8e80941Smrg struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 1397b8e80941Smrg 1398b8e80941Smrg /* Nothing to do! */ 1399b8e80941Smrg if (!ctx->draw_count && !job->clear) return; 1400b8e80941Smrg 1401b8e80941Smrg /* Whether to stall the pipeline for immediately correct results */ 1402b8e80941Smrg bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; 1403b8e80941Smrg 1404b8e80941Smrg /* Submit the frame itself */ 1405b8e80941Smrg panfrost_submit_frame(ctx, flush_immediate, fence, job); 1406b8e80941Smrg 1407b8e80941Smrg /* Prepare for the next frame */ 1408b8e80941Smrg panfrost_invalidate_frame(ctx); 1409b8e80941Smrg} 1410b8e80941Smrg 1411b8e80941Smrg#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; 1412b8e80941Smrg 1413b8e80941Smrgstatic int 1414b8e80941Smrgg2m_draw_mode(enum pipe_prim_type mode) 1415b8e80941Smrg{ 1416b8e80941Smrg switch (mode) { 1417b8e80941Smrg DEFINE_CASE(POINTS); 1418b8e80941Smrg DEFINE_CASE(LINES); 1419b8e80941Smrg DEFINE_CASE(LINE_LOOP); 1420b8e80941Smrg DEFINE_CASE(LINE_STRIP); 1421b8e80941Smrg DEFINE_CASE(TRIANGLES); 1422b8e80941Smrg DEFINE_CASE(TRIANGLE_STRIP); 1423b8e80941Smrg DEFINE_CASE(TRIANGLE_FAN); 1424b8e80941Smrg DEFINE_CASE(QUADS); 1425b8e80941Smrg DEFINE_CASE(QUAD_STRIP); 1426b8e80941Smrg DEFINE_CASE(POLYGON); 1427b8e80941Smrg 1428b8e80941Smrg default: 1429b8e80941Smrg DBG("Illegal draw mode %d\n", mode); 1430b8e80941Smrg assert(0); 1431b8e80941Smrg return MALI_LINE_LOOP; 1432b8e80941Smrg } 1433b8e80941Smrg} 1434b8e80941Smrg 1435b8e80941Smrg#undef DEFINE_CASE 1436b8e80941Smrg 1437b8e80941Smrgstatic unsigned 1438b8e80941Smrgpanfrost_translate_index_size(unsigned size) 1439b8e80941Smrg{ 1440b8e80941Smrg switch (size) { 1441b8e80941Smrg case 1: 1442b8e80941Smrg return MALI_DRAW_INDEXED_UINT8; 1443b8e80941Smrg 1444b8e80941Smrg case 2: 1445b8e80941Smrg return MALI_DRAW_INDEXED_UINT16; 1446b8e80941Smrg 1447b8e80941Smrg case 4: 1448b8e80941Smrg return MALI_DRAW_INDEXED_UINT32; 1449b8e80941Smrg 1450b8e80941Smrg default: 1451b8e80941Smrg DBG("Unknown index size %d\n", size); 1452b8e80941Smrg assert(0); 1453b8e80941Smrg return 0; 1454b8e80941Smrg } 1455b8e80941Smrg} 1456b8e80941Smrg 1457b8e80941Smrg/* Gets a GPU address for the associated index buffer. Only gauranteed to be 1458b8e80941Smrg * good for the duration of the draw (transient), could last longer */ 1459b8e80941Smrg 1460b8e80941Smrgstatic mali_ptr 1461b8e80941Smrgpanfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) 1462b8e80941Smrg{ 1463b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); 1464b8e80941Smrg 1465b8e80941Smrg off_t offset = info->start * info->index_size; 1466b8e80941Smrg 1467b8e80941Smrg if (!info->has_user_indices) { 1468b8e80941Smrg /* Only resources can be directly mapped */ 1469b8e80941Smrg return rsrc->bo->gpu + offset; 1470b8e80941Smrg } else { 1471b8e80941Smrg /* Otherwise, we need to upload to transient memory */ 1472b8e80941Smrg const uint8_t *ibuf8 = (const uint8_t *) info->index.user; 1473b8e80941Smrg return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); 1474b8e80941Smrg } 1475b8e80941Smrg} 1476b8e80941Smrg 1477b8e80941Smrgstatic void 1478b8e80941Smrgpanfrost_draw_vbo( 1479b8e80941Smrg struct pipe_context *pipe, 1480b8e80941Smrg const struct pipe_draw_info *info) 1481b8e80941Smrg{ 1482b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 1483b8e80941Smrg 1484b8e80941Smrg ctx->payload_vertex.draw_start = info->start; 1485b8e80941Smrg ctx->payload_tiler.draw_start = info->start; 1486b8e80941Smrg 1487b8e80941Smrg int mode = info->mode; 1488b8e80941Smrg 1489b8e80941Smrg /* Fallback for unsupported modes */ 1490b8e80941Smrg 1491b8e80941Smrg if (!(ctx->draw_modes & (1 << mode))) { 1492b8e80941Smrg if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { 1493b8e80941Smrg mode = PIPE_PRIM_TRIANGLE_FAN; 1494b8e80941Smrg } else { 1495b8e80941Smrg if (info->count < 4) { 1496b8e80941Smrg /* Degenerate case? */ 1497b8e80941Smrg return; 1498b8e80941Smrg } 1499b8e80941Smrg 1500b8e80941Smrg util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base); 1501b8e80941Smrg util_primconvert_draw_vbo(ctx->primconvert, info); 1502b8e80941Smrg return; 1503b8e80941Smrg } 1504b8e80941Smrg } 1505b8e80941Smrg 1506b8e80941Smrg /* Now that we have a guaranteed terminating path, find the job. 1507b8e80941Smrg * Assignment commented out to prevent unused warning */ 1508b8e80941Smrg 1509b8e80941Smrg /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx); 1510b8e80941Smrg 1511b8e80941Smrg ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); 1512b8e80941Smrg 1513b8e80941Smrg ctx->vertex_count = info->count; 1514b8e80941Smrg 1515b8e80941Smrg /* For non-indexed draws, they're the same */ 1516b8e80941Smrg unsigned invocation_count = ctx->vertex_count; 1517b8e80941Smrg 1518b8e80941Smrg unsigned draw_flags = 0; 1519b8e80941Smrg 1520b8e80941Smrg /* The draw flags interpret how primitive size is interpreted */ 1521b8e80941Smrg 1522b8e80941Smrg if (panfrost_writes_point_size(ctx)) 1523b8e80941Smrg draw_flags |= MALI_DRAW_VARYING_SIZE; 1524b8e80941Smrg 1525b8e80941Smrg /* For higher amounts of vertices (greater than what fits in a 16-bit 1526b8e80941Smrg * short), the other value is needed, otherwise there will be bizarre 1527b8e80941Smrg * rendering artefacts. It's not clear what these values mean yet. */ 1528b8e80941Smrg 1529b8e80941Smrg draw_flags |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000; 1530b8e80941Smrg 1531b8e80941Smrg if (info->index_size) { 1532b8e80941Smrg /* Calculate the min/max index used so we can figure out how 1533b8e80941Smrg * many times to invoke the vertex shader */ 1534b8e80941Smrg 1535b8e80941Smrg /* Fetch / calculate index bounds */ 1536b8e80941Smrg unsigned min_index = 0, max_index = 0; 1537b8e80941Smrg 1538b8e80941Smrg if (info->max_index == ~0u) { 1539b8e80941Smrg u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index); 1540b8e80941Smrg } else { 1541b8e80941Smrg min_index = info->min_index; 1542b8e80941Smrg max_index = info->max_index; 1543b8e80941Smrg } 1544b8e80941Smrg 1545b8e80941Smrg /* Use the corresponding values */ 1546b8e80941Smrg invocation_count = max_index - min_index + 1; 1547b8e80941Smrg ctx->payload_vertex.draw_start = min_index; 1548b8e80941Smrg ctx->payload_tiler.draw_start = min_index; 1549b8e80941Smrg 1550b8e80941Smrg ctx->payload_tiler.prefix.negative_start = -min_index; 1551b8e80941Smrg ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count); 1552b8e80941Smrg 1553b8e80941Smrg //assert(!info->restart_index); /* TODO: Research */ 1554b8e80941Smrg assert(!info->index_bias); 1555b8e80941Smrg 1556b8e80941Smrg draw_flags |= panfrost_translate_index_size(info->index_size); 1557b8e80941Smrg ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); 1558b8e80941Smrg } else { 1559b8e80941Smrg /* Index count == vertex count, if no indexing is applied, as 1560b8e80941Smrg * if it is internally indexed in the expected order */ 1561b8e80941Smrg 1562b8e80941Smrg ctx->payload_tiler.prefix.negative_start = 0; 1563b8e80941Smrg ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); 1564b8e80941Smrg 1565b8e80941Smrg /* Reverse index state */ 1566b8e80941Smrg ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; 1567b8e80941Smrg } 1568b8e80941Smrg 1569b8e80941Smrg ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count); 1570b8e80941Smrg ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count); 1571b8e80941Smrg ctx->payload_tiler.prefix.unknown_draw = draw_flags; 1572b8e80941Smrg 1573b8e80941Smrg /* Fire off the draw itself */ 1574b8e80941Smrg panfrost_queue_draw(ctx); 1575b8e80941Smrg} 1576b8e80941Smrg 1577b8e80941Smrg/* CSO state */ 1578b8e80941Smrg 1579b8e80941Smrgstatic void 1580b8e80941Smrgpanfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) 1581b8e80941Smrg{ 1582b8e80941Smrg free(hwcso); 1583b8e80941Smrg} 1584b8e80941Smrg 1585b8e80941Smrgstatic void * 1586b8e80941Smrgpanfrost_create_rasterizer_state( 1587b8e80941Smrg struct pipe_context *pctx, 1588b8e80941Smrg const struct pipe_rasterizer_state *cso) 1589b8e80941Smrg{ 1590b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1591b8e80941Smrg struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); 1592b8e80941Smrg 1593b8e80941Smrg so->base = *cso; 1594b8e80941Smrg 1595b8e80941Smrg /* Bitmask, unknown meaning of the start value */ 1596b8e80941Smrg so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7; 1597b8e80941Smrg 1598b8e80941Smrg so->tiler_gl_enables |= MALI_FRONT_FACE( 1599b8e80941Smrg cso->front_ccw ? MALI_CCW : MALI_CW); 1600b8e80941Smrg 1601b8e80941Smrg if (cso->cull_face & PIPE_FACE_FRONT) 1602b8e80941Smrg so->tiler_gl_enables |= MALI_CULL_FACE_FRONT; 1603b8e80941Smrg 1604b8e80941Smrg if (cso->cull_face & PIPE_FACE_BACK) 1605b8e80941Smrg so->tiler_gl_enables |= MALI_CULL_FACE_BACK; 1606b8e80941Smrg 1607b8e80941Smrg return so; 1608b8e80941Smrg} 1609b8e80941Smrg 1610b8e80941Smrgstatic void 1611b8e80941Smrgpanfrost_bind_rasterizer_state( 1612b8e80941Smrg struct pipe_context *pctx, 1613b8e80941Smrg void *hwcso) 1614b8e80941Smrg{ 1615b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1616b8e80941Smrg 1617b8e80941Smrg /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ 1618b8e80941Smrg if (!hwcso) 1619b8e80941Smrg return; 1620b8e80941Smrg 1621b8e80941Smrg ctx->rasterizer = hwcso; 1622b8e80941Smrg ctx->dirty |= PAN_DIRTY_RASTERIZER; 1623b8e80941Smrg} 1624b8e80941Smrg 1625b8e80941Smrgstatic void * 1626b8e80941Smrgpanfrost_create_vertex_elements_state( 1627b8e80941Smrg struct pipe_context *pctx, 1628b8e80941Smrg unsigned num_elements, 1629b8e80941Smrg const struct pipe_vertex_element *elements) 1630b8e80941Smrg{ 1631b8e80941Smrg struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); 1632b8e80941Smrg 1633b8e80941Smrg so->num_elements = num_elements; 1634b8e80941Smrg memcpy(so->pipe, elements, sizeof(*elements) * num_elements); 1635b8e80941Smrg 1636b8e80941Smrg /* XXX: What the cornball? This is totally, 100%, unapologetically 1637b8e80941Smrg * nonsense. And yet it somehow fixes a regression in -bshadow 1638b8e80941Smrg * (previously, we allocated the descriptor here... a newer commit 1639b8e80941Smrg * removed that allocation, and then memory corruption led to 1640b8e80941Smrg * shader_meta getting overwritten in bad ways and then the whole test 1641b8e80941Smrg * case falling apart . TODO: LOOK INTO PLEASE XXX XXX BAD XXX XXX XXX 1642b8e80941Smrg */ 1643b8e80941Smrg panfrost_allocate_chunk(pan_context(pctx), 0, HEAP_DESCRIPTOR); 1644b8e80941Smrg 1645b8e80941Smrg for (int i = 0; i < num_elements; ++i) { 1646b8e80941Smrg so->hw[i].index = elements[i].vertex_buffer_index; 1647b8e80941Smrg 1648b8e80941Smrg enum pipe_format fmt = elements[i].src_format; 1649b8e80941Smrg const struct util_format_description *desc = util_format_description(fmt); 1650b8e80941Smrg so->hw[i].unknown1 = 0x2; 1651b8e80941Smrg so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels); 1652b8e80941Smrg 1653b8e80941Smrg so->hw[i].format = panfrost_find_format(desc); 1654b8e80941Smrg 1655b8e80941Smrg /* The field itself should probably be shifted over */ 1656b8e80941Smrg so->hw[i].src_offset = elements[i].src_offset; 1657b8e80941Smrg } 1658b8e80941Smrg 1659b8e80941Smrg return so; 1660b8e80941Smrg} 1661b8e80941Smrg 1662b8e80941Smrgstatic void 1663b8e80941Smrgpanfrost_bind_vertex_elements_state( 1664b8e80941Smrg struct pipe_context *pctx, 1665b8e80941Smrg void *hwcso) 1666b8e80941Smrg{ 1667b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1668b8e80941Smrg 1669b8e80941Smrg ctx->vertex = hwcso; 1670b8e80941Smrg ctx->dirty |= PAN_DIRTY_VERTEX; 1671b8e80941Smrg} 1672b8e80941Smrg 1673b8e80941Smrgstatic void 1674b8e80941Smrgpanfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) 1675b8e80941Smrg{ 1676b8e80941Smrg struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso; 1677b8e80941Smrg unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements; 1678b8e80941Smrg DBG("Vertex elements delete leaks descriptor (%d bytes)\n", bytes); 1679b8e80941Smrg free(hwcso); 1680b8e80941Smrg} 1681b8e80941Smrg 1682b8e80941Smrgstatic void * 1683b8e80941Smrgpanfrost_create_shader_state( 1684b8e80941Smrg struct pipe_context *pctx, 1685b8e80941Smrg const struct pipe_shader_state *cso) 1686b8e80941Smrg{ 1687b8e80941Smrg struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); 1688b8e80941Smrg so->base = *cso; 1689b8e80941Smrg 1690b8e80941Smrg /* Token deep copy to prevent memory corruption */ 1691b8e80941Smrg 1692b8e80941Smrg if (cso->type == PIPE_SHADER_IR_TGSI) 1693b8e80941Smrg so->base.tokens = tgsi_dup_tokens(so->base.tokens); 1694b8e80941Smrg 1695b8e80941Smrg return so; 1696b8e80941Smrg} 1697b8e80941Smrg 1698b8e80941Smrgstatic void 1699b8e80941Smrgpanfrost_delete_shader_state( 1700b8e80941Smrg struct pipe_context *pctx, 1701b8e80941Smrg void *so) 1702b8e80941Smrg{ 1703b8e80941Smrg struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so; 1704b8e80941Smrg 1705b8e80941Smrg if (cso->base.type == PIPE_SHADER_IR_TGSI) { 1706b8e80941Smrg DBG("Deleting TGSI shader leaks duplicated tokens\n"); 1707b8e80941Smrg } 1708b8e80941Smrg 1709b8e80941Smrg unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta); 1710b8e80941Smrg DBG("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak); 1711b8e80941Smrg 1712b8e80941Smrg free(so); 1713b8e80941Smrg} 1714b8e80941Smrg 1715b8e80941Smrgstatic void * 1716b8e80941Smrgpanfrost_create_sampler_state( 1717b8e80941Smrg struct pipe_context *pctx, 1718b8e80941Smrg const struct pipe_sampler_state *cso) 1719b8e80941Smrg{ 1720b8e80941Smrg struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); 1721b8e80941Smrg so->base = *cso; 1722b8e80941Smrg 1723b8e80941Smrg /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */ 1724b8e80941Smrg 1725b8e80941Smrg struct mali_sampler_descriptor sampler_descriptor = { 1726b8e80941Smrg .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter)) 1727b8e80941Smrg | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter)) 1728b8e80941Smrg | translate_mip_filter(cso->min_mip_filter) 1729b8e80941Smrg | 0x20, 1730b8e80941Smrg 1731b8e80941Smrg .wrap_s = translate_tex_wrap(cso->wrap_s), 1732b8e80941Smrg .wrap_t = translate_tex_wrap(cso->wrap_t), 1733b8e80941Smrg .wrap_r = translate_tex_wrap(cso->wrap_r), 1734b8e80941Smrg .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), 1735b8e80941Smrg .border_color = { 1736b8e80941Smrg cso->border_color.f[0], 1737b8e80941Smrg cso->border_color.f[1], 1738b8e80941Smrg cso->border_color.f[2], 1739b8e80941Smrg cso->border_color.f[3] 1740b8e80941Smrg }, 1741b8e80941Smrg .min_lod = FIXED_16(cso->min_lod), 1742b8e80941Smrg .max_lod = FIXED_16(cso->max_lod), 1743b8e80941Smrg .unknown2 = 1, 1744b8e80941Smrg }; 1745b8e80941Smrg 1746b8e80941Smrg so->hw = sampler_descriptor; 1747b8e80941Smrg 1748b8e80941Smrg return so; 1749b8e80941Smrg} 1750b8e80941Smrg 1751b8e80941Smrgstatic void 1752b8e80941Smrgpanfrost_bind_sampler_states( 1753b8e80941Smrg struct pipe_context *pctx, 1754b8e80941Smrg enum pipe_shader_type shader, 1755b8e80941Smrg unsigned start_slot, unsigned num_sampler, 1756b8e80941Smrg void **sampler) 1757b8e80941Smrg{ 1758b8e80941Smrg assert(start_slot == 0); 1759b8e80941Smrg 1760b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1761b8e80941Smrg 1762b8e80941Smrg /* XXX: Should upload, not just copy? */ 1763b8e80941Smrg ctx->sampler_count[shader] = num_sampler; 1764b8e80941Smrg memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); 1765b8e80941Smrg 1766b8e80941Smrg ctx->dirty |= PAN_DIRTY_SAMPLERS; 1767b8e80941Smrg} 1768b8e80941Smrg 1769b8e80941Smrgstatic bool 1770b8e80941Smrgpanfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant) 1771b8e80941Smrg{ 1772b8e80941Smrg struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha; 1773b8e80941Smrg 1774b8e80941Smrg if (alpha->enabled || variant->alpha_state.enabled) { 1775b8e80941Smrg /* Make sure enable state is at least the same */ 1776b8e80941Smrg if (alpha->enabled != variant->alpha_state.enabled) { 1777b8e80941Smrg return false; 1778b8e80941Smrg } 1779b8e80941Smrg 1780b8e80941Smrg /* Check that the contents of the test are the same */ 1781b8e80941Smrg bool same_func = alpha->func == variant->alpha_state.func; 1782b8e80941Smrg bool same_ref = alpha->ref_value == variant->alpha_state.ref_value; 1783b8e80941Smrg 1784b8e80941Smrg if (!(same_func && same_ref)) { 1785b8e80941Smrg return false; 1786b8e80941Smrg } 1787b8e80941Smrg } 1788b8e80941Smrg /* Otherwise, we're good to go */ 1789b8e80941Smrg return true; 1790b8e80941Smrg} 1791b8e80941Smrg 1792b8e80941Smrgstatic void 1793b8e80941Smrgpanfrost_bind_fs_state( 1794b8e80941Smrg struct pipe_context *pctx, 1795b8e80941Smrg void *hwcso) 1796b8e80941Smrg{ 1797b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1798b8e80941Smrg 1799b8e80941Smrg ctx->fs = hwcso; 1800b8e80941Smrg 1801b8e80941Smrg if (hwcso) { 1802b8e80941Smrg /* Match the appropriate variant */ 1803b8e80941Smrg 1804b8e80941Smrg signed variant = -1; 1805b8e80941Smrg 1806b8e80941Smrg struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso; 1807b8e80941Smrg 1808b8e80941Smrg for (unsigned i = 0; i < variants->variant_count; ++i) { 1809b8e80941Smrg if (panfrost_variant_matches(ctx, &variants->variants[i])) { 1810b8e80941Smrg variant = i; 1811b8e80941Smrg break; 1812b8e80941Smrg } 1813b8e80941Smrg } 1814b8e80941Smrg 1815b8e80941Smrg if (variant == -1) { 1816b8e80941Smrg /* No variant matched, so create a new one */ 1817b8e80941Smrg variant = variants->variant_count++; 1818b8e80941Smrg assert(variants->variant_count < MAX_SHADER_VARIANTS); 1819b8e80941Smrg 1820b8e80941Smrg variants->variants[variant].base = hwcso; 1821b8e80941Smrg variants->variants[variant].alpha_state = ctx->depth_stencil->alpha; 1822b8e80941Smrg 1823b8e80941Smrg /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */ 1824b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1825b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); 1826b8e80941Smrg 1827b8e80941Smrg variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu; 1828b8e80941Smrg variants->variants[variant].tripipe_gpu = transfer.gpu; 1829b8e80941Smrg 1830b8e80941Smrg } 1831b8e80941Smrg 1832b8e80941Smrg /* Select this variant */ 1833b8e80941Smrg variants->active_variant = variant; 1834b8e80941Smrg 1835b8e80941Smrg struct panfrost_shader_state *shader_state = &variants->variants[variant]; 1836b8e80941Smrg assert(panfrost_variant_matches(ctx, shader_state)); 1837b8e80941Smrg 1838b8e80941Smrg /* Now we have a variant selected, so compile and go */ 1839b8e80941Smrg 1840b8e80941Smrg if (!shader_state->compiled) { 1841b8e80941Smrg panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state); 1842b8e80941Smrg shader_state->compiled = true; 1843b8e80941Smrg } 1844b8e80941Smrg } 1845b8e80941Smrg 1846b8e80941Smrg ctx->dirty |= PAN_DIRTY_FS; 1847b8e80941Smrg} 1848b8e80941Smrg 1849b8e80941Smrgstatic void 1850b8e80941Smrgpanfrost_bind_vs_state( 1851b8e80941Smrg struct pipe_context *pctx, 1852b8e80941Smrg void *hwcso) 1853b8e80941Smrg{ 1854b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1855b8e80941Smrg 1856b8e80941Smrg ctx->vs = hwcso; 1857b8e80941Smrg 1858b8e80941Smrg if (hwcso) { 1859b8e80941Smrg if (!ctx->vs->variants[0].compiled) { 1860b8e80941Smrg ctx->vs->variants[0].base = hwcso; 1861b8e80941Smrg 1862b8e80941Smrg /* TODO DRY from above */ 1863b8e80941Smrg struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); 1864b8e80941Smrg ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu; 1865b8e80941Smrg ctx->vs->variants[0].tripipe_gpu = transfer.gpu; 1866b8e80941Smrg 1867b8e80941Smrg panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]); 1868b8e80941Smrg ctx->vs->variants[0].compiled = true; 1869b8e80941Smrg } 1870b8e80941Smrg } 1871b8e80941Smrg 1872b8e80941Smrg ctx->dirty |= PAN_DIRTY_VS; 1873b8e80941Smrg} 1874b8e80941Smrg 1875b8e80941Smrgstatic void 1876b8e80941Smrgpanfrost_set_vertex_buffers( 1877b8e80941Smrg struct pipe_context *pctx, 1878b8e80941Smrg unsigned start_slot, 1879b8e80941Smrg unsigned num_buffers, 1880b8e80941Smrg const struct pipe_vertex_buffer *buffers) 1881b8e80941Smrg{ 1882b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1883b8e80941Smrg 1884b8e80941Smrg util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers); 1885b8e80941Smrg} 1886b8e80941Smrg 1887b8e80941Smrgstatic void 1888b8e80941Smrgpanfrost_set_constant_buffer( 1889b8e80941Smrg struct pipe_context *pctx, 1890b8e80941Smrg enum pipe_shader_type shader, uint index, 1891b8e80941Smrg const struct pipe_constant_buffer *buf) 1892b8e80941Smrg{ 1893b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1894b8e80941Smrg struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; 1895b8e80941Smrg 1896b8e80941Smrg size_t sz = buf ? buf->buffer_size : 0; 1897b8e80941Smrg 1898b8e80941Smrg /* Free previous buffer */ 1899b8e80941Smrg 1900b8e80941Smrg pbuf->dirty = true; 1901b8e80941Smrg pbuf->size = sz; 1902b8e80941Smrg 1903b8e80941Smrg if (pbuf->buffer) { 1904b8e80941Smrg free(pbuf->buffer); 1905b8e80941Smrg pbuf->buffer = NULL; 1906b8e80941Smrg } 1907b8e80941Smrg 1908b8e80941Smrg /* If unbinding, we're done */ 1909b8e80941Smrg 1910b8e80941Smrg if (!buf) 1911b8e80941Smrg return; 1912b8e80941Smrg 1913b8e80941Smrg /* Multiple constant buffers not yet supported */ 1914b8e80941Smrg assert(index == 0); 1915b8e80941Smrg 1916b8e80941Smrg const uint8_t *cpu; 1917b8e80941Smrg 1918b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); 1919b8e80941Smrg 1920b8e80941Smrg if (rsrc) { 1921b8e80941Smrg cpu = rsrc->bo->cpu; 1922b8e80941Smrg } else if (buf->user_buffer) { 1923b8e80941Smrg cpu = buf->user_buffer; 1924b8e80941Smrg } else { 1925b8e80941Smrg DBG("No constant buffer?\n"); 1926b8e80941Smrg return; 1927b8e80941Smrg } 1928b8e80941Smrg 1929b8e80941Smrg /* Copy the constant buffer into the driver context for later upload */ 1930b8e80941Smrg 1931b8e80941Smrg pbuf->buffer = malloc(sz); 1932b8e80941Smrg memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); 1933b8e80941Smrg} 1934b8e80941Smrg 1935b8e80941Smrgstatic void 1936b8e80941Smrgpanfrost_set_stencil_ref( 1937b8e80941Smrg struct pipe_context *pctx, 1938b8e80941Smrg const struct pipe_stencil_ref *ref) 1939b8e80941Smrg{ 1940b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 1941b8e80941Smrg ctx->stencil_ref = *ref; 1942b8e80941Smrg 1943b8e80941Smrg /* Shader core dirty */ 1944b8e80941Smrg ctx->dirty |= PAN_DIRTY_FS; 1945b8e80941Smrg} 1946b8e80941Smrg 1947b8e80941Smrgstatic struct pipe_sampler_view * 1948b8e80941Smrgpanfrost_create_sampler_view( 1949b8e80941Smrg struct pipe_context *pctx, 1950b8e80941Smrg struct pipe_resource *texture, 1951b8e80941Smrg const struct pipe_sampler_view *template) 1952b8e80941Smrg{ 1953b8e80941Smrg struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view); 1954b8e80941Smrg int bytes_per_pixel = util_format_get_blocksize(texture->format); 1955b8e80941Smrg 1956b8e80941Smrg pipe_reference(NULL, &texture->reference); 1957b8e80941Smrg 1958b8e80941Smrg struct panfrost_resource *prsrc = (struct panfrost_resource *) texture; 1959b8e80941Smrg 1960b8e80941Smrg so->base = *template; 1961b8e80941Smrg so->base.texture = texture; 1962b8e80941Smrg so->base.reference.count = 1; 1963b8e80941Smrg so->base.context = pctx; 1964b8e80941Smrg 1965b8e80941Smrg /* sampler_views correspond to texture descriptors, minus the texture 1966b8e80941Smrg * (data) itself. So, we serialise the descriptor here and cache it for 1967b8e80941Smrg * later. */ 1968b8e80941Smrg 1969b8e80941Smrg /* Make sure it's something with which we're familiar */ 1970b8e80941Smrg assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4); 1971b8e80941Smrg 1972b8e80941Smrg /* TODO: Detect from format better */ 1973b8e80941Smrg const struct util_format_description *desc = util_format_description(prsrc->base.format); 1974b8e80941Smrg 1975b8e80941Smrg unsigned char user_swizzle[4] = { 1976b8e80941Smrg template->swizzle_r, 1977b8e80941Smrg template->swizzle_g, 1978b8e80941Smrg template->swizzle_b, 1979b8e80941Smrg template->swizzle_a 1980b8e80941Smrg }; 1981b8e80941Smrg 1982b8e80941Smrg enum mali_format format = panfrost_find_format(desc); 1983b8e80941Smrg 1984b8e80941Smrg bool is_depth = desc->format == PIPE_FORMAT_Z32_UNORM; 1985b8e80941Smrg 1986b8e80941Smrg unsigned usage2_layout = 0x10; 1987b8e80941Smrg 1988b8e80941Smrg switch (prsrc->bo->layout) { 1989b8e80941Smrg case PAN_AFBC: 1990b8e80941Smrg usage2_layout |= 0x8 | 0x4; 1991b8e80941Smrg break; 1992b8e80941Smrg case PAN_TILED: 1993b8e80941Smrg usage2_layout |= 0x1; 1994b8e80941Smrg break; 1995b8e80941Smrg case PAN_LINEAR: 1996b8e80941Smrg usage2_layout |= is_depth ? 0x1 : 0x2; 1997b8e80941Smrg break; 1998b8e80941Smrg default: 1999b8e80941Smrg assert(0); 2000b8e80941Smrg break; 2001b8e80941Smrg } 2002b8e80941Smrg 2003b8e80941Smrg struct mali_texture_descriptor texture_descriptor = { 2004b8e80941Smrg .width = MALI_POSITIVE(texture->width0), 2005b8e80941Smrg .height = MALI_POSITIVE(texture->height0), 2006b8e80941Smrg .depth = MALI_POSITIVE(texture->depth0), 2007b8e80941Smrg 2008b8e80941Smrg /* TODO: Decode */ 2009b8e80941Smrg .format = { 2010b8e80941Smrg .swizzle = panfrost_translate_swizzle_4(desc->swizzle), 2011b8e80941Smrg .format = format, 2012b8e80941Smrg 2013b8e80941Smrg .usage1 = 0x0, 2014b8e80941Smrg .is_not_cubemap = texture->target != PIPE_TEXTURE_CUBE, 2015b8e80941Smrg 2016b8e80941Smrg .usage2 = usage2_layout 2017b8e80941Smrg }, 2018b8e80941Smrg 2019b8e80941Smrg .swizzle = panfrost_translate_swizzle_4(user_swizzle) 2020b8e80941Smrg }; 2021b8e80941Smrg 2022b8e80941Smrg /* TODO: Other base levels require adjusting dimensions / level numbers / etc */ 2023b8e80941Smrg assert (template->u.tex.first_level == 0); 2024b8e80941Smrg 2025b8e80941Smrg /* Disable mipmapping for now to avoid regressions while automipmapping 2026b8e80941Smrg * is being implemented. TODO: Remove me once automipmaps work */ 2027b8e80941Smrg 2028b8e80941Smrg //texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level; 2029b8e80941Smrg texture_descriptor.nr_mipmap_levels = 0; 2030b8e80941Smrg 2031b8e80941Smrg so->hw = texture_descriptor; 2032b8e80941Smrg 2033b8e80941Smrg return (struct pipe_sampler_view *) so; 2034b8e80941Smrg} 2035b8e80941Smrg 2036b8e80941Smrgstatic void 2037b8e80941Smrgpanfrost_set_sampler_views( 2038b8e80941Smrg struct pipe_context *pctx, 2039b8e80941Smrg enum pipe_shader_type shader, 2040b8e80941Smrg unsigned start_slot, unsigned num_views, 2041b8e80941Smrg struct pipe_sampler_view **views) 2042b8e80941Smrg{ 2043b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 2044b8e80941Smrg 2045b8e80941Smrg assert(start_slot == 0); 2046b8e80941Smrg 2047b8e80941Smrg ctx->sampler_view_count[shader] = num_views; 2048b8e80941Smrg memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); 2049b8e80941Smrg 2050b8e80941Smrg ctx->dirty |= PAN_DIRTY_TEXTURES; 2051b8e80941Smrg} 2052b8e80941Smrg 2053b8e80941Smrgstatic void 2054b8e80941Smrgpanfrost_sampler_view_destroy( 2055b8e80941Smrg struct pipe_context *pctx, 2056b8e80941Smrg struct pipe_sampler_view *views) 2057b8e80941Smrg{ 2058b8e80941Smrg //struct panfrost_context *ctx = pan_context(pctx); 2059b8e80941Smrg 2060b8e80941Smrg /* TODO */ 2061b8e80941Smrg 2062b8e80941Smrg free(views); 2063b8e80941Smrg} 2064b8e80941Smrg 2065b8e80941Smrgstatic void 2066b8e80941Smrgpanfrost_set_framebuffer_state(struct pipe_context *pctx, 2067b8e80941Smrg const struct pipe_framebuffer_state *fb) 2068b8e80941Smrg{ 2069b8e80941Smrg struct panfrost_context *ctx = pan_context(pctx); 2070b8e80941Smrg 2071b8e80941Smrg /* Flush when switching away from an FBO */ 2072b8e80941Smrg 2073b8e80941Smrg if (!panfrost_is_scanout(ctx)) { 2074b8e80941Smrg panfrost_flush(pctx, NULL, 0); 2075b8e80941Smrg } 2076b8e80941Smrg 2077b8e80941Smrg ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; 2078b8e80941Smrg ctx->pipe_framebuffer.samples = fb->samples; 2079b8e80941Smrg ctx->pipe_framebuffer.layers = fb->layers; 2080b8e80941Smrg ctx->pipe_framebuffer.width = fb->width; 2081b8e80941Smrg ctx->pipe_framebuffer.height = fb->height; 2082b8e80941Smrg 2083b8e80941Smrg for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 2084b8e80941Smrg struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; 2085b8e80941Smrg 2086b8e80941Smrg /* check if changing cbuf */ 2087b8e80941Smrg if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; 2088b8e80941Smrg 2089b8e80941Smrg if (cb && (i != 0)) { 2090b8e80941Smrg DBG("XXX: Multiple render targets not supported before t7xx!\n"); 2091b8e80941Smrg assert(0); 2092b8e80941Smrg } 2093b8e80941Smrg 2094b8e80941Smrg /* assign new */ 2095b8e80941Smrg pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb); 2096b8e80941Smrg 2097b8e80941Smrg if (!cb) 2098b8e80941Smrg continue; 2099b8e80941Smrg 2100b8e80941Smrg if (ctx->require_sfbd) 2101b8e80941Smrg ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 2102b8e80941Smrg else 2103b8e80941Smrg ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 2104b8e80941Smrg 2105b8e80941Smrg panfrost_attach_vt_framebuffer(ctx); 2106b8e80941Smrg 2107b8e80941Smrg struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); 2108b8e80941Smrg enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format; 2109b8e80941Smrg bool is_scanout = panfrost_is_scanout(ctx); 2110b8e80941Smrg 2111b8e80941Smrg if (!is_scanout && tex->bo->layout != PAN_AFBC && panfrost_can_afbc(format)) { 2112b8e80941Smrg /* The blob is aggressive about enabling AFBC. As such, 2113b8e80941Smrg * it's pretty much necessary to use it here, since we 2114b8e80941Smrg * have no traces of non-compressed FBO. */ 2115b8e80941Smrg 2116b8e80941Smrg panfrost_enable_afbc(ctx, tex, false); 2117b8e80941Smrg } 2118b8e80941Smrg 2119b8e80941Smrg if (!is_scanout && !tex->bo->has_checksum) { 2120b8e80941Smrg /* Enable transaction elimination if we can */ 2121b8e80941Smrg panfrost_enable_checksum(ctx, tex); 2122b8e80941Smrg } 2123b8e80941Smrg } 2124b8e80941Smrg 2125b8e80941Smrg { 2126b8e80941Smrg struct pipe_surface *zb = fb->zsbuf; 2127b8e80941Smrg 2128b8e80941Smrg if (ctx->pipe_framebuffer.zsbuf != zb) { 2129b8e80941Smrg pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb); 2130b8e80941Smrg 2131b8e80941Smrg if (zb) { 2132b8e80941Smrg /* FBO has depth */ 2133b8e80941Smrg 2134b8e80941Smrg if (ctx->require_sfbd) 2135b8e80941Smrg ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); 2136b8e80941Smrg else 2137b8e80941Smrg ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); 2138b8e80941Smrg 2139b8e80941Smrg panfrost_attach_vt_framebuffer(ctx); 2140b8e80941Smrg 2141b8e80941Smrg /* Keep the depth FBO linear */ 2142b8e80941Smrg } 2143b8e80941Smrg } 2144b8e80941Smrg } 2145b8e80941Smrg} 2146b8e80941Smrg 2147b8e80941Smrgstatic void * 2148b8e80941Smrgpanfrost_create_blend_state(struct pipe_context *pipe, 2149b8e80941Smrg const struct pipe_blend_state *blend) 2150b8e80941Smrg{ 2151b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2152b8e80941Smrg struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); 2153b8e80941Smrg so->base = *blend; 2154b8e80941Smrg 2155b8e80941Smrg /* TODO: The following features are not yet implemented */ 2156b8e80941Smrg assert(!blend->logicop_enable); 2157b8e80941Smrg assert(!blend->alpha_to_coverage); 2158b8e80941Smrg assert(!blend->alpha_to_one); 2159b8e80941Smrg 2160b8e80941Smrg /* Compile the blend state, first as fixed-function if we can */ 2161b8e80941Smrg 2162b8e80941Smrg if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color)) 2163b8e80941Smrg return so; 2164b8e80941Smrg 2165b8e80941Smrg /* If we can't, compile a blend shader instead */ 2166b8e80941Smrg 2167b8e80941Smrg panfrost_make_blend_shader(ctx, so, &ctx->blend_color); 2168b8e80941Smrg 2169b8e80941Smrg return so; 2170b8e80941Smrg} 2171b8e80941Smrg 2172b8e80941Smrgstatic void 2173b8e80941Smrgpanfrost_bind_blend_state(struct pipe_context *pipe, 2174b8e80941Smrg void *cso) 2175b8e80941Smrg{ 2176b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2177b8e80941Smrg struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; 2178b8e80941Smrg struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; 2179b8e80941Smrg ctx->blend = pblend; 2180b8e80941Smrg 2181b8e80941Smrg if (!blend) 2182b8e80941Smrg return; 2183b8e80941Smrg 2184b8e80941Smrg SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); 2185b8e80941Smrg 2186b8e80941Smrg /* TODO: Attach color */ 2187b8e80941Smrg 2188b8e80941Smrg /* Shader itself is not dirty, but the shader core is */ 2189b8e80941Smrg ctx->dirty |= PAN_DIRTY_FS; 2190b8e80941Smrg} 2191b8e80941Smrg 2192b8e80941Smrgstatic void 2193b8e80941Smrgpanfrost_delete_blend_state(struct pipe_context *pipe, 2194b8e80941Smrg void *blend) 2195b8e80941Smrg{ 2196b8e80941Smrg struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend; 2197b8e80941Smrg 2198b8e80941Smrg if (so->has_blend_shader) { 2199b8e80941Smrg DBG("Deleting blend state leak blend shaders bytecode\n"); 2200b8e80941Smrg } 2201b8e80941Smrg 2202b8e80941Smrg free(blend); 2203b8e80941Smrg} 2204b8e80941Smrg 2205b8e80941Smrgstatic void 2206b8e80941Smrgpanfrost_set_blend_color(struct pipe_context *pipe, 2207b8e80941Smrg const struct pipe_blend_color *blend_color) 2208b8e80941Smrg{ 2209b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2210b8e80941Smrg 2211b8e80941Smrg /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */ 2212b8e80941Smrg 2213b8e80941Smrg if (blend_color) { 2214b8e80941Smrg ctx->blend_color = *blend_color; 2215b8e80941Smrg 2216b8e80941Smrg /* The blend mode depends on the blend constant color, due to the 2217b8e80941Smrg * fixed/programmable split. So, we're forced to regenerate the blend 2218b8e80941Smrg * equation */ 2219b8e80941Smrg 2220b8e80941Smrg /* TODO: Attach color */ 2221b8e80941Smrg } 2222b8e80941Smrg} 2223b8e80941Smrg 2224b8e80941Smrgstatic void * 2225b8e80941Smrgpanfrost_create_depth_stencil_state(struct pipe_context *pipe, 2226b8e80941Smrg const struct pipe_depth_stencil_alpha_state *depth_stencil) 2227b8e80941Smrg{ 2228b8e80941Smrg return mem_dup(depth_stencil, sizeof(*depth_stencil)); 2229b8e80941Smrg} 2230b8e80941Smrg 2231b8e80941Smrgstatic void 2232b8e80941Smrgpanfrost_bind_depth_stencil_state(struct pipe_context *pipe, 2233b8e80941Smrg void *cso) 2234b8e80941Smrg{ 2235b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2236b8e80941Smrg struct pipe_depth_stencil_alpha_state *depth_stencil = cso; 2237b8e80941Smrg ctx->depth_stencil = depth_stencil; 2238b8e80941Smrg 2239b8e80941Smrg if (!depth_stencil) 2240b8e80941Smrg return; 2241b8e80941Smrg 2242b8e80941Smrg /* Alpha does not exist in the hardware (it's not in ES3), so it's 2243b8e80941Smrg * emulated in the fragment shader */ 2244b8e80941Smrg 2245b8e80941Smrg if (depth_stencil->alpha.enabled) { 2246b8e80941Smrg /* We need to trigger a new shader (maybe) */ 2247b8e80941Smrg ctx->base.bind_fs_state(&ctx->base, ctx->fs); 2248b8e80941Smrg } 2249b8e80941Smrg 2250b8e80941Smrg /* Stencil state */ 2251b8e80941Smrg SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */ 2252b8e80941Smrg 2253b8e80941Smrg panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); 2254b8e80941Smrg ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; 2255b8e80941Smrg 2256b8e80941Smrg panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back); 2257b8e80941Smrg ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask; 2258b8e80941Smrg 2259b8e80941Smrg /* Depth state (TODO: Refactor) */ 2260b8e80941Smrg SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled); 2261b8e80941Smrg 2262b8e80941Smrg int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; 2263b8e80941Smrg 2264b8e80941Smrg ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; 2265b8e80941Smrg ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); 2266b8e80941Smrg 2267b8e80941Smrg /* Bounds test not implemented */ 2268b8e80941Smrg assert(!depth_stencil->depth.bounds_test); 2269b8e80941Smrg 2270b8e80941Smrg ctx->dirty |= PAN_DIRTY_FS; 2271b8e80941Smrg} 2272b8e80941Smrg 2273b8e80941Smrgstatic void 2274b8e80941Smrgpanfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) 2275b8e80941Smrg{ 2276b8e80941Smrg free( depth ); 2277b8e80941Smrg} 2278b8e80941Smrg 2279b8e80941Smrgstatic void 2280b8e80941Smrgpanfrost_set_sample_mask(struct pipe_context *pipe, 2281b8e80941Smrg unsigned sample_mask) 2282b8e80941Smrg{ 2283b8e80941Smrg} 2284b8e80941Smrg 2285b8e80941Smrgstatic void 2286b8e80941Smrgpanfrost_set_clip_state(struct pipe_context *pipe, 2287b8e80941Smrg const struct pipe_clip_state *clip) 2288b8e80941Smrg{ 2289b8e80941Smrg //struct panfrost_context *panfrost = pan_context(pipe); 2290b8e80941Smrg} 2291b8e80941Smrg 2292b8e80941Smrgstatic void 2293b8e80941Smrgpanfrost_set_viewport_states(struct pipe_context *pipe, 2294b8e80941Smrg unsigned start_slot, 2295b8e80941Smrg unsigned num_viewports, 2296b8e80941Smrg const struct pipe_viewport_state *viewports) 2297b8e80941Smrg{ 2298b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2299b8e80941Smrg 2300b8e80941Smrg assert(start_slot == 0); 2301b8e80941Smrg assert(num_viewports == 1); 2302b8e80941Smrg 2303b8e80941Smrg ctx->pipe_viewport = *viewports; 2304b8e80941Smrg 2305b8e80941Smrg#if 0 2306b8e80941Smrg /* TODO: What if not centered? */ 2307b8e80941Smrg float w = abs(viewports->scale[0]) * 2.0; 2308b8e80941Smrg float h = abs(viewports->scale[1]) * 2.0; 2309b8e80941Smrg 2310b8e80941Smrg ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w); 2311b8e80941Smrg ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h); 2312b8e80941Smrg#endif 2313b8e80941Smrg} 2314b8e80941Smrg 2315b8e80941Smrgstatic void 2316b8e80941Smrgpanfrost_set_scissor_states(struct pipe_context *pipe, 2317b8e80941Smrg unsigned start_slot, 2318b8e80941Smrg unsigned num_scissors, 2319b8e80941Smrg const struct pipe_scissor_state *scissors) 2320b8e80941Smrg{ 2321b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2322b8e80941Smrg 2323b8e80941Smrg assert(start_slot == 0); 2324b8e80941Smrg assert(num_scissors == 1); 2325b8e80941Smrg 2326b8e80941Smrg ctx->scissor = *scissors; 2327b8e80941Smrg} 2328b8e80941Smrg 2329b8e80941Smrgstatic void 2330b8e80941Smrgpanfrost_set_polygon_stipple(struct pipe_context *pipe, 2331b8e80941Smrg const struct pipe_poly_stipple *stipple) 2332b8e80941Smrg{ 2333b8e80941Smrg //struct panfrost_context *panfrost = pan_context(pipe); 2334b8e80941Smrg} 2335b8e80941Smrg 2336b8e80941Smrgstatic void 2337b8e80941Smrgpanfrost_set_active_query_state(struct pipe_context *pipe, 2338b8e80941Smrg boolean enable) 2339b8e80941Smrg{ 2340b8e80941Smrg //struct panfrost_context *panfrost = pan_context(pipe); 2341b8e80941Smrg} 2342b8e80941Smrg 2343b8e80941Smrgstatic void 2344b8e80941Smrgpanfrost_destroy(struct pipe_context *pipe) 2345b8e80941Smrg{ 2346b8e80941Smrg struct panfrost_context *panfrost = pan_context(pipe); 2347b8e80941Smrg struct panfrost_screen *screen = pan_screen(pipe->screen); 2348b8e80941Smrg 2349b8e80941Smrg if (panfrost->blitter) 2350b8e80941Smrg util_blitter_destroy(panfrost->blitter); 2351b8e80941Smrg 2352b8e80941Smrg screen->driver->free_slab(screen, &panfrost->scratchpad); 2353b8e80941Smrg screen->driver->free_slab(screen, &panfrost->varying_mem); 2354b8e80941Smrg screen->driver->free_slab(screen, &panfrost->shaders); 2355b8e80941Smrg screen->driver->free_slab(screen, &panfrost->tiler_heap); 2356b8e80941Smrg screen->driver->free_slab(screen, &panfrost->misc_0); 2357b8e80941Smrg} 2358b8e80941Smrg 2359b8e80941Smrgstatic struct pipe_query * 2360b8e80941Smrgpanfrost_create_query(struct pipe_context *pipe, 2361b8e80941Smrg unsigned type, 2362b8e80941Smrg unsigned index) 2363b8e80941Smrg{ 2364b8e80941Smrg struct panfrost_query *q = CALLOC_STRUCT(panfrost_query); 2365b8e80941Smrg 2366b8e80941Smrg q->type = type; 2367b8e80941Smrg q->index = index; 2368b8e80941Smrg 2369b8e80941Smrg return (struct pipe_query *) q; 2370b8e80941Smrg} 2371b8e80941Smrg 2372b8e80941Smrgstatic void 2373b8e80941Smrgpanfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q) 2374b8e80941Smrg{ 2375b8e80941Smrg FREE(q); 2376b8e80941Smrg} 2377b8e80941Smrg 2378b8e80941Smrgstatic boolean 2379b8e80941Smrgpanfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) 2380b8e80941Smrg{ 2381b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2382b8e80941Smrg struct panfrost_query *query = (struct panfrost_query *) q; 2383b8e80941Smrg 2384b8e80941Smrg switch (query->type) { 2385b8e80941Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 2386b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 2387b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 2388b8e80941Smrg { 2389b8e80941Smrg /* Allocate a word for the query results to be stored */ 2390b8e80941Smrg query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR); 2391b8e80941Smrg 2392b8e80941Smrg ctx->occlusion_query = query; 2393b8e80941Smrg 2394b8e80941Smrg break; 2395b8e80941Smrg } 2396b8e80941Smrg 2397b8e80941Smrg default: 2398b8e80941Smrg DBG("Skipping query %d\n", query->type); 2399b8e80941Smrg break; 2400b8e80941Smrg } 2401b8e80941Smrg 2402b8e80941Smrg return true; 2403b8e80941Smrg} 2404b8e80941Smrg 2405b8e80941Smrgstatic bool 2406b8e80941Smrgpanfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) 2407b8e80941Smrg{ 2408b8e80941Smrg struct panfrost_context *ctx = pan_context(pipe); 2409b8e80941Smrg ctx->occlusion_query = NULL; 2410b8e80941Smrg return true; 2411b8e80941Smrg} 2412b8e80941Smrg 2413b8e80941Smrgstatic boolean 2414b8e80941Smrgpanfrost_get_query_result(struct pipe_context *pipe, 2415b8e80941Smrg struct pipe_query *q, 2416b8e80941Smrg boolean wait, 2417b8e80941Smrg union pipe_query_result *vresult) 2418b8e80941Smrg{ 2419b8e80941Smrg /* STUB */ 2420b8e80941Smrg struct panfrost_query *query = (struct panfrost_query *) q; 2421b8e80941Smrg 2422b8e80941Smrg /* We need to flush out the jobs to actually run the counter, TODO 2423b8e80941Smrg * check wait, TODO wallpaper after if needed */ 2424b8e80941Smrg 2425b8e80941Smrg panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); 2426b8e80941Smrg 2427b8e80941Smrg switch (query->type) { 2428b8e80941Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 2429b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 2430b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { 2431b8e80941Smrg /* Read back the query results */ 2432b8e80941Smrg unsigned *result = (unsigned *) query->transfer.cpu; 2433b8e80941Smrg unsigned passed = *result; 2434b8e80941Smrg 2435b8e80941Smrg if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { 2436b8e80941Smrg vresult->u64 = passed; 2437b8e80941Smrg } else { 2438b8e80941Smrg vresult->b = !!passed; 2439b8e80941Smrg } 2440b8e80941Smrg 2441b8e80941Smrg break; 2442b8e80941Smrg } 2443b8e80941Smrg default: 2444b8e80941Smrg DBG("Skipped query get %d\n", query->type); 2445b8e80941Smrg break; 2446b8e80941Smrg } 2447b8e80941Smrg 2448b8e80941Smrg return true; 2449b8e80941Smrg} 2450b8e80941Smrg 2451b8e80941Smrgstatic struct pipe_stream_output_target * 2452b8e80941Smrgpanfrost_create_stream_output_target(struct pipe_context *pctx, 2453b8e80941Smrg struct pipe_resource *prsc, 2454b8e80941Smrg unsigned buffer_offset, 2455b8e80941Smrg unsigned buffer_size) 2456b8e80941Smrg{ 2457b8e80941Smrg struct pipe_stream_output_target *target; 2458b8e80941Smrg 2459b8e80941Smrg target = CALLOC_STRUCT(pipe_stream_output_target); 2460b8e80941Smrg 2461b8e80941Smrg if (!target) 2462b8e80941Smrg return NULL; 2463b8e80941Smrg 2464b8e80941Smrg pipe_reference_init(&target->reference, 1); 2465b8e80941Smrg pipe_resource_reference(&target->buffer, prsc); 2466b8e80941Smrg 2467b8e80941Smrg target->context = pctx; 2468b8e80941Smrg target->buffer_offset = buffer_offset; 2469b8e80941Smrg target->buffer_size = buffer_size; 2470b8e80941Smrg 2471b8e80941Smrg return target; 2472b8e80941Smrg} 2473b8e80941Smrg 2474b8e80941Smrgstatic void 2475b8e80941Smrgpanfrost_stream_output_target_destroy(struct pipe_context *pctx, 2476b8e80941Smrg struct pipe_stream_output_target *target) 2477b8e80941Smrg{ 2478b8e80941Smrg pipe_resource_reference(&target->buffer, NULL); 2479b8e80941Smrg free(target); 2480b8e80941Smrg} 2481b8e80941Smrg 2482b8e80941Smrgstatic void 2483b8e80941Smrgpanfrost_set_stream_output_targets(struct pipe_context *pctx, 2484b8e80941Smrg unsigned num_targets, 2485b8e80941Smrg struct pipe_stream_output_target **targets, 2486b8e80941Smrg const unsigned *offsets) 2487b8e80941Smrg{ 2488b8e80941Smrg /* STUB */ 2489b8e80941Smrg} 2490b8e80941Smrg 2491b8e80941Smrgstatic void 2492b8e80941Smrgpanfrost_setup_hardware(struct panfrost_context *ctx) 2493b8e80941Smrg{ 2494b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 2495b8e80941Smrg struct panfrost_screen *screen = pan_screen(gallium->screen); 2496b8e80941Smrg 2497b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) { 2498b8e80941Smrg /* Allocate the beginning of the transient pool */ 2499b8e80941Smrg int entry_size = (1 << 22); /* 4MB */ 2500b8e80941Smrg 2501b8e80941Smrg ctx->transient_pools[i].entry_size = entry_size; 2502b8e80941Smrg ctx->transient_pools[i].entry_count = 1; 2503b8e80941Smrg 2504b8e80941Smrg ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); 2505b8e80941Smrg } 2506b8e80941Smrg 2507b8e80941Smrg screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); 2508b8e80941Smrg screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); 2509b8e80941Smrg screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); 2510b8e80941Smrg screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); 2511b8e80941Smrg screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); 2512b8e80941Smrg 2513b8e80941Smrg} 2514b8e80941Smrg 2515b8e80941Smrg/* New context creation, which also does hardware initialisation since I don't 2516b8e80941Smrg * know the better way to structure this :smirk: */ 2517b8e80941Smrg 2518b8e80941Smrgstruct pipe_context * 2519b8e80941Smrgpanfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) 2520b8e80941Smrg{ 2521b8e80941Smrg struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); 2522b8e80941Smrg struct panfrost_screen *pscreen = pan_screen(screen); 2523b8e80941Smrg memset(ctx, 0, sizeof(*ctx)); 2524b8e80941Smrg struct pipe_context *gallium = (struct pipe_context *) ctx; 2525b8e80941Smrg unsigned gpu_id; 2526b8e80941Smrg 2527b8e80941Smrg gpu_id = pscreen->driver->query_gpu_version(pscreen); 2528b8e80941Smrg 2529b8e80941Smrg ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */ 2530b8e80941Smrg ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */ 2531b8e80941Smrg 2532b8e80941Smrg gallium->screen = screen; 2533b8e80941Smrg 2534b8e80941Smrg gallium->destroy = panfrost_destroy; 2535b8e80941Smrg 2536b8e80941Smrg gallium->set_framebuffer_state = panfrost_set_framebuffer_state; 2537b8e80941Smrg 2538b8e80941Smrg gallium->flush = panfrost_flush; 2539b8e80941Smrg gallium->clear = panfrost_clear; 2540b8e80941Smrg gallium->draw_vbo = panfrost_draw_vbo; 2541b8e80941Smrg 2542b8e80941Smrg gallium->set_vertex_buffers = panfrost_set_vertex_buffers; 2543b8e80941Smrg gallium->set_constant_buffer = panfrost_set_constant_buffer; 2544b8e80941Smrg 2545b8e80941Smrg gallium->set_stencil_ref = panfrost_set_stencil_ref; 2546b8e80941Smrg 2547b8e80941Smrg gallium->create_sampler_view = panfrost_create_sampler_view; 2548b8e80941Smrg gallium->set_sampler_views = panfrost_set_sampler_views; 2549b8e80941Smrg gallium->sampler_view_destroy = panfrost_sampler_view_destroy; 2550b8e80941Smrg 2551b8e80941Smrg gallium->create_rasterizer_state = panfrost_create_rasterizer_state; 2552b8e80941Smrg gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state; 2553b8e80941Smrg gallium->delete_rasterizer_state = panfrost_generic_cso_delete; 2554b8e80941Smrg 2555b8e80941Smrg gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state; 2556b8e80941Smrg gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; 2557b8e80941Smrg gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state; 2558b8e80941Smrg 2559b8e80941Smrg gallium->create_fs_state = panfrost_create_shader_state; 2560b8e80941Smrg gallium->delete_fs_state = panfrost_delete_shader_state; 2561b8e80941Smrg gallium->bind_fs_state = panfrost_bind_fs_state; 2562b8e80941Smrg 2563b8e80941Smrg gallium->create_vs_state = panfrost_create_shader_state; 2564b8e80941Smrg gallium->delete_vs_state = panfrost_delete_shader_state; 2565b8e80941Smrg gallium->bind_vs_state = panfrost_bind_vs_state; 2566b8e80941Smrg 2567b8e80941Smrg gallium->create_sampler_state = panfrost_create_sampler_state; 2568b8e80941Smrg gallium->delete_sampler_state = panfrost_generic_cso_delete; 2569b8e80941Smrg gallium->bind_sampler_states = panfrost_bind_sampler_states; 2570b8e80941Smrg 2571b8e80941Smrg gallium->create_blend_state = panfrost_create_blend_state; 2572b8e80941Smrg gallium->bind_blend_state = panfrost_bind_blend_state; 2573b8e80941Smrg gallium->delete_blend_state = panfrost_delete_blend_state; 2574b8e80941Smrg 2575b8e80941Smrg gallium->set_blend_color = panfrost_set_blend_color; 2576b8e80941Smrg 2577b8e80941Smrg gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; 2578b8e80941Smrg gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state; 2579b8e80941Smrg gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state; 2580b8e80941Smrg 2581b8e80941Smrg gallium->set_sample_mask = panfrost_set_sample_mask; 2582b8e80941Smrg 2583b8e80941Smrg gallium->set_clip_state = panfrost_set_clip_state; 2584b8e80941Smrg gallium->set_viewport_states = panfrost_set_viewport_states; 2585b8e80941Smrg gallium->set_scissor_states = panfrost_set_scissor_states; 2586b8e80941Smrg gallium->set_polygon_stipple = panfrost_set_polygon_stipple; 2587b8e80941Smrg gallium->set_active_query_state = panfrost_set_active_query_state; 2588b8e80941Smrg 2589b8e80941Smrg gallium->create_query = panfrost_create_query; 2590b8e80941Smrg gallium->destroy_query = panfrost_destroy_query; 2591b8e80941Smrg gallium->begin_query = panfrost_begin_query; 2592b8e80941Smrg gallium->end_query = panfrost_end_query; 2593b8e80941Smrg gallium->get_query_result = panfrost_get_query_result; 2594b8e80941Smrg 2595b8e80941Smrg gallium->create_stream_output_target = panfrost_create_stream_output_target; 2596b8e80941Smrg gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy; 2597b8e80941Smrg gallium->set_stream_output_targets = panfrost_set_stream_output_targets; 2598b8e80941Smrg 2599b8e80941Smrg panfrost_resource_context_init(gallium); 2600b8e80941Smrg 2601b8e80941Smrg pscreen->driver->init_context(ctx); 2602b8e80941Smrg 2603b8e80941Smrg panfrost_setup_hardware(ctx); 2604b8e80941Smrg 2605b8e80941Smrg /* XXX: leaks */ 2606b8e80941Smrg gallium->stream_uploader = u_upload_create_default(gallium); 2607b8e80941Smrg gallium->const_uploader = gallium->stream_uploader; 2608b8e80941Smrg assert(gallium->stream_uploader); 2609b8e80941Smrg 2610b8e80941Smrg /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */ 2611b8e80941Smrg ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1; 2612b8e80941Smrg 2613b8e80941Smrg ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes); 2614b8e80941Smrg 2615b8e80941Smrg ctx->blitter = util_blitter_create(gallium); 2616b8e80941Smrg assert(ctx->blitter); 2617b8e80941Smrg 2618b8e80941Smrg /* Prepare for render! */ 2619b8e80941Smrg 2620b8e80941Smrg panfrost_job_init(ctx); 2621b8e80941Smrg panfrost_emit_vertex_payload(ctx); 2622b8e80941Smrg panfrost_emit_tiler_payload(ctx); 2623b8e80941Smrg panfrost_invalidate_frame(ctx); 2624b8e80941Smrg panfrost_default_shader_backend(ctx); 2625b8e80941Smrg panfrost_generate_space_filler_indices(); 2626b8e80941Smrg 2627b8e80941Smrg return gallium; 2628b8e80941Smrg} 2629