1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2018-2019 Alyssa Rosenzweig 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#include "pan_context.h" 26b8e80941Smrg#include "pan_util.h" 27b8e80941Smrg#include "pan_format.h" 28b8e80941Smrg 29b8e80941Smrg#include "util/u_format.h" 30b8e80941Smrg 31b8e80941Smrgstatic struct mali_rt_format 32b8e80941Smrgpanfrost_mfbd_format(struct pipe_surface *surf) 33b8e80941Smrg{ 34b8e80941Smrg /* Explode details on the format */ 35b8e80941Smrg 36b8e80941Smrg const struct util_format_description *desc = 37b8e80941Smrg util_format_description(surf->texture->format); 38b8e80941Smrg 39b8e80941Smrg /* Fill in accordingly, defaulting to RGBA8888 (UNORM) */ 40b8e80941Smrg 41b8e80941Smrg struct mali_rt_format fmt = { 42b8e80941Smrg .unk1 = 0x4000000, 43b8e80941Smrg .unk2 = 0x1, 44b8e80941Smrg .nr_channels = MALI_POSITIVE(desc->nr_channels), 45b8e80941Smrg .flags = 0x444, 46b8e80941Smrg .swizzle = panfrost_translate_swizzle_4(desc->swizzle), 47b8e80941Smrg .unk4 = 0x8 48b8e80941Smrg }; 49b8e80941Smrg 50b8e80941Smrg /* Set flags for alternative formats */ 51b8e80941Smrg 52b8e80941Smrg if (surf->texture->format == PIPE_FORMAT_B5G6R5_UNORM) { 53b8e80941Smrg fmt.unk1 = 0x14000000; 54b8e80941Smrg fmt.nr_channels = MALI_POSITIVE(2); 55b8e80941Smrg fmt.flags |= 0x1; 56b8e80941Smrg } 57b8e80941Smrg 58b8e80941Smrg return fmt; 59b8e80941Smrg} 60b8e80941Smrg 61b8e80941Smrg 62b8e80941Smrgstatic void 63b8e80941Smrgpanfrost_mfbd_clear( 64b8e80941Smrg struct panfrost_job *job, 65b8e80941Smrg struct bifrost_framebuffer *fb, 66b8e80941Smrg struct bifrost_fb_extra *fbx, 67b8e80941Smrg struct bifrost_render_target *rt) 68b8e80941Smrg{ 69b8e80941Smrg if (job->clear & PIPE_CLEAR_COLOR) { 70b8e80941Smrg rt->clear_color_1 = job->clear_color; 71b8e80941Smrg rt->clear_color_2 = job->clear_color; 72b8e80941Smrg rt->clear_color_3 = job->clear_color; 73b8e80941Smrg rt->clear_color_4 = job->clear_color; 74b8e80941Smrg } 75b8e80941Smrg 76b8e80941Smrg if (job->clear & PIPE_CLEAR_DEPTH) { 77b8e80941Smrg fb->clear_depth = job->clear_depth; 78b8e80941Smrg } 79b8e80941Smrg 80b8e80941Smrg if (job->clear & PIPE_CLEAR_STENCIL) { 81b8e80941Smrg fb->clear_stencil = job->clear_stencil; 82b8e80941Smrg } 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrgstatic void 86b8e80941Smrgpanfrost_mfbd_set_cbuf( 87b8e80941Smrg struct bifrost_render_target *rt, 88b8e80941Smrg struct pipe_surface *surf, 89b8e80941Smrg bool flip_y) 90b8e80941Smrg{ 91b8e80941Smrg struct panfrost_resource *rsrc = pan_resource(surf->texture); 92b8e80941Smrg int stride = rsrc->bo->slices[0].stride; 93b8e80941Smrg 94b8e80941Smrg rt->format = panfrost_mfbd_format(surf); 95b8e80941Smrg 96b8e80941Smrg /* Now, we set the layout specific pieces */ 97b8e80941Smrg 98b8e80941Smrg if (rsrc->bo->layout == PAN_LINEAR) { 99b8e80941Smrg mali_ptr framebuffer = rsrc->bo->gpu; 100b8e80941Smrg 101b8e80941Smrg if (flip_y) { 102b8e80941Smrg framebuffer += stride * (surf->texture->height0 - 1); 103b8e80941Smrg stride = -stride; 104b8e80941Smrg } 105b8e80941Smrg 106b8e80941Smrg rt->framebuffer = framebuffer; 107b8e80941Smrg rt->framebuffer_stride = stride / 16; 108b8e80941Smrg } else if (rsrc->bo->layout == PAN_AFBC) { 109b8e80941Smrg rt->afbc.metadata = rsrc->bo->afbc_slab.gpu; 110b8e80941Smrg rt->afbc.stride = 0; 111b8e80941Smrg rt->afbc.unk = 0x30009; 112b8e80941Smrg 113b8e80941Smrg rt->format.flags |= MALI_MFBD_FORMAT_AFBC; 114b8e80941Smrg 115b8e80941Smrg mali_ptr afbc_main = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; 116b8e80941Smrg rt->framebuffer = afbc_main; 117b8e80941Smrg 118b8e80941Smrg /* TODO: Investigate shift */ 119b8e80941Smrg rt->framebuffer_stride = stride << 1; 120b8e80941Smrg } else { 121b8e80941Smrg fprintf(stderr, "Invalid render layout (cbuf)"); 122b8e80941Smrg assert(0); 123b8e80941Smrg } 124b8e80941Smrg} 125b8e80941Smrg 126b8e80941Smrgstatic void 127b8e80941Smrgpanfrost_mfbd_set_zsbuf( 128b8e80941Smrg struct bifrost_framebuffer *fb, 129b8e80941Smrg struct bifrost_fb_extra *fbx, 130b8e80941Smrg struct pipe_surface *surf) 131b8e80941Smrg{ 132b8e80941Smrg struct panfrost_resource *rsrc = pan_resource(surf->texture); 133b8e80941Smrg 134b8e80941Smrg if (rsrc->bo->layout == PAN_AFBC) { 135b8e80941Smrg fb->unk3 |= MALI_MFBD_EXTRA; 136b8e80941Smrg 137b8e80941Smrg fbx->flags = 138b8e80941Smrg MALI_EXTRA_PRESENT | 139b8e80941Smrg MALI_EXTRA_AFBC | 140b8e80941Smrg MALI_EXTRA_AFBC_ZS | 141b8e80941Smrg MALI_EXTRA_ZS | 142b8e80941Smrg 0x1; /* unknown */ 143b8e80941Smrg 144b8e80941Smrg fbx->ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; 145b8e80941Smrg fbx->ds_afbc.depth_stencil_afbc_stride = 0; 146b8e80941Smrg 147b8e80941Smrg fbx->ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; 148b8e80941Smrg 149b8e80941Smrg fbx->ds_afbc.zero1 = 0x10009; 150b8e80941Smrg fbx->ds_afbc.padding = 0x1000; 151b8e80941Smrg } else if (rsrc->bo->layout == PAN_LINEAR) { 152b8e80941Smrg fb->unk3 |= MALI_MFBD_EXTRA; 153b8e80941Smrg fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1; 154b8e80941Smrg 155b8e80941Smrg fbx->ds_linear.depth = rsrc->bo->gpu; 156b8e80941Smrg fbx->ds_linear.depth_stride = rsrc->bo->slices[0].stride; 157b8e80941Smrg } else { 158b8e80941Smrg assert(0); 159b8e80941Smrg } 160b8e80941Smrg} 161b8e80941Smrg 162b8e80941Smrg/* Helper for sequential uploads used for MFBD */ 163b8e80941Smrg 164b8e80941Smrg#define UPLOAD(dest, offset, src, max) { \ 165b8e80941Smrg size_t sz = sizeof(*src); \ 166b8e80941Smrg memcpy(dest.cpu + offset, src, sz); \ 167b8e80941Smrg assert((offset + sz) <= max); \ 168b8e80941Smrg offset += sz; \ 169b8e80941Smrg} 170b8e80941Smrg 171b8e80941Smrgstatic mali_ptr 172b8e80941Smrgpanfrost_mfbd_upload( 173b8e80941Smrg struct panfrost_context *ctx, 174b8e80941Smrg struct bifrost_framebuffer *fb, 175b8e80941Smrg struct bifrost_fb_extra *fbx, 176b8e80941Smrg struct bifrost_render_target *rts, 177b8e80941Smrg unsigned cbufs) 178b8e80941Smrg{ 179b8e80941Smrg off_t offset = 0; 180b8e80941Smrg 181b8e80941Smrg /* There may be extra data stuck in the middle */ 182b8e80941Smrg bool has_extra = fb->unk3 & MALI_MFBD_EXTRA; 183b8e80941Smrg 184b8e80941Smrg /* Compute total size for transfer */ 185b8e80941Smrg 186b8e80941Smrg size_t total_sz = 187b8e80941Smrg sizeof(struct bifrost_framebuffer) + 188b8e80941Smrg (has_extra ? sizeof(struct bifrost_fb_extra) : 0) + 189b8e80941Smrg sizeof(struct bifrost_render_target) * cbufs; 190b8e80941Smrg 191b8e80941Smrg struct panfrost_transfer m_f_trans = 192b8e80941Smrg panfrost_allocate_transient(ctx, total_sz); 193b8e80941Smrg 194b8e80941Smrg /* Do the transfer */ 195b8e80941Smrg 196b8e80941Smrg UPLOAD(m_f_trans, offset, fb, total_sz); 197b8e80941Smrg 198b8e80941Smrg if (has_extra) 199b8e80941Smrg UPLOAD(m_f_trans, offset, fbx, total_sz); 200b8e80941Smrg 201b8e80941Smrg for (unsigned c = 0; c < cbufs; ++c) { 202b8e80941Smrg UPLOAD(m_f_trans, offset, &rts[c], total_sz); 203b8e80941Smrg } 204b8e80941Smrg 205b8e80941Smrg /* Return pointer suitable for the fragment section */ 206b8e80941Smrg return m_f_trans.gpu | MALI_MFBD | (has_extra ? 2 : 0); 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrg#undef UPLOAD 210b8e80941Smrg 211b8e80941Smrg/* Creates an MFBD for the FRAGMENT section of the bound framebuffer */ 212b8e80941Smrg 213b8e80941Smrgmali_ptr 214b8e80941Smrgpanfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) 215b8e80941Smrg{ 216b8e80941Smrg struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); 217b8e80941Smrg 218b8e80941Smrg struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx); 219b8e80941Smrg struct bifrost_fb_extra fbx = {}; 220b8e80941Smrg struct bifrost_render_target rts[4] = {}; 221b8e80941Smrg 222b8e80941Smrg /* XXX: MRT case */ 223b8e80941Smrg fb.rt_count_2 = 1; 224b8e80941Smrg fb.unk3 = 0x100; 225b8e80941Smrg 226b8e80941Smrg /* TODO: MRT clear */ 227b8e80941Smrg panfrost_mfbd_clear(job, &fb, &fbx, &rts[0]); 228b8e80941Smrg 229b8e80941Smrg for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) { 230b8e80941Smrg struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[cb]; 231b8e80941Smrg panfrost_mfbd_set_cbuf(&rts[cb], surf, flip_y); 232b8e80941Smrg } 233b8e80941Smrg 234b8e80941Smrg if (ctx->pipe_framebuffer.zsbuf) { 235b8e80941Smrg panfrost_mfbd_set_zsbuf(&fb, &fbx, ctx->pipe_framebuffer.zsbuf); 236b8e80941Smrg } 237b8e80941Smrg 238b8e80941Smrg /* For the special case of a depth-only FBO, we need to attach a dummy render target */ 239b8e80941Smrg 240b8e80941Smrg if (ctx->pipe_framebuffer.nr_cbufs == 0) { 241b8e80941Smrg struct mali_rt_format null_rt = { 242b8e80941Smrg .unk1 = 0x4000000, 243b8e80941Smrg .unk4 = 0x8 244b8e80941Smrg }; 245b8e80941Smrg 246b8e80941Smrg rts[0].format = null_rt; 247b8e80941Smrg rts[0].framebuffer = 0; 248b8e80941Smrg rts[0].framebuffer_stride = 0; 249b8e80941Smrg } 250b8e80941Smrg 251b8e80941Smrg /* When scanning out, the depth buffer is immediately invalidated, so 252b8e80941Smrg * we don't need to waste bandwidth writing it out. This can improve 253b8e80941Smrg * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of 254b8e80941Smrg * memory bandwidth!). 255b8e80941Smrg * 256b8e80941Smrg * The exception is ReadPixels, but this is not supported on GLES so we 257b8e80941Smrg * can safely ignore it. */ 258b8e80941Smrg 259b8e80941Smrg if (panfrost_is_scanout(ctx)) { 260b8e80941Smrg job->requirements &= ~PAN_REQ_DEPTH_WRITE; 261b8e80941Smrg } 262b8e80941Smrg 263b8e80941Smrg /* Actualize the requirements */ 264b8e80941Smrg 265b8e80941Smrg if (job->requirements & PAN_REQ_MSAA) { 266b8e80941Smrg rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA; 267b8e80941Smrg 268b8e80941Smrg /* XXX */ 269b8e80941Smrg fb.unk1 |= (1 << 4) | (1 << 1); 270b8e80941Smrg fb.rt_count_2 = 4; 271b8e80941Smrg } 272b8e80941Smrg 273b8e80941Smrg if (job->requirements & PAN_REQ_DEPTH_WRITE) 274b8e80941Smrg fb.unk3 |= MALI_MFBD_DEPTH_WRITE; 275b8e80941Smrg 276b8e80941Smrg if (ctx->pipe_framebuffer.nr_cbufs == 1) { 277b8e80941Smrg struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; 278b8e80941Smrg 279b8e80941Smrg if (rsrc->bo->has_checksum) { 280b8e80941Smrg fb.unk3 |= MALI_MFBD_EXTRA; 281b8e80941Smrg fbx.flags |= MALI_EXTRA_PRESENT; 282b8e80941Smrg fbx.checksum_stride = rsrc->bo->checksum_stride; 283b8e80941Smrg fbx.checksum = rsrc->bo->gpu + rsrc->bo->slices[0].stride * rsrc->base.height0; 284b8e80941Smrg } 285b8e80941Smrg } 286b8e80941Smrg 287b8e80941Smrg /* We always upload at least one (dummy) cbuf */ 288b8e80941Smrg unsigned cbufs = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); 289b8e80941Smrg 290b8e80941Smrg return panfrost_mfbd_upload(ctx, &fb, &fbx, rts, cbufs); 291b8e80941Smrg} 292