1/* 2 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/u_string.h" 29#include "util/u_memory.h" 30#include "util/u_inlines.h" 31#include "util/u_format.h" 32 33#include "freedreno_draw.h" 34#include "freedreno_state.h" 35#include "freedreno_resource.h" 36 37#include "fd3_gmem.h" 38#include "fd3_context.h" 39#include "fd3_emit.h" 40#include "fd3_program.h" 41#include "fd3_format.h" 42#include "fd3_zsa.h" 43 44static void 45emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, 46 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w, 47 bool decode_srgb) 48{ 49 enum a3xx_tile_mode tile_mode; 50 unsigned i; 51 52 if (bin_w) { 53 tile_mode = TILE_32X32; 54 } else { 55 tile_mode = LINEAR; 56 } 57 58 for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) { 59 enum pipe_format pformat = 0; 60 enum a3xx_color_fmt format = 0; 61 enum a3xx_color_swap swap = WZYX; 62 bool srgb = false; 63 struct fd_resource *rsc = NULL; 64 struct fd_resource_slice *slice = NULL; 65 uint32_t stride = 0; 66 uint32_t base = 0; 67 uint32_t offset = 0; 68 69 if ((i < nr_bufs) && bufs[i]) { 70 struct pipe_surface *psurf = bufs[i]; 71 72 rsc = fd_resource(psurf->texture); 73 pformat = psurf->format; 74 /* In case we're drawing to Z32F_S8, the "color" actually goes to 75 * the stencil 76 */ 77 if (rsc->stencil) { 78 rsc = rsc->stencil; 79 pformat = rsc->base.format; 80 if (bases) 81 bases++; 82 } 83 slice = fd_resource_slice(rsc, psurf->u.tex.level); 84 format = fd3_pipe2color(pformat); 85 swap = fd3_pipe2swap(pformat); 86 if (decode_srgb) 87 srgb = util_format_is_srgb(pformat); 88 else 89 pformat = util_format_linear(pformat); 90 91 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 92 93 offset = fd_resource_offset(rsc, psurf->u.tex.level, 94 psurf->u.tex.first_layer); 95 96 if (bin_w) { 97 stride = bin_w * rsc->cpp; 98 99 if (bases) { 100 base = bases[i]; 101 } 102 } else { 103 stride = slice->pitch * rsc->cpp; 104 } 105 } else if (i < nr_bufs && bases) { 106 base = bases[i]; 107 } 108 109 OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); 110 OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 111 A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | 112 A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | 113 A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | 114 COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB)); 115 if (bin_w || (i >= nr_bufs) || !bufs[i]) { 116 OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); 117 } else { 118 OUT_RELOCW(ring, rsc->bo, offset, 0, -1); 119 } 120 121 OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); 122 OUT_RING(ring, COND((i < nr_bufs) && bufs[i], 123 A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT( 124 fd3_fs_output_format(pformat)))); 125 } 126} 127 128static bool 129use_hw_binning(struct fd_batch *batch) 130{ 131 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; 132 133 /* workaround: combining scissor optimization and hw binning 134 * seems problematic. Seems like we end up with a mismatch 135 * between binning pass and rendering pass, wrt. where the hw 136 * thinks the vertices belong. And the blob driver doesn't 137 * seem to implement anything like scissor optimization, so 138 * not entirely sure what I might be missing. 139 * 140 * But scissor optimization is mainly for window managers, 141 * which don't have many vertices (and therefore doesn't 142 * benefit much from binning pass). 143 * 144 * So for now just disable binning if scissor optimization is 145 * used. 146 */ 147 if (gmem->minx || gmem->miny) 148 return false; 149 150 if ((gmem->maxpw * gmem->maxph) > 32) 151 return false; 152 153 if ((gmem->maxpw > 15) || (gmem->maxph > 15)) 154 return false; 155 156 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); 157} 158 159/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ 160static void update_vsc_pipe(struct fd_batch *batch); 161static void 162emit_binning_workaround(struct fd_batch *batch) 163{ 164 struct fd_context *ctx = batch->ctx; 165 struct fd_gmem_stateobj *gmem = &ctx->gmem; 166 struct fd_ringbuffer *ring = batch->gmem; 167 struct fd3_emit emit = { 168 .debug = &ctx->debug, 169 .vtx = &ctx->solid_vbuf_state, 170 .prog = &ctx->solid_prog, 171 .key = { 172 .half_precision = true, 173 }, 174 }; 175 176 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); 177 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 178 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 179 A3XX_RB_MODE_CONTROL_MRT(0)); 180 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | 181 A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 182 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); 183 184 OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); 185 OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | 186 A3XX_RB_COPY_CONTROL_MODE(0) | 187 A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); 188 OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ 189 OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); 190 OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | 191 A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | 192 A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) | 193 A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | 194 A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE)); 195 196 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 197 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 198 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 199 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 200 201 fd3_program_emit(ring, &emit, 0, NULL); 202 fd3_emit_vertex_bufs(ring, &emit); 203 204 OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); 205 OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | 206 A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | 207 A3XX_HLSQ_CONTROL_0_REG_RESERVED2 | 208 A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); 209 OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | 210 A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE); 211 OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); 212 OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */ 213 214 OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); 215 OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) | 216 A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20)); 217 218 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); 219 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 220 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 221 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); 222 223 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 224 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); 225 226 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 227 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | 228 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 229 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 230 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 231 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | 232 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 233 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 234 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 235 236 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 237 OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0)); 238 239 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 240 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 241 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 242 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 243 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 244 245 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 246 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | 247 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 248 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 249 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 250 251 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 252 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 253 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1)); 254 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) | 255 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1)); 256 257 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 258 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 259 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 260 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | 261 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); 262 263 fd_wfi(batch, ring); 264 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 265 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); 266 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); 267 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0)); 268 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0)); 269 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 270 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 271 272 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 273 OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE | 274 A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE | 275 A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE | 276 A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE | 277 A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE); 278 279 OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); 280 OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | 281 A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); 282 283 OUT_PKT3(ring, CP_DRAW_INDX_2, 5); 284 OUT_RING(ring, 0x00000000); /* viz query info. */ 285 OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, 286 INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0)); 287 OUT_RING(ring, 2); /* NumIndices */ 288 OUT_RING(ring, 2); 289 OUT_RING(ring, 1); 290 fd_reset_wfi(batch); 291 292 OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); 293 OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); 294 295 OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); 296 OUT_RING(ring, 0x00000000); 297 298 fd_wfi(batch, ring); 299 OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); 300 OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 301 A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 302 303 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 304 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 305 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 306 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 307 308 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 309 OUT_RING(ring, 0x00000000); 310} 311 312/* transfer from gmem to system memory (ie. normal RAM) */ 313 314static void 315emit_gmem2mem_surf(struct fd_batch *batch, 316 enum adreno_rb_copy_control_mode mode, 317 bool stencil, 318 uint32_t base, struct pipe_surface *psurf) 319{ 320 struct fd_ringbuffer *ring = batch->gmem; 321 struct fd_resource *rsc = fd_resource(psurf->texture); 322 enum pipe_format format = psurf->format; 323 324 if (!rsc->valid) 325 return; 326 327 if (stencil) { 328 rsc = rsc->stencil; 329 format = rsc->base.format; 330 } 331 332 struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); 333 uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, 334 psurf->u.tex.first_layer); 335 336 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 337 338 OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); 339 OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | 340 A3XX_RB_COPY_CONTROL_MODE(mode) | 341 A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | 342 COND(format == PIPE_FORMAT_Z32_FLOAT || 343 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, 344 A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE)); 345 346 OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ 347 OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); 348 OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | 349 A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) | 350 A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | 351 A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | 352 A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); 353 354 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 355 DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); 356} 357 358static void 359fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) 360{ 361 struct fd_context *ctx = batch->ctx; 362 struct fd_ringbuffer *ring = batch->gmem; 363 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 364 struct fd3_emit emit = { 365 .debug = &ctx->debug, 366 .vtx = &ctx->solid_vbuf_state, 367 .prog = &ctx->solid_prog, 368 .key = { 369 .half_precision = true, 370 }, 371 }; 372 int i; 373 374 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 375 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); 376 377 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 378 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | 379 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 380 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 381 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 382 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | 383 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 384 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 385 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 386 387 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); 388 OUT_RING(ring, 0xff000000 | 389 A3XX_RB_STENCILREFMASK_STENCILREF(0) | 390 A3XX_RB_STENCILREFMASK_STENCILMASK(0) | 391 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 392 OUT_RING(ring, 0xff000000 | 393 A3XX_RB_STENCILREFMASK_STENCILREF(0) | 394 A3XX_RB_STENCILREFMASK_STENCILMASK(0) | 395 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 396 397 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 398 OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); 399 400 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 401 OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ 402 403 fd_wfi(batch, ring); 404 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 405 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); 406 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); 407 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5)); 408 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0)); 409 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 410 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 411 412 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 413 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 414 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 415 A3XX_RB_MODE_CONTROL_MRT(0)); 416 417 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 418 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 419 A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 420 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 421 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); 422 423 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 424 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 425 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 426 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 427 428 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 429 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | 430 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 431 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 432 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 433 434 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 435 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 436 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 437 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | 438 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); 439 440 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 441 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 442 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 443 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 444 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 445 446 fd3_program_emit(ring, &emit, 0, NULL); 447 fd3_emit_vertex_bufs(ring, &emit); 448 449 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 450 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 451 if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH) 452 emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false, 453 ctx->gmem.zsbuf_base[0], pfb->zsbuf); 454 if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL) 455 emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true, 456 ctx->gmem.zsbuf_base[1], pfb->zsbuf); 457 } 458 459 if (batch->resolve & FD_BUFFER_COLOR) { 460 for (i = 0; i < pfb->nr_cbufs; i++) { 461 if (!pfb->cbufs[i]) 462 continue; 463 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) 464 continue; 465 emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, 466 ctx->gmem.cbuf_base[i], pfb->cbufs[i]); 467 } 468 } 469 470 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 471 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 472 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 473 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 474 475 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 476 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 477 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 478 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 479} 480 481/* transfer from system memory to gmem */ 482 483static void 484emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[], 485 struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) 486{ 487 struct fd_ringbuffer *ring = batch->gmem; 488 struct pipe_surface *zsbufs[2]; 489 490 assert(bufs > 0); 491 492 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 493 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 494 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 495 A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); 496 497 emit_mrt(ring, bufs, psurf, bases, bin_w, false); 498 499 if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT || 500 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { 501 /* Depth is stored as unorm in gmem, so we have to write it in using a 502 * special blit shader which writes depth. 503 */ 504 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 505 OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z | 506 A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | 507 A3XX_RB_DEPTH_CONTROL_Z_ENABLE | 508 A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE | 509 A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS))); 510 511 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 512 OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | 513 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); 514 OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w)); 515 516 if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { 517 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); 518 OUT_RING(ring, 0); 519 } else { 520 /* The gmem_restore_tex logic will put the first buffer's stencil 521 * as color. Supply it with the proper information to make that 522 * happen. 523 */ 524 zsbufs[0] = zsbufs[1] = psurf[0]; 525 psurf = zsbufs; 526 bufs = 2; 527 } 528 } else { 529 OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); 530 OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); 531 } 532 533 fd3_emit_gmem_restore_tex(ring, psurf, bufs); 534 535 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 536 DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); 537} 538 539static void 540fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) 541{ 542 struct fd_context *ctx = batch->ctx; 543 struct fd_gmem_stateobj *gmem = &ctx->gmem; 544 struct fd_ringbuffer *ring = batch->gmem; 545 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 546 struct fd3_emit emit = { 547 .debug = &ctx->debug, 548 .vtx = &ctx->blit_vbuf_state, 549 .sprite_coord_enable = 1, 550 /* NOTE: They all use the same VP, this is for vtx bufs. */ 551 .prog = &ctx->blit_prog[0], 552 .key = { 553 .half_precision = fd_half_precision(pfb), 554 }, 555 }; 556 float x0, y0, x1, y1; 557 unsigned bin_w = tile->bin_w; 558 unsigned bin_h = tile->bin_h; 559 unsigned i; 560 561 /* write texture coordinates to vertexbuf: */ 562 x0 = ((float)tile->xoff) / ((float)pfb->width); 563 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width); 564 y0 = ((float)tile->yoff) / ((float)pfb->height); 565 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); 566 567 OUT_PKT3(ring, CP_MEM_WRITE, 5); 568 OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); 569 OUT_RING(ring, fui(x0)); 570 OUT_RING(ring, fui(y0)); 571 OUT_RING(ring, fui(x1)); 572 OUT_RING(ring, fui(y1)); 573 574 fd3_emit_cache_flush(batch, ring); 575 576 for (i = 0; i < 4; i++) { 577 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 578 OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | 579 A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | 580 A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); 581 582 OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); 583 OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | 584 A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | 585 A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | 586 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | 587 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | 588 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); 589 } 590 591 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 592 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) | 593 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 594 595 fd_wfi(batch, ring); 596 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 597 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); 598 599 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 600 OUT_RING(ring, 0); 601 OUT_RING(ring, 0); 602 603 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 604 OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ 605 606 fd_wfi(batch, ring); 607 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 608 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5)); 609 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0)); 610 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5)); 611 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0)); 612 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 613 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 614 615 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 616 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 617 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 618 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) | 619 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1)); 620 621 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 622 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 623 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 624 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) | 625 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1)); 626 627 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 628 OUT_RING(ring, 0x2 | 629 A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | 630 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 631 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 632 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 633 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) | 634 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 635 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 636 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 637 638 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); 639 OUT_RING(ring, 0); /* RB_STENCIL_INFO */ 640 OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ 641 642 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 643 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 644 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 645 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 646 647 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 648 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) | 649 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 650 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 651 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 652 653 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 654 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 655 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 656 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 657 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 658 659 fd3_emit_vertex_bufs(ring, &emit); 660 661 /* for gmem pitch/base calculations, we need to use the non- 662 * truncated tile sizes: 663 */ 664 bin_w = gmem->bin_w; 665 bin_h = gmem->bin_h; 666 667 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { 668 emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; 669 emit.fp = NULL; /* frag shader changed so clear cache */ 670 fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); 671 emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); 672 } 673 674 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 675 if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && 676 pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { 677 /* Non-float can use a regular color write. It's split over 8-bit 678 * components, so half precision is always sufficient. 679 */ 680 emit.prog = &ctx->blit_prog[0]; 681 emit.key.half_precision = true; 682 } else { 683 /* Float depth needs special blit shader that writes depth */ 684 if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) 685 emit.prog = &ctx->blit_z; 686 else 687 emit.prog = &ctx->blit_zs; 688 emit.key.half_precision = false; 689 } 690 emit.fp = NULL; /* frag shader changed so clear cache */ 691 fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); 692 emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); 693 } 694 695 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 696 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 697 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 698 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 699 700 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 701 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 702 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 703 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 704} 705 706static void 707patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 708{ 709 unsigned i; 710 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 711 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 712 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); 713 } 714 util_dynarray_resize(&batch->draw_patches, 0); 715} 716 717static void 718patch_rbrc(struct fd_batch *batch, uint32_t val) 719{ 720 unsigned i; 721 for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) { 722 struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i); 723 *patch->cs = patch->val | val; 724 } 725 util_dynarray_resize(&batch->rbrc_patches, 0); 726} 727 728/* for rendering directly to system memory: */ 729static void 730fd3_emit_sysmem_prep(struct fd_batch *batch) 731{ 732 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 733 struct fd_ringbuffer *ring = batch->gmem; 734 uint32_t i, pitch = 0; 735 736 for (i = 0; i < pfb->nr_cbufs; i++) { 737 struct pipe_surface *psurf = pfb->cbufs[i]; 738 if (!psurf) 739 continue; 740 pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; 741 } 742 743 fd3_emit_restore(batch, ring); 744 745 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 746 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 747 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 748 749 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true); 750 751 /* setup scissor/offset for current tile: */ 752 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 753 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | 754 A3XX_RB_WINDOW_OFFSET_Y(0)); 755 756 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 757 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 758 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 759 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) | 760 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); 761 762 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 763 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 764 A3XX_RB_MODE_CONTROL_GMEM_BYPASS | 765 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 766 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 767 768 patch_draws(batch, IGNORE_VISIBILITY); 769 patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); 770} 771 772static void 773update_vsc_pipe(struct fd_batch *batch) 774{ 775 struct fd_context *ctx = batch->ctx; 776 struct fd3_context *fd3_ctx = fd3_context(ctx); 777 struct fd_ringbuffer *ring = batch->gmem; 778 int i; 779 780 OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); 781 OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ 782 783 for (i = 0; i < 8; i++) { 784 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; 785 786 if (!pipe->bo) { 787 pipe->bo = fd_bo_new(ctx->dev, 0x40000, 788 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); 789 } 790 791 OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); 792 OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) | 793 A3XX_VSC_PIPE_CONFIG_Y(pipe->y) | 794 A3XX_VSC_PIPE_CONFIG_W(pipe->w) | 795 A3XX_VSC_PIPE_CONFIG_H(pipe->h)); 796 OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[i].DATA_ADDRESS */ 797 OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */ 798 } 799} 800 801static void 802emit_binning_pass(struct fd_batch *batch) 803{ 804 struct fd_context *ctx = batch->ctx; 805 struct fd_gmem_stateobj *gmem = &ctx->gmem; 806 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 807 struct fd_ringbuffer *ring = batch->gmem; 808 int i; 809 810 uint32_t x1 = gmem->minx; 811 uint32_t y1 = gmem->miny; 812 uint32_t x2 = gmem->minx + gmem->width - 1; 813 uint32_t y2 = gmem->miny + gmem->height - 1; 814 815 if (ctx->screen->gpu_id == 320) { 816 emit_binning_workaround(batch); 817 fd_wfi(batch, ring); 818 OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); 819 OUT_RING(ring, 0x00007fff); 820 } 821 822 OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); 823 OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE); 824 825 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 826 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) | 827 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 828 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 829 830 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 831 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 832 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 833 834 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 835 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 836 A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 837 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 838 839 /* setup scissor/offset for whole screen: */ 840 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 841 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) | 842 A3XX_RB_WINDOW_OFFSET_Y(y1)); 843 844 OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); 845 OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE); 846 847 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 848 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | 849 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); 850 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | 851 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); 852 853 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 854 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | 855 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 856 A3XX_RB_MODE_CONTROL_MRT(0)); 857 858 for (i = 0; i < 4; i++) { 859 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 860 OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) | 861 A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | 862 A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0)); 863 } 864 865 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 866 OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) | 867 A3XX_PC_VSTREAM_CONTROL_N(0)); 868 869 /* emit IB to binning drawcmds: */ 870 ctx->emit_ib(ring, batch->binning); 871 fd_reset_wfi(batch); 872 873 fd_wfi(batch, ring); 874 875 /* and then put stuff back the way it was: */ 876 877 OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); 878 OUT_RING(ring, 0x00000000); 879 880 OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); 881 OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE | 882 A3XX_SP_SP_CTRL_REG_CONSTMODE(1) | 883 A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | 884 A3XX_SP_SP_CTRL_REG_L0MODE(0)); 885 886 OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); 887 OUT_RING(ring, 0x00000000); 888 889 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 890 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 891 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 892 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 893 894 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); 895 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 896 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 897 A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1)); 898 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 899 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 900 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 901 902 fd_event_write(batch, ring, CACHE_FLUSH); 903 fd_wfi(batch, ring); 904 905 if (ctx->screen->gpu_id == 320) { 906 /* dummy-draw workaround: */ 907 OUT_PKT3(ring, CP_DRAW_INDX, 3); 908 OUT_RING(ring, 0x00000000); 909 OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, 910 INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0)); 911 OUT_RING(ring, 0); /* NumIndices */ 912 fd_reset_wfi(batch); 913 } 914 915 OUT_PKT3(ring, CP_NOP, 4); 916 OUT_RING(ring, 0x00000000); 917 OUT_RING(ring, 0x00000000); 918 OUT_RING(ring, 0x00000000); 919 OUT_RING(ring, 0x00000000); 920 921 fd_wfi(batch, ring); 922 923 if (ctx->screen->gpu_id == 320) { 924 emit_binning_workaround(batch); 925 } 926} 927 928/* before first tile */ 929static void 930fd3_emit_tile_init(struct fd_batch *batch) 931{ 932 struct fd_ringbuffer *ring = batch->gmem; 933 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 934 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; 935 uint32_t rb_render_control; 936 937 fd3_emit_restore(batch, ring); 938 939 /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated 940 * at the right and bottom edge tiles 941 */ 942 OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); 943 OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 944 A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 945 946 update_vsc_pipe(batch); 947 948 fd_wfi(batch, ring); 949 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 950 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 951 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 952 953 if (use_hw_binning(batch)) { 954 /* emit hw binning pass: */ 955 emit_binning_pass(batch); 956 957 patch_draws(batch, USE_VISIBILITY); 958 } else { 959 patch_draws(batch, IGNORE_VISIBILITY); 960 } 961 962 rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 963 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w); 964 965 patch_rbrc(batch, rb_render_control); 966} 967 968/* before mem2gmem */ 969static void 970fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) 971{ 972 struct fd_ringbuffer *ring = batch->gmem; 973 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 974 975 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 976 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 977 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 978 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 979} 980 981/* before IB to rendering cmds: */ 982static void 983fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) 984{ 985 struct fd_context *ctx = batch->ctx; 986 struct fd3_context *fd3_ctx = fd3_context(ctx); 987 struct fd_ringbuffer *ring = batch->gmem; 988 struct fd_gmem_stateobj *gmem = &ctx->gmem; 989 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 990 991 uint32_t x1 = tile->xoff; 992 uint32_t y1 = tile->yoff; 993 uint32_t x2 = tile->xoff + tile->bin_w - 1; 994 uint32_t y2 = tile->yoff + tile->bin_h - 1; 995 996 uint32_t reg; 997 998 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 999 reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 1000 if (pfb->zsbuf) { 1001 reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); 1002 } 1003 OUT_RING(ring, reg); 1004 if (pfb->zsbuf) { 1005 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 1006 OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w)); 1007 if (rsc->stencil) { 1008 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); 1009 OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); 1010 OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); 1011 } 1012 } else { 1013 OUT_RING(ring, 0x00000000); 1014 } 1015 1016 if (use_hw_binning(batch)) { 1017 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; 1018 1019 assert(pipe->w * pipe->h); 1020 1021 fd_event_write(batch, ring, HLSQ_FLUSH); 1022 fd_wfi(batch, ring); 1023 1024 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 1025 OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | 1026 A3XX_PC_VSTREAM_CONTROL_N(tile->n)); 1027 1028 1029 OUT_PKT3(ring, CP_SET_BIN_DATA, 2); 1030 OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ 1031 OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ 1032 (tile->p * 4), 0, 0); 1033 } else { 1034 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 1035 OUT_RING(ring, 0x00000000); 1036 } 1037 1038 OUT_PKT3(ring, CP_SET_BIN, 3); 1039 OUT_RING(ring, 0x00000000); 1040 OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); 1041 OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); 1042 1043 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true); 1044 1045 /* setup scissor/offset for current tile: */ 1046 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 1047 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) | 1048 A3XX_RB_WINDOW_OFFSET_Y(tile->yoff)); 1049 1050 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 1051 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | 1052 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); 1053 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | 1054 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); 1055} 1056 1057void 1058fd3_gmem_init(struct pipe_context *pctx) 1059{ 1060 struct fd_context *ctx = fd_context(pctx); 1061 1062 ctx->emit_sysmem_prep = fd3_emit_sysmem_prep; 1063 ctx->emit_tile_init = fd3_emit_tile_init; 1064 ctx->emit_tile_prep = fd3_emit_tile_prep; 1065 ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem; 1066 ctx->emit_tile_renderprep = fd3_emit_tile_renderprep; 1067 ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem; 1068} 1069