1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28#include "pipe/p_state.h" 29#include "util/u_string.h" 30#include "util/u_memory.h" 31#include "util/u_prim.h" 32 33#include "freedreno_state.h" 34#include "freedreno_resource.h" 35 36#include "fd6_draw.h" 37#include "fd6_context.h" 38#include "fd6_emit.h" 39#include "fd6_program.h" 40#include "fd6_format.h" 41#include "fd6_zsa.h" 42 43/* some bits in common w/ a4xx: */ 44#include "a4xx/fd4_draw.h" 45 46static void 47draw_emit_indirect(struct fd_batch *batch, struct fd_ringbuffer *ring, 48 enum pc_di_primtype primtype, 49 const struct pipe_draw_info *info, 50 unsigned index_offset) 51{ 52 struct fd_resource *ind = fd_resource(info->indirect->buffer); 53 54 if (info->index_size) { 55 struct pipe_resource *idx = info->index.resource; 56 unsigned max_indicies = idx->width0 / info->index_size; 57 58 OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6); 59 OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, 60 fd4_size2indextype(info->index_size), 0), 61 &batch->draw_patches); 62 OUT_RELOC(ring, fd_resource(idx)->bo, 63 index_offset, 0, 0); 64 // XXX: Check A5xx vs A6xx 65 OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indicies)); 66 OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); 67 } else { 68 OUT_PKT7(ring, CP_DRAW_INDIRECT, 3); 69 OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0), 70 &batch->draw_patches); 71 OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); 72 } 73} 74 75static void 76draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, 77 enum pc_di_primtype primtype, 78 const struct pipe_draw_info *info, 79 unsigned index_offset) 80{ 81 if (info->index_size) { 82 assert(!info->has_user_indices); 83 84 struct pipe_resource *idx_buffer = info->index.resource; 85 uint32_t idx_size = info->index_size * info->count; 86 uint32_t idx_offset = index_offset + info->start * info->index_size; 87 88 /* leave vis mode blank for now, it will be patched up when 89 * we know if we are binning or not 90 */ 91 uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | 92 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | 93 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(fd4_size2indextype(info->index_size)) | 94 0x2000; 95 96 OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 7); 97 OUT_RINGP(ring, draw, &batch->draw_patches); 98 OUT_RING(ring, info->instance_count); /* NumInstances */ 99 OUT_RING(ring, info->count); /* NumIndices */ 100 OUT_RING(ring, 0x0); /* XXX */ 101 OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); 102 OUT_RING (ring, idx_size); 103 } else { 104 /* leave vis mode blank for now, it will be patched up when 105 * we know if we are binning or not 106 */ 107 uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | 108 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 109 0x2000; 110 111 OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 3); 112 OUT_RINGP(ring, draw, &batch->draw_patches); 113 OUT_RING(ring, info->instance_count); /* NumInstances */ 114 OUT_RING(ring, info->count); /* NumIndices */ 115 } 116} 117 118/* fixup dirty shader state in case some "unrelated" (from the state- 119 * tracker's perspective) state change causes us to switch to a 120 * different variant. 121 */ 122static void 123fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) 124{ 125 struct fd6_context *fd6_ctx = fd6_context(ctx); 126 struct ir3_shader_key *last_key = &fd6_ctx->last_key; 127 128 if (!ir3_shader_key_equal(last_key, key)) { 129 if (ir3_shader_key_changes_fs(last_key, key)) { 130 ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG; 131 ctx->dirty |= FD_DIRTY_PROG; 132 } 133 134 if (ir3_shader_key_changes_vs(last_key, key)) { 135 ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG; 136 ctx->dirty |= FD_DIRTY_PROG; 137 } 138 139 fd6_ctx->last_key = *key; 140 } 141} 142 143static bool 144fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, 145 unsigned index_offset) 146{ 147 struct fd6_context *fd6_ctx = fd6_context(ctx); 148 struct fd6_emit emit = { 149 .ctx = ctx, 150 .vtx = &ctx->vtx, 151 .info = info, 152 .key = { 153 .vs = ctx->prog.vp, 154 .fs = ctx->prog.fp, 155 .key = { 156 .color_two_side = ctx->rasterizer->light_twoside, 157 .vclamp_color = ctx->rasterizer->clamp_vertex_color, 158 .fclamp_color = ctx->rasterizer->clamp_fragment_color, 159 .rasterflat = ctx->rasterizer->flatshade, 160 .ucp_enables = ctx->rasterizer->clip_plane_enable, 161 .has_per_samp = (fd6_ctx->fsaturate || fd6_ctx->vsaturate), 162 .vsaturate_s = fd6_ctx->vsaturate_s, 163 .vsaturate_t = fd6_ctx->vsaturate_t, 164 .vsaturate_r = fd6_ctx->vsaturate_r, 165 .fsaturate_s = fd6_ctx->fsaturate_s, 166 .fsaturate_t = fd6_ctx->fsaturate_t, 167 .fsaturate_r = fd6_ctx->fsaturate_r, 168 .vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples, 169 .fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples, 170 .sample_shading = (ctx->min_samples > 1), 171 .msaa = (ctx->framebuffer.samples > 1), 172 }, 173 }, 174 .rasterflat = ctx->rasterizer->flatshade, 175 .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, 176 .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, 177 }; 178 179 fixup_shader_state(ctx, &emit.key.key); 180 181 if (!(ctx->dirty & FD_DIRTY_PROG)) { 182 emit.prog = fd6_ctx->prog; 183 } else { 184 fd6_ctx->prog = fd6_emit_get_prog(&emit); 185 } 186 187 /* bail if compile failed: */ 188 if (!fd6_ctx->prog) 189 return NULL; 190 191 emit.dirty = ctx->dirty; /* *after* fixup_shader_state() */ 192 emit.bs = fd6_emit_get_prog(&emit)->bs; 193 emit.vs = fd6_emit_get_prog(&emit)->vs; 194 emit.fs = fd6_emit_get_prog(&emit)->fs; 195 196 const struct ir3_shader_variant *vp = emit.vs; 197 const struct ir3_shader_variant *fp = emit.fs; 198 199 ctx->stats.vs_regs += ir3_shader_halfregs(vp); 200 ctx->stats.fs_regs += ir3_shader_halfregs(fp); 201 202 /* figure out whether we need to disable LRZ write for binning 203 * pass using draw pass's fp: 204 */ 205 emit.no_lrz_write = fp->writes_pos || fp->no_earlyz; 206 207 struct fd_ringbuffer *ring = ctx->batch->draw; 208 enum pc_di_primtype primtype = ctx->primtypes[info->mode]; 209 210 fd6_emit_state(ring, &emit); 211 212 OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 2); 213 OUT_RING(ring, info->index_size ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */ 214 OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */ 215 216 OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1); 217 OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ 218 info->restart_index : 0xffffffff); 219 220 /* for debug after a lock up, write a unique counter value 221 * to scratch7 for each draw, to make it easier to match up 222 * register dumps to cmdstream. The combination of IB 223 * (scratch6) and DRAW is enough to "triangulate" the 224 * particular draw that caused lockup. 225 */ 226 emit_marker6(ring, 7); 227 228 if (info->indirect) { 229 draw_emit_indirect(ctx->batch, ring, primtype, 230 info, index_offset); 231 } else { 232 draw_emit(ctx->batch, ring, primtype, 233 info, index_offset); 234 } 235 236 emit_marker6(ring, 7); 237 fd_reset_wfi(ctx->batch); 238 239 if (emit.streamout_mask) { 240 struct fd_ringbuffer *ring = ctx->batch->draw; 241 242 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 243 if (emit.streamout_mask & (1 << i)) { 244 fd6_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false); 245 } 246 } 247 } 248 249 fd_context_all_clean(ctx); 250 251 return true; 252} 253 254static void 255fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) 256{ 257 struct fd_ringbuffer *ring; 258 259 // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth 260 // splitting both clear and lrz clear out into their own rb's. And 261 // just throw away any draws prior to clear. (Anything not fullscreen 262 // clear, just fallback to generic path that treats it as a normal 263 // draw 264 265 if (!batch->lrz_clear) { 266 batch->lrz_clear = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0); 267 } 268 269 ring = batch->lrz_clear; 270 271 emit_marker6(ring, 7); 272 OUT_PKT7(ring, CP_SET_MARKER, 1); 273 OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); 274 emit_marker6(ring, 7); 275 276 OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); 277 OUT_RING(ring, 0x10000000); 278 279 OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); 280 OUT_RING(ring, 0x7ffff); 281 282 emit_marker6(ring, 7); 283 OUT_PKT7(ring, CP_SET_MARKER, 1); 284 OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0xc)); 285 emit_marker6(ring, 7); 286 287 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); 288 OUT_RING(ring, 0x0); 289 290 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); 291 OUT_RING(ring, 0x00000000); 292 OUT_RING(ring, 0x00000000); 293 OUT_RING(ring, 0x00000000); 294 OUT_RING(ring, 0x00000000); 295 OUT_RING(ring, 0x00000000); 296 OUT_RING(ring, 0x00000000); 297 OUT_RING(ring, 0x00000000); 298 OUT_RING(ring, 0x00000000); 299 OUT_RING(ring, 0x00000000); 300 OUT_RING(ring, 0x00000000); 301 OUT_RING(ring, 0x00000000); 302 OUT_RING(ring, 0x00000000); 303 OUT_RING(ring, 0x00000000); 304 305 OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1); 306 OUT_RING(ring, 0x0000f410); 307 308 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); 309 OUT_RING(ring, A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 310 0x4f00080); 311 312 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1); 313 OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 314 0x4f00080); 315 316 fd6_event_write(batch, ring, UNK_1D, true); 317 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); 318 319 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); 320 OUT_RING(ring, fui(depth)); 321 OUT_RING(ring, 0x00000000); 322 OUT_RING(ring, 0x00000000); 323 OUT_RING(ring, 0x00000000); 324 325 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); 326 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R16_UNORM) | 327 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | 328 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); 329 OUT_RELOCW(ring, zsbuf->lrz, 0, 0, 0); 330 OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(zsbuf->lrz_pitch * 2)); 331 OUT_RING(ring, 0x00000000); 332 OUT_RING(ring, 0x00000000); 333 OUT_RING(ring, 0x00000000); 334 OUT_RING(ring, 0x00000000); 335 OUT_RING(ring, 0x00000000); 336 337 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); 338 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(0)); 339 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(0)); 340 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); 341 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); 342 343 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); 344 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | 345 A6XX_GRAS_2D_DST_TL_Y(0)); 346 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) | 347 A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1)); 348 349 fd6_event_write(batch, ring, 0x3f, false); 350 351 OUT_WFI5(ring); 352 353 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); 354 OUT_RING(ring, 0x1000000); 355 356 OUT_PKT7(ring, CP_BLIT, 1); 357 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); 358 359 OUT_WFI5(ring); 360 361 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); 362 OUT_RING(ring, 0x0); 363 364 fd6_event_write(batch, ring, UNK_1D, true); 365 fd6_event_write(batch, ring, FACENESS_FLUSH, true); 366 fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); 367 368 fd6_cache_inv(batch, ring); 369} 370 371static bool is_z32(enum pipe_format format) 372{ 373 switch (format) { 374 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 375 case PIPE_FORMAT_Z32_UNORM: 376 case PIPE_FORMAT_Z32_FLOAT: 377 return true; 378 default: 379 return false; 380 } 381} 382 383static bool 384fd6_clear(struct fd_context *ctx, unsigned buffers, 385 const union pipe_color_union *color, double depth, unsigned stencil) 386{ 387 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; 388 const bool has_depth = pfb->zsbuf; 389 unsigned color_buffers = buffers >> 2; 390 unsigned i; 391 392 /* If we're clearing after draws, fallback to 3D pipe clears. We could 393 * use blitter clears in the draw batch but then we'd have to patch up the 394 * gmem offsets. This doesn't seem like a useful thing to optimize for 395 * however.*/ 396 if (ctx->batch->num_draws > 0) 397 return false; 398 399 foreach_bit(i, color_buffers) 400 ctx->batch->clear_color[i] = *color; 401 if (buffers & PIPE_CLEAR_DEPTH) 402 ctx->batch->clear_depth = depth; 403 if (buffers & PIPE_CLEAR_STENCIL) 404 ctx->batch->clear_stencil = stencil; 405 406 ctx->batch->fast_cleared |= buffers; 407 408 if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) { 409 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); 410 if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) { 411 zsbuf->lrz_valid = true; 412 fd6_clear_lrz(ctx->batch, zsbuf, depth); 413 } 414 } 415 416 return true; 417} 418 419void 420fd6_draw_init(struct pipe_context *pctx) 421{ 422 struct fd_context *ctx = fd_context(pctx); 423 ctx->draw_vbo = fd6_draw_vbo; 424 ctx->clear = fd6_clear; 425} 426