1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "fd6_const.h" 26#include "fd6_pack.h" 27 28#define emit_const_user fd6_emit_const_user 29#define emit_const_bo fd6_emit_const_bo 30#include "ir3_const.h" 31 32/* regid: base const register 33 * prsc or dwords: buffer containing constant values 34 * sizedwords: size of const value buffer 35 */ 36void 37fd6_emit_const_user(struct fd_ringbuffer *ring, 38 const struct ir3_shader_variant *v, uint32_t regid, 39 uint32_t sizedwords, const uint32_t *dwords) 40{ 41 emit_const_asserts(ring, v, regid, sizedwords); 42 43 /* NOTE we cheat a bit here, since we know mesa is aligning 44 * the size of the user buffer to 16 bytes. And we want to 45 * cut cycles in a hot path. 46 */ 47 uint32_t align_sz = align(sizedwords, 4); 48 49 if (fd6_geom_stage(v->type)) { 50 OUT_PKTBUF( 51 ring, CP_LOAD_STATE6_GEOM, dwords, align_sz, 52 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS, 53 .state_src = SS6_DIRECT, 54 .state_block = fd6_stage2shadersb(v->type), 55 .num_unit = DIV_ROUND_UP(sizedwords, 4)), 56 CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2()); 57 } else { 58 OUT_PKTBUF( 59 ring, CP_LOAD_STATE6_FRAG, dwords, align_sz, 60 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS, 61 .state_src = SS6_DIRECT, 62 .state_block = fd6_stage2shadersb(v->type), 63 .num_unit = DIV_ROUND_UP(sizedwords, 4)), 64 CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2()); 65 } 66} 67void 68fd6_emit_const_bo(struct fd_ringbuffer *ring, 69 const struct ir3_shader_variant *v, uint32_t regid, 70 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) 71{ 72 uint32_t dst_off = regid / 4; 73 assert(dst_off % 4 == 0); 74 uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4); 75 assert(num_unit % 4 == 0); 76 77 emit_const_asserts(ring, v, regid, sizedwords); 78 79 if (fd6_geom_stage(v->type)) { 80 OUT_PKT(ring, CP_LOAD_STATE6_GEOM, 81 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS, 82 .state_src = SS6_INDIRECT, 83 .state_block = fd6_stage2shadersb(v->type), 84 .num_unit = num_unit, ), 85 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset)); 86 } else { 87 OUT_PKT(ring, CP_LOAD_STATE6_FRAG, 88 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS, 89 .state_src = SS6_INDIRECT, 90 .state_block = fd6_stage2shadersb(v->type), 91 .num_unit = num_unit, ), 92 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset)); 93 } 94} 95 96static bool 97is_stateobj(struct fd_ringbuffer *ring) 98{ 99 return true; 100} 101 102static void 103emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, 104 uint32_t dst_offset, uint32_t num, struct fd_bo **bos, 105 uint32_t *offsets) 106{ 107 unreachable("shouldn't be called on a6xx"); 108} 109 110static void 111emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, 112 struct ir3_shader_variant *s) assert_dt 113{ 114 struct fd_context *ctx = emit->ctx; 115 const struct ir3_const_state *const_state = ir3_const_state(s); 116 const unsigned regid = const_state->offsets.primitive_param * 4 + 4; 117 uint32_t dwords = 16; 118 119 OUT_PKT7(ring, fd6_stage2opcode(s->type), 3); 120 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | 121 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 122 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 123 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) | 124 CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4)); 125 OUT_RB(ring, ctx->batch->tess_addrs_constobj); 126} 127 128static void 129emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, 130 uint32_t *params, int num_params) 131{ 132 const struct ir3_const_state *const_state = ir3_const_state(v); 133 const unsigned regid = const_state->offsets.primitive_param; 134 int size = MIN2(1 + regid, v->constlen) - regid; 135 if (size > 0) 136 fd6_emit_const_user(ring, v, regid * 4, num_params, params); 137} 138 139struct fd_ringbuffer * 140fd6_build_tess_consts(struct fd6_emit *emit) 141{ 142 struct fd_context *ctx = emit->ctx; 143 144 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( 145 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 146 147 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS 148 * size is dwords, since that's what LDG/STG use. 149 */ 150 unsigned num_vertices = emit->hs 151 ? emit->patch_vertices 152 : emit->gs->shader->nir->info.gs.vertices_in; 153 154 uint32_t vs_params[4] = { 155 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ 156 emit->vs->output_size * 4, /* vs vertex stride */ 157 0, 0}; 158 159 emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params)); 160 161 if (emit->hs) { 162 uint32_t hs_params[4] = { 163 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ 164 emit->vs->output_size * 4, /* vs vertex stride */ 165 emit->hs->output_size, emit->patch_vertices}; 166 167 emit_stage_tess_consts(constobj, emit->hs, hs_params, 168 ARRAY_SIZE(hs_params)); 169 emit_tess_bos(constobj, emit, emit->hs); 170 171 if (emit->gs) 172 num_vertices = emit->gs->shader->nir->info.gs.vertices_in; 173 174 uint32_t ds_params[4] = { 175 emit->ds->output_size * num_vertices * 4, /* ds primitive stride */ 176 emit->ds->output_size * 4, /* ds vertex stride */ 177 emit->hs->output_size, /* hs vertex stride (dwords) */ 178 emit->hs->shader->nir->info.tess.tcs_vertices_out}; 179 180 emit_stage_tess_consts(constobj, emit->ds, ds_params, 181 ARRAY_SIZE(ds_params)); 182 emit_tess_bos(constobj, emit, emit->ds); 183 } 184 185 if (emit->gs) { 186 struct ir3_shader_variant *prev; 187 if (emit->ds) 188 prev = emit->ds; 189 else 190 prev = emit->vs; 191 192 uint32_t gs_params[4] = { 193 prev->output_size * num_vertices * 4, /* ds primitive stride */ 194 prev->output_size * 4, /* ds vertex stride */ 195 0, 196 0, 197 }; 198 199 num_vertices = emit->gs->shader->nir->info.gs.vertices_in; 200 emit_stage_tess_consts(constobj, emit->gs, gs_params, 201 ARRAY_SIZE(gs_params)); 202 } 203 204 return constobj; 205} 206 207static void 208fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, 209 struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) 210{ 211 const struct ir3_const_state *const_state = ir3_const_state(v); 212 int num_ubos = const_state->num_ubos; 213 214 if (!num_ubos) 215 return; 216 217 OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos)); 218 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | 219 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) | 220 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 221 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) | 222 CP_LOAD_STATE6_0_NUM_UNIT(num_ubos)); 223 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 224 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 225 226 for (int i = 0; i < num_ubos; i++) { 227 /* NIR constant data is packed into the end of the shader. */ 228 if (i == const_state->constant_data_ubo) { 229 int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16); 230 OUT_RELOC(ring, v->bo, v->info.constant_data_offset, 231 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0); 232 continue; 233 } 234 235 struct pipe_constant_buffer *cb = &constbuf->cb[i]; 236 237 /* If we have user pointers (constbuf 0, aka GL uniforms), upload them 238 * to a buffer now, and save it in the constbuf so that we don't have 239 * to reupload until they get changed. 240 */ 241 if (cb->user_buffer) { 242 struct pipe_context *pctx = &ctx->base; 243 u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64, 244 cb->user_buffer, &cb->buffer_offset, &cb->buffer); 245 cb->user_buffer = NULL; 246 } 247 248 if (cb->buffer) { 249 int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16); 250 OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 251 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0); 252 } else { 253 OUT_RING(ring, 0xbad00000 | (i << 16)); 254 OUT_RING(ring, A6XX_UBO_1_SIZE(0)); 255 } 256 } 257} 258 259static unsigned 260user_consts_cmdstream_size(struct ir3_shader_variant *v) 261{ 262 struct ir3_const_state *const_state = ir3_const_state(v); 263 struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state; 264 265 if (unlikely(!ubo_state->cmdstream_size)) { 266 unsigned packets, size; 267 268 /* pre-calculate size required for userconst stateobj: */ 269 ir3_user_consts_size(ubo_state, &packets, &size); 270 271 /* also account for UBO addresses: */ 272 packets += 1; 273 size += 2 * const_state->num_ubos; 274 275 unsigned sizedwords = (4 * packets) + size; 276 ubo_state->cmdstream_size = sizedwords * 4; 277 } 278 279 return ubo_state->cmdstream_size; 280} 281 282struct fd_ringbuffer * 283fd6_build_user_consts(struct fd6_emit *emit) 284{ 285 static const enum pipe_shader_type types[] = { 286 PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL, 287 PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT, 288 }; 289 struct ir3_shader_variant *variants[] = { 290 emit->vs, emit->hs, emit->ds, emit->gs, emit->fs, 291 }; 292 struct fd_context *ctx = emit->ctx; 293 unsigned sz = 0; 294 295 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { 296 if (!variants[i]) 297 continue; 298 sz += user_consts_cmdstream_size(variants[i]); 299 } 300 301 struct fd_ringbuffer *constobj = 302 fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING); 303 304 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { 305 if (!variants[i]) 306 continue; 307 ir3_emit_user_consts(ctx->screen, variants[i], constobj, 308 &ctx->constbuf[types[i]]); 309 fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]); 310 } 311 312 return constobj; 313} 314 315struct fd_ringbuffer * 316fd6_build_vs_driver_params(struct fd6_emit *emit) 317{ 318 struct fd_context *ctx = emit->ctx; 319 struct fd6_context *fd6_ctx = fd6_context(ctx); 320 const struct ir3_shader_variant *vs = emit->vs; 321 322 if (vs->need_driver_params) { 323 struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( 324 ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING); 325 ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info, emit->indirect, 326 emit->draw); 327 fd6_ctx->has_dp_state = true; 328 return dpconstobj; 329 } 330 331 fd6_ctx->has_dp_state = false; 332 return NULL; 333} 334 335void 336fd6_emit_cs_consts(const struct ir3_shader_variant *v, 337 struct fd_ringbuffer *ring, struct fd_context *ctx, 338 const struct pipe_grid_info *info) 339{ 340 ir3_emit_cs_consts(v, ring, ctx, info); 341 fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]); 342} 343 344void 345fd6_emit_immediates(struct fd_screen *screen, 346 const struct ir3_shader_variant *v, 347 struct fd_ringbuffer *ring) 348{ 349 ir3_emit_immediates(screen, v, ring); 350} 351 352void 353fd6_emit_link_map(struct fd_screen *screen, 354 const struct ir3_shader_variant *producer, 355 const struct ir3_shader_variant *v, 356 struct fd_ringbuffer *ring) 357{ 358 ir3_emit_link_map(screen, producer, v, ring); 359} 360