1/* 2 * Copyright 2010 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "pipe/p_context.h" 24#include "pipe/p_defines.h" 25#include "pipe/p_state.h" 26#include "util/u_inlines.h" 27 28#include "nvc0/nvc0_context.h" 29#include "nvc0/nvc0_query_hw.h" 30 31#include "nvc0/nvc0_compute.xml.h" 32 33static inline void 34nvc0_program_update_context_state(struct nvc0_context *nvc0, 35 struct nvc0_program *prog, int stage) 36{ 37 if (prog && prog->need_tls) { 38 const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR; 39 if (!nvc0->state.tls_required) 40 BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls); 41 nvc0->state.tls_required |= 1 << stage; 42 } else { 43 if (nvc0->state.tls_required == (1 << stage)) 44 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS); 45 nvc0->state.tls_required &= ~(1 << stage); 46 } 47} 48 49static inline bool 50nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) 51{ 52 if (prog->mem) 53 return true; 54 55 if (!prog->translated) { 56 prog->translated = nvc0_program_translate( 57 prog, nvc0->screen->base.device->chipset, 58 nvc0->screen->base.disk_shader_cache, &nvc0->base.debug); 59 if (!prog->translated) 60 return false; 61 } 62 63 if (likely(prog->code_size)) 64 return nvc0_program_upload(nvc0, prog); 65 return true; /* stream output info only */ 66} 67 68void 69nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage, 70 struct nvc0_program *prog) 71{ 72 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 73 74 if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) { 75 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1); 76 PUSH_DATA (push, prog->code_base); 77 } else { 78 BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2); 79 PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base); 80 PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base); 81 } 82} 83 84void 85nvc0_vertprog_validate(struct nvc0_context *nvc0) 86{ 87 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 88 struct nvc0_program *vp = nvc0->vertprog; 89 90 if (!nvc0_program_validate(nvc0, vp)) 91 return; 92 nvc0_program_update_context_state(nvc0, vp, 0); 93 94 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1); 95 PUSH_DATA (push, 0x11); 96 nvc0_program_sp_start_id(nvc0, 1, vp); 97 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1); 98 PUSH_DATA (push, vp->num_gprs); 99 100 // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1); 101 // PUSH_DATA (push, 0); 102} 103 104void 105nvc0_fragprog_validate(struct nvc0_context *nvc0) 106{ 107 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 108 struct nvc0_program *fp = nvc0->fragprog; 109 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; 110 111 if (fp->fp.force_persample_interp != rast->force_persample_interp) { 112 /* Force the program to be reuploaded, which will trigger interp fixups 113 * to get applied 114 */ 115 if (fp->mem) 116 nouveau_heap_free(&fp->mem); 117 118 fp->fp.force_persample_interp = rast->force_persample_interp; 119 } 120 121 if (fp->fp.msaa != rast->multisample) { 122 /* Force the program to be reuploaded, which will trigger interp fixups 123 * to get applied 124 */ 125 if (fp->mem) 126 nouveau_heap_free(&fp->mem); 127 128 fp->fp.msaa = rast->multisample; 129 } 130 131 /* Shade model works well enough when both colors follow it. However if one 132 * (or both) is explicitly set, then we have to go the patching route. 133 */ 134 bool has_explicit_color = fp->fp.colors && 135 (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) || 136 ((fp->fp.colors & 2) && !fp->fp.color_interp[1])); 137 bool hwflatshade = false; 138 if (has_explicit_color && fp->fp.flatshade != rast->flatshade) { 139 /* Force re-upload */ 140 if (fp->mem) 141 nouveau_heap_free(&fp->mem); 142 143 fp->fp.flatshade = rast->flatshade; 144 145 /* Always smooth-shade in this mode, the shader will decide on its own 146 * when to flat-shade. 147 */ 148 } else if (!has_explicit_color) { 149 hwflatshade = rast->flatshade; 150 151 /* No need to binary-patch the shader each time, make sure that it's set 152 * up for the default behaviour. 153 */ 154 fp->fp.flatshade = 0; 155 } 156 157 if (hwflatshade != nvc0->state.flatshade) { 158 nvc0->state.flatshade = hwflatshade; 159 BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); 160 PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT : 161 NVC0_3D_SHADE_MODEL_SMOOTH); 162 } 163 164 if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) { 165 return; 166 } 167 168 if (!nvc0_program_validate(nvc0, fp)) 169 return; 170 nvc0_program_update_context_state(nvc0, fp, 4); 171 172 if (fp->fp.early_z != nvc0->state.early_z_forced) { 173 nvc0->state.early_z_forced = fp->fp.early_z; 174 IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); 175 } 176 if (fp->fp.post_depth_coverage != nvc0->state.post_depth_coverage) { 177 nvc0->state.post_depth_coverage = fp->fp.post_depth_coverage; 178 IMMED_NVC0(push, NVC0_3D(POST_DEPTH_COVERAGE), 179 fp->fp.post_depth_coverage); 180 } 181 182 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1); 183 PUSH_DATA (push, 0x51); 184 nvc0_program_sp_start_id(nvc0, 5, fp); 185 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); 186 PUSH_DATA (push, fp->num_gprs); 187 188 BEGIN_NVC0(push, SUBC_3D(0x0360), 2); 189 PUSH_DATA (push, 0x20164010); 190 PUSH_DATA (push, 0x20); 191 BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); 192 PUSH_DATA (push, fp->flags[0]); 193} 194 195void 196nvc0_tctlprog_validate(struct nvc0_context *nvc0) 197{ 198 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 199 struct nvc0_program *tp = nvc0->tctlprog; 200 201 if (tp && nvc0_program_validate(nvc0, tp)) { 202 if (tp->tp.tess_mode != ~0) { 203 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); 204 PUSH_DATA (push, tp->tp.tess_mode); 205 } 206 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); 207 PUSH_DATA (push, 0x21); 208 nvc0_program_sp_start_id(nvc0, 2, tp); 209 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); 210 PUSH_DATA (push, tp->num_gprs); 211 } else { 212 tp = nvc0->tcp_empty; 213 /* not a whole lot we can do to handle this failure */ 214 if (!nvc0_program_validate(nvc0, tp)) 215 assert(!"unable to validate empty tcp"); 216 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); 217 PUSH_DATA (push, 0x20); 218 nvc0_program_sp_start_id(nvc0, 2, tp); 219 } 220 nvc0_program_update_context_state(nvc0, tp, 1); 221} 222 223void 224nvc0_tevlprog_validate(struct nvc0_context *nvc0) 225{ 226 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 227 struct nvc0_program *tp = nvc0->tevlprog; 228 229 if (tp && nvc0_program_validate(nvc0, tp)) { 230 if (tp->tp.tess_mode != ~0) { 231 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); 232 PUSH_DATA (push, tp->tp.tess_mode); 233 } 234 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); 235 PUSH_DATA (push, 0x31); 236 nvc0_program_sp_start_id(nvc0, 3, tp); 237 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1); 238 PUSH_DATA (push, tp->num_gprs); 239 } else { 240 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); 241 PUSH_DATA (push, 0x30); 242 } 243 nvc0_program_update_context_state(nvc0, tp, 2); 244} 245 246void 247nvc0_gmtyprog_validate(struct nvc0_context *nvc0) 248{ 249 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 250 struct nvc0_program *gp = nvc0->gmtyprog; 251 252 /* we allow GPs with no code for specifying stream output state only */ 253 if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) { 254 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); 255 PUSH_DATA (push, 0x41); 256 nvc0_program_sp_start_id(nvc0, 4, gp); 257 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); 258 PUSH_DATA (push, gp->num_gprs); 259 } else { 260 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); 261 PUSH_DATA (push, 0x40); 262 } 263 nvc0_program_update_context_state(nvc0, gp, 3); 264} 265 266void 267nvc0_compprog_validate(struct nvc0_context *nvc0) 268{ 269 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 270 struct nvc0_program *cp = nvc0->compprog; 271 272 if (cp && !nvc0_program_validate(nvc0, cp)) 273 return; 274 275 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 276 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); 277} 278 279void 280nvc0_layer_validate(struct nvc0_context *nvc0) 281{ 282 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 283 struct nvc0_program *last; 284 bool prog_selects_layer = false; 285 bool layer_viewport_relative = false; 286 287 if (nvc0->gmtyprog) 288 last = nvc0->gmtyprog; 289 else if (nvc0->tevlprog) 290 last = nvc0->tevlprog; 291 else 292 last = nvc0->vertprog; 293 294 if (last) { 295 prog_selects_layer = !!(last->hdr[13] & (1 << 9)); 296 layer_viewport_relative = last->vp.layer_viewport_relative; 297 } 298 299 BEGIN_NVC0(push, NVC0_3D(LAYER), 1); 300 PUSH_DATA (push, prog_selects_layer ? NVC0_3D_LAYER_USE_GP : 0); 301 if (nvc0->screen->eng3d->oclass >= GM200_3D_CLASS) { 302 IMMED_NVC0(push, NVC0_3D(LAYER_VIEWPORT_RELATIVE), 303 layer_viewport_relative); 304 } 305} 306 307void 308nvc0_tfb_validate(struct nvc0_context *nvc0) 309{ 310 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 311 struct nvc0_transform_feedback_state *tfb; 312 unsigned b; 313 314 if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; 315 else 316 if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; 317 else 318 tfb = nvc0->vertprog->tfb; 319 320 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); 321 322 if (tfb && tfb != nvc0->state.tfb) { 323 for (b = 0; b < 4; ++b) { 324 if (tfb->varying_count[b]) { 325 unsigned n = (tfb->varying_count[b] + 3) / 4; 326 327 BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3); 328 PUSH_DATA (push, tfb->stream[b]); 329 PUSH_DATA (push, tfb->varying_count[b]); 330 PUSH_DATA (push, tfb->stride[b]); 331 BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n); 332 PUSH_DATAp(push, tfb->varying_index[b], n); 333 334 if (nvc0->tfbbuf[b]) 335 nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; 336 } else { 337 IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0); 338 } 339 } 340 } 341 nvc0->state.tfb = tfb; 342 343 if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS)) 344 return; 345 346 for (b = 0; b < nvc0->num_tfbbufs; ++b) { 347 struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); 348 struct nv04_resource *buf; 349 350 if (targ && tfb) 351 targ->stride = tfb->stride[b]; 352 353 if (!targ || !targ->stride) { 354 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); 355 continue; 356 } 357 358 buf = nv04_resource(targ->pipe.buffer); 359 360 BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR); 361 362 if (!(nvc0->tfbbuf_dirty & (1 << b))) 363 continue; 364 365 if (!targ->clean) 366 nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq)); 367 nouveau_pushbuf_space(push, 0, 0, 1); 368 BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); 369 PUSH_DATA (push, 1); 370 PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); 371 PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); 372 PUSH_DATA (push, targ->pipe.buffer_size); 373 if (!targ->clean) { 374 nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4); 375 } else { 376 PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ 377 targ->clean = false; 378 } 379 } 380 for (; b < 4; ++b) 381 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); 382} 383