1#include "util/u_format.h" 2#include "util/u_framebuffer.h" 3#include "util/u_math.h" 4#include "util/u_viewport.h" 5 6#include "nvc0/nvc0_context.h" 7 8#if 0 9static void 10nvc0_validate_zcull(struct nvc0_context *nvc0) 11{ 12 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 13 struct pipe_framebuffer_state *fb = &nvc0->framebuffer; 14 struct nv50_surface *sf = nv50_surface(fb->zsbuf); 15 struct nv50_miptree *mt = nv50_miptree(sf->base.texture); 16 struct nouveau_bo *bo = mt->base.bo; 17 uint32_t size; 18 uint32_t offset = align(mt->total_size, 1 << 17); 19 unsigned width, height; 20 21 assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); 22 23 size = mt->total_size * 2; 24 25 height = align(fb->height, 32); 26 width = fb->width % 224; 27 if (width) 28 width = fb->width + (224 - width); 29 else 30 width = fb->width; 31 32 BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); 33 PUSH_DATA (push, 0); 34 BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2); 35 PUSH_DATAh(push, bo->offset + offset); 36 PUSH_DATA (push, bo->offset + offset); 37 offset += 1 << 17; 38 BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2); 39 PUSH_DATAh(push, bo->offset + offset); 40 PUSH_DATA (push, bo->offset + offset); 41 BEGIN_NVC0(push, SUBC_3D(0x07e0), 2); 42 PUSH_DATA (push, size); 43 PUSH_DATA (push, size >> 16); 44 BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */ 45 PUSH_DATA (push, 2); 46 BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4); 47 PUSH_DATA (push, width); 48 PUSH_DATA (push, height); 49 PUSH_DATA (push, 1); 50 PUSH_DATA (push, 0); 51 BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2); 52 PUSH_DATA (push, 0); 53 PUSH_DATA (push, 0); 54 BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1); 55 PUSH_DATA (push, 0); 56} 57#endif 58 59static inline void 60nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers) 61{ 62 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9); 63 PUSH_DATA (push, 0); 64 PUSH_DATA (push, 0); 65 PUSH_DATA (push, 64); // width 66 PUSH_DATA (push, 0); // height 67 PUSH_DATA (push, 0); // format 68 PUSH_DATA (push, 0); // tile mode 69 PUSH_DATA (push, layers); // layers 70 PUSH_DATA (push, 0); // layer stride 71 PUSH_DATA (push, 0); // base layer 72} 73 74static uint32_t 75gm200_encode_cb_sample_location(uint8_t x, uint8_t y) 76{ 77 static const uint8_t lut[] = { 78 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 79 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}; 80 uint32_t result = 0; 81 /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */ 82 result |= lut[x] << 8 | lut[y] << 24; 83 /* fill in gaps with data in a representation for SV_SAMPLE_POS */ 84 result |= x << 12 | y << 28; 85 return result; 86} 87 88static void 89gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) 90{ 91 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 92 struct nvc0_screen *screen = nvc0->screen; 93 unsigned grid_width, grid_height, hw_grid_width; 94 uint8_t sample_locations[16][2]; 95 unsigned cb[64]; 96 unsigned i, pixel, pixel_y, pixel_x, sample; 97 uint32_t packed_locations[4] = {}; 98 99 screen->base.base.get_sample_pixel_grid( 100 &screen->base.base, ms, &grid_width, &grid_height); 101 102 hw_grid_width = grid_width; 103 if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */ 104 hw_grid_width = 4; 105 106 if (nvc0->sample_locations_enabled) { 107 uint8_t locations[2 * 4 * 8]; 108 memcpy(locations, nvc0->sample_locations, sizeof(locations)); 109 util_sample_locations_flip_y( 110 &screen->base.base, nvc0->framebuffer.height, ms, locations); 111 112 for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) { 113 for (sample = 0; sample < ms; sample++) { 114 unsigned pixel_x = pixel % hw_grid_width; 115 unsigned pixel_y = pixel / hw_grid_width; 116 unsigned wi = pixel * ms + sample; 117 unsigned ri = (pixel_y * grid_width + pixel_x % grid_width); 118 ri = ri * ms + sample; 119 sample_locations[wi][0] = locations[ri] & 0xf; 120 sample_locations[wi][1] = 16 - (locations[ri] >> 4); 121 } 122 } 123 } else { 124 const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); 125 for (i = 0; i < 16; i++) { 126 sample_locations[i][0] = ptr[i % ms][0]; 127 sample_locations[i][1] = ptr[i % ms][1]; 128 } 129 } 130 131 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 132 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 133 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 134 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 135 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64); 136 PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); 137 for (pixel_y = 0; pixel_y < 4; pixel_y++) { 138 for (pixel_x = 0; pixel_x < 2; pixel_x++) { 139 for (sample = 0; sample < ms; sample++) { 140 unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample; 141 unsigned read_index = pixel_y % grid_height * hw_grid_width; 142 uint8_t x, y; 143 read_index += pixel_x % grid_width; 144 read_index = read_index * ms + sample; 145 x = sample_locations[read_index][0]; 146 y = sample_locations[read_index][1]; 147 cb[write_index] = gm200_encode_cb_sample_location(x, y); 148 } 149 } 150 } 151 PUSH_DATAp(push, cb, 64); 152 153 for (i = 0; i < 16; i++) { 154 packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8); 155 packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4); 156 } 157 158 BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); 159 PUSH_DATAp(push, packed_locations, 4); 160} 161 162static void 163nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) 164{ 165 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 166 struct nvc0_screen *screen = nvc0->screen; 167 unsigned i; 168 169 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 170 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 171 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 172 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 173 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); 174 PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); 175 for (i = 0; i < ms; i++) { 176 float xy[2]; 177 nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); 178 PUSH_DATAf(push, xy[0]); 179 PUSH_DATAf(push, xy[1]); 180 } 181} 182 183static void 184validate_sample_locations(struct nvc0_context *nvc0) 185{ 186 unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer); 187 188 if (nvc0->screen->base.class_3d >= GM200_3D_CLASS) 189 gm200_validate_sample_locations(nvc0, ms); 190 else 191 nvc0_validate_sample_locations(nvc0, ms); 192} 193 194static void 195nvc0_validate_fb(struct nvc0_context *nvc0) 196{ 197 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 198 struct pipe_framebuffer_state *fb = &nvc0->framebuffer; 199 unsigned i; 200 unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; 201 unsigned nr_cbufs = fb->nr_cbufs; 202 bool serialize = false; 203 204 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); 205 206 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); 207 PUSH_DATA (push, fb->width << 16); 208 PUSH_DATA (push, fb->height << 16); 209 210 for (i = 0; i < fb->nr_cbufs; ++i) { 211 struct nv50_surface *sf; 212 struct nv04_resource *res; 213 struct nouveau_bo *bo; 214 215 if (!fb->cbufs[i]) { 216 nvc0_fb_set_null_rt(push, i, 0); 217 continue; 218 } 219 220 sf = nv50_surface(fb->cbufs[i]); 221 res = nv04_resource(sf->base.texture); 222 bo = res->bo; 223 224 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9); 225 PUSH_DATAh(push, res->address + sf->offset); 226 PUSH_DATA (push, res->address + sf->offset); 227 if (likely(nouveau_bo_memtype(bo))) { 228 struct nv50_miptree *mt = nv50_miptree(sf->base.texture); 229 230 assert(sf->base.texture->target != PIPE_BUFFER); 231 232 PUSH_DATA(push, sf->width); 233 PUSH_DATA(push, sf->height); 234 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); 235 PUSH_DATA(push, (mt->layout_3d << 16) | 236 mt->level[sf->base.u.tex.level].tile_mode); 237 PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth); 238 PUSH_DATA(push, mt->layer_stride >> 2); 239 PUSH_DATA(push, sf->base.u.tex.first_layer); 240 241 ms_mode = mt->ms_mode; 242 } else { 243 if (res->base.target == PIPE_BUFFER) { 244 PUSH_DATA(push, 262144); 245 PUSH_DATA(push, 1); 246 } else { 247 PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch); 248 PUSH_DATA(push, sf->height); 249 } 250 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); 251 PUSH_DATA(push, 1 << 12); 252 PUSH_DATA(push, 1); 253 PUSH_DATA(push, 0); 254 PUSH_DATA(push, 0); 255 256 nvc0_resource_fence(res, NOUVEAU_BO_WR); 257 258 assert(!fb->zsbuf); 259 } 260 261 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING) 262 serialize = true; 263 res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; 264 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; 265 266 /* only register for writing, otherwise we'd always serialize here */ 267 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR); 268 } 269 270 if (fb->zsbuf) { 271 struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); 272 struct nv50_surface *sf = nv50_surface(fb->zsbuf); 273 int unk = mt->base.base.target == PIPE_TEXTURE_2D; 274 275 BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); 276 PUSH_DATAh(push, mt->base.address + sf->offset); 277 PUSH_DATA (push, mt->base.address + sf->offset); 278 PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt); 279 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); 280 PUSH_DATA (push, mt->layer_stride >> 2); 281 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); 282 PUSH_DATA (push, 1); 283 BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); 284 PUSH_DATA (push, sf->width); 285 PUSH_DATA (push, sf->height); 286 PUSH_DATA (push, (unk << 16) | 287 (sf->base.u.tex.first_layer + sf->depth)); 288 BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); 289 PUSH_DATA (push, sf->base.u.tex.first_layer); 290 291 ms_mode = mt->ms_mode; 292 293 if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) 294 serialize = true; 295 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; 296 mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; 297 298 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR); 299 } else { 300 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); 301 PUSH_DATA (push, 0); 302 } 303 304 if (nr_cbufs == 0 && !fb->zsbuf) { 305 assert(util_is_power_of_two_or_zero(fb->samples)); 306 assert(fb->samples <= 8); 307 308 nvc0_fb_set_null_rt(push, 0, fb->layers); 309 310 if (fb->samples > 1) 311 ms_mode = ffs(fb->samples) - 1; 312 nr_cbufs = 1; 313 } 314 315 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); 316 PUSH_DATA (push, (076543210 << 4) | nr_cbufs); 317 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); 318 319 if (serialize) 320 IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); 321 322 NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize); 323} 324 325static void 326nvc0_validate_blend_colour(struct nvc0_context *nvc0) 327{ 328 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 329 330 BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4); 331 PUSH_DATAf(push, nvc0->blend_colour.color[0]); 332 PUSH_DATAf(push, nvc0->blend_colour.color[1]); 333 PUSH_DATAf(push, nvc0->blend_colour.color[2]); 334 PUSH_DATAf(push, nvc0->blend_colour.color[3]); 335} 336 337static void 338nvc0_validate_stencil_ref(struct nvc0_context *nvc0) 339{ 340 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 341 const ubyte *ref = &nvc0->stencil_ref.ref_value[0]; 342 343 IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]); 344 IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]); 345} 346 347static void 348nvc0_validate_stipple(struct nvc0_context *nvc0) 349{ 350 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 351 unsigned i; 352 353 BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32); 354 for (i = 0; i < 32; ++i) 355 PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i])); 356} 357 358static void 359nvc0_validate_scissor(struct nvc0_context *nvc0) 360{ 361 int i; 362 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 363 364 if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) && 365 nvc0->rast->pipe.scissor == nvc0->state.scissor) 366 return; 367 368 if (nvc0->state.scissor != nvc0->rast->pipe.scissor) 369 nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1; 370 371 nvc0->state.scissor = nvc0->rast->pipe.scissor; 372 373 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { 374 struct pipe_scissor_state *s = &nvc0->scissors[i]; 375 if (!(nvc0->scissors_dirty & (1 << i))) 376 continue; 377 378 BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2); 379 if (nvc0->rast->pipe.scissor) { 380 PUSH_DATA(push, (s->maxx << 16) | s->minx); 381 PUSH_DATA(push, (s->maxy << 16) | s->miny); 382 } else { 383 PUSH_DATA(push, (0xffff << 16) | 0); 384 PUSH_DATA(push, (0xffff << 16) | 0); 385 } 386 } 387 nvc0->scissors_dirty = 0; 388} 389 390static void 391nvc0_validate_viewport(struct nvc0_context *nvc0) 392{ 393 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 394 int x, y, w, h, i; 395 float zmin, zmax; 396 397 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { 398 struct pipe_viewport_state *vp = &nvc0->viewports[i]; 399 400 if (!(nvc0->viewports_dirty & (1 << i))) 401 continue; 402 403 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3); 404 PUSH_DATAf(push, vp->translate[0]); 405 PUSH_DATAf(push, vp->translate[1]); 406 PUSH_DATAf(push, vp->translate[2]); 407 408 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3); 409 PUSH_DATAf(push, vp->scale[0]); 410 PUSH_DATAf(push, vp->scale[1]); 411 PUSH_DATAf(push, vp->scale[2]); 412 413 /* now set the viewport rectangle to viewport dimensions for clipping */ 414 415 x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0]))); 416 y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1]))); 417 w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x; 418 h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y; 419 420 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2); 421 PUSH_DATA (push, (w << 16) | x); 422 PUSH_DATA (push, (h << 16) | y); 423 424 /* If the halfz setting ever changes, the viewports will also get 425 * updated. The rast will get updated before the validate function has a 426 * chance to hit, so we can just use it directly without an atom 427 * dependency. 428 */ 429 util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax); 430 431 BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2); 432 PUSH_DATAf(push, zmin); 433 PUSH_DATAf(push, zmax); 434 } 435 nvc0->viewports_dirty = 0; 436} 437 438static void 439nvc0_validate_window_rects(struct nvc0_context *nvc0) 440{ 441 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 442 bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive; 443 int i; 444 445 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable); 446 if (!enable) 447 return; 448 449 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive); 450 BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2); 451 for (i = 0; i < nvc0->window_rect.rects; i++) { 452 struct pipe_scissor_state *s = &nvc0->window_rect.rect[i]; 453 PUSH_DATA(push, (s->maxx << 16) | s->minx); 454 PUSH_DATA(push, (s->maxy << 16) | s->miny); 455 } 456 for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) { 457 PUSH_DATA(push, 0); 458 PUSH_DATA(push, 0); 459 } 460} 461 462static inline void 463nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) 464{ 465 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 466 struct nvc0_screen *screen = nvc0->screen; 467 468 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 469 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 470 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 471 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 472 BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); 473 PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO); 474 PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); 475} 476 477static inline void 478nvc0_check_program_ucps(struct nvc0_context *nvc0, 479 struct nvc0_program *vp, uint8_t mask) 480{ 481 const unsigned n = util_logbase2(mask) + 1; 482 483 if (vp->vp.num_ucps >= n) 484 return; 485 nvc0_program_destroy(nvc0, vp); 486 487 vp->vp.num_ucps = n; 488 if (likely(vp == nvc0->vertprog)) 489 nvc0_vertprog_validate(nvc0); 490 else 491 if (likely(vp == nvc0->gmtyprog)) 492 nvc0_gmtyprog_validate(nvc0); 493 else 494 nvc0_tevlprog_validate(nvc0); 495} 496 497static void 498nvc0_validate_clip(struct nvc0_context *nvc0) 499{ 500 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 501 struct nvc0_program *vp; 502 unsigned stage; 503 uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; 504 505 if (nvc0->gmtyprog) { 506 stage = 3; 507 vp = nvc0->gmtyprog; 508 } else 509 if (nvc0->tevlprog) { 510 stage = 2; 511 vp = nvc0->tevlprog; 512 } else { 513 stage = 0; 514 vp = nvc0->vertprog; 515 } 516 517 if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) 518 nvc0_check_program_ucps(nvc0, vp, clip_enable); 519 520 if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage))) 521 if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) 522 nvc0_upload_uclip_planes(nvc0, stage); 523 524 clip_enable &= vp->vp.clip_enable; 525 clip_enable |= vp->vp.cull_enable; 526 527 if (nvc0->state.clip_enable != clip_enable) { 528 nvc0->state.clip_enable = clip_enable; 529 IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable); 530 } 531 if (nvc0->state.clip_mode != vp->vp.clip_mode) { 532 nvc0->state.clip_mode = vp->vp.clip_mode; 533 BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1); 534 PUSH_DATA (push, vp->vp.clip_mode); 535 } 536} 537 538static void 539nvc0_validate_blend(struct nvc0_context *nvc0) 540{ 541 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 542 543 PUSH_SPACE(push, nvc0->blend->size); 544 PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size); 545} 546 547static void 548nvc0_validate_zsa(struct nvc0_context *nvc0) 549{ 550 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 551 552 PUSH_SPACE(push, nvc0->zsa->size); 553 PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size); 554} 555 556static void 557nvc0_validate_rasterizer(struct nvc0_context *nvc0) 558{ 559 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 560 561 PUSH_SPACE(push, nvc0->rast->size); 562 PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size); 563} 564 565static void 566nvc0_constbufs_validate(struct nvc0_context *nvc0) 567{ 568 unsigned s; 569 570 bool can_serialize = true; 571 572 for (s = 0; s < 5; ++s) { 573 while (nvc0->constbuf_dirty[s]) { 574 int i = ffs(nvc0->constbuf_dirty[s]) - 1; 575 nvc0->constbuf_dirty[s] &= ~(1 << i); 576 577 if (nvc0->constbuf[s][i].user) { 578 struct nouveau_bo *bo = nvc0->screen->uniform_bo; 579 const unsigned base = NVC0_CB_USR_INFO(s); 580 const unsigned size = nvc0->constbuf[s][0].size; 581 assert(i == 0); /* we really only want OpenGL uniforms here */ 582 assert(nvc0->constbuf[s][0].u.data); 583 584 if (!nvc0->state.uniform_buffer_bound[s]) { 585 nvc0->state.uniform_buffer_bound[s] = true; 586 587 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, 588 NVC0_MAX_CONSTBUF_SIZE, bo->offset + base); 589 } 590 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), 591 base, NVC0_MAX_CONSTBUF_SIZE, 592 0, (size + 3) / 4, 593 nvc0->constbuf[s][0].u.data); 594 } else { 595 struct nv04_resource *res = 596 nv04_resource(nvc0->constbuf[s][i].u.buf); 597 if (res) { 598 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, 599 nvc0->constbuf[s][i].size, 600 res->address + nvc0->constbuf[s][i].offset); 601 602 BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD); 603 604 nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ 605 res->cb_bindings[s] |= 1 << i; 606 607 if (i == 0) 608 nvc0->state.uniform_buffer_bound[s] = false; 609 } else if (i != 0) { 610 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0); 611 } 612 } 613 } 614 } 615 616 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) { 617 /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ 618 nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; 619 nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5]; 620 nvc0->state.uniform_buffer_bound[5] = false; 621 } 622} 623 624static void 625nvc0_validate_buffers(struct nvc0_context *nvc0) 626{ 627 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 628 struct nvc0_screen *screen = nvc0->screen; 629 int i, s; 630 631 for (s = 0; s < 5; s++) { 632 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 633 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 634 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 635 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 636 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); 637 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); 638 for (i = 0; i < NVC0_MAX_BUFFERS; i++) { 639 if (nvc0->buffers[s][i].buffer) { 640 struct nv04_resource *res = 641 nv04_resource(nvc0->buffers[s][i].buffer); 642 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); 643 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); 644 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); 645 PUSH_DATA (push, 0); 646 BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR); 647 util_range_add(&res->valid_buffer_range, 648 nvc0->buffers[s][i].buffer_offset, 649 nvc0->buffers[s][i].buffer_offset + 650 nvc0->buffers[s][i].buffer_size); 651 } else { 652 PUSH_DATA (push, 0); 653 PUSH_DATA (push, 0); 654 PUSH_DATA (push, 0); 655 PUSH_DATA (push, 0); 656 } 657 } 658 } 659 660} 661 662static void 663nvc0_validate_sample_mask(struct nvc0_context *nvc0) 664{ 665 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 666 667 unsigned mask[4] = 668 { 669 nvc0->sample_mask & 0xffff, 670 nvc0->sample_mask & 0xffff, 671 nvc0->sample_mask & 0xffff, 672 nvc0->sample_mask & 0xffff 673 }; 674 675 BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4); 676 PUSH_DATA (push, mask[0]); 677 PUSH_DATA (push, mask[1]); 678 PUSH_DATA (push, mask[2]); 679 PUSH_DATA (push, mask[3]); 680} 681 682static void 683nvc0_validate_min_samples(struct nvc0_context *nvc0) 684{ 685 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 686 int samples; 687 688 samples = util_next_power_of_two(nvc0->min_samples); 689 if (samples > 1) { 690 // If we're using the incoming sample mask and doing sample shading, we 691 // have to do sample shading "to the max", otherwise there's no way to 692 // tell which sets of samples are covered by the current invocation. 693 // Similarly for reading the framebuffer. 694 if (nvc0->fragprog && ( 695 nvc0->fragprog->fp.sample_mask_in || 696 nvc0->fragprog->fp.reads_framebuffer)) 697 samples = util_framebuffer_get_num_samples(&nvc0->framebuffer); 698 samples |= NVC0_3D_SAMPLE_SHADING_ENABLE; 699 } 700 701 IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples); 702} 703 704static void 705nvc0_validate_driverconst(struct nvc0_context *nvc0) 706{ 707 struct nvc0_screen *screen = nvc0->screen; 708 int i; 709 710 for (i = 0; i < 5; ++i) 711 nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, 712 screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); 713 714 nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; 715} 716 717static void 718nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0) 719{ 720 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 721 bool rasterizer_discard; 722 723 if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) { 724 rasterizer_discard = true; 725 } else { 726 bool zs = nvc0->zsa && 727 (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled); 728 rasterizer_discard = !zs && 729 (!nvc0->fragprog || !nvc0->fragprog->hdr[18]); 730 } 731 732 if (rasterizer_discard != nvc0->state.rasterizer_discard) { 733 nvc0->state.rasterizer_discard = rasterizer_discard; 734 IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard); 735 } 736} 737 738/* alpha test is disabled if there are no color RTs, so make sure we have at 739 * least one if alpha test is enabled. Note that this must run after 740 * nvc0_validate_fb, otherwise that will override the RT count setting. 741 */ 742static void 743nvc0_validate_zsa_fb(struct nvc0_context *nvc0) 744{ 745 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 746 747 if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled && 748 nvc0->framebuffer.zsbuf && 749 nvc0->framebuffer.nr_cbufs == 0) { 750 nvc0_fb_set_null_rt(push, 0, 0); 751 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); 752 PUSH_DATA (push, (076543210 << 4) | 1); 753 } 754} 755 756static void 757nvc0_validate_rast_fb(struct nvc0_context *nvc0) 758{ 759 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 760 struct pipe_framebuffer_state *fb = &nvc0->framebuffer; 761 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; 762 763 if (!rast) 764 return; 765 766 if (rast->offset_units_unscaled) { 767 BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1); 768 if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM) 769 PUSH_DATAf(push, rast->offset_units * (1 << 16)); 770 else 771 PUSH_DATAf(push, rast->offset_units * (1 << 24)); 772 } 773} 774 775 776static void 777nvc0_validate_tess_state(struct nvc0_context *nvc0) 778{ 779 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 780 781 BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6); 782 PUSH_DATAp(push, nvc0->default_tess_outer, 4); 783 PUSH_DATAp(push, nvc0->default_tess_inner, 2); 784} 785 786/* If we have a frag shader bound which tries to read from the framebuffer, we 787 * have to make sure that the fb is bound as a texture in the expected 788 * location. For Fermi, that's in the special driver slot 16, while for Kepler 789 * it's a regular binding stored in the driver constbuf. 790 */ 791static void 792nvc0_validate_fbread(struct nvc0_context *nvc0) 793{ 794 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 795 struct nvc0_screen *screen = nvc0->screen; 796 struct pipe_context *pipe = &nvc0->base.pipe; 797 struct pipe_sampler_view *old_view = nvc0->fbtexture; 798 struct pipe_sampler_view *new_view = NULL; 799 800 if (nvc0->fragprog && 801 nvc0->fragprog->fp.reads_framebuffer && 802 nvc0->framebuffer.nr_cbufs && 803 nvc0->framebuffer.cbufs[0]) { 804 struct pipe_sampler_view tmpl; 805 struct pipe_surface *sf = nvc0->framebuffer.cbufs[0]; 806 807 tmpl.target = PIPE_TEXTURE_2D_ARRAY; 808 tmpl.format = sf->format; 809 tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level; 810 tmpl.u.tex.first_layer = sf->u.tex.first_layer; 811 tmpl.u.tex.last_layer = sf->u.tex.last_layer; 812 tmpl.swizzle_r = PIPE_SWIZZLE_X; 813 tmpl.swizzle_g = PIPE_SWIZZLE_Y; 814 tmpl.swizzle_b = PIPE_SWIZZLE_Z; 815 tmpl.swizzle_a = PIPE_SWIZZLE_W; 816 817 /* Bail if it's the same parameters */ 818 if (old_view && old_view->texture == sf->texture && 819 old_view->format == sf->format && 820 old_view->u.tex.first_level == sf->u.tex.level && 821 old_view->u.tex.first_layer == sf->u.tex.first_layer && 822 old_view->u.tex.last_layer == sf->u.tex.last_layer) 823 return; 824 825 new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl); 826 } else if (old_view == NULL) { 827 return; 828 } 829 830 if (old_view) 831 pipe_sampler_view_reference(&nvc0->fbtexture, NULL); 832 nvc0->fbtexture = new_view; 833 834 if (new_view) { 835 struct nv50_tic_entry *tic = nv50_tic_entry(new_view); 836 assert(tic->id < 0); 837 tic->id = nvc0_screen_tic_alloc(screen, tic); 838 nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32, 839 NV_VRAM_DOMAIN(&screen->base), 32, tic->tic); 840 screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); 841 842 if (screen->base.class_3d >= NVE4_3D_CLASS) { 843 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 844 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 845 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 846 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); 847 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1); 848 PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO); 849 PUSH_DATA (push, (0 << 20) | tic->id); 850 } else { 851 BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1); 852 PUSH_DATA (push, (tic->id << 9) | 1); 853 } 854 855 IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0); 856 } 857} 858 859static void 860nvc0_switch_pipe_context(struct nvc0_context *ctx_to) 861{ 862 struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx; 863 unsigned s; 864 865 if (ctx_from) 866 ctx_to->state = ctx_from->state; 867 else 868 ctx_to->state = ctx_to->screen->save_state; 869 870 ctx_to->dirty_3d = ~0; 871 ctx_to->dirty_cp = ~0; 872 ctx_to->viewports_dirty = ~0; 873 ctx_to->scissors_dirty = ~0; 874 875 for (s = 0; s < 6; ++s) { 876 ctx_to->samplers_dirty[s] = ~0; 877 ctx_to->textures_dirty[s] = ~0; 878 ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1; 879 ctx_to->buffers_dirty[s] = ~0; 880 ctx_to->images_dirty[s] = ~0; 881 } 882 883 /* Reset tfb as the shader that owns it may have been deleted. */ 884 ctx_to->state.tfb = NULL; 885 886 if (!ctx_to->vertex) 887 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS); 888 889 if (!ctx_to->vertprog) 890 ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG; 891 if (!ctx_to->fragprog) 892 ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG; 893 894 if (!ctx_to->blend) 895 ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND; 896 if (!ctx_to->rast) 897 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR); 898 if (!ctx_to->zsa) 899 ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA; 900 901 ctx_to->screen->cur_ctx = ctx_to; 902} 903 904static struct nvc0_state_validate 905validate_list_3d[] = { 906 { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER }, 907 { nvc0_validate_blend, NVC0_NEW_3D_BLEND }, 908 { nvc0_validate_zsa, NVC0_NEW_3D_ZSA }, 909 { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK }, 910 { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER }, 911 { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR }, 912 { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF }, 913 { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE }, 914 { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER }, 915 { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT }, 916 { nvc0_validate_window_rects, NVC0_NEW_3D_WINDOW_RECTS }, 917 { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG }, 918 { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG }, 919 { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG }, 920 { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR }, 921 { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG }, 922 { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES | 923 NVC0_NEW_3D_FRAGPROG | 924 NVC0_NEW_3D_FRAMEBUFFER }, 925 { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER }, 926 { nvc0_validate_fp_zsa_rast, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA | 927 NVC0_NEW_3D_RASTERIZER }, 928 { nvc0_validate_zsa_fb, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER }, 929 { nvc0_validate_rast_fb, NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER }, 930 { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER | 931 NVC0_NEW_3D_VERTPROG | 932 NVC0_NEW_3D_TEVLPROG | 933 NVC0_NEW_3D_GMTYPROG }, 934 { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF }, 935 { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES }, 936 { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS }, 937 { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS }, 938 { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG | 939 NVC0_NEW_3D_FRAMEBUFFER }, 940 { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS }, 941 { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES }, 942 { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS }, 943 { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG }, 944 { nvc0_layer_validate, NVC0_NEW_3D_VERTPROG | 945 NVC0_NEW_3D_TEVLPROG | 946 NVC0_NEW_3D_GMTYPROG }, 947 { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, 948 { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS | 949 NVC0_NEW_3D_FRAMEBUFFER}, 950}; 951 952bool 953nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, 954 struct nvc0_state_validate *validate_list, int size, 955 uint32_t *dirty, struct nouveau_bufctx *bufctx) 956{ 957 uint32_t state_mask; 958 int ret; 959 unsigned i; 960 961 if (nvc0->screen->cur_ctx != nvc0) 962 nvc0_switch_pipe_context(nvc0); 963 964 state_mask = *dirty & mask; 965 966 if (state_mask) { 967 for (i = 0; i < size; ++i) { 968 struct nvc0_state_validate *validate = &validate_list[i]; 969 970 if (state_mask & validate->states) 971 validate->func(nvc0); 972 } 973 *dirty &= ~state_mask; 974 975 nvc0_bufctx_fence(nvc0, bufctx, false); 976 } 977 978 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx); 979 ret = nouveau_pushbuf_validate(nvc0->base.pushbuf); 980 981 return !ret; 982} 983 984bool 985nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask) 986{ 987 bool ret; 988 989 ret = nvc0_state_validate(nvc0, mask, validate_list_3d, 990 ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d, 991 nvc0->bufctx_3d); 992 993 if (unlikely(nvc0->state.flushed)) { 994 nvc0->state.flushed = false; 995 nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true); 996 } 997 return ret; 998} 999