1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * Copyright 2010 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * \brief Quad depth / stencil testing 31 */ 32 33#include "pipe/p_defines.h" 34#include "util/format/u_format.h" 35#include "util/u_math.h" 36#include "util/u_memory.h" 37#include "tgsi/tgsi_scan.h" 38#include "sp_context.h" 39#include "sp_quad.h" 40#include "sp_quad_pipe.h" 41#include "sp_tile_cache.h" 42#include "sp_state.h" /* for sp_fragment_shader */ 43 44 45struct depth_data { 46 struct pipe_surface *ps; 47 enum pipe_format format; 48 unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */ 49 unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */ 50 ubyte stencilVals[TGSI_QUAD_SIZE]; 51 boolean use_shader_stencil_refs; 52 ubyte shader_stencil_refs[TGSI_QUAD_SIZE]; 53 struct softpipe_cached_tile *tile; 54 float minval, maxval; 55 bool clamp; 56}; 57 58 59 60static void 61get_depth_stencil_values( struct depth_data *data, 62 const struct quad_header *quad ) 63{ 64 unsigned j; 65 const struct softpipe_cached_tile *tile = data->tile; 66 67 switch (data->format) { 68 case PIPE_FORMAT_Z16_UNORM: 69 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 70 int x = quad->input.x0 % TILE_SIZE + (j & 1); 71 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 72 data->bzzzz[j] = tile->data.depth16[y][x]; 73 } 74 break; 75 case PIPE_FORMAT_Z32_UNORM: 76 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 77 int x = quad->input.x0 % TILE_SIZE + (j & 1); 78 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 79 data->bzzzz[j] = tile->data.depth32[y][x]; 80 } 81 break; 82 case PIPE_FORMAT_Z24X8_UNORM: 83 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 84 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 85 int x = quad->input.x0 % TILE_SIZE + (j & 1); 86 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 87 data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; 88 data->stencilVals[j] = tile->data.depth32[y][x] >> 24; 89 } 90 break; 91 case PIPE_FORMAT_X8Z24_UNORM: 92 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 93 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 94 int x = quad->input.x0 % TILE_SIZE + (j & 1); 95 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 96 data->bzzzz[j] = tile->data.depth32[y][x] >> 8; 97 data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; 98 } 99 break; 100 case PIPE_FORMAT_S8_UINT: 101 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 102 int x = quad->input.x0 % TILE_SIZE + (j & 1); 103 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 104 data->bzzzz[j] = 0; 105 data->stencilVals[j] = tile->data.stencil8[y][x]; 106 } 107 break; 108 case PIPE_FORMAT_Z32_FLOAT: 109 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 110 int x = quad->input.x0 % TILE_SIZE + (j & 1); 111 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 112 data->bzzzz[j] = tile->data.depth32[y][x]; 113 } 114 break; 115 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 116 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 117 int x = quad->input.x0 % TILE_SIZE + (j & 1); 118 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 119 data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff; 120 data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff; 121 } 122 break; 123 default: 124 assert(0); 125 } 126} 127 128 129/** 130 * If the shader has not been run, interpolate the depth values 131 * ourselves. 132 */ 133static void 134interpolate_quad_depth( struct quad_header *quad ) 135{ 136 const float fx = (float) quad->input.x0; 137 const float fy = (float) quad->input.y0; 138 const float dzdx = quad->posCoef->dadx[2]; 139 const float dzdy = quad->posCoef->dady[2]; 140 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; 141 142 quad->output.depth[0] = z0; 143 quad->output.depth[1] = z0 + dzdx; 144 quad->output.depth[2] = z0 + dzdy; 145 quad->output.depth[3] = z0 + dzdx + dzdy; 146} 147 148 149/** 150 * Compute the depth_data::qzzzz[] values from the float fragment Z values. 151 */ 152static void 153convert_quad_depth( struct depth_data *data, 154 const struct quad_header *quad ) 155{ 156 unsigned j; 157 float dvals[TGSI_QUAD_SIZE]; 158 159 /* Convert quad's float depth values to int depth values (qzzzz). 160 * If the Z buffer stores integer values, we _have_ to do the depth 161 * compares with integers (not floats). Otherwise, the float->int->float 162 * conversion of Z values (which isn't an identity function) will cause 163 * Z-fighting errors. 164 */ 165 if (data->clamp) { 166 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 167 dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval); 168 } 169 } else { 170 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 171 dvals[j] = quad->output.depth[j]; 172 } 173 } 174 175 switch (data->format) { 176 case PIPE_FORMAT_Z16_UNORM: 177 { 178 float scale = 65535.0; 179 180 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 181 data->qzzzz[j] = (unsigned) (dvals[j] * scale); 182 } 183 } 184 break; 185 case PIPE_FORMAT_Z32_UNORM: 186 { 187 double scale = (double) (uint) ~0UL; 188 189 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 190 data->qzzzz[j] = (unsigned) (dvals[j] * scale); 191 } 192 } 193 break; 194 case PIPE_FORMAT_Z24X8_UNORM: 195 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 196 { 197 float scale = (float) ((1 << 24) - 1); 198 199 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 200 data->qzzzz[j] = (unsigned) (dvals[j] * scale); 201 } 202 } 203 break; 204 case PIPE_FORMAT_X8Z24_UNORM: 205 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 206 { 207 float scale = (float) ((1 << 24) - 1); 208 209 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 210 data->qzzzz[j] = (unsigned) (dvals[j] * scale); 211 } 212 } 213 break; 214 case PIPE_FORMAT_Z32_FLOAT: 215 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 216 { 217 union fi fui; 218 219 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 220 fui.f = dvals[j]; 221 data->qzzzz[j] = fui.ui; 222 } 223 } 224 break; 225 default: 226 assert(0); 227 } 228} 229 230 231/** 232 * Compute the depth_data::shader_stencil_refs[] values from the float 233 * fragment stencil values. 234 */ 235static void 236convert_quad_stencil( struct depth_data *data, 237 const struct quad_header *quad ) 238{ 239 unsigned j; 240 241 data->use_shader_stencil_refs = TRUE; 242 /* Copy quads stencil values 243 */ 244 switch (data->format) { 245 case PIPE_FORMAT_Z24X8_UNORM: 246 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 247 case PIPE_FORMAT_X8Z24_UNORM: 248 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 249 case PIPE_FORMAT_S8_UINT: 250 case PIPE_FORMAT_Z32_FLOAT: 251 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 252 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 253 data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); 254 } 255 break; 256 default: 257 assert(0); 258 } 259} 260 261 262/** 263 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. 264 */ 265static void 266write_depth_stencil_values( struct depth_data *data, 267 struct quad_header *quad ) 268{ 269 struct softpipe_cached_tile *tile = data->tile; 270 unsigned j; 271 272 /* put updated Z values back into cached tile */ 273 switch (data->format) { 274 case PIPE_FORMAT_Z16_UNORM: 275 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 276 int x = quad->input.x0 % TILE_SIZE + (j & 1); 277 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 278 tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; 279 } 280 break; 281 case PIPE_FORMAT_Z24X8_UNORM: 282 case PIPE_FORMAT_Z32_UNORM: 283 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 284 int x = quad->input.x0 % TILE_SIZE + (j & 1); 285 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 286 tile->data.depth32[y][x] = data->bzzzz[j]; 287 } 288 break; 289 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 290 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 291 int x = quad->input.x0 % TILE_SIZE + (j & 1); 292 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 293 tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; 294 } 295 break; 296 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 297 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 298 int x = quad->input.x0 % TILE_SIZE + (j & 1); 299 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 300 tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; 301 } 302 break; 303 case PIPE_FORMAT_X8Z24_UNORM: 304 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 305 int x = quad->input.x0 % TILE_SIZE + (j & 1); 306 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 307 tile->data.depth32[y][x] = data->bzzzz[j] << 8; 308 } 309 break; 310 case PIPE_FORMAT_S8_UINT: 311 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 312 int x = quad->input.x0 % TILE_SIZE + (j & 1); 313 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 314 tile->data.stencil8[y][x] = data->stencilVals[j]; 315 } 316 break; 317 case PIPE_FORMAT_Z32_FLOAT: 318 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 319 int x = quad->input.x0 % TILE_SIZE + (j & 1); 320 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 321 tile->data.depth32[y][x] = data->bzzzz[j]; 322 } 323 break; 324 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 325 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 326 int x = quad->input.x0 % TILE_SIZE + (j & 1); 327 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 328 tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32); 329 } 330 break; 331 default: 332 assert(0); 333 } 334} 335 336 337 338/** Only 8-bit stencil supported */ 339#define STENCIL_MAX 0xff 340 341 342/** 343 * Do the basic stencil test (compare stencil buffer values against the 344 * reference value. 345 * 346 * \param data->stencilVals the stencil values from the stencil buffer 347 * \param func the stencil func (PIPE_FUNC_x) 348 * \param ref the stencil reference value 349 * \param valMask the stencil value mask indicating which bits of the stencil 350 * values and ref value are to be used. 351 * \return mask indicating which pixels passed the stencil test 352 */ 353static unsigned 354do_stencil_test(struct depth_data *data, 355 unsigned func, 356 unsigned ref, unsigned valMask) 357{ 358 unsigned passMask = 0x0; 359 unsigned j; 360 ubyte refs[TGSI_QUAD_SIZE]; 361 362 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 363 if (data->use_shader_stencil_refs) 364 refs[j] = data->shader_stencil_refs[j] & valMask; 365 else 366 refs[j] = ref & valMask; 367 } 368 369 switch (func) { 370 case PIPE_FUNC_NEVER: 371 /* passMask = 0x0 */ 372 break; 373 case PIPE_FUNC_LESS: 374 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 375 if (refs[j] < (data->stencilVals[j] & valMask)) { 376 passMask |= (1 << j); 377 } 378 } 379 break; 380 case PIPE_FUNC_EQUAL: 381 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 382 if (refs[j] == (data->stencilVals[j] & valMask)) { 383 passMask |= (1 << j); 384 } 385 } 386 break; 387 case PIPE_FUNC_LEQUAL: 388 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 389 if (refs[j] <= (data->stencilVals[j] & valMask)) { 390 passMask |= (1 << j); 391 } 392 } 393 break; 394 case PIPE_FUNC_GREATER: 395 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 396 if (refs[j] > (data->stencilVals[j] & valMask)) { 397 passMask |= (1 << j); 398 } 399 } 400 break; 401 case PIPE_FUNC_NOTEQUAL: 402 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 403 if (refs[j] != (data->stencilVals[j] & valMask)) { 404 passMask |= (1 << j); 405 } 406 } 407 break; 408 case PIPE_FUNC_GEQUAL: 409 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 410 if (refs[j] >= (data->stencilVals[j] & valMask)) { 411 passMask |= (1 << j); 412 } 413 } 414 break; 415 case PIPE_FUNC_ALWAYS: 416 passMask = MASK_ALL; 417 break; 418 default: 419 assert(0); 420 } 421 422 return passMask; 423} 424 425 426/** 427 * Apply the stencil operator to stencil values. 428 * 429 * \param data->stencilVals the stencil buffer values (read and written) 430 * \param mask indicates which pixels to update 431 * \param op the stencil operator (PIPE_STENCIL_OP_x) 432 * \param ref the stencil reference value 433 * \param wrtMask writemask controlling which bits are changed in the 434 * stencil values 435 */ 436static void 437apply_stencil_op(struct depth_data *data, 438 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) 439{ 440 unsigned j; 441 ubyte newstencil[TGSI_QUAD_SIZE]; 442 ubyte refs[TGSI_QUAD_SIZE]; 443 444 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 445 newstencil[j] = data->stencilVals[j]; 446 if (data->use_shader_stencil_refs) 447 refs[j] = data->shader_stencil_refs[j]; 448 else 449 refs[j] = ref; 450 } 451 452 switch (op) { 453 case PIPE_STENCIL_OP_KEEP: 454 /* no-op */ 455 break; 456 case PIPE_STENCIL_OP_ZERO: 457 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 458 if (mask & (1 << j)) { 459 newstencil[j] = 0; 460 } 461 } 462 break; 463 case PIPE_STENCIL_OP_REPLACE: 464 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 465 if (mask & (1 << j)) { 466 newstencil[j] = refs[j]; 467 } 468 } 469 break; 470 case PIPE_STENCIL_OP_INCR: 471 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 472 if (mask & (1 << j)) { 473 if (data->stencilVals[j] < STENCIL_MAX) { 474 newstencil[j] = data->stencilVals[j] + 1; 475 } 476 } 477 } 478 break; 479 case PIPE_STENCIL_OP_DECR: 480 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 481 if (mask & (1 << j)) { 482 if (data->stencilVals[j] > 0) { 483 newstencil[j] = data->stencilVals[j] - 1; 484 } 485 } 486 } 487 break; 488 case PIPE_STENCIL_OP_INCR_WRAP: 489 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 490 if (mask & (1 << j)) { 491 newstencil[j] = data->stencilVals[j] + 1; 492 } 493 } 494 break; 495 case PIPE_STENCIL_OP_DECR_WRAP: 496 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 497 if (mask & (1 << j)) { 498 newstencil[j] = data->stencilVals[j] - 1; 499 } 500 } 501 break; 502 case PIPE_STENCIL_OP_INVERT: 503 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 504 if (mask & (1 << j)) { 505 newstencil[j] = ~data->stencilVals[j]; 506 } 507 } 508 break; 509 default: 510 assert(0); 511 } 512 513 /* 514 * update the stencil values 515 */ 516 if (wrtMask != STENCIL_MAX) { 517 /* apply bit-wise stencil buffer writemask */ 518 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 519 data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); 520 } 521 } 522 else { 523 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 524 data->stencilVals[j] = newstencil[j]; 525 } 526 } 527} 528 529 530 531/** 532 * To increase efficiency, we should probably have multiple versions 533 * of this function that are specifically for Z16, Z32 and FP Z buffers. 534 * Try to effectively do that with codegen... 535 */ 536static boolean 537depth_test_quad(struct quad_stage *qs, 538 struct depth_data *data, 539 struct quad_header *quad) 540{ 541 struct softpipe_context *softpipe = qs->softpipe; 542 unsigned zmask = 0; 543 unsigned j; 544 545#define DEPTHTEST(l, op, r) do { \ 546 if (data->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || \ 547 data->format == PIPE_FORMAT_Z32_FLOAT) { \ 548 for (j = 0; j < TGSI_QUAD_SIZE; j++) { \ 549 if (((float *)l)[j] op ((float *)r)[j]) \ 550 zmask |= (1 << j); \ 551 } \ 552 } else { \ 553 for (j = 0; j < TGSI_QUAD_SIZE; j++) { \ 554 if (l[j] op r[j]) \ 555 zmask |= (1 << j); \ 556 } \ 557 } \ 558 } while (0) 559 560 switch (softpipe->depth_stencil->depth_func) { 561 case PIPE_FUNC_NEVER: 562 /* zmask = 0 */ 563 break; 564 case PIPE_FUNC_LESS: 565 /* Note this is pretty much a single sse or cell instruction. 566 * Like this: quad->mask &= (quad->outputs.depth < zzzz); 567 */ 568 DEPTHTEST(data->qzzzz, <, data->bzzzz); 569 break; 570 case PIPE_FUNC_EQUAL: 571 DEPTHTEST(data->qzzzz, ==, data->bzzzz); 572 break; 573 case PIPE_FUNC_LEQUAL: 574 DEPTHTEST(data->qzzzz, <=, data->bzzzz); 575 break; 576 case PIPE_FUNC_GREATER: 577 DEPTHTEST(data->qzzzz, >, data->bzzzz); 578 break; 579 case PIPE_FUNC_NOTEQUAL: 580 DEPTHTEST(data->qzzzz, !=, data->bzzzz); 581 break; 582 case PIPE_FUNC_GEQUAL: 583 DEPTHTEST(data->qzzzz, >=, data->bzzzz); 584 break; 585 case PIPE_FUNC_ALWAYS: 586 zmask = MASK_ALL; 587 break; 588 default: 589 assert(0); 590 } 591 592 quad->inout.mask &= zmask; 593 if (quad->inout.mask == 0) 594 return FALSE; 595 596 /* Update our internal copy only if writemask set. Even if 597 * depth.writemask is FALSE, may still need to write out buffer 598 * data due to stencil changes. 599 */ 600 if (softpipe->depth_stencil->depth_writemask) { 601 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 602 if (quad->inout.mask & (1 << j)) { 603 data->bzzzz[j] = data->qzzzz[j]; 604 } 605 } 606 } 607 608 return TRUE; 609} 610 611 612 613/** 614 * Do stencil (and depth) testing. Stenciling depends on the outcome of 615 * depth testing. 616 */ 617static void 618depth_stencil_test_quad(struct quad_stage *qs, 619 struct depth_data *data, 620 struct quad_header *quad) 621{ 622 struct softpipe_context *softpipe = qs->softpipe; 623 unsigned func, zFailOp, zPassOp, failOp; 624 ubyte ref, wrtMask, valMask; 625 uint face = quad->input.facing; 626 627 if (!softpipe->depth_stencil->stencil[1].enabled) { 628 /* single-sided stencil test, use front (face=0) state */ 629 face = 0; 630 } 631 632 /* 0 = front-face, 1 = back-face */ 633 assert(face == 0 || face == 1); 634 635 /* choose front or back face function, operator, etc */ 636 /* XXX we could do these initializations once per primitive */ 637 func = softpipe->depth_stencil->stencil[face].func; 638 failOp = softpipe->depth_stencil->stencil[face].fail_op; 639 zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; 640 zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; 641 ref = softpipe->stencil_ref.ref_value[face]; 642 wrtMask = softpipe->depth_stencil->stencil[face].writemask; 643 valMask = softpipe->depth_stencil->stencil[face].valuemask; 644 645 /* do the stencil test first */ 646 { 647 unsigned passMask, failMask; 648 passMask = do_stencil_test(data, func, ref, valMask); 649 failMask = quad->inout.mask & ~passMask; 650 quad->inout.mask &= passMask; 651 652 if (failOp != PIPE_STENCIL_OP_KEEP) { 653 apply_stencil_op(data, failMask, failOp, ref, wrtMask); 654 } 655 } 656 657 if (quad->inout.mask) { 658 /* now the pixels that passed the stencil test are depth tested */ 659 if (softpipe->depth_stencil->depth_enabled) { 660 const unsigned origMask = quad->inout.mask; 661 662 depth_test_quad(qs, data, quad); /* quad->mask is updated */ 663 664 /* update stencil buffer values according to z pass/fail result */ 665 if (zFailOp != PIPE_STENCIL_OP_KEEP) { 666 const unsigned zFailMask = origMask & ~quad->inout.mask; 667 apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask); 668 } 669 670 if (zPassOp != PIPE_STENCIL_OP_KEEP) { 671 const unsigned zPassMask = origMask & quad->inout.mask; 672 apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask); 673 } 674 } 675 else { 676 /* no depth test, apply Zpass operator to stencil buffer values */ 677 apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); 678 } 679 } 680} 681 682 683#define ALPHATEST( FUNC, COMP ) \ 684 static unsigned \ 685 alpha_test_quads_##FUNC( struct quad_stage *qs, \ 686 struct quad_header *quads[], \ 687 unsigned nr ) \ 688 { \ 689 const float ref = qs->softpipe->depth_stencil->alpha_ref_value; \ 690 const uint cbuf = 0; /* only output[0].alpha is tested */ \ 691 unsigned pass_nr = 0; \ 692 unsigned i; \ 693 \ 694 for (i = 0; i < nr; i++) { \ 695 const float *aaaa = quads[i]->output.color[cbuf][3]; \ 696 unsigned passMask = 0; \ 697 \ 698 if (aaaa[0] COMP ref) passMask |= (1 << 0); \ 699 if (aaaa[1] COMP ref) passMask |= (1 << 1); \ 700 if (aaaa[2] COMP ref) passMask |= (1 << 2); \ 701 if (aaaa[3] COMP ref) passMask |= (1 << 3); \ 702 \ 703 quads[i]->inout.mask &= passMask; \ 704 \ 705 if (quads[i]->inout.mask) \ 706 quads[pass_nr++] = quads[i]; \ 707 } \ 708 \ 709 return pass_nr; \ 710 } 711 712 713ALPHATEST( LESS, < ) 714ALPHATEST( EQUAL, == ) 715ALPHATEST( LEQUAL, <= ) 716ALPHATEST( GREATER, > ) 717ALPHATEST( NOTEQUAL, != ) 718ALPHATEST( GEQUAL, >= ) 719 720 721/* XXX: Incorporate into shader using KILL_IF. 722 */ 723static unsigned 724alpha_test_quads(struct quad_stage *qs, 725 struct quad_header *quads[], 726 unsigned nr) 727{ 728 switch (qs->softpipe->depth_stencil->alpha_func) { 729 case PIPE_FUNC_LESS: 730 return alpha_test_quads_LESS( qs, quads, nr ); 731 case PIPE_FUNC_EQUAL: 732 return alpha_test_quads_EQUAL( qs, quads, nr ); 733 case PIPE_FUNC_LEQUAL: 734 return alpha_test_quads_LEQUAL( qs, quads, nr ); 735 case PIPE_FUNC_GREATER: 736 return alpha_test_quads_GREATER( qs, quads, nr ); 737 case PIPE_FUNC_NOTEQUAL: 738 return alpha_test_quads_NOTEQUAL( qs, quads, nr ); 739 case PIPE_FUNC_GEQUAL: 740 return alpha_test_quads_GEQUAL( qs, quads, nr ); 741 case PIPE_FUNC_ALWAYS: 742 return nr; 743 case PIPE_FUNC_NEVER: 744 default: 745 return 0; 746 } 747} 748 749 750/** 751 * EXT_depth_bounds_test has some careful language about precision: 752 * 753 * At what precision is the depth bounds test carried out? 754 * 755 * RESOLUTION: For the purposes of the test, the bounds are converted 756 * to fixed-point as though they were to be written to the depth buffer, 757 * and the comparison uses those quantized bounds. 758 * 759 * We choose the obvious interpretation that Z32F needs no such conversion. 760 */ 761static unsigned 762depth_bounds_test_quads(struct quad_stage *qs, 763 struct quad_header *quads[], 764 unsigned nr, 765 struct depth_data *data) 766{ 767 struct pipe_depth_stencil_alpha_state *dsa = qs->softpipe->depth_stencil; 768 unsigned i = 0, pass_nr = 0; 769 enum pipe_format format = util_format_get_depth_only(data->format); 770 double min = dsa->depth_bounds_min; 771 double max = dsa->depth_bounds_max; 772 773 for (i = 0; i < nr; i++) { 774 unsigned j = 0, passMask = 0; 775 776 get_depth_stencil_values(data, quads[i]); 777 778 if (format == PIPE_FORMAT_Z32_FLOAT) { 779 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 780 double z = uif(data->bzzzz[j]); 781 782 if (z >= min && z <= max) 783 passMask |= (1 << j); 784 } 785 } else { 786 unsigned imin, imax; 787 788 if (format == PIPE_FORMAT_Z16_UNORM) { 789 imin = ((unsigned) (min * 65535.0)) & 0xffff; 790 imax = ((unsigned) (max * 65535.0)) & 0xffff; 791 } else if (format == PIPE_FORMAT_Z32_UNORM) { 792 imin = (unsigned) (min * 4294967295.0); 793 imax = (unsigned) (max * 4294967295.0); 794 } else if (format == PIPE_FORMAT_Z24X8_UNORM || 795 format == PIPE_FORMAT_X8Z24_UNORM) { 796 imin = ((unsigned) (min * 16777215.0)) & 0xffffff; 797 imax = ((unsigned) (max * 16777215.0)) & 0xffffff; 798 } else { 799 unreachable("Unknown depth buffer format"); 800 } 801 802 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 803 unsigned iz = data->bzzzz[j]; 804 805 if (iz >= imin && iz <= imax) 806 passMask |= (1 << j); 807 } 808 } 809 810 quads[i]->inout.mask &= passMask; 811 812 if (quads[i]->inout.mask) 813 quads[pass_nr++] = quads[i]; 814 } 815 816 return pass_nr; 817} 818 819 820static unsigned mask_count[16] = 821{ 822 0, /* 0x0 */ 823 1, /* 0x1 */ 824 1, /* 0x2 */ 825 2, /* 0x3 */ 826 1, /* 0x4 */ 827 2, /* 0x5 */ 828 2, /* 0x6 */ 829 3, /* 0x7 */ 830 1, /* 0x8 */ 831 2, /* 0x9 */ 832 2, /* 0xa */ 833 3, /* 0xb */ 834 2, /* 0xc */ 835 3, /* 0xd */ 836 3, /* 0xe */ 837 4, /* 0xf */ 838}; 839 840 841 842/** 843 * General depth/stencil test function. Used when there's no fast-path. 844 */ 845static void 846depth_test_quads_fallback(struct quad_stage *qs, 847 struct quad_header *quads[], 848 unsigned nr) 849{ 850 unsigned i, pass = 0; 851 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; 852 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth; 853 boolean shader_stencil_ref = fsInfo->writes_stencil; 854 boolean have_zs = !!qs->softpipe->framebuffer.zsbuf; 855 struct depth_data data; 856 unsigned vp_idx = quads[0]->input.viewport_index; 857 858 data.use_shader_stencil_refs = FALSE; 859 860 if (have_zs && (qs->softpipe->depth_stencil->depth_enabled || 861 qs->softpipe->depth_stencil->stencil[0].enabled || 862 qs->softpipe->depth_stencil->depth_bounds_test)) { 863 float near_val, far_val; 864 865 data.ps = qs->softpipe->framebuffer.zsbuf; 866 data.format = data.ps->format; 867 data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, 868 quads[0]->input.x0, 869 quads[0]->input.y0, quads[0]->input.layer); 870 data.clamp = !qs->softpipe->rasterizer->depth_clip_near; 871 872 near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2]; 873 far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0); 874 data.minval = MIN2(near_val, far_val); 875 data.maxval = MAX2(near_val, far_val); 876 } 877 878 /* EXT_depth_bounds_test says: 879 * 880 * Where should the depth bounds test take place in the OpenGL fragment 881 * processing pipeline? 882 * 883 * RESOLUTION: After scissor test, before alpha test. In practice, 884 * this is a logical placement of the test. An implementation is 885 * free to perform the test in a manner that is consistent with the 886 * specified ordering. 887 */ 888 889 if (have_zs && qs->softpipe->depth_stencil->depth_bounds_test) { 890 nr = depth_bounds_test_quads(qs, quads, nr, &data); 891 } 892 893 if (qs->softpipe->depth_stencil->alpha_enabled) { 894 nr = alpha_test_quads(qs, quads, nr); 895 } 896 897 if (have_zs && (qs->softpipe->depth_stencil->depth_enabled || 898 qs->softpipe->depth_stencil->stencil[0].enabled)) { 899 for (i = 0; i < nr; i++) { 900 get_depth_stencil_values(&data, quads[i]); 901 902 if (qs->softpipe->depth_stencil->depth_enabled) { 903 if (interp_depth) 904 interpolate_quad_depth(quads[i]); 905 906 convert_quad_depth(&data, quads[i]); 907 } 908 909 if (qs->softpipe->depth_stencil->stencil[0].enabled) { 910 if (shader_stencil_ref) 911 convert_quad_stencil(&data, quads[i]); 912 913 depth_stencil_test_quad(qs, &data, quads[i]); 914 write_depth_stencil_values(&data, quads[i]); 915 } 916 else { 917 if (!depth_test_quad(qs, &data, quads[i])) 918 continue; 919 920 if (qs->softpipe->depth_stencil->depth_writemask) 921 write_depth_stencil_values(&data, quads[i]); 922 } 923 924 quads[pass++] = quads[i]; 925 } 926 927 nr = pass; 928 } 929 930 if (qs->softpipe->active_query_count) { 931 for (i = 0; i < nr; i++) 932 qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; 933 } 934 935 if (nr) 936 qs->next->run(qs->next, quads, nr); 937} 938 939 940/** 941 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled. 942 */ 943 944#define NAME depth_interp_z16_less_write 945#define OPERATOR < 946#include "sp_quad_depth_test_tmp.h" 947 948#define NAME depth_interp_z16_equal_write 949#define OPERATOR == 950#include "sp_quad_depth_test_tmp.h" 951 952#define NAME depth_interp_z16_lequal_write 953#define OPERATOR <= 954#include "sp_quad_depth_test_tmp.h" 955 956#define NAME depth_interp_z16_greater_write 957#define OPERATOR > 958#include "sp_quad_depth_test_tmp.h" 959 960#define NAME depth_interp_z16_notequal_write 961#define OPERATOR != 962#include "sp_quad_depth_test_tmp.h" 963 964#define NAME depth_interp_z16_gequal_write 965#define OPERATOR >= 966#include "sp_quad_depth_test_tmp.h" 967 968#define NAME depth_interp_z16_always_write 969#define ALWAYS 1 970#include "sp_quad_depth_test_tmp.h" 971 972 973 974static void 975depth_noop(struct quad_stage *qs, 976 struct quad_header *quads[], 977 unsigned nr) 978{ 979 qs->next->run(qs->next, quads, nr); 980} 981 982 983 984static void 985choose_depth_test(struct quad_stage *qs, 986 struct quad_header *quads[], 987 unsigned nr) 988{ 989 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; 990 991 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth; 992 993 boolean alpha = qs->softpipe->depth_stencil->alpha_enabled; 994 995 boolean depth = qs->softpipe->depth_stencil->depth_enabled; 996 997 unsigned depthfunc = qs->softpipe->depth_stencil->depth_func; 998 999 boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled; 1000 1001 boolean depthwrite = qs->softpipe->depth_stencil->depth_writemask; 1002 1003 boolean occlusion = qs->softpipe->active_query_count; 1004 1005 boolean clipped = !qs->softpipe->rasterizer->depth_clip_near; 1006 1007 boolean depth_bounds = qs->softpipe->depth_stencil->depth_bounds_test; 1008 1009 if(!qs->softpipe->framebuffer.zsbuf) 1010 depth = depthwrite = stencil = FALSE; 1011 1012 /* default */ 1013 qs->run = depth_test_quads_fallback; 1014 1015 /* look for special cases */ 1016 if (!alpha && 1017 !depth && 1018 !occlusion && 1019 !clipped && 1020 !stencil && 1021 !depth_bounds) { 1022 qs->run = depth_noop; 1023 } 1024 else if (!alpha && 1025 interp_depth && 1026 depth && 1027 depthwrite && 1028 !occlusion && 1029 !clipped && 1030 !stencil && 1031 !depth_bounds) 1032 { 1033 if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) { 1034 switch (depthfunc) { 1035 case PIPE_FUNC_NEVER: 1036 qs->run = depth_test_quads_fallback; 1037 break; 1038 case PIPE_FUNC_LESS: 1039 qs->run = depth_interp_z16_less_write; 1040 break; 1041 case PIPE_FUNC_EQUAL: 1042 qs->run = depth_interp_z16_equal_write; 1043 break; 1044 case PIPE_FUNC_LEQUAL: 1045 qs->run = depth_interp_z16_lequal_write; 1046 break; 1047 case PIPE_FUNC_GREATER: 1048 qs->run = depth_interp_z16_greater_write; 1049 break; 1050 case PIPE_FUNC_NOTEQUAL: 1051 qs->run = depth_interp_z16_notequal_write; 1052 break; 1053 case PIPE_FUNC_GEQUAL: 1054 qs->run = depth_interp_z16_gequal_write; 1055 break; 1056 case PIPE_FUNC_ALWAYS: 1057 qs->run = depth_interp_z16_always_write; 1058 break; 1059 default: 1060 qs->run = depth_test_quads_fallback; 1061 break; 1062 } 1063 } 1064 } 1065 1066 /* next quad/fragment stage */ 1067 qs->run( qs, quads, nr ); 1068} 1069 1070 1071 1072static void 1073depth_test_begin(struct quad_stage *qs) 1074{ 1075 qs->run = choose_depth_test; 1076 qs->next->begin(qs->next); 1077} 1078 1079 1080static void 1081depth_test_destroy(struct quad_stage *qs) 1082{ 1083 FREE( qs ); 1084} 1085 1086 1087struct quad_stage * 1088sp_quad_depth_test_stage(struct softpipe_context *softpipe) 1089{ 1090 struct quad_stage *stage = CALLOC_STRUCT(quad_stage); 1091 1092 stage->softpipe = softpipe; 1093 stage->begin = depth_test_begin; 1094 stage->run = choose_depth_test; 1095 stage->destroy = depth_test_destroy; 1096 1097 return stage; 1098} 1099