1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * Copyright 2010 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * \brief  Quad depth / stencil testing
31 */
32
33#include "pipe/p_defines.h"
34#include "util/format/u_format.h"
35#include "util/u_math.h"
36#include "util/u_memory.h"
37#include "tgsi/tgsi_scan.h"
38#include "sp_context.h"
39#include "sp_quad.h"
40#include "sp_quad_pipe.h"
41#include "sp_tile_cache.h"
42#include "sp_state.h"           /* for sp_fragment_shader */
43
44
45struct depth_data {
46   struct pipe_surface *ps;
47   enum pipe_format format;
48   unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
49   unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
50   ubyte stencilVals[TGSI_QUAD_SIZE];
51   boolean use_shader_stencil_refs;
52   ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53   struct softpipe_cached_tile *tile;
54   float minval, maxval;
55   bool clamp;
56};
57
58
59
60static void
61get_depth_stencil_values( struct depth_data *data,
62                          const struct quad_header *quad )
63{
64   unsigned j;
65   const struct softpipe_cached_tile *tile = data->tile;
66
67   switch (data->format) {
68   case PIPE_FORMAT_Z16_UNORM:
69      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
70         int x = quad->input.x0 % TILE_SIZE + (j & 1);
71         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
72         data->bzzzz[j] = tile->data.depth16[y][x];
73      }
74      break;
75   case PIPE_FORMAT_Z32_UNORM:
76      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77         int x = quad->input.x0 % TILE_SIZE + (j & 1);
78         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
79         data->bzzzz[j] = tile->data.depth32[y][x];
80      }
81      break;
82   case PIPE_FORMAT_Z24X8_UNORM:
83   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
84      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
85         int x = quad->input.x0 % TILE_SIZE + (j & 1);
86         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
87         data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
88         data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
89      }
90      break;
91   case PIPE_FORMAT_X8Z24_UNORM:
92   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
93      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94         int x = quad->input.x0 % TILE_SIZE + (j & 1);
95         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
96         data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
97         data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
98      }
99      break;
100   case PIPE_FORMAT_S8_UINT:
101      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
102         int x = quad->input.x0 % TILE_SIZE + (j & 1);
103         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
104         data->bzzzz[j] = 0;
105         data->stencilVals[j] = tile->data.stencil8[y][x];
106      }
107      break;
108   case PIPE_FORMAT_Z32_FLOAT:
109      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
110         int x = quad->input.x0 % TILE_SIZE + (j & 1);
111         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
112         data->bzzzz[j] = tile->data.depth32[y][x];
113      }
114      break;
115   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
116      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
117         int x = quad->input.x0 % TILE_SIZE + (j & 1);
118         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
119         data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
120         data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
121      }
122      break;
123   default:
124      assert(0);
125   }
126}
127
128
129/**
130 * If the shader has not been run, interpolate the depth values
131 * ourselves.
132 */
133static void
134interpolate_quad_depth( struct quad_header *quad )
135{
136   const float fx = (float) quad->input.x0;
137   const float fy = (float) quad->input.y0;
138   const float dzdx = quad->posCoef->dadx[2];
139   const float dzdy = quad->posCoef->dady[2];
140   const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
141
142   quad->output.depth[0] = z0;
143   quad->output.depth[1] = z0 + dzdx;
144   quad->output.depth[2] = z0 + dzdy;
145   quad->output.depth[3] = z0 + dzdx + dzdy;
146}
147
148
149/**
150 * Compute the depth_data::qzzzz[] values from the float fragment Z values.
151 */
152static void
153convert_quad_depth( struct depth_data *data,
154                    const struct quad_header *quad )
155{
156   unsigned j;
157   float dvals[TGSI_QUAD_SIZE];
158
159   /* Convert quad's float depth values to int depth values (qzzzz).
160    * If the Z buffer stores integer values, we _have_ to do the depth
161    * compares with integers (not floats).  Otherwise, the float->int->float
162    * conversion of Z values (which isn't an identity function) will cause
163    * Z-fighting errors.
164    */
165   if (data->clamp) {
166      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
167         dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
168      }
169   } else {
170      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
171         dvals[j] = quad->output.depth[j];
172      }
173   }
174
175   switch (data->format) {
176   case PIPE_FORMAT_Z16_UNORM:
177      {
178         float scale = 65535.0;
179
180         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
181            data->qzzzz[j] = (unsigned) (dvals[j] * scale);
182         }
183      }
184      break;
185   case PIPE_FORMAT_Z32_UNORM:
186      {
187         double scale = (double) (uint) ~0UL;
188
189         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
190            data->qzzzz[j] = (unsigned) (dvals[j] * scale);
191         }
192      }
193      break;
194   case PIPE_FORMAT_Z24X8_UNORM:
195   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
196      {
197         float scale = (float) ((1 << 24) - 1);
198
199         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
200            data->qzzzz[j] = (unsigned) (dvals[j] * scale);
201         }
202      }
203      break;
204   case PIPE_FORMAT_X8Z24_UNORM:
205   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
206      {
207         float scale = (float) ((1 << 24) - 1);
208
209         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
210            data->qzzzz[j] = (unsigned) (dvals[j] * scale);
211         }
212      }
213      break;
214   case PIPE_FORMAT_Z32_FLOAT:
215   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
216      {
217         union fi fui;
218
219         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
220            fui.f = dvals[j];
221            data->qzzzz[j] = fui.ui;
222         }
223      }
224      break;
225   default:
226      assert(0);
227   }
228}
229
230
231/**
232 * Compute the depth_data::shader_stencil_refs[] values from the float
233 * fragment stencil values.
234 */
235static void
236convert_quad_stencil( struct depth_data *data,
237                      const struct quad_header *quad )
238{
239   unsigned j;
240
241   data->use_shader_stencil_refs = TRUE;
242   /* Copy quads stencil values
243    */
244   switch (data->format) {
245   case PIPE_FORMAT_Z24X8_UNORM:
246   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
247   case PIPE_FORMAT_X8Z24_UNORM:
248   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
249   case PIPE_FORMAT_S8_UINT:
250   case PIPE_FORMAT_Z32_FLOAT:
251   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
252      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
253         data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
254      }
255      break;
256   default:
257      assert(0);
258   }
259}
260
261
262/**
263 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
264 */
265static void
266write_depth_stencil_values( struct depth_data *data,
267                            struct quad_header *quad )
268{
269   struct softpipe_cached_tile *tile = data->tile;
270   unsigned j;
271
272   /* put updated Z values back into cached tile */
273   switch (data->format) {
274   case PIPE_FORMAT_Z16_UNORM:
275      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
276         int x = quad->input.x0 % TILE_SIZE + (j & 1);
277         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
278         tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
279      }
280      break;
281   case PIPE_FORMAT_Z24X8_UNORM:
282   case PIPE_FORMAT_Z32_UNORM:
283      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
284         int x = quad->input.x0 % TILE_SIZE + (j & 1);
285         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
286         tile->data.depth32[y][x] = data->bzzzz[j];
287      }
288      break;
289   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
290      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
291         int x = quad->input.x0 % TILE_SIZE + (j & 1);
292         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
293         tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
294      }
295      break;
296   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
297      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
298         int x = quad->input.x0 % TILE_SIZE + (j & 1);
299         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
300         tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
301      }
302      break;
303   case PIPE_FORMAT_X8Z24_UNORM:
304      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
305         int x = quad->input.x0 % TILE_SIZE + (j & 1);
306         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
307         tile->data.depth32[y][x] = data->bzzzz[j] << 8;
308      }
309      break;
310   case PIPE_FORMAT_S8_UINT:
311      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
312         int x = quad->input.x0 % TILE_SIZE + (j & 1);
313         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
314         tile->data.stencil8[y][x] = data->stencilVals[j];
315      }
316      break;
317   case PIPE_FORMAT_Z32_FLOAT:
318      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
319         int x = quad->input.x0 % TILE_SIZE + (j & 1);
320         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
321         tile->data.depth32[y][x] = data->bzzzz[j];
322      }
323      break;
324   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
325      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
326         int x = quad->input.x0 % TILE_SIZE + (j & 1);
327         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
328         tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
329      }
330      break;
331   default:
332      assert(0);
333   }
334}
335
336
337
338/** Only 8-bit stencil supported */
339#define STENCIL_MAX 0xff
340
341
342/**
343 * Do the basic stencil test (compare stencil buffer values against the
344 * reference value.
345 *
346 * \param data->stencilVals  the stencil values from the stencil buffer
347 * \param func  the stencil func (PIPE_FUNC_x)
348 * \param ref  the stencil reference value
349 * \param valMask  the stencil value mask indicating which bits of the stencil
350 *                 values and ref value are to be used.
351 * \return mask indicating which pixels passed the stencil test
352 */
353static unsigned
354do_stencil_test(struct depth_data *data,
355                unsigned func,
356                unsigned ref, unsigned valMask)
357{
358   unsigned passMask = 0x0;
359   unsigned j;
360   ubyte refs[TGSI_QUAD_SIZE];
361
362   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
363      if (data->use_shader_stencil_refs)
364         refs[j] = data->shader_stencil_refs[j] & valMask;
365      else
366         refs[j] = ref & valMask;
367   }
368
369   switch (func) {
370   case PIPE_FUNC_NEVER:
371      /* passMask = 0x0 */
372      break;
373   case PIPE_FUNC_LESS:
374      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375         if (refs[j] < (data->stencilVals[j] & valMask)) {
376            passMask |= (1 << j);
377         }
378      }
379      break;
380   case PIPE_FUNC_EQUAL:
381      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
382         if (refs[j] == (data->stencilVals[j] & valMask)) {
383            passMask |= (1 << j);
384         }
385      }
386      break;
387   case PIPE_FUNC_LEQUAL:
388      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389         if (refs[j] <= (data->stencilVals[j] & valMask)) {
390            passMask |= (1 << j);
391         }
392      }
393      break;
394   case PIPE_FUNC_GREATER:
395      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
396         if (refs[j] > (data->stencilVals[j] & valMask)) {
397            passMask |= (1 << j);
398         }
399      }
400      break;
401   case PIPE_FUNC_NOTEQUAL:
402      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403         if (refs[j] != (data->stencilVals[j] & valMask)) {
404            passMask |= (1 << j);
405         }
406      }
407      break;
408   case PIPE_FUNC_GEQUAL:
409      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
410         if (refs[j] >= (data->stencilVals[j] & valMask)) {
411            passMask |= (1 << j);
412         }
413      }
414      break;
415   case PIPE_FUNC_ALWAYS:
416      passMask = MASK_ALL;
417      break;
418   default:
419      assert(0);
420   }
421
422   return passMask;
423}
424
425
426/**
427 * Apply the stencil operator to stencil values.
428 *
429 * \param data->stencilVals  the stencil buffer values (read and written)
430 * \param mask  indicates which pixels to update
431 * \param op  the stencil operator (PIPE_STENCIL_OP_x)
432 * \param ref  the stencil reference value
433 * \param wrtMask  writemask controlling which bits are changed in the
434 *                 stencil values
435 */
436static void
437apply_stencil_op(struct depth_data *data,
438                 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
439{
440   unsigned j;
441   ubyte newstencil[TGSI_QUAD_SIZE];
442   ubyte refs[TGSI_QUAD_SIZE];
443
444   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445      newstencil[j] = data->stencilVals[j];
446      if (data->use_shader_stencil_refs)
447         refs[j] = data->shader_stencil_refs[j];
448      else
449         refs[j] = ref;
450   }
451
452   switch (op) {
453   case PIPE_STENCIL_OP_KEEP:
454      /* no-op */
455      break;
456   case PIPE_STENCIL_OP_ZERO:
457      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
458         if (mask & (1 << j)) {
459            newstencil[j] = 0;
460         }
461      }
462      break;
463   case PIPE_STENCIL_OP_REPLACE:
464      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
465         if (mask & (1 << j)) {
466            newstencil[j] = refs[j];
467         }
468      }
469      break;
470   case PIPE_STENCIL_OP_INCR:
471      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
472         if (mask & (1 << j)) {
473            if (data->stencilVals[j] < STENCIL_MAX) {
474               newstencil[j] = data->stencilVals[j] + 1;
475            }
476         }
477      }
478      break;
479   case PIPE_STENCIL_OP_DECR:
480      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
481         if (mask & (1 << j)) {
482            if (data->stencilVals[j] > 0) {
483               newstencil[j] = data->stencilVals[j] - 1;
484            }
485         }
486      }
487      break;
488   case PIPE_STENCIL_OP_INCR_WRAP:
489      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
490         if (mask & (1 << j)) {
491            newstencil[j] = data->stencilVals[j] + 1;
492         }
493      }
494      break;
495   case PIPE_STENCIL_OP_DECR_WRAP:
496      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
497         if (mask & (1 << j)) {
498            newstencil[j] = data->stencilVals[j] - 1;
499         }
500      }
501      break;
502   case PIPE_STENCIL_OP_INVERT:
503      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
504         if (mask & (1 << j)) {
505            newstencil[j] = ~data->stencilVals[j];
506         }
507      }
508      break;
509   default:
510      assert(0);
511   }
512
513   /*
514    * update the stencil values
515    */
516   if (wrtMask != STENCIL_MAX) {
517      /* apply bit-wise stencil buffer writemask */
518      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
519         data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
520      }
521   }
522   else {
523      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
524         data->stencilVals[j] = newstencil[j];
525      }
526   }
527}
528
529
530
531/**
532 * To increase efficiency, we should probably have multiple versions
533 * of this function that are specifically for Z16, Z32 and FP Z buffers.
534 * Try to effectively do that with codegen...
535 */
536static boolean
537depth_test_quad(struct quad_stage *qs,
538                struct depth_data *data,
539                struct quad_header *quad)
540{
541   struct softpipe_context *softpipe = qs->softpipe;
542   unsigned zmask = 0;
543   unsigned j;
544
545#define DEPTHTEST(l, op, r) do { \
546      if (data->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || \
547          data->format == PIPE_FORMAT_Z32_FLOAT) { \
548         for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
549            if (((float *)l)[j] op ((float *)r)[j]) \
550               zmask |= (1 << j); \
551         } \
552      } else { \
553         for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
554            if (l[j] op r[j]) \
555               zmask |= (1 << j); \
556         } \
557      } \
558   } while (0)
559
560   switch (softpipe->depth_stencil->depth_func) {
561   case PIPE_FUNC_NEVER:
562      /* zmask = 0 */
563      break;
564   case PIPE_FUNC_LESS:
565      /* Note this is pretty much a single sse or cell instruction.
566       * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
567       */
568      DEPTHTEST(data->qzzzz,  <, data->bzzzz);
569      break;
570   case PIPE_FUNC_EQUAL:
571      DEPTHTEST(data->qzzzz, ==, data->bzzzz);
572      break;
573   case PIPE_FUNC_LEQUAL:
574      DEPTHTEST(data->qzzzz, <=, data->bzzzz);
575      break;
576   case PIPE_FUNC_GREATER:
577      DEPTHTEST(data->qzzzz,  >, data->bzzzz);
578      break;
579   case PIPE_FUNC_NOTEQUAL:
580      DEPTHTEST(data->qzzzz, !=, data->bzzzz);
581      break;
582   case PIPE_FUNC_GEQUAL:
583      DEPTHTEST(data->qzzzz, >=, data->bzzzz);
584      break;
585   case PIPE_FUNC_ALWAYS:
586      zmask = MASK_ALL;
587      break;
588   default:
589      assert(0);
590   }
591
592   quad->inout.mask &= zmask;
593   if (quad->inout.mask == 0)
594      return FALSE;
595
596   /* Update our internal copy only if writemask set.  Even if
597    * depth.writemask is FALSE, may still need to write out buffer
598    * data due to stencil changes.
599    */
600   if (softpipe->depth_stencil->depth_writemask) {
601      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
602         if (quad->inout.mask & (1 << j)) {
603            data->bzzzz[j] = data->qzzzz[j];
604         }
605      }
606   }
607
608   return TRUE;
609}
610
611
612
613/**
614 * Do stencil (and depth) testing.  Stenciling depends on the outcome of
615 * depth testing.
616 */
617static void
618depth_stencil_test_quad(struct quad_stage *qs,
619                        struct depth_data *data,
620                        struct quad_header *quad)
621{
622   struct softpipe_context *softpipe = qs->softpipe;
623   unsigned func, zFailOp, zPassOp, failOp;
624   ubyte ref, wrtMask, valMask;
625   uint face = quad->input.facing;
626
627   if (!softpipe->depth_stencil->stencil[1].enabled) {
628      /* single-sided stencil test, use front (face=0) state */
629      face = 0;
630   }
631
632   /* 0 = front-face, 1 = back-face */
633   assert(face == 0 || face == 1);
634
635   /* choose front or back face function, operator, etc */
636   /* XXX we could do these initializations once per primitive */
637   func    = softpipe->depth_stencil->stencil[face].func;
638   failOp  = softpipe->depth_stencil->stencil[face].fail_op;
639   zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
640   zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
641   ref     = softpipe->stencil_ref.ref_value[face];
642   wrtMask = softpipe->depth_stencil->stencil[face].writemask;
643   valMask = softpipe->depth_stencil->stencil[face].valuemask;
644
645   /* do the stencil test first */
646   {
647      unsigned passMask, failMask;
648      passMask = do_stencil_test(data, func, ref, valMask);
649      failMask = quad->inout.mask & ~passMask;
650      quad->inout.mask &= passMask;
651
652      if (failOp != PIPE_STENCIL_OP_KEEP) {
653         apply_stencil_op(data, failMask, failOp, ref, wrtMask);
654      }
655   }
656
657   if (quad->inout.mask) {
658      /* now the pixels that passed the stencil test are depth tested */
659      if (softpipe->depth_stencil->depth_enabled) {
660         const unsigned origMask = quad->inout.mask;
661
662         depth_test_quad(qs, data, quad);  /* quad->mask is updated */
663
664         /* update stencil buffer values according to z pass/fail result */
665         if (zFailOp != PIPE_STENCIL_OP_KEEP) {
666            const unsigned zFailMask = origMask & ~quad->inout.mask;
667            apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
668         }
669
670         if (zPassOp != PIPE_STENCIL_OP_KEEP) {
671            const unsigned zPassMask = origMask & quad->inout.mask;
672            apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
673         }
674      }
675      else {
676         /* no depth test, apply Zpass operator to stencil buffer values */
677         apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
678      }
679   }
680}
681
682
683#define ALPHATEST( FUNC, COMP )                                         \
684   static unsigned                                                      \
685   alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
686                           struct quad_header *quads[],                 \
687                           unsigned nr )                                \
688   {                                                                    \
689      const float ref = qs->softpipe->depth_stencil->alpha_ref_value;   \
690      const uint cbuf = 0; /* only output[0].alpha is tested */         \
691      unsigned pass_nr = 0;                                             \
692      unsigned i;                                                       \
693                                                                        \
694      for (i = 0; i < nr; i++) {                                        \
695         const float *aaaa = quads[i]->output.color[cbuf][3];           \
696         unsigned passMask = 0;                                         \
697                                                                        \
698         if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
699         if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
700         if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
701         if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
702                                                                        \
703         quads[i]->inout.mask &= passMask;                              \
704                                                                        \
705         if (quads[i]->inout.mask)                                      \
706            quads[pass_nr++] = quads[i];                                \
707      }                                                                 \
708                                                                        \
709      return pass_nr;                                                   \
710   }
711
712
713ALPHATEST( LESS,     < )
714ALPHATEST( EQUAL,    == )
715ALPHATEST( LEQUAL,   <= )
716ALPHATEST( GREATER,  > )
717ALPHATEST( NOTEQUAL, != )
718ALPHATEST( GEQUAL,   >= )
719
720
721/* XXX: Incorporate into shader using KILL_IF.
722 */
723static unsigned
724alpha_test_quads(struct quad_stage *qs,
725                 struct quad_header *quads[],
726                 unsigned nr)
727{
728   switch (qs->softpipe->depth_stencil->alpha_func) {
729   case PIPE_FUNC_LESS:
730      return alpha_test_quads_LESS( qs, quads, nr );
731   case PIPE_FUNC_EQUAL:
732      return alpha_test_quads_EQUAL( qs, quads, nr );
733   case PIPE_FUNC_LEQUAL:
734      return alpha_test_quads_LEQUAL( qs, quads, nr );
735   case PIPE_FUNC_GREATER:
736      return alpha_test_quads_GREATER( qs, quads, nr );
737   case PIPE_FUNC_NOTEQUAL:
738      return alpha_test_quads_NOTEQUAL( qs, quads, nr );
739   case PIPE_FUNC_GEQUAL:
740      return alpha_test_quads_GEQUAL( qs, quads, nr );
741   case PIPE_FUNC_ALWAYS:
742      return nr;
743   case PIPE_FUNC_NEVER:
744   default:
745      return 0;
746   }
747}
748
749
750/**
751 * EXT_depth_bounds_test has some careful language about precision:
752 *
753 *     At what precision is the depth bounds test carried out?
754 *
755 *       RESOLUTION:  For the purposes of the test, the bounds are converted
756 *       to fixed-point as though they were to be written to the depth buffer,
757 *       and the comparison uses those quantized bounds.
758 *
759 * We choose the obvious interpretation that Z32F needs no such conversion.
760 */
761static unsigned
762depth_bounds_test_quads(struct quad_stage *qs,
763                        struct quad_header *quads[],
764                        unsigned nr,
765                        struct depth_data *data)
766{
767   struct pipe_depth_stencil_alpha_state *dsa = qs->softpipe->depth_stencil;
768   unsigned i = 0, pass_nr = 0;
769   enum pipe_format format = util_format_get_depth_only(data->format);
770   double min = dsa->depth_bounds_min;
771   double max = dsa->depth_bounds_max;
772
773   for (i = 0; i < nr; i++) {
774      unsigned j = 0, passMask = 0;
775
776      get_depth_stencil_values(data, quads[i]);
777
778      if (format == PIPE_FORMAT_Z32_FLOAT) {
779         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
780            double z = uif(data->bzzzz[j]);
781
782            if (z >= min && z <= max)
783               passMask |= (1 << j);
784         }
785      } else {
786         unsigned imin, imax;
787
788         if (format == PIPE_FORMAT_Z16_UNORM) {
789            imin = ((unsigned) (min * 65535.0)) & 0xffff;
790            imax = ((unsigned) (max * 65535.0)) & 0xffff;
791         } else if (format == PIPE_FORMAT_Z32_UNORM) {
792            imin = (unsigned) (min * 4294967295.0);
793            imax = (unsigned) (max * 4294967295.0);
794         } else if (format == PIPE_FORMAT_Z24X8_UNORM ||
795                    format == PIPE_FORMAT_X8Z24_UNORM) {
796            imin = ((unsigned) (min * 16777215.0)) & 0xffffff;
797            imax = ((unsigned) (max * 16777215.0)) & 0xffffff;
798         } else {
799            unreachable("Unknown depth buffer format");
800         }
801
802         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
803            unsigned iz = data->bzzzz[j];
804
805            if (iz >= imin && iz <= imax)
806               passMask |= (1 << j);
807         }
808      }
809
810      quads[i]->inout.mask &= passMask;
811
812      if (quads[i]->inout.mask)
813         quads[pass_nr++] = quads[i];
814   }
815
816   return pass_nr;
817}
818
819
820static unsigned mask_count[16] =
821{
822   0,                           /* 0x0 */
823   1,                           /* 0x1 */
824   1,                           /* 0x2 */
825   2,                           /* 0x3 */
826   1,                           /* 0x4 */
827   2,                           /* 0x5 */
828   2,                           /* 0x6 */
829   3,                           /* 0x7 */
830   1,                           /* 0x8 */
831   2,                           /* 0x9 */
832   2,                           /* 0xa */
833   3,                           /* 0xb */
834   2,                           /* 0xc */
835   3,                           /* 0xd */
836   3,                           /* 0xe */
837   4,                           /* 0xf */
838};
839
840
841
842/**
843 * General depth/stencil test function.  Used when there's no fast-path.
844 */
845static void
846depth_test_quads_fallback(struct quad_stage *qs,
847                          struct quad_header *quads[],
848                          unsigned nr)
849{
850   unsigned i, pass = 0;
851   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
852   boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
853   boolean shader_stencil_ref = fsInfo->writes_stencil;
854   boolean have_zs = !!qs->softpipe->framebuffer.zsbuf;
855   struct depth_data data;
856   unsigned vp_idx = quads[0]->input.viewport_index;
857
858   data.use_shader_stencil_refs = FALSE;
859
860   if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
861                   qs->softpipe->depth_stencil->stencil[0].enabled ||
862                   qs->softpipe->depth_stencil->depth_bounds_test)) {
863      float near_val, far_val;
864
865      data.ps = qs->softpipe->framebuffer.zsbuf;
866      data.format = data.ps->format;
867      data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
868                                     quads[0]->input.x0,
869                                     quads[0]->input.y0, quads[0]->input.layer);
870      data.clamp = !qs->softpipe->rasterizer->depth_clip_near;
871
872      near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
873      far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
874      data.minval = MIN2(near_val, far_val);
875      data.maxval = MAX2(near_val, far_val);
876   }
877
878   /* EXT_depth_bounds_test says:
879    *
880    *     Where should the depth bounds test take place in the OpenGL fragment
881    *     processing pipeline?
882    *
883    *       RESOLUTION:  After scissor test, before alpha test. In practice,
884    *       this is a logical placement of the test.  An implementation is
885    *       free to perform the test in a manner that is consistent with the
886    *       specified ordering.
887    */
888
889   if (have_zs && qs->softpipe->depth_stencil->depth_bounds_test) {
890      nr = depth_bounds_test_quads(qs, quads, nr, &data);
891   }
892
893   if (qs->softpipe->depth_stencil->alpha_enabled) {
894      nr = alpha_test_quads(qs, quads, nr);
895   }
896
897   if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
898                   qs->softpipe->depth_stencil->stencil[0].enabled)) {
899      for (i = 0; i < nr; i++) {
900         get_depth_stencil_values(&data, quads[i]);
901
902         if (qs->softpipe->depth_stencil->depth_enabled) {
903            if (interp_depth)
904               interpolate_quad_depth(quads[i]);
905
906            convert_quad_depth(&data, quads[i]);
907         }
908
909         if (qs->softpipe->depth_stencil->stencil[0].enabled) {
910            if (shader_stencil_ref)
911               convert_quad_stencil(&data, quads[i]);
912
913            depth_stencil_test_quad(qs, &data, quads[i]);
914            write_depth_stencil_values(&data, quads[i]);
915         }
916         else {
917            if (!depth_test_quad(qs, &data, quads[i]))
918               continue;
919
920            if (qs->softpipe->depth_stencil->depth_writemask)
921               write_depth_stencil_values(&data, quads[i]);
922         }
923
924         quads[pass++] = quads[i];
925      }
926
927      nr = pass;
928   }
929
930   if (qs->softpipe->active_query_count) {
931      for (i = 0; i < nr; i++)
932         qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
933   }
934
935   if (nr)
936      qs->next->run(qs->next, quads, nr);
937}
938
939
940/**
941 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
942 */
943
944#define NAME depth_interp_z16_less_write
945#define OPERATOR <
946#include "sp_quad_depth_test_tmp.h"
947
948#define NAME depth_interp_z16_equal_write
949#define OPERATOR ==
950#include "sp_quad_depth_test_tmp.h"
951
952#define NAME depth_interp_z16_lequal_write
953#define OPERATOR <=
954#include "sp_quad_depth_test_tmp.h"
955
956#define NAME depth_interp_z16_greater_write
957#define OPERATOR >
958#include "sp_quad_depth_test_tmp.h"
959
960#define NAME depth_interp_z16_notequal_write
961#define OPERATOR !=
962#include "sp_quad_depth_test_tmp.h"
963
964#define NAME depth_interp_z16_gequal_write
965#define OPERATOR >=
966#include "sp_quad_depth_test_tmp.h"
967
968#define NAME depth_interp_z16_always_write
969#define ALWAYS 1
970#include "sp_quad_depth_test_tmp.h"
971
972
973
974static void
975depth_noop(struct quad_stage *qs,
976           struct quad_header *quads[],
977           unsigned nr)
978{
979   qs->next->run(qs->next, quads, nr);
980}
981
982
983
984static void
985choose_depth_test(struct quad_stage *qs,
986                  struct quad_header *quads[],
987                  unsigned nr)
988{
989   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
990
991   boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
992
993   boolean alpha = qs->softpipe->depth_stencil->alpha_enabled;
994
995   boolean depth = qs->softpipe->depth_stencil->depth_enabled;
996
997   unsigned depthfunc = qs->softpipe->depth_stencil->depth_func;
998
999   boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
1000
1001   boolean depthwrite = qs->softpipe->depth_stencil->depth_writemask;
1002
1003   boolean occlusion = qs->softpipe->active_query_count;
1004
1005   boolean clipped = !qs->softpipe->rasterizer->depth_clip_near;
1006
1007   boolean depth_bounds = qs->softpipe->depth_stencil->depth_bounds_test;
1008
1009   if(!qs->softpipe->framebuffer.zsbuf)
1010      depth = depthwrite = stencil = FALSE;
1011
1012   /* default */
1013   qs->run = depth_test_quads_fallback;
1014
1015   /* look for special cases */
1016   if (!alpha &&
1017       !depth &&
1018       !occlusion &&
1019       !clipped &&
1020       !stencil &&
1021       !depth_bounds) {
1022      qs->run = depth_noop;
1023   }
1024   else if (!alpha &&
1025            interp_depth &&
1026            depth &&
1027            depthwrite &&
1028            !occlusion &&
1029            !clipped &&
1030            !stencil &&
1031            !depth_bounds)
1032   {
1033      if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
1034         switch (depthfunc) {
1035         case PIPE_FUNC_NEVER:
1036            qs->run = depth_test_quads_fallback;
1037            break;
1038         case PIPE_FUNC_LESS:
1039            qs->run = depth_interp_z16_less_write;
1040            break;
1041         case PIPE_FUNC_EQUAL:
1042            qs->run = depth_interp_z16_equal_write;
1043            break;
1044         case PIPE_FUNC_LEQUAL:
1045            qs->run = depth_interp_z16_lequal_write;
1046            break;
1047         case PIPE_FUNC_GREATER:
1048            qs->run = depth_interp_z16_greater_write;
1049            break;
1050         case PIPE_FUNC_NOTEQUAL:
1051            qs->run = depth_interp_z16_notequal_write;
1052            break;
1053         case PIPE_FUNC_GEQUAL:
1054            qs->run = depth_interp_z16_gequal_write;
1055            break;
1056         case PIPE_FUNC_ALWAYS:
1057            qs->run = depth_interp_z16_always_write;
1058            break;
1059         default:
1060            qs->run = depth_test_quads_fallback;
1061            break;
1062         }
1063      }
1064   }
1065
1066   /* next quad/fragment stage */
1067   qs->run( qs, quads, nr );
1068}
1069
1070
1071
1072static void
1073depth_test_begin(struct quad_stage *qs)
1074{
1075   qs->run = choose_depth_test;
1076   qs->next->begin(qs->next);
1077}
1078
1079
1080static void
1081depth_test_destroy(struct quad_stage *qs)
1082{
1083   FREE( qs );
1084}
1085
1086
1087struct quad_stage *
1088sp_quad_depth_test_stage(struct softpipe_context *softpipe)
1089{
1090   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1091
1092   stage->softpipe = softpipe;
1093   stage->begin = depth_test_begin;
1094   stage->run = choose_depth_test;
1095   stage->destroy = depth_test_destroy;
1096
1097   return stage;
1098}
1099