17ec681f3Smrg/*
27ec681f3Smrg * Copyright 2019 Advanced Micro Devices, Inc.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the
67ec681f3Smrg * "Software"), to deal in the Software without restriction, including
77ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish,
87ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to
97ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to
107ec681f3Smrg * the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
137ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
147ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
157ec681f3Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
167ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
177ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
187ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
197ec681f3Smrg *
207ec681f3Smrg * The above copyright notice and this permission notice (including the
217ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
227ec681f3Smrg * of the Software.
237ec681f3Smrg *
247ec681f3Smrg */
257ec681f3Smrg
267ec681f3Smrg#include "ac_llvm_cull.h"
277ec681f3Smrg
287ec681f3Smrg#include <llvm-c/Core.h>
297ec681f3Smrg
307ec681f3Smrgstruct ac_position_w_info {
317ec681f3Smrg   /* If a primitive intersects the W=0 plane, it causes a reflection
327ec681f3Smrg    * of the determinant used for face culling. Every vertex behind
337ec681f3Smrg    * the W=0 plane negates the determinant, so having 2 vertices behind
347ec681f3Smrg    * the plane has no effect. This is i1 true if the determinant should be
357ec681f3Smrg    * negated.
367ec681f3Smrg    */
377ec681f3Smrg   LLVMValueRef w_reflection;
387ec681f3Smrg
397ec681f3Smrg   /* If we simplify the "-w <= p <= w" view culling equation, we get
407ec681f3Smrg    * "-w <= w", which can't be satisfied when w is negative.
417ec681f3Smrg    * In perspective projection, a negative W means that the primitive
427ec681f3Smrg    * is behind the viewer, but the equation is independent of the type
437ec681f3Smrg    * of projection.
447ec681f3Smrg    *
457ec681f3Smrg    * w_accepted is false when all W are negative and therefore
467ec681f3Smrg    * the primitive is invisible.
477ec681f3Smrg    */
487ec681f3Smrg   LLVMValueRef w_accepted;
497ec681f3Smrg
507ec681f3Smrg   /* The bounding box culling doesn't work and should be skipped when this is true. */
517ec681f3Smrg   LLVMValueRef any_w_negative;
527ec681f3Smrg};
537ec681f3Smrg
547ec681f3Smrgstatic void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
557ec681f3Smrg                                  struct ac_position_w_info *w, unsigned num_vertices)
567ec681f3Smrg{
577ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
587ec681f3Smrg   LLVMValueRef all_w_negative = ctx->i1true;
597ec681f3Smrg
607ec681f3Smrg   w->w_reflection = ctx->i1false;
617ec681f3Smrg   w->any_w_negative = ctx->i1false;
627ec681f3Smrg
637ec681f3Smrg   for (unsigned i = 0; i < num_vertices; i++) {
647ec681f3Smrg      LLVMValueRef neg_w;
657ec681f3Smrg
667ec681f3Smrg      neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
677ec681f3Smrg      /* If neg_w is true, negate w_reflection. */
687ec681f3Smrg      w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
697ec681f3Smrg      w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
707ec681f3Smrg      all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
717ec681f3Smrg   }
727ec681f3Smrg   w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
737ec681f3Smrg}
747ec681f3Smrg
757ec681f3Smrg/* Perform front/back face culling and return true if the primitive is accepted. */
767ec681f3Smrgstatic LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
777ec681f3Smrg                                 struct ac_position_w_info *w, bool cull_front, bool cull_back,
787ec681f3Smrg                                 bool cull_zero_area)
797ec681f3Smrg{
807ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
817ec681f3Smrg
827ec681f3Smrg   if (cull_front && cull_back)
837ec681f3Smrg      return ctx->i1false;
847ec681f3Smrg
857ec681f3Smrg   if (!cull_front && !cull_back && !cull_zero_area)
867ec681f3Smrg      return ctx->i1true;
877ec681f3Smrg
887ec681f3Smrg   /* Front/back face culling. Also if the determinant == 0, the triangle
897ec681f3Smrg    * area is 0.
907ec681f3Smrg    */
917ec681f3Smrg   LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
927ec681f3Smrg   LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
937ec681f3Smrg   LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
947ec681f3Smrg   LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
957ec681f3Smrg   LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
967ec681f3Smrg   LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
977ec681f3Smrg   LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
987ec681f3Smrg
997ec681f3Smrg   /* Negative W negates the determinant. */
1007ec681f3Smrg   det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");
1017ec681f3Smrg
1027ec681f3Smrg   LLVMValueRef accepted = NULL;
1037ec681f3Smrg   if (cull_front) {
1047ec681f3Smrg      LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
1057ec681f3Smrg      accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
1067ec681f3Smrg   } else if (cull_back) {
1077ec681f3Smrg      LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
1087ec681f3Smrg      accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
1097ec681f3Smrg   } else if (cull_zero_area) {
1107ec681f3Smrg      accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
1117ec681f3Smrg   }
1127ec681f3Smrg   return accepted;
1137ec681f3Smrg}
1147ec681f3Smrg
1157ec681f3Smrg/* Perform view culling and small primitive elimination and return true
1167ec681f3Smrg * if the primitive is accepted and initially_accepted == true. */
1177ec681f3Smrgstatic void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
1187ec681f3Smrg                      LLVMValueRef initially_accepted, struct ac_position_w_info *w,
1197ec681f3Smrg                      LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
1207ec681f3Smrg                      LLVMValueRef small_prim_precision, struct ac_cull_options *options,
1217ec681f3Smrg                      ac_cull_accept_func accept_func, void *userdata)
1227ec681f3Smrg{
1237ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
1247ec681f3Smrg
1257ec681f3Smrg   if (!options->cull_view_xy && !options->cull_view_near_z && !options->cull_view_far_z &&
1267ec681f3Smrg       !options->cull_small_prims) {
1277ec681f3Smrg      if (accept_func)
1287ec681f3Smrg         accept_func(ctx, initially_accepted, userdata);
1297ec681f3Smrg      return;
1307ec681f3Smrg   }
1317ec681f3Smrg
1327ec681f3Smrg   ac_build_ifcc(ctx, initially_accepted, 10000000);
1337ec681f3Smrg   {
1347ec681f3Smrg      LLVMValueRef bbox_min[3], bbox_max[3];
1357ec681f3Smrg      LLVMValueRef accepted = ctx->i1true;
1367ec681f3Smrg
1377ec681f3Smrg      /* Compute the primitive bounding box for easy culling. */
1387ec681f3Smrg      for (unsigned chan = 0; chan < (options->cull_view_near_z ||
1397ec681f3Smrg                                      options->cull_view_far_z ? 3 : 2); chan++) {
1407ec681f3Smrg         assert(options->num_vertices >= 2);
1417ec681f3Smrg         bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
1427ec681f3Smrg         bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
1437ec681f3Smrg
1447ec681f3Smrg         if (options->num_vertices == 3) {
1457ec681f3Smrg            bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
1467ec681f3Smrg            bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
1477ec681f3Smrg         }
1487ec681f3Smrg      }
1497ec681f3Smrg
1507ec681f3Smrg      /* View culling. */
1517ec681f3Smrg      if (options->cull_view_xy || options->cull_view_near_z || options->cull_view_far_z) {
1527ec681f3Smrg         for (unsigned chan = 0; chan < 3; chan++) {
1537ec681f3Smrg            LLVMValueRef visible;
1547ec681f3Smrg
1557ec681f3Smrg            if ((options->cull_view_xy && chan <= 1) || (options->cull_view_near_z && chan == 2)) {
1567ec681f3Smrg               float t = chan == 2 && options->use_halfz_clip_space ? 0 : -1;
1577ec681f3Smrg               visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
1587ec681f3Smrg                                       LLVMConstReal(ctx->f32, t), "");
1597ec681f3Smrg               accepted = LLVMBuildAnd(builder, accepted, visible, "");
1607ec681f3Smrg            }
1617ec681f3Smrg
1627ec681f3Smrg            if ((options->cull_view_xy && chan <= 1) || (options->cull_view_far_z && chan == 2)) {
1637ec681f3Smrg               visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");
1647ec681f3Smrg               accepted = LLVMBuildAnd(builder, accepted, visible, "");
1657ec681f3Smrg            }
1667ec681f3Smrg         }
1677ec681f3Smrg      }
1687ec681f3Smrg
1697ec681f3Smrg      /* Small primitive elimination. */
1707ec681f3Smrg      if (options->cull_small_prims) {
1717ec681f3Smrg         /* Assuming a sample position at (0.5, 0.5), if we round
1727ec681f3Smrg          * the bounding box min/max extents and the results of
1737ec681f3Smrg          * the rounding are equal in either the X or Y direction,
1747ec681f3Smrg          * the bounding box does not intersect the sample.
1757ec681f3Smrg          *
1767ec681f3Smrg          * See these GDC slides for pictures:
1777ec681f3Smrg          * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
1787ec681f3Smrg          */
1797ec681f3Smrg         LLVMValueRef min, max, not_equal[2], visible;
1807ec681f3Smrg
1817ec681f3Smrg         for (unsigned chan = 0; chan < 2; chan++) {
1827ec681f3Smrg            /* Convert the position to screen-space coordinates. */
1837ec681f3Smrg            min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
1847ec681f3Smrg            max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
1857ec681f3Smrg            /* Scale the bounding box according to the precision of
1867ec681f3Smrg             * the rasterizer and the number of MSAA samples. */
1877ec681f3Smrg            min = LLVMBuildFSub(builder, min, small_prim_precision, "");
1887ec681f3Smrg            max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
1897ec681f3Smrg
1907ec681f3Smrg            /* Determine if the bbox intersects the sample point.
1917ec681f3Smrg             * It also works for MSAA, but vp_scale, vp_translate,
1927ec681f3Smrg             * and small_prim_precision are computed differently.
1937ec681f3Smrg             */
1947ec681f3Smrg            min = ac_build_round(ctx, min);
1957ec681f3Smrg            max = ac_build_round(ctx, max);
1967ec681f3Smrg            not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
1977ec681f3Smrg         }
1987ec681f3Smrg         visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
1997ec681f3Smrg         accepted = LLVMBuildAnd(builder, accepted, visible, "");
2007ec681f3Smrg      }
2017ec681f3Smrg
2027ec681f3Smrg      /* Disregard the bounding box culling if any W is negative because the code
2037ec681f3Smrg       * doesn't work with that.
2047ec681f3Smrg       */
2057ec681f3Smrg      accepted = LLVMBuildOr(builder, accepted, w->any_w_negative, "");
2067ec681f3Smrg
2077ec681f3Smrg      if (accept_func)
2087ec681f3Smrg         accept_func(ctx, accepted, userdata);
2097ec681f3Smrg   }
2107ec681f3Smrg   ac_build_endif(ctx, 10000000);
2117ec681f3Smrg}
2127ec681f3Smrg
2137ec681f3Smrg/**
2147ec681f3Smrg * Return i1 true if the primitive is accepted (not culled).
2157ec681f3Smrg *
2167ec681f3Smrg * \param pos                   Vertex positions 3x vec4
2177ec681f3Smrg * \param initially_accepted    AND'ed with the result. Some computations can be
2187ec681f3Smrg *                              skipped if this is false.
2197ec681f3Smrg * \param vp_scale              Viewport scale XY.
2207ec681f3Smrg *                              For MSAA, multiply them by the number of samples.
2217ec681f3Smrg * \param vp_translate          Viewport translation XY.
2227ec681f3Smrg *                              For MSAA, multiply them by the number of samples.
2237ec681f3Smrg * \param small_prim_precision  Precision of small primitive culling. This should
2247ec681f3Smrg *                              be the same as or greater than the precision of
2257ec681f3Smrg *                              the rasterizer. Set to num_samples / 2^subpixel_bits.
2267ec681f3Smrg *                              subpixel_bits are defined by the quantization mode.
2277ec681f3Smrg * \param options               See ac_cull_options.
2287ec681f3Smrg * \param accept_func           Callback invoked in the inner-most branch where the primitive is accepted.
2297ec681f3Smrg */
2307ec681f3Smrgvoid ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
2317ec681f3Smrg                       LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
2327ec681f3Smrg                       LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
2337ec681f3Smrg                       struct ac_cull_options *options, ac_cull_accept_func accept_func,
2347ec681f3Smrg                       void *userdata)
2357ec681f3Smrg{
2367ec681f3Smrg   struct ac_position_w_info w;
2377ec681f3Smrg   ac_analyze_position_w(ctx, pos, &w, options->num_vertices);
2387ec681f3Smrg
2397ec681f3Smrg   /* W culling. */
2407ec681f3Smrg   LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
2417ec681f3Smrg   accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
2427ec681f3Smrg
2437ec681f3Smrg   /* Face culling. */
2447ec681f3Smrg   accepted = LLVMBuildAnd(
2457ec681f3Smrg      ctx->builder, accepted,
2467ec681f3Smrg      ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),
2477ec681f3Smrg      "");
2487ec681f3Smrg
2497ec681f3Smrg   /* View culling and small primitive elimination. */
2507ec681f3Smrg   cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options,
2517ec681f3Smrg             accept_func, userdata);
2527ec681f3Smrg}
253