1/* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25 26#include "ac_llvm_cull.h" 27 28#include <llvm-c/Core.h> 29 30struct ac_position_w_info { 31 /* If a primitive intersects the W=0 plane, it causes a reflection 32 * of the determinant used for face culling. Every vertex behind 33 * the W=0 plane negates the determinant, so having 2 vertices behind 34 * the plane has no effect. This is i1 true if the determinant should be 35 * negated. 36 */ 37 LLVMValueRef w_reflection; 38 39 /* If we simplify the "-w <= p <= w" view culling equation, we get 40 * "-w <= w", which can't be satisfied when w is negative. 41 * In perspective projection, a negative W means that the primitive 42 * is behind the viewer, but the equation is independent of the type 43 * of projection. 44 * 45 * w_accepted is false when all W are negative and therefore 46 * the primitive is invisible. 47 */ 48 LLVMValueRef w_accepted; 49 50 /* The bounding box culling doesn't work and should be skipped when this is true. */ 51 LLVMValueRef any_w_negative; 52}; 53 54static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 55 struct ac_position_w_info *w, unsigned num_vertices) 56{ 57 LLVMBuilderRef builder = ctx->builder; 58 LLVMValueRef all_w_negative = ctx->i1true; 59 60 w->w_reflection = ctx->i1false; 61 w->any_w_negative = ctx->i1false; 62 63 for (unsigned i = 0; i < num_vertices; i++) { 64 LLVMValueRef neg_w; 65 66 neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); 67 /* If neg_w is true, negate w_reflection. */ 68 w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); 69 w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); 70 all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); 71 } 72 w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); 73} 74 75/* Perform front/back face culling and return true if the primitive is accepted. */ 76static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 77 struct ac_position_w_info *w, bool cull_front, bool cull_back, 78 bool cull_zero_area) 79{ 80 LLVMBuilderRef builder = ctx->builder; 81 82 if (cull_front && cull_back) 83 return ctx->i1false; 84 85 if (!cull_front && !cull_back && !cull_zero_area) 86 return ctx->i1true; 87 88 /* Front/back face culling. Also if the determinant == 0, the triangle 89 * area is 0. 90 */ 91 LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); 92 LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); 93 LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); 94 LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); 95 LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); 96 LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); 97 LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); 98 99 /* Negative W negates the determinant. */ 100 det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); 101 102 LLVMValueRef accepted = NULL; 103 if (cull_front) { 104 LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; 105 accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); 106 } else if (cull_back) { 107 LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; 108 accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); 109 } else if (cull_zero_area) { 110 accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); 111 } 112 return accepted; 113} 114 115/* Perform view culling and small primitive elimination and return true 116 * if the primitive is accepted and initially_accepted == true. */ 117static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 118 LLVMValueRef initially_accepted, struct ac_position_w_info *w, 119 LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], 120 LLVMValueRef small_prim_precision, struct ac_cull_options *options, 121 ac_cull_accept_func accept_func, void *userdata) 122{ 123 LLVMBuilderRef builder = ctx->builder; 124 125 if (!options->cull_view_xy && !options->cull_view_near_z && !options->cull_view_far_z && 126 !options->cull_small_prims) { 127 if (accept_func) 128 accept_func(ctx, initially_accepted, userdata); 129 return; 130 } 131 132 ac_build_ifcc(ctx, initially_accepted, 10000000); 133 { 134 LLVMValueRef bbox_min[3], bbox_max[3]; 135 LLVMValueRef accepted = ctx->i1true; 136 137 /* Compute the primitive bounding box for easy culling. */ 138 for (unsigned chan = 0; chan < (options->cull_view_near_z || 139 options->cull_view_far_z ? 3 : 2); chan++) { 140 assert(options->num_vertices >= 2); 141 bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); 142 bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); 143 144 if (options->num_vertices == 3) { 145 bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); 146 bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); 147 } 148 } 149 150 /* View culling. */ 151 if (options->cull_view_xy || options->cull_view_near_z || options->cull_view_far_z) { 152 for (unsigned chan = 0; chan < 3; chan++) { 153 LLVMValueRef visible; 154 155 if ((options->cull_view_xy && chan <= 1) || (options->cull_view_near_z && chan == 2)) { 156 float t = chan == 2 && options->use_halfz_clip_space ? 0 : -1; 157 visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], 158 LLVMConstReal(ctx->f32, t), ""); 159 accepted = LLVMBuildAnd(builder, accepted, visible, ""); 160 } 161 162 if ((options->cull_view_xy && chan <= 1) || (options->cull_view_far_z && chan == 2)) { 163 visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, ""); 164 accepted = LLVMBuildAnd(builder, accepted, visible, ""); 165 } 166 } 167 } 168 169 /* Small primitive elimination. */ 170 if (options->cull_small_prims) { 171 /* Assuming a sample position at (0.5, 0.5), if we round 172 * the bounding box min/max extents and the results of 173 * the rounding are equal in either the X or Y direction, 174 * the bounding box does not intersect the sample. 175 * 176 * See these GDC slides for pictures: 177 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf 178 */ 179 LLVMValueRef min, max, not_equal[2], visible; 180 181 for (unsigned chan = 0; chan < 2; chan++) { 182 /* Convert the position to screen-space coordinates. */ 183 min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]); 184 max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]); 185 /* Scale the bounding box according to the precision of 186 * the rasterizer and the number of MSAA samples. */ 187 min = LLVMBuildFSub(builder, min, small_prim_precision, ""); 188 max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); 189 190 /* Determine if the bbox intersects the sample point. 191 * It also works for MSAA, but vp_scale, vp_translate, 192 * and small_prim_precision are computed differently. 193 */ 194 min = ac_build_round(ctx, min); 195 max = ac_build_round(ctx, max); 196 not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); 197 } 198 visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); 199 accepted = LLVMBuildAnd(builder, accepted, visible, ""); 200 } 201 202 /* Disregard the bounding box culling if any W is negative because the code 203 * doesn't work with that. 204 */ 205 accepted = LLVMBuildOr(builder, accepted, w->any_w_negative, ""); 206 207 if (accept_func) 208 accept_func(ctx, accepted, userdata); 209 } 210 ac_build_endif(ctx, 10000000); 211} 212 213/** 214 * Return i1 true if the primitive is accepted (not culled). 215 * 216 * \param pos Vertex positions 3x vec4 217 * \param initially_accepted AND'ed with the result. Some computations can be 218 * skipped if this is false. 219 * \param vp_scale Viewport scale XY. 220 * For MSAA, multiply them by the number of samples. 221 * \param vp_translate Viewport translation XY. 222 * For MSAA, multiply them by the number of samples. 223 * \param small_prim_precision Precision of small primitive culling. This should 224 * be the same as or greater than the precision of 225 * the rasterizer. Set to num_samples / 2^subpixel_bits. 226 * subpixel_bits are defined by the quantization mode. 227 * \param options See ac_cull_options. 228 * \param accept_func Callback invoked in the inner-most branch where the primitive is accepted. 229 */ 230void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 231 LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], 232 LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, 233 struct ac_cull_options *options, ac_cull_accept_func accept_func, 234 void *userdata) 235{ 236 struct ac_position_w_info w; 237 ac_analyze_position_w(ctx, pos, &w, options->num_vertices); 238 239 /* W culling. */ 240 LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; 241 accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); 242 243 /* Face culling. */ 244 accepted = LLVMBuildAnd( 245 ctx->builder, accepted, 246 ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area), 247 ""); 248 249 /* View culling and small primitive elimination. */ 250 cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options, 251 accept_func, userdata); 252} 253