17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2019 Advanced Micro Devices, Inc. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the 67ec681f3Smrg * "Software"), to deal in the Software without restriction, including 77ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish, 87ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to 97ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to 107ec681f3Smrg * the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 137ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 147ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 157ec681f3Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 167ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 177ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 187ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 197ec681f3Smrg * 207ec681f3Smrg * The above copyright notice and this permission notice (including the 217ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions 227ec681f3Smrg * of the Software. 237ec681f3Smrg * 247ec681f3Smrg */ 257ec681f3Smrg 267ec681f3Smrg#include "ac_llvm_cull.h" 277ec681f3Smrg 287ec681f3Smrg#include <llvm-c/Core.h> 297ec681f3Smrg 307ec681f3Smrgstruct ac_position_w_info { 317ec681f3Smrg /* If a primitive intersects the W=0 plane, it causes a reflection 327ec681f3Smrg * of the determinant used for face culling. Every vertex behind 337ec681f3Smrg * the W=0 plane negates the determinant, so having 2 vertices behind 347ec681f3Smrg * the plane has no effect. This is i1 true if the determinant should be 357ec681f3Smrg * negated. 367ec681f3Smrg */ 377ec681f3Smrg LLVMValueRef w_reflection; 387ec681f3Smrg 397ec681f3Smrg /* If we simplify the "-w <= p <= w" view culling equation, we get 407ec681f3Smrg * "-w <= w", which can't be satisfied when w is negative. 417ec681f3Smrg * In perspective projection, a negative W means that the primitive 427ec681f3Smrg * is behind the viewer, but the equation is independent of the type 437ec681f3Smrg * of projection. 447ec681f3Smrg * 457ec681f3Smrg * w_accepted is false when all W are negative and therefore 467ec681f3Smrg * the primitive is invisible. 477ec681f3Smrg */ 487ec681f3Smrg LLVMValueRef w_accepted; 497ec681f3Smrg 507ec681f3Smrg /* The bounding box culling doesn't work and should be skipped when this is true. */ 517ec681f3Smrg LLVMValueRef any_w_negative; 527ec681f3Smrg}; 537ec681f3Smrg 547ec681f3Smrgstatic void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 557ec681f3Smrg struct ac_position_w_info *w, unsigned num_vertices) 567ec681f3Smrg{ 577ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 587ec681f3Smrg LLVMValueRef all_w_negative = ctx->i1true; 597ec681f3Smrg 607ec681f3Smrg w->w_reflection = ctx->i1false; 617ec681f3Smrg w->any_w_negative = ctx->i1false; 627ec681f3Smrg 637ec681f3Smrg for (unsigned i = 0; i < num_vertices; i++) { 647ec681f3Smrg LLVMValueRef neg_w; 657ec681f3Smrg 667ec681f3Smrg neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); 677ec681f3Smrg /* If neg_w is true, negate w_reflection. */ 687ec681f3Smrg w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); 697ec681f3Smrg w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); 707ec681f3Smrg all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); 717ec681f3Smrg } 727ec681f3Smrg w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3Smrg/* Perform front/back face culling and return true if the primitive is accepted. */ 767ec681f3Smrgstatic LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 777ec681f3Smrg struct ac_position_w_info *w, bool cull_front, bool cull_back, 787ec681f3Smrg bool cull_zero_area) 797ec681f3Smrg{ 807ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 817ec681f3Smrg 827ec681f3Smrg if (cull_front && cull_back) 837ec681f3Smrg return ctx->i1false; 847ec681f3Smrg 857ec681f3Smrg if (!cull_front && !cull_back && !cull_zero_area) 867ec681f3Smrg return ctx->i1true; 877ec681f3Smrg 887ec681f3Smrg /* Front/back face culling. Also if the determinant == 0, the triangle 897ec681f3Smrg * area is 0. 907ec681f3Smrg */ 917ec681f3Smrg LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); 927ec681f3Smrg LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); 937ec681f3Smrg LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); 947ec681f3Smrg LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); 957ec681f3Smrg LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); 967ec681f3Smrg LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); 977ec681f3Smrg LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); 987ec681f3Smrg 997ec681f3Smrg /* Negative W negates the determinant. */ 1007ec681f3Smrg det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); 1017ec681f3Smrg 1027ec681f3Smrg LLVMValueRef accepted = NULL; 1037ec681f3Smrg if (cull_front) { 1047ec681f3Smrg LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; 1057ec681f3Smrg accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); 1067ec681f3Smrg } else if (cull_back) { 1077ec681f3Smrg LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; 1087ec681f3Smrg accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); 1097ec681f3Smrg } else if (cull_zero_area) { 1107ec681f3Smrg accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); 1117ec681f3Smrg } 1127ec681f3Smrg return accepted; 1137ec681f3Smrg} 1147ec681f3Smrg 1157ec681f3Smrg/* Perform view culling and small primitive elimination and return true 1167ec681f3Smrg * if the primitive is accepted and initially_accepted == true. */ 1177ec681f3Smrgstatic void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 1187ec681f3Smrg LLVMValueRef initially_accepted, struct ac_position_w_info *w, 1197ec681f3Smrg LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], 1207ec681f3Smrg LLVMValueRef small_prim_precision, struct ac_cull_options *options, 1217ec681f3Smrg ac_cull_accept_func accept_func, void *userdata) 1227ec681f3Smrg{ 1237ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 1247ec681f3Smrg 1257ec681f3Smrg if (!options->cull_view_xy && !options->cull_view_near_z && !options->cull_view_far_z && 1267ec681f3Smrg !options->cull_small_prims) { 1277ec681f3Smrg if (accept_func) 1287ec681f3Smrg accept_func(ctx, initially_accepted, userdata); 1297ec681f3Smrg return; 1307ec681f3Smrg } 1317ec681f3Smrg 1327ec681f3Smrg ac_build_ifcc(ctx, initially_accepted, 10000000); 1337ec681f3Smrg { 1347ec681f3Smrg LLVMValueRef bbox_min[3], bbox_max[3]; 1357ec681f3Smrg LLVMValueRef accepted = ctx->i1true; 1367ec681f3Smrg 1377ec681f3Smrg /* Compute the primitive bounding box for easy culling. */ 1387ec681f3Smrg for (unsigned chan = 0; chan < (options->cull_view_near_z || 1397ec681f3Smrg options->cull_view_far_z ? 3 : 2); chan++) { 1407ec681f3Smrg assert(options->num_vertices >= 2); 1417ec681f3Smrg bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); 1427ec681f3Smrg bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); 1437ec681f3Smrg 1447ec681f3Smrg if (options->num_vertices == 3) { 1457ec681f3Smrg bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); 1467ec681f3Smrg bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); 1477ec681f3Smrg } 1487ec681f3Smrg } 1497ec681f3Smrg 1507ec681f3Smrg /* View culling. */ 1517ec681f3Smrg if (options->cull_view_xy || options->cull_view_near_z || options->cull_view_far_z) { 1527ec681f3Smrg for (unsigned chan = 0; chan < 3; chan++) { 1537ec681f3Smrg LLVMValueRef visible; 1547ec681f3Smrg 1557ec681f3Smrg if ((options->cull_view_xy && chan <= 1) || (options->cull_view_near_z && chan == 2)) { 1567ec681f3Smrg float t = chan == 2 && options->use_halfz_clip_space ? 0 : -1; 1577ec681f3Smrg visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], 1587ec681f3Smrg LLVMConstReal(ctx->f32, t), ""); 1597ec681f3Smrg accepted = LLVMBuildAnd(builder, accepted, visible, ""); 1607ec681f3Smrg } 1617ec681f3Smrg 1627ec681f3Smrg if ((options->cull_view_xy && chan <= 1) || (options->cull_view_far_z && chan == 2)) { 1637ec681f3Smrg visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, ""); 1647ec681f3Smrg accepted = LLVMBuildAnd(builder, accepted, visible, ""); 1657ec681f3Smrg } 1667ec681f3Smrg } 1677ec681f3Smrg } 1687ec681f3Smrg 1697ec681f3Smrg /* Small primitive elimination. */ 1707ec681f3Smrg if (options->cull_small_prims) { 1717ec681f3Smrg /* Assuming a sample position at (0.5, 0.5), if we round 1727ec681f3Smrg * the bounding box min/max extents and the results of 1737ec681f3Smrg * the rounding are equal in either the X or Y direction, 1747ec681f3Smrg * the bounding box does not intersect the sample. 1757ec681f3Smrg * 1767ec681f3Smrg * See these GDC slides for pictures: 1777ec681f3Smrg * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf 1787ec681f3Smrg */ 1797ec681f3Smrg LLVMValueRef min, max, not_equal[2], visible; 1807ec681f3Smrg 1817ec681f3Smrg for (unsigned chan = 0; chan < 2; chan++) { 1827ec681f3Smrg /* Convert the position to screen-space coordinates. */ 1837ec681f3Smrg min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]); 1847ec681f3Smrg max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]); 1857ec681f3Smrg /* Scale the bounding box according to the precision of 1867ec681f3Smrg * the rasterizer and the number of MSAA samples. */ 1877ec681f3Smrg min = LLVMBuildFSub(builder, min, small_prim_precision, ""); 1887ec681f3Smrg max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); 1897ec681f3Smrg 1907ec681f3Smrg /* Determine if the bbox intersects the sample point. 1917ec681f3Smrg * It also works for MSAA, but vp_scale, vp_translate, 1927ec681f3Smrg * and small_prim_precision are computed differently. 1937ec681f3Smrg */ 1947ec681f3Smrg min = ac_build_round(ctx, min); 1957ec681f3Smrg max = ac_build_round(ctx, max); 1967ec681f3Smrg not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); 1977ec681f3Smrg } 1987ec681f3Smrg visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); 1997ec681f3Smrg accepted = LLVMBuildAnd(builder, accepted, visible, ""); 2007ec681f3Smrg } 2017ec681f3Smrg 2027ec681f3Smrg /* Disregard the bounding box culling if any W is negative because the code 2037ec681f3Smrg * doesn't work with that. 2047ec681f3Smrg */ 2057ec681f3Smrg accepted = LLVMBuildOr(builder, accepted, w->any_w_negative, ""); 2067ec681f3Smrg 2077ec681f3Smrg if (accept_func) 2087ec681f3Smrg accept_func(ctx, accepted, userdata); 2097ec681f3Smrg } 2107ec681f3Smrg ac_build_endif(ctx, 10000000); 2117ec681f3Smrg} 2127ec681f3Smrg 2137ec681f3Smrg/** 2147ec681f3Smrg * Return i1 true if the primitive is accepted (not culled). 2157ec681f3Smrg * 2167ec681f3Smrg * \param pos Vertex positions 3x vec4 2177ec681f3Smrg * \param initially_accepted AND'ed with the result. Some computations can be 2187ec681f3Smrg * skipped if this is false. 2197ec681f3Smrg * \param vp_scale Viewport scale XY. 2207ec681f3Smrg * For MSAA, multiply them by the number of samples. 2217ec681f3Smrg * \param vp_translate Viewport translation XY. 2227ec681f3Smrg * For MSAA, multiply them by the number of samples. 2237ec681f3Smrg * \param small_prim_precision Precision of small primitive culling. This should 2247ec681f3Smrg * be the same as or greater than the precision of 2257ec681f3Smrg * the rasterizer. Set to num_samples / 2^subpixel_bits. 2267ec681f3Smrg * subpixel_bits are defined by the quantization mode. 2277ec681f3Smrg * \param options See ac_cull_options. 2287ec681f3Smrg * \param accept_func Callback invoked in the inner-most branch where the primitive is accepted. 2297ec681f3Smrg */ 2307ec681f3Smrgvoid ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], 2317ec681f3Smrg LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], 2327ec681f3Smrg LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, 2337ec681f3Smrg struct ac_cull_options *options, ac_cull_accept_func accept_func, 2347ec681f3Smrg void *userdata) 2357ec681f3Smrg{ 2367ec681f3Smrg struct ac_position_w_info w; 2377ec681f3Smrg ac_analyze_position_w(ctx, pos, &w, options->num_vertices); 2387ec681f3Smrg 2397ec681f3Smrg /* W culling. */ 2407ec681f3Smrg LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; 2417ec681f3Smrg accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); 2427ec681f3Smrg 2437ec681f3Smrg /* Face culling. */ 2447ec681f3Smrg accepted = LLVMBuildAnd( 2457ec681f3Smrg ctx->builder, accepted, 2467ec681f3Smrg ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area), 2477ec681f3Smrg ""); 2487ec681f3Smrg 2497ec681f3Smrg /* View culling and small primitive elimination. */ 2507ec681f3Smrg cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options, 2517ec681f3Smrg accept_func, userdata); 2527ec681f3Smrg} 253