17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2019 Advanced Micro Devices, Inc. 37ec681f3Smrg * Copyright 2021 Valve Corporation 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg */ 257ec681f3Smrg 267ec681f3Smrg#include "ac_nir.h" 277ec681f3Smrg#include "nir_builder.h" 287ec681f3Smrg 297ec681f3Smrg/* This code is adapted from ac_llvm_cull.c, hence the copyright to AMD. */ 307ec681f3Smrg 317ec681f3Smrgtypedef struct 327ec681f3Smrg{ 337ec681f3Smrg nir_ssa_def *w_reflection; 347ec681f3Smrg nir_ssa_def *w_accepted; 357ec681f3Smrg nir_ssa_def *all_w_positive; 367ec681f3Smrg nir_ssa_def *any_w_negative; 377ec681f3Smrg} position_w_info; 387ec681f3Smrg 397ec681f3Smrgstatic void 407ec681f3Smrganalyze_position_w(nir_builder *b, nir_ssa_def *pos[3][4], position_w_info *w_info) 417ec681f3Smrg{ 427ec681f3Smrg nir_ssa_def *all_w_negative = nir_imm_bool(b, true); 437ec681f3Smrg 447ec681f3Smrg w_info->w_reflection = nir_imm_bool(b, false); 457ec681f3Smrg w_info->any_w_negative = nir_imm_bool(b, false); 467ec681f3Smrg 477ec681f3Smrg for (unsigned i = 0; i < 3; ++i) { 487ec681f3Smrg nir_ssa_def *neg_w = nir_flt(b, pos[i][3], nir_imm_float(b, 0.0f)); 497ec681f3Smrg w_info->w_reflection = nir_ixor(b, neg_w, w_info->w_reflection); 507ec681f3Smrg w_info->any_w_negative = nir_ior(b, neg_w, w_info->any_w_negative); 517ec681f3Smrg all_w_negative = nir_iand(b, neg_w, all_w_negative); 527ec681f3Smrg } 537ec681f3Smrg 547ec681f3Smrg w_info->all_w_positive = nir_inot(b, w_info->any_w_negative); 557ec681f3Smrg w_info->w_accepted = nir_inot(b, all_w_negative); 567ec681f3Smrg} 577ec681f3Smrg 587ec681f3Smrgstatic nir_ssa_def * 597ec681f3Smrgcull_face(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info *w_info) 607ec681f3Smrg{ 617ec681f3Smrg nir_ssa_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]); 627ec681f3Smrg nir_ssa_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]); 637ec681f3Smrg nir_ssa_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]); 647ec681f3Smrg nir_ssa_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]); 657ec681f3Smrg nir_ssa_def *det_p0 = nir_fmul(b, det_t0, det_t1); 667ec681f3Smrg nir_ssa_def *det_p1 = nir_fmul(b, det_t2, det_t3); 677ec681f3Smrg nir_ssa_def *det = nir_fsub(b, det_p0, det_p1); 687ec681f3Smrg 697ec681f3Smrg det = nir_bcsel(b, w_info->w_reflection, nir_fneg(b, det), det); 707ec681f3Smrg 717ec681f3Smrg nir_ssa_def *front_facing_cw = nir_flt(b, det, nir_imm_float(b, 0.0f)); 727ec681f3Smrg nir_ssa_def *front_facing_ccw = nir_flt(b, nir_imm_float(b, 0.0f), det); 737ec681f3Smrg nir_ssa_def *ccw = nir_build_load_cull_ccw_amd(b); 747ec681f3Smrg nir_ssa_def *front_facing = nir_bcsel(b, ccw, front_facing_ccw, front_facing_cw); 757ec681f3Smrg nir_ssa_def *cull_front = nir_build_load_cull_front_face_enabled_amd(b); 767ec681f3Smrg nir_ssa_def *cull_back = nir_build_load_cull_back_face_enabled_amd(b); 777ec681f3Smrg 787ec681f3Smrg nir_ssa_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back); 797ec681f3Smrg 807ec681f3Smrg /* Don't reject NaN and +/-infinity, these are tricky. 817ec681f3Smrg * Just trust fixed-function HW to handle these cases correctly. 827ec681f3Smrg */ 837ec681f3Smrg face_culled = nir_iand(b, face_culled, nir_fisfinite(b, det)); 847ec681f3Smrg 857ec681f3Smrg return nir_inot(b, face_culled); 867ec681f3Smrg} 877ec681f3Smrg 887ec681f3Smrgstatic nir_ssa_def * 897ec681f3Smrgcull_bbox(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *accepted, const position_w_info *w_info) 907ec681f3Smrg{ 917ec681f3Smrg nir_ssa_def *bbox_accepted = NULL; 927ec681f3Smrg nir_ssa_def *try_cull_bbox = nir_iand(b, accepted, w_info->all_w_positive); 937ec681f3Smrg 947ec681f3Smrg nir_if *if_cull_bbox = nir_push_if(b, try_cull_bbox); 957ec681f3Smrg { 967ec681f3Smrg nir_ssa_def *bbox_min[3] = {0}, *bbox_max[3] = {0}; 977ec681f3Smrg 987ec681f3Smrg for (unsigned chan = 0; chan < 2; ++chan) { 997ec681f3Smrg bbox_min[chan] = nir_fmin(b, pos[0][chan], nir_fmin(b, pos[1][chan], pos[2][chan])); 1007ec681f3Smrg bbox_max[chan] = nir_fmax(b, pos[0][chan], nir_fmax(b, pos[1][chan], pos[2][chan])); 1017ec681f3Smrg } 1027ec681f3Smrg 1037ec681f3Smrg nir_ssa_def *vp_scale[2] = { nir_build_load_viewport_x_scale(b), nir_build_load_viewport_y_scale(b), }; 1047ec681f3Smrg nir_ssa_def *vp_translate[2] = { nir_build_load_viewport_x_offset(b), nir_build_load_viewport_y_offset(b), }; 1057ec681f3Smrg nir_ssa_def *prim_outside_view = nir_imm_false(b); 1067ec681f3Smrg 1077ec681f3Smrg /* Frustrum culling - eliminate triangles that are fully outside the view. */ 1087ec681f3Smrg for (unsigned chan = 0; chan < 2; ++chan) { 1097ec681f3Smrg prim_outside_view = nir_ior(b, prim_outside_view, nir_flt(b, bbox_max[chan], nir_imm_float(b, -1.0f))); 1107ec681f3Smrg prim_outside_view = nir_ior(b, prim_outside_view, nir_flt(b, nir_imm_float(b, 1.0f), bbox_min[chan])); 1117ec681f3Smrg } 1127ec681f3Smrg 1137ec681f3Smrg nir_ssa_def *prim_is_small = NULL; 1147ec681f3Smrg nir_ssa_def *prim_is_small_else = nir_imm_false(b); 1157ec681f3Smrg 1167ec681f3Smrg /* Small primitive filter - eliminate triangles that are too small to affect a sample. */ 1177ec681f3Smrg nir_if *if_cull_small_prims = nir_push_if(b, nir_build_load_cull_small_primitives_enabled_amd(b)); 1187ec681f3Smrg { 1197ec681f3Smrg nir_ssa_def *small_prim_precision = nir_build_load_cull_small_prim_precision_amd(b); 1207ec681f3Smrg prim_is_small = nir_imm_false(b); 1217ec681f3Smrg 1227ec681f3Smrg for (unsigned chan = 0; chan < 2; ++chan) { 1237ec681f3Smrg /* Convert the position to screen-space coordinates. */ 1247ec681f3Smrg nir_ssa_def *min = nir_ffma(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]); 1257ec681f3Smrg nir_ssa_def *max = nir_ffma(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]); 1267ec681f3Smrg 1277ec681f3Smrg /* Scale the bounding box according to precision. */ 1287ec681f3Smrg min = nir_fsub(b, min, small_prim_precision); 1297ec681f3Smrg max = nir_fadd(b, max, small_prim_precision); 1307ec681f3Smrg 1317ec681f3Smrg /* Determine if the bbox intersects the sample point, by checking if the min and max round to the same int. */ 1327ec681f3Smrg min = nir_fround_even(b, min); 1337ec681f3Smrg max = nir_fround_even(b, max); 1347ec681f3Smrg 1357ec681f3Smrg nir_ssa_def *rounded_to_eq = nir_feq(b, min, max); 1367ec681f3Smrg prim_is_small = nir_ior(b, prim_is_small, rounded_to_eq); 1377ec681f3Smrg } 1387ec681f3Smrg } 1397ec681f3Smrg nir_pop_if(b, if_cull_small_prims); 1407ec681f3Smrg 1417ec681f3Smrg prim_is_small = nir_if_phi(b, prim_is_small, prim_is_small_else); 1427ec681f3Smrg nir_ssa_def *prim_invisible = nir_ior(b, prim_outside_view, prim_is_small); 1437ec681f3Smrg 1447ec681f3Smrg bbox_accepted = nir_inot(b, prim_invisible); 1457ec681f3Smrg } 1467ec681f3Smrg nir_pop_if(b, if_cull_bbox); 1477ec681f3Smrg return nir_if_phi(b, bbox_accepted, accepted); 1487ec681f3Smrg} 1497ec681f3Smrg 1507ec681f3Smrgnir_ssa_def * 1517ec681f3Smrgac_nir_cull_triangle(nir_builder *b, 1527ec681f3Smrg nir_ssa_def *initially_accepted, 1537ec681f3Smrg nir_ssa_def *pos[3][4]) 1547ec681f3Smrg{ 1557ec681f3Smrg position_w_info w_info = {0}; 1567ec681f3Smrg analyze_position_w(b, pos, &w_info); 1577ec681f3Smrg 1587ec681f3Smrg nir_ssa_def *accepted = initially_accepted; 1597ec681f3Smrg accepted = nir_iand(b, accepted, w_info.w_accepted); 1607ec681f3Smrg accepted = nir_iand(b, accepted, cull_face(b, pos, &w_info)); 1617ec681f3Smrg accepted = nir_iand(b, accepted, cull_bbox(b, pos, accepted, &w_info)); 1627ec681f3Smrg 1637ec681f3Smrg return accepted; 1647ec681f3Smrg} 165