1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Red Hat 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark <robclark@freedesktop.org> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "nir.h" 28b8e80941Smrg#include "nir_builder.h" 29b8e80941Smrg 30b8e80941Smrg#define MAX_CLIP_PLANES 8 31b8e80941Smrg 32b8e80941Smrg/* Generates the lowering code for user-clip-planes, generating CLIPDIST 33b8e80941Smrg * from UCP[n] + CLIPVERTEX or POSITION. Additionally, an optional pass 34b8e80941Smrg * for fragment shaders to insert conditional kills based on the inter- 35b8e80941Smrg * polated CLIPDIST 36b8e80941Smrg * 37b8e80941Smrg * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at 38b8e80941Smrg * least in scenarios where you can count on each output written once 39b8e80941Smrg * and only once). 40b8e80941Smrg */ 41b8e80941Smrg 42b8e80941Smrg 43b8e80941Smrgstatic nir_variable * 44b8e80941Smrgcreate_clipdist_var(nir_shader *shader, unsigned drvloc, 45b8e80941Smrg bool output, gl_varying_slot slot) 46b8e80941Smrg{ 47b8e80941Smrg nir_variable *var = rzalloc(shader, nir_variable); 48b8e80941Smrg 49b8e80941Smrg var->data.driver_location = drvloc; 50b8e80941Smrg var->type = glsl_vec4_type(); 51b8e80941Smrg var->data.mode = output ? nir_var_shader_out : nir_var_shader_in; 52b8e80941Smrg var->name = ralloc_asprintf(var, "clipdist_%d", drvloc); 53b8e80941Smrg var->data.index = 0; 54b8e80941Smrg var->data.location = slot; 55b8e80941Smrg 56b8e80941Smrg if (output) { 57b8e80941Smrg exec_list_push_tail(&shader->outputs, &var->node); 58b8e80941Smrg shader->num_outputs++; /* TODO use type_size() */ 59b8e80941Smrg } 60b8e80941Smrg else { 61b8e80941Smrg exec_list_push_tail(&shader->inputs, &var->node); 62b8e80941Smrg shader->num_inputs++; /* TODO use type_size() */ 63b8e80941Smrg } 64b8e80941Smrg return var; 65b8e80941Smrg} 66b8e80941Smrg 67b8e80941Smrgstatic void 68b8e80941Smrgstore_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) 69b8e80941Smrg{ 70b8e80941Smrg nir_intrinsic_instr *store; 71b8e80941Smrg 72b8e80941Smrg store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); 73b8e80941Smrg store->num_components = 4; 74b8e80941Smrg nir_intrinsic_set_base(store, out->data.driver_location); 75b8e80941Smrg nir_intrinsic_set_write_mask(store, 0xf); 76b8e80941Smrg store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); 77b8e80941Smrg store->src[0].is_ssa = true; 78b8e80941Smrg store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 79b8e80941Smrg nir_builder_instr_insert(b, &store->instr); 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic void 83b8e80941Smrgload_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) 84b8e80941Smrg{ 85b8e80941Smrg nir_intrinsic_instr *load; 86b8e80941Smrg 87b8e80941Smrg load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); 88b8e80941Smrg load->num_components = 4; 89b8e80941Smrg nir_intrinsic_set_base(load, in->data.driver_location); 90b8e80941Smrg load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 91b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 92b8e80941Smrg nir_builder_instr_insert(b, &load->instr); 93b8e80941Smrg 94b8e80941Smrg val[0] = nir_channel(b, &load->dest.ssa, 0); 95b8e80941Smrg val[1] = nir_channel(b, &load->dest.ssa, 1); 96b8e80941Smrg val[2] = nir_channel(b, &load->dest.ssa, 2); 97b8e80941Smrg val[3] = nir_channel(b, &load->dest.ssa, 3); 98b8e80941Smrg} 99b8e80941Smrg 100b8e80941Smrgstatic nir_ssa_def * 101b8e80941Smrgfind_output_in_block(nir_block *block, unsigned drvloc) 102b8e80941Smrg{ 103b8e80941Smrg nir_foreach_instr(instr, block) { 104b8e80941Smrg 105b8e80941Smrg if (instr->type == nir_instr_type_intrinsic) { 106b8e80941Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 107b8e80941Smrg if ((intr->intrinsic == nir_intrinsic_store_output) && 108b8e80941Smrg nir_intrinsic_base(intr) == drvloc) { 109b8e80941Smrg assert(intr->src[0].is_ssa); 110b8e80941Smrg assert(nir_src_is_const(intr->src[1])); 111b8e80941Smrg return intr->src[0].ssa; 112b8e80941Smrg } 113b8e80941Smrg } 114b8e80941Smrg } 115b8e80941Smrg 116b8e80941Smrg return NULL; 117b8e80941Smrg} 118b8e80941Smrg 119b8e80941Smrg/* TODO: maybe this would be a useful helper? 120b8e80941Smrg * NOTE: assumes each output is written exactly once (and unconditionally) 121b8e80941Smrg * so if needed nir_lower_outputs_to_temporaries() 122b8e80941Smrg */ 123b8e80941Smrgstatic nir_ssa_def * 124b8e80941Smrgfind_output(nir_shader *shader, unsigned drvloc) 125b8e80941Smrg{ 126b8e80941Smrg nir_ssa_def *def = NULL; 127b8e80941Smrg nir_foreach_function(function, shader) { 128b8e80941Smrg if (function->impl) { 129b8e80941Smrg nir_foreach_block_reverse(block, function->impl) { 130b8e80941Smrg nir_ssa_def *new_def = find_output_in_block(block, drvloc); 131b8e80941Smrg assert(!(new_def && def)); 132b8e80941Smrg def = new_def; 133b8e80941Smrg#if !defined(DEBUG) 134b8e80941Smrg /* for debug builds, scan entire shader to assert 135b8e80941Smrg * if output is written multiple times. For release 136b8e80941Smrg * builds just assume all is well and bail when we 137b8e80941Smrg * find first: 138b8e80941Smrg */ 139b8e80941Smrg if (def) 140b8e80941Smrg break; 141b8e80941Smrg#endif 142b8e80941Smrg } 143b8e80941Smrg } 144b8e80941Smrg } 145b8e80941Smrg 146b8e80941Smrg return def; 147b8e80941Smrg} 148b8e80941Smrg 149b8e80941Smrg/* 150b8e80941Smrg * VS lowering 151b8e80941Smrg */ 152b8e80941Smrg 153b8e80941Smrg/* ucp_enables is bitmask of enabled ucps. Actual ucp values are 154b8e80941Smrg * passed in to shader via user_clip_plane system-values 155b8e80941Smrg * 156b8e80941Smrg * If use_vars is true, the pass will use variable loads and stores instead 157b8e80941Smrg * of working with store_output intrinsics. 158b8e80941Smrg */ 159b8e80941Smrgbool 160b8e80941Smrgnir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars) 161b8e80941Smrg{ 162b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 163b8e80941Smrg nir_ssa_def *clipdist[MAX_CLIP_PLANES]; 164b8e80941Smrg nir_builder b; 165b8e80941Smrg int maxloc = -1; 166b8e80941Smrg nir_variable *position = NULL; 167b8e80941Smrg nir_variable *clipvertex = NULL; 168b8e80941Smrg nir_ssa_def *cv; 169b8e80941Smrg nir_variable *out[2] = { NULL }; 170b8e80941Smrg 171b8e80941Smrg if (!ucp_enables) 172b8e80941Smrg return false; 173b8e80941Smrg 174b8e80941Smrg nir_builder_init(&b, impl); 175b8e80941Smrg 176b8e80941Smrg /* NIR should ensure that, even in case of loops/if-else, there 177b8e80941Smrg * should be only a single predecessor block to end_block, which 178b8e80941Smrg * makes the perfect place to insert the clipdist calculations. 179b8e80941Smrg * 180b8e80941Smrg * NOTE: in case of early returns, these would have to be lowered 181b8e80941Smrg * to jumps to end_block predecessor in a previous pass. Not sure 182b8e80941Smrg * if there is a good way to sanity check this, but for now the 183b8e80941Smrg * users of this pass don't support sub-routines. 184b8e80941Smrg */ 185b8e80941Smrg assert(impl->end_block->predecessors->entries == 1); 186b8e80941Smrg b.cursor = nir_after_cf_list(&impl->body); 187b8e80941Smrg 188b8e80941Smrg /* find clipvertex/position outputs: */ 189b8e80941Smrg nir_foreach_variable(var, &shader->outputs) { 190b8e80941Smrg switch (var->data.location) { 191b8e80941Smrg case VARYING_SLOT_POS: 192b8e80941Smrg position = var; 193b8e80941Smrg break; 194b8e80941Smrg case VARYING_SLOT_CLIP_VERTEX: 195b8e80941Smrg clipvertex = var; 196b8e80941Smrg break; 197b8e80941Smrg case VARYING_SLOT_CLIP_DIST0: 198b8e80941Smrg case VARYING_SLOT_CLIP_DIST1: 199b8e80941Smrg /* if shader is already writing CLIPDIST, then 200b8e80941Smrg * there should be no user-clip-planes to deal 201b8e80941Smrg * with. 202b8e80941Smrg * 203b8e80941Smrg * We assume nir_remove_dead_variables has removed the clipdist 204b8e80941Smrg * variables if they're not written. 205b8e80941Smrg */ 206b8e80941Smrg return false; 207b8e80941Smrg } 208b8e80941Smrg } 209b8e80941Smrg 210b8e80941Smrg if (use_vars) { 211b8e80941Smrg cv = nir_load_var(&b, clipvertex ? clipvertex : position); 212b8e80941Smrg 213b8e80941Smrg if (clipvertex) { 214b8e80941Smrg exec_node_remove(&clipvertex->node); 215b8e80941Smrg clipvertex->data.mode = nir_var_shader_temp; 216b8e80941Smrg exec_list_push_tail(&shader->globals, &clipvertex->node); 217b8e80941Smrg } 218b8e80941Smrg } else { 219b8e80941Smrg if (clipvertex) 220b8e80941Smrg cv = find_output(shader, clipvertex->data.driver_location); 221b8e80941Smrg else if (position) 222b8e80941Smrg cv = find_output(shader, position->data.driver_location); 223b8e80941Smrg else 224b8e80941Smrg return false; 225b8e80941Smrg } 226b8e80941Smrg 227b8e80941Smrg /* insert CLIPDIST outputs: */ 228b8e80941Smrg if (ucp_enables & 0x0f) 229b8e80941Smrg out[0] = 230b8e80941Smrg create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0); 231b8e80941Smrg if (ucp_enables & 0xf0) 232b8e80941Smrg out[1] = 233b8e80941Smrg create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1); 234b8e80941Smrg 235b8e80941Smrg for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { 236b8e80941Smrg if (ucp_enables & (1 << plane)) { 237b8e80941Smrg nir_ssa_def *ucp = nir_load_user_clip_plane(&b, plane); 238b8e80941Smrg 239b8e80941Smrg /* calculate clipdist[plane] - dot(ucp, cv): */ 240b8e80941Smrg clipdist[plane] = nir_fdot4(&b, ucp, cv); 241b8e80941Smrg } else { 242b8e80941Smrg /* 0.0 == don't-clip == disabled: */ 243b8e80941Smrg clipdist[plane] = nir_imm_float(&b, 0.0); 244b8e80941Smrg } 245b8e80941Smrg } 246b8e80941Smrg 247b8e80941Smrg if (use_vars) { 248b8e80941Smrg if (ucp_enables & 0x0f) 249b8e80941Smrg nir_store_var(&b, out[0], nir_vec(&b, clipdist, 4), 0xf); 250b8e80941Smrg if (ucp_enables & 0xf0) 251b8e80941Smrg nir_store_var(&b, out[1], nir_vec(&b, &clipdist[4], 4), 0xf); 252b8e80941Smrg } else { 253b8e80941Smrg if (ucp_enables & 0x0f) 254b8e80941Smrg store_clipdist_output(&b, out[0], &clipdist[0]); 255b8e80941Smrg if (ucp_enables & 0xf0) 256b8e80941Smrg store_clipdist_output(&b, out[1], &clipdist[4]); 257b8e80941Smrg } 258b8e80941Smrg 259b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_dominance); 260b8e80941Smrg 261b8e80941Smrg return true; 262b8e80941Smrg} 263b8e80941Smrg 264b8e80941Smrg/* 265b8e80941Smrg * FS lowering 266b8e80941Smrg */ 267b8e80941Smrg 268b8e80941Smrgstatic void 269b8e80941Smrglower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, 270b8e80941Smrg nir_variable **in) 271b8e80941Smrg{ 272b8e80941Smrg nir_ssa_def *clipdist[MAX_CLIP_PLANES]; 273b8e80941Smrg nir_builder b; 274b8e80941Smrg 275b8e80941Smrg nir_builder_init(&b, impl); 276b8e80941Smrg b.cursor = nir_before_cf_list(&impl->body); 277b8e80941Smrg 278b8e80941Smrg if (ucp_enables & 0x0f) 279b8e80941Smrg load_clipdist_input(&b, in[0], &clipdist[0]); 280b8e80941Smrg if (ucp_enables & 0xf0) 281b8e80941Smrg load_clipdist_input(&b, in[1], &clipdist[4]); 282b8e80941Smrg 283b8e80941Smrg for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { 284b8e80941Smrg if (ucp_enables & (1 << plane)) { 285b8e80941Smrg nir_intrinsic_instr *discard; 286b8e80941Smrg nir_ssa_def *cond; 287b8e80941Smrg 288b8e80941Smrg cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0)); 289b8e80941Smrg 290b8e80941Smrg discard = nir_intrinsic_instr_create(b.shader, 291b8e80941Smrg nir_intrinsic_discard_if); 292b8e80941Smrg discard->src[0] = nir_src_for_ssa(cond); 293b8e80941Smrg nir_builder_instr_insert(&b, &discard->instr); 294b8e80941Smrg 295b8e80941Smrg b.shader->info.fs.uses_discard = true; 296b8e80941Smrg } 297b8e80941Smrg } 298b8e80941Smrg 299b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_dominance); 300b8e80941Smrg} 301b8e80941Smrg 302b8e80941Smrg/* insert conditional kill based on interpolated CLIPDIST 303b8e80941Smrg */ 304b8e80941Smrgbool 305b8e80941Smrgnir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables) 306b8e80941Smrg{ 307b8e80941Smrg nir_variable *in[2]; 308b8e80941Smrg int maxloc = -1; 309b8e80941Smrg 310b8e80941Smrg if (!ucp_enables) 311b8e80941Smrg return false; 312b8e80941Smrg 313b8e80941Smrg nir_foreach_variable(var, &shader->inputs) { 314b8e80941Smrg int loc = var->data.driver_location; 315b8e80941Smrg 316b8e80941Smrg /* keep track of last used driver-location.. we'll be 317b8e80941Smrg * appending CLIP_DIST0/CLIP_DIST1 after last existing 318b8e80941Smrg * input: 319b8e80941Smrg */ 320b8e80941Smrg maxloc = MAX2(maxloc, loc); 321b8e80941Smrg } 322b8e80941Smrg 323b8e80941Smrg /* The shader won't normally have CLIPDIST inputs, so we 324b8e80941Smrg * must add our own: 325b8e80941Smrg */ 326b8e80941Smrg /* insert CLIPDIST outputs: */ 327b8e80941Smrg if (ucp_enables & 0x0f) 328b8e80941Smrg in[0] = 329b8e80941Smrg create_clipdist_var(shader, ++maxloc, false, 330b8e80941Smrg VARYING_SLOT_CLIP_DIST0); 331b8e80941Smrg if (ucp_enables & 0xf0) 332b8e80941Smrg in[1] = 333b8e80941Smrg create_clipdist_var(shader, ++maxloc, false, 334b8e80941Smrg VARYING_SLOT_CLIP_DIST1); 335b8e80941Smrg 336b8e80941Smrg nir_foreach_function(function, shader) { 337b8e80941Smrg if (!strcmp(function->name, "main")) 338b8e80941Smrg lower_clip_fs(function->impl, ucp_enables, in); 339b8e80941Smrg } 340b8e80941Smrg 341b8e80941Smrg return true; 342b8e80941Smrg} 343