1/* 2 * Copyright © 2020 Valve Corporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6#include "tu_private.h" 7#include "nir_builder.h" 8 9/* Some a6xx variants cannot support a non-contiguous multiview mask. Instead, 10 * inside the shader something like this needs to be inserted: 11 * 12 * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.); 13 * 14 * Scan backwards until we find the gl_Position write (there should only be 15 * one). 16 */ 17static bool 18lower_multiview_mask(nir_shader *nir, uint32_t *mask) 19{ 20 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 21 22 if (util_is_power_of_two_or_zero(*mask + 1)) { 23 nir_metadata_preserve(impl, nir_metadata_all); 24 return false; 25 } 26 27 nir_builder b; 28 nir_builder_init(&b, impl); 29 30 uint32_t old_mask = *mask; 31 *mask = BIT(util_logbase2(old_mask) + 1) - 1; 32 33 nir_foreach_block_reverse(block, impl) { 34 nir_foreach_instr_reverse(instr, block) { 35 if (instr->type != nir_instr_type_intrinsic) 36 continue; 37 38 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 39 if (intrin->intrinsic != nir_intrinsic_store_deref) 40 continue; 41 42 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 43 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 44 continue; 45 46 nir_variable *var = nir_deref_instr_get_variable(deref); 47 if (var->data.location != VARYING_SLOT_POS) 48 continue; 49 50 assert(intrin->src[1].is_ssa); 51 nir_ssa_def *orig_src = intrin->src[1].ssa; 52 b.cursor = nir_before_instr(instr); 53 54 /* ((1ull << gl_ViewIndex) & mask) != 0 */ 55 nir_ssa_def *cmp = 56 nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask), 57 nir_ishl(&b, nir_imm_int(&b, 1), 58 nir_load_view_index(&b)))); 59 60 nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.)); 61 nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src)); 62 63 nir_metadata_preserve(impl, nir_metadata_block_index | 64 nir_metadata_dominance); 65 return true; 66 } 67 } 68 69 nir_metadata_preserve(impl, nir_metadata_all); 70 return false; 71} 72 73bool 74tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output, 75 struct tu_device *dev) 76{ 77 *multi_pos_output = false; 78 79 bool progress = false; 80 81 if (!dev->physical_device->info->a6xx.supports_multiview_mask) 82 NIR_PASS(progress, nir, lower_multiview_mask, &mask); 83 84 unsigned num_views = util_logbase2(mask) + 1; 85 86 /* Blob doesn't apply multipos optimization starting from 11 views 87 * even on a650, however in practice, with the limit of 16 views, 88 * tests pass on a640/a650 and fail on a630. 89 */ 90 unsigned max_views_for_multipos = 91 dev->physical_device->info->a6xx.supports_multiview_mask ? 16 : 10; 92 93 /* Speculatively assign output locations so that we know num_outputs. We 94 * will assign output locations for real after this pass. 95 */ 96 unsigned num_outputs; 97 nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX); 98 99 /* In addition to the generic checks done by NIR, check that we don't 100 * overflow VPC with the extra copies of gl_Position. 101 */ 102 if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) && 103 num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 && 104 nir_can_lower_multiview(nir)) { 105 *multi_pos_output = true; 106 107 /* It appears that the multiview mask is ignored when multi-position 108 * output is enabled, so we have to write 0 to inactive views ourselves. 109 */ 110 NIR_PASS(progress, nir, lower_multiview_mask, &mask); 111 112 NIR_PASS_V(nir, nir_lower_multiview, mask); 113 progress = true; 114 } 115 116 return progress; 117} 118 119