1/*
2 * Copyright © 2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6#include "tu_private.h"
7#include "nir_builder.h"
8
9/* Some a6xx variants cannot support a non-contiguous multiview mask. Instead,
10 * inside the shader something like this needs to be inserted:
11 *
12 * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.);
13 *
14 * Scan backwards until we find the gl_Position write (there should only be
15 * one).
16 */
17static bool
18lower_multiview_mask(nir_shader *nir, uint32_t *mask)
19{
20   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
21
22   if (util_is_power_of_two_or_zero(*mask + 1)) {
23      nir_metadata_preserve(impl, nir_metadata_all);
24      return false;
25   }
26
27   nir_builder b;
28   nir_builder_init(&b, impl);
29
30   uint32_t old_mask = *mask;
31   *mask = BIT(util_logbase2(old_mask) + 1) - 1;
32
33   nir_foreach_block_reverse(block, impl) {
34      nir_foreach_instr_reverse(instr, block) {
35         if (instr->type != nir_instr_type_intrinsic)
36            continue;
37
38         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
39         if (intrin->intrinsic != nir_intrinsic_store_deref)
40            continue;
41
42         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
43         if (!nir_deref_mode_is(deref, nir_var_shader_out))
44            continue;
45
46         nir_variable *var = nir_deref_instr_get_variable(deref);
47         if (var->data.location != VARYING_SLOT_POS)
48            continue;
49
50         assert(intrin->src[1].is_ssa);
51         nir_ssa_def *orig_src = intrin->src[1].ssa;
52         b.cursor = nir_before_instr(instr);
53
54         /* ((1ull << gl_ViewIndex) & mask) != 0 */
55         nir_ssa_def *cmp =
56            nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask),
57                                  nir_ishl(&b, nir_imm_int(&b, 1),
58                                           nir_load_view_index(&b))));
59
60         nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.));
61         nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src));
62
63         nir_metadata_preserve(impl, nir_metadata_block_index |
64                                     nir_metadata_dominance);
65         return true;
66      }
67   }
68
69   nir_metadata_preserve(impl, nir_metadata_all);
70   return false;
71}
72
73bool
74tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
75                       struct tu_device *dev)
76{
77   *multi_pos_output = false;
78
79   bool progress = false;
80
81   if (!dev->physical_device->info->a6xx.supports_multiview_mask)
82      NIR_PASS(progress, nir, lower_multiview_mask, &mask);
83
84   unsigned num_views = util_logbase2(mask) + 1;
85
86   /* Blob doesn't apply multipos optimization starting from 11 views
87    * even on a650, however in practice, with the limit of 16 views,
88    * tests pass on a640/a650 and fail on a630.
89    */
90   unsigned max_views_for_multipos =
91      dev->physical_device->info->a6xx.supports_multiview_mask ? 16 : 10;
92
93   /* Speculatively assign output locations so that we know num_outputs. We
94    * will assign output locations for real after this pass.
95    */
96   unsigned num_outputs;
97   nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX);
98
99   /* In addition to the generic checks done by NIR, check that we don't
100    * overflow VPC with the extra copies of gl_Position.
101    */
102   if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) &&
103       num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 &&
104       nir_can_lower_multiview(nir)) {
105      *multi_pos_output = true;
106
107      /* It appears that the multiview mask is ignored when multi-position
108       * output is enabled, so we have to write 0 to inactive views ourselves.
109       */
110      NIR_PASS(progress, nir, lower_multiview_mask, &mask);
111
112      NIR_PASS_V(nir, nir_lower_multiview, mask);
113      progress = true;
114   }
115
116   return progress;
117}
118
119