17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2016 Intel Corporation
37ec681f3Smrg * Copyright © 2020 Valve Corporation
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
227ec681f3Smrg * IN THE SOFTWARE.
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "nir_control_flow.h"
267ec681f3Smrg#include "nir_builder.h"
277ec681f3Smrg
287ec681f3Smrg/**
297ec681f3Smrg * This file implements an optimization for multiview. Some GPU's have a
307ec681f3Smrg * special mode which allows the vertex shader (or last stage in the geometry
317ec681f3Smrg * pipeline) to create multiple primitives in different layers of the
327ec681f3Smrg * framebuffer at once by writing multiple copies of gl_Position. The
337ec681f3Smrg * assumption is that in most uses of multiview, the only use of gl_ViewIndex
347ec681f3Smrg * is to change the position to implement the parallax effect, and other
357ec681f3Smrg * varyings will be the same between the different views. We put the body of
367ec681f3Smrg * the original vertex shader in a loop, writing to a different copy of
377ec681f3Smrg * gl_Position each loop iteration, and then let other optimizations clean up
387ec681f3Smrg * the mess.
397ec681f3Smrg */
407ec681f3Smrg
417ec681f3Smrgstatic bool
427ec681f3Smrgshader_writes_to_memory(nir_shader *shader)
437ec681f3Smrg{
447ec681f3Smrg   /* With multiview, we would need to ensure that memory writes happen either
457ec681f3Smrg    * once or once per view. Since combination of multiview and memory writes
467ec681f3Smrg    * is not expected, we'll just skip this optimization in this case.
477ec681f3Smrg    */
487ec681f3Smrg
497ec681f3Smrg   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
507ec681f3Smrg
517ec681f3Smrg   nir_foreach_block(block, entrypoint) {
527ec681f3Smrg      nir_foreach_instr(instr, block) {
537ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
547ec681f3Smrg            continue;
557ec681f3Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
567ec681f3Smrg
577ec681f3Smrg         switch (intrin->intrinsic) {
587ec681f3Smrg         case nir_intrinsic_deref_atomic_add:
597ec681f3Smrg         case nir_intrinsic_deref_atomic_imin:
607ec681f3Smrg         case nir_intrinsic_deref_atomic_umin:
617ec681f3Smrg         case nir_intrinsic_deref_atomic_imax:
627ec681f3Smrg         case nir_intrinsic_deref_atomic_umax:
637ec681f3Smrg         case nir_intrinsic_deref_atomic_and:
647ec681f3Smrg         case nir_intrinsic_deref_atomic_or:
657ec681f3Smrg         case nir_intrinsic_deref_atomic_xor:
667ec681f3Smrg         case nir_intrinsic_deref_atomic_exchange:
677ec681f3Smrg         case nir_intrinsic_deref_atomic_comp_swap:
687ec681f3Smrg         case nir_intrinsic_store_ssbo:
697ec681f3Smrg         case nir_intrinsic_ssbo_atomic_add:
707ec681f3Smrg         case nir_intrinsic_ssbo_atomic_imin:
717ec681f3Smrg         case nir_intrinsic_ssbo_atomic_umin:
727ec681f3Smrg         case nir_intrinsic_ssbo_atomic_imax:
737ec681f3Smrg         case nir_intrinsic_ssbo_atomic_umax:
747ec681f3Smrg         case nir_intrinsic_ssbo_atomic_and:
757ec681f3Smrg         case nir_intrinsic_ssbo_atomic_or:
767ec681f3Smrg         case nir_intrinsic_ssbo_atomic_xor:
777ec681f3Smrg         case nir_intrinsic_ssbo_atomic_exchange:
787ec681f3Smrg         case nir_intrinsic_ssbo_atomic_comp_swap:
797ec681f3Smrg         case nir_intrinsic_store_shared:
807ec681f3Smrg         case nir_intrinsic_shared_atomic_add:
817ec681f3Smrg         case nir_intrinsic_shared_atomic_imin:
827ec681f3Smrg         case nir_intrinsic_shared_atomic_umin:
837ec681f3Smrg         case nir_intrinsic_shared_atomic_imax:
847ec681f3Smrg         case nir_intrinsic_shared_atomic_umax:
857ec681f3Smrg         case nir_intrinsic_shared_atomic_and:
867ec681f3Smrg         case nir_intrinsic_shared_atomic_or:
877ec681f3Smrg         case nir_intrinsic_shared_atomic_xor:
887ec681f3Smrg         case nir_intrinsic_shared_atomic_exchange:
897ec681f3Smrg         case nir_intrinsic_shared_atomic_comp_swap:
907ec681f3Smrg         case nir_intrinsic_image_deref_store:
917ec681f3Smrg         case nir_intrinsic_image_deref_atomic_add:
927ec681f3Smrg         case nir_intrinsic_image_deref_atomic_fadd:
937ec681f3Smrg         case nir_intrinsic_image_deref_atomic_umin:
947ec681f3Smrg         case nir_intrinsic_image_deref_atomic_umax:
957ec681f3Smrg         case nir_intrinsic_image_deref_atomic_imin:
967ec681f3Smrg         case nir_intrinsic_image_deref_atomic_imax:
977ec681f3Smrg         case nir_intrinsic_image_deref_atomic_fmin:
987ec681f3Smrg         case nir_intrinsic_image_deref_atomic_fmax:
997ec681f3Smrg         case nir_intrinsic_image_deref_atomic_and:
1007ec681f3Smrg         case nir_intrinsic_image_deref_atomic_or:
1017ec681f3Smrg         case nir_intrinsic_image_deref_atomic_xor:
1027ec681f3Smrg         case nir_intrinsic_image_deref_atomic_exchange:
1037ec681f3Smrg         case nir_intrinsic_image_deref_atomic_comp_swap:
1047ec681f3Smrg            return true;
1057ec681f3Smrg
1067ec681f3Smrg         default:
1077ec681f3Smrg            /* Keep walking. */
1087ec681f3Smrg            break;
1097ec681f3Smrg         }
1107ec681f3Smrg      }
1117ec681f3Smrg   }
1127ec681f3Smrg
1137ec681f3Smrg   return false;
1147ec681f3Smrg}
1157ec681f3Smrg
1167ec681f3Smrgbool
1177ec681f3Smrgnir_shader_uses_view_index(nir_shader *shader)
1187ec681f3Smrg{
1197ec681f3Smrg   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
1207ec681f3Smrg
1217ec681f3Smrg   nir_foreach_block(block, entrypoint) {
1227ec681f3Smrg      nir_foreach_instr(instr, block) {
1237ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
1247ec681f3Smrg            continue;
1257ec681f3Smrg
1267ec681f3Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1277ec681f3Smrg         if (intrin->intrinsic == nir_intrinsic_load_view_index)
1287ec681f3Smrg            return true;
1297ec681f3Smrg      }
1307ec681f3Smrg   }
1317ec681f3Smrg
1327ec681f3Smrg   return false;
1337ec681f3Smrg}
1347ec681f3Smrg
1357ec681f3Smrgstatic bool
1367ec681f3Smrgshader_only_position_uses_view_index(nir_shader *shader)
1377ec681f3Smrg{
1387ec681f3Smrg   nir_shader *shader_no_position = nir_shader_clone(NULL, shader);
1397ec681f3Smrg   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader_no_position);
1407ec681f3Smrg
1417ec681f3Smrg   /* Remove the store position from a cloned shader. */
1427ec681f3Smrg   nir_foreach_block(block, entrypoint) {
1437ec681f3Smrg      nir_foreach_instr_safe(instr, block) {
1447ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
1457ec681f3Smrg            continue;
1467ec681f3Smrg
1477ec681f3Smrg         nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
1487ec681f3Smrg         if (store->intrinsic != nir_intrinsic_store_deref)
1497ec681f3Smrg            continue;
1507ec681f3Smrg
1517ec681f3Smrg         nir_variable *var = nir_intrinsic_get_var(store, 0);
1527ec681f3Smrg         if (var->data.location != VARYING_SLOT_POS)
1537ec681f3Smrg            continue;
1547ec681f3Smrg
1557ec681f3Smrg         nir_instr_remove(&store->instr);
1567ec681f3Smrg      }
1577ec681f3Smrg   }
1587ec681f3Smrg
1597ec681f3Smrg   /* Clean up shader so unused load_view_index intrinsics are removed. */
1607ec681f3Smrg   bool progress;
1617ec681f3Smrg   do {
1627ec681f3Smrg      progress = false;
1637ec681f3Smrg      progress |= nir_opt_dead_cf(shader_no_position);
1647ec681f3Smrg
1657ec681f3Smrg      /* Peephole select will drop if-blocks that have then and else empty,
1667ec681f3Smrg       * which will remove the usage of an SSA in the condition.
1677ec681f3Smrg       */
1687ec681f3Smrg      progress |= nir_opt_peephole_select(shader_no_position, 0, false, false);
1697ec681f3Smrg
1707ec681f3Smrg      progress |= nir_opt_dce(shader_no_position);
1717ec681f3Smrg   } while (progress);
1727ec681f3Smrg
1737ec681f3Smrg   bool uses_view_index = nir_shader_uses_view_index(shader_no_position);
1747ec681f3Smrg
1757ec681f3Smrg   ralloc_free(shader_no_position);
1767ec681f3Smrg   return !uses_view_index;
1777ec681f3Smrg}
1787ec681f3Smrg
1797ec681f3Smrg/* Return true if it's safe to call nir_lower_multiview() on this vertex
1807ec681f3Smrg * shader. Note that this only handles driver-agnostic checks, i.e. things
1817ec681f3Smrg * which would make nir_lower_multiview() incorrect. Any driver-specific
1827ec681f3Smrg * checks, e.g. for sufficient varying space or performance considerations,
1837ec681f3Smrg * should be handled in the driver.
1847ec681f3Smrg *
1857ec681f3Smrg * Note that we don't handle the more complex checks needed for lowering
1867ec681f3Smrg * pipelines with geometry or tessellation shaders.
1877ec681f3Smrg */
1887ec681f3Smrg
1897ec681f3Smrgbool
1907ec681f3Smrgnir_can_lower_multiview(nir_shader *shader)
1917ec681f3Smrg{
1927ec681f3Smrg   bool writes_position = false;
1937ec681f3Smrg   nir_foreach_shader_out_variable(var, shader) {
1947ec681f3Smrg      if (var->data.location == VARYING_SLOT_POS) {
1957ec681f3Smrg         writes_position = true;
1967ec681f3Smrg         break;
1977ec681f3Smrg      }
1987ec681f3Smrg   }
1997ec681f3Smrg
2007ec681f3Smrg   /* Don't bother handling this edge case. */
2017ec681f3Smrg   if (!writes_position)
2027ec681f3Smrg      return false;
2037ec681f3Smrg
2047ec681f3Smrg   return !shader_writes_to_memory(shader) &&
2057ec681f3Smrg          shader_only_position_uses_view_index(shader);
2067ec681f3Smrg}
2077ec681f3Smrg
2087ec681f3Smrg/**
2097ec681f3Smrg * The lowering. Call with the last active geometry stage.
2107ec681f3Smrg */
2117ec681f3Smrg
2127ec681f3Smrgbool
2137ec681f3Smrgnir_lower_multiview(nir_shader *shader, uint32_t view_mask)
2147ec681f3Smrg{
2157ec681f3Smrg   assert(shader->info.stage != MESA_SHADER_FRAGMENT);
2167ec681f3Smrg   int view_count = util_bitcount(view_mask);
2177ec681f3Smrg
2187ec681f3Smrg   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
2197ec681f3Smrg
2207ec681f3Smrg   /* Update position to refer to an array. */
2217ec681f3Smrg   nir_variable *pos_var = NULL;
2227ec681f3Smrg   nir_foreach_shader_out_variable(var, shader) {
2237ec681f3Smrg      if (var->data.location == VARYING_SLOT_POS) {
2247ec681f3Smrg         assert(var->type == glsl_vec4_type());
2257ec681f3Smrg         var->type = glsl_array_type(glsl_vec4_type(), view_count, 0);
2267ec681f3Smrg         var->data.per_view = true;
2277ec681f3Smrg         pos_var = var;
2287ec681f3Smrg         break;
2297ec681f3Smrg      }
2307ec681f3Smrg   }
2317ec681f3Smrg
2327ec681f3Smrg   assert(pos_var);
2337ec681f3Smrg
2347ec681f3Smrg   nir_cf_list body;
2357ec681f3Smrg   nir_cf_list_extract(&body, &entrypoint->body);
2367ec681f3Smrg
2377ec681f3Smrg   nir_builder b;
2387ec681f3Smrg   nir_builder_init(&b, entrypoint);
2397ec681f3Smrg   b.cursor = nir_after_cf_list(&entrypoint->body);
2407ec681f3Smrg
2417ec681f3Smrg   /* Loop Index will go from 0 to view_count. */
2427ec681f3Smrg   nir_variable *loop_index_var =
2437ec681f3Smrg      nir_local_variable_create(entrypoint, glsl_uint_type(), "loop_index");
2447ec681f3Smrg   nir_deref_instr *loop_index_deref = nir_build_deref_var(&b, loop_index_var);
2457ec681f3Smrg   nir_store_deref(&b, loop_index_deref, nir_imm_int(&b, 0), 1);
2467ec681f3Smrg
2477ec681f3Smrg   /* Array of view index values that are active in the loop.  Note that the
2487ec681f3Smrg    * loop index only matches the view index if there are no gaps in the
2497ec681f3Smrg    * view_mask.
2507ec681f3Smrg    */
2517ec681f3Smrg   nir_variable *view_index_var = nir_local_variable_create(
2527ec681f3Smrg      entrypoint, glsl_array_type(glsl_uint_type(), view_count, 0), "view_index");
2537ec681f3Smrg   nir_deref_instr *view_index_deref = nir_build_deref_var(&b, view_index_var);
2547ec681f3Smrg   {
2557ec681f3Smrg      int array_position = 0;
2567ec681f3Smrg      uint32_t view_mask_temp = view_mask;
2577ec681f3Smrg      while (view_mask_temp) {
2587ec681f3Smrg         uint32_t view_index = u_bit_scan(&view_mask_temp);
2597ec681f3Smrg         nir_store_deref(&b, nir_build_deref_array_imm(&b, view_index_deref, array_position),
2607ec681f3Smrg                         nir_imm_int(&b, view_index), 1);
2617ec681f3Smrg         array_position++;
2627ec681f3Smrg      }
2637ec681f3Smrg   }
2647ec681f3Smrg
2657ec681f3Smrg   /* Create the equivalent of
2667ec681f3Smrg    *
2677ec681f3Smrg    *    while (true):
2687ec681f3Smrg    *       if (loop_index >= view_count):
2697ec681f3Smrg    *          break
2707ec681f3Smrg    *
2717ec681f3Smrg    *       view_index = active_indices[loop_index]
2727ec681f3Smrg    *       pos_deref = &pos[loop_index]
2737ec681f3Smrg    *
2747ec681f3Smrg    *       # Placeholder for the body to be reinserted.
2757ec681f3Smrg    *
2767ec681f3Smrg    *       loop_index += 1
2777ec681f3Smrg    *
2787ec681f3Smrg    * Later both `view_index` and `pos_deref` will be used to rewrite the
2797ec681f3Smrg    * original shader body.
2807ec681f3Smrg    */
2817ec681f3Smrg
2827ec681f3Smrg   nir_loop* loop = nir_push_loop(&b);
2837ec681f3Smrg
2847ec681f3Smrg   nir_ssa_def *loop_index = nir_load_deref(&b, loop_index_deref);
2857ec681f3Smrg   nir_ssa_def *cmp = nir_ige(&b, loop_index, nir_imm_int(&b, view_count));
2867ec681f3Smrg   nir_if *loop_check = nir_push_if(&b, cmp);
2877ec681f3Smrg   nir_jump(&b, nir_jump_break);
2887ec681f3Smrg   nir_pop_if(&b, loop_check);
2897ec681f3Smrg
2907ec681f3Smrg   nir_ssa_def *view_index =
2917ec681f3Smrg      nir_load_deref(&b, nir_build_deref_array(&b, view_index_deref, loop_index));
2927ec681f3Smrg   nir_deref_instr *pos_deref =
2937ec681f3Smrg      nir_build_deref_array(&b, nir_build_deref_var(&b, pos_var), loop_index);
2947ec681f3Smrg
2957ec681f3Smrg   nir_store_deref(&b, loop_index_deref, nir_iadd_imm(&b, loop_index, 1), 1);
2967ec681f3Smrg   nir_pop_loop(&b, loop);
2977ec681f3Smrg
2987ec681f3Smrg   /* Reinsert the body. */
2997ec681f3Smrg   b.cursor = nir_after_instr(&pos_deref->instr);
3007ec681f3Smrg   nir_cf_reinsert(&body, b.cursor);
3017ec681f3Smrg
3027ec681f3Smrg   nir_foreach_block(block, entrypoint) {
3037ec681f3Smrg      nir_foreach_instr_safe(instr, block) {
3047ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
3057ec681f3Smrg            continue;
3067ec681f3Smrg
3077ec681f3Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3087ec681f3Smrg
3097ec681f3Smrg         switch (intrin->intrinsic) {
3107ec681f3Smrg         case nir_intrinsic_load_view_index: {
3117ec681f3Smrg            assert(intrin->dest.is_ssa);
3127ec681f3Smrg            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, view_index);
3137ec681f3Smrg            break;
3147ec681f3Smrg         }
3157ec681f3Smrg
3167ec681f3Smrg         case nir_intrinsic_store_deref: {
3177ec681f3Smrg            nir_variable *var = nir_intrinsic_get_var(intrin, 0);
3187ec681f3Smrg            if (var == pos_var) {
3197ec681f3Smrg               nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
3207ec681f3Smrg
3217ec681f3Smrg               nir_instr_rewrite_src(instr, &intrin->src[0],
3227ec681f3Smrg                                     nir_src_for_ssa(&pos_deref->dest.ssa));
3237ec681f3Smrg
3247ec681f3Smrg               /* Remove old deref since it has the wrong type. */
3257ec681f3Smrg               nir_deref_instr_remove_if_unused(old_deref);
3267ec681f3Smrg            }
3277ec681f3Smrg            break;
3287ec681f3Smrg         }
3297ec681f3Smrg
3307ec681f3Smrg         case nir_intrinsic_load_deref:
3317ec681f3Smrg            if (nir_intrinsic_get_var(intrin, 0) == pos_var) {
3327ec681f3Smrg               unreachable("Should have lowered I/O to temporaries "
3337ec681f3Smrg                           "so no load_deref on position output is expected.");
3347ec681f3Smrg            }
3357ec681f3Smrg            break;
3367ec681f3Smrg
3377ec681f3Smrg         case nir_intrinsic_copy_deref:
3387ec681f3Smrg            unreachable("Should have lowered copy_derefs at this point");
3397ec681f3Smrg            break;
3407ec681f3Smrg
3417ec681f3Smrg         default:
3427ec681f3Smrg            /* Do nothing. */
3437ec681f3Smrg            break;
3447ec681f3Smrg         }
3457ec681f3Smrg      }
3467ec681f3Smrg   }
3477ec681f3Smrg
3487ec681f3Smrg   nir_metadata_preserve(entrypoint, nir_metadata_none);
3497ec681f3Smrg   return true;
3507ec681f3Smrg}
3517ec681f3Smrg
352