101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "vtn_private.h"
2501e04c3fSmrg
267ec681f3Smrgstatic struct vtn_ssa_value *
2701e04c3fSmrgvtn_build_subgroup_instr(struct vtn_builder *b,
2801e04c3fSmrg                         nir_intrinsic_op nir_op,
2901e04c3fSmrg                         struct vtn_ssa_value *src0,
3001e04c3fSmrg                         nir_ssa_def *index,
3101e04c3fSmrg                         unsigned const_idx0,
3201e04c3fSmrg                         unsigned const_idx1)
3301e04c3fSmrg{
3401e04c3fSmrg   /* Some of the subgroup operations take an index.  SPIR-V allows this to be
3501e04c3fSmrg    * any integer type.  To make things simpler for drivers, we only support
3601e04c3fSmrg    * 32-bit indices.
3701e04c3fSmrg    */
3801e04c3fSmrg   if (index && index->bit_size != 32)
3901e04c3fSmrg      index = nir_u2u32(&b->nb, index);
4001e04c3fSmrg
417ec681f3Smrg   struct vtn_ssa_value *dst = vtn_create_ssa_value(b, src0->type);
427ec681f3Smrg
4301e04c3fSmrg   vtn_assert(dst->type == src0->type);
4401e04c3fSmrg   if (!glsl_type_is_vector_or_scalar(dst->type)) {
4501e04c3fSmrg      for (unsigned i = 0; i < glsl_get_length(dst->type); i++) {
467ec681f3Smrg         dst->elems[0] =
477ec681f3Smrg            vtn_build_subgroup_instr(b, nir_op, src0->elems[i], index,
487ec681f3Smrg                                     const_idx0, const_idx1);
4901e04c3fSmrg      }
507ec681f3Smrg      return dst;
5101e04c3fSmrg   }
5201e04c3fSmrg
5301e04c3fSmrg   nir_intrinsic_instr *intrin =
5401e04c3fSmrg      nir_intrinsic_instr_create(b->nb.shader, nir_op);
5501e04c3fSmrg   nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
5601e04c3fSmrg                              dst->type, NULL);
5701e04c3fSmrg   intrin->num_components = intrin->dest.ssa.num_components;
5801e04c3fSmrg
5901e04c3fSmrg   intrin->src[0] = nir_src_for_ssa(src0->def);
6001e04c3fSmrg   if (index)
6101e04c3fSmrg      intrin->src[1] = nir_src_for_ssa(index);
6201e04c3fSmrg
6301e04c3fSmrg   intrin->const_index[0] = const_idx0;
6401e04c3fSmrg   intrin->const_index[1] = const_idx1;
6501e04c3fSmrg
6601e04c3fSmrg   nir_builder_instr_insert(&b->nb, &intrin->instr);
6701e04c3fSmrg
6801e04c3fSmrg   dst->def = &intrin->dest.ssa;
697ec681f3Smrg
707ec681f3Smrg   return dst;
7101e04c3fSmrg}
7201e04c3fSmrg
7301e04c3fSmrgvoid
7401e04c3fSmrgvtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
7501e04c3fSmrg                    const uint32_t *w, unsigned count)
7601e04c3fSmrg{
777ec681f3Smrg   struct vtn_type *dest_type = vtn_get_type(b, w[1]);
7801e04c3fSmrg
7901e04c3fSmrg   switch (opcode) {
8001e04c3fSmrg   case SpvOpGroupNonUniformElect: {
817ec681f3Smrg      vtn_fail_if(dest_type->type != glsl_bool_type(),
8201e04c3fSmrg                  "OpGroupNonUniformElect must return a Bool");
8301e04c3fSmrg      nir_intrinsic_instr *elect =
8401e04c3fSmrg         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect);
8501e04c3fSmrg      nir_ssa_dest_init_for_type(&elect->instr, &elect->dest,
867ec681f3Smrg                                 dest_type->type, NULL);
8701e04c3fSmrg      nir_builder_instr_insert(&b->nb, &elect->instr);
887ec681f3Smrg      vtn_push_nir_ssa(b, w[2], &elect->dest.ssa);
8901e04c3fSmrg      break;
9001e04c3fSmrg   }
9101e04c3fSmrg
927ec681f3Smrg   case SpvOpGroupNonUniformBallot:
937ec681f3Smrg   case SpvOpSubgroupBallotKHR: {
947ec681f3Smrg      bool has_scope = (opcode != SpvOpSubgroupBallotKHR);
957ec681f3Smrg      vtn_fail_if(dest_type->type != glsl_vector_type(GLSL_TYPE_UINT, 4),
9601e04c3fSmrg                  "OpGroupNonUniformBallot must return a uvec4");
9701e04c3fSmrg      nir_intrinsic_instr *ballot =
9801e04c3fSmrg         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot);
997ec681f3Smrg      ballot->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[3 + has_scope]));
10001e04c3fSmrg      nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL);
10101e04c3fSmrg      ballot->num_components = 4;
10201e04c3fSmrg      nir_builder_instr_insert(&b->nb, &ballot->instr);
1037ec681f3Smrg      vtn_push_nir_ssa(b, w[2], &ballot->dest.ssa);
10401e04c3fSmrg      break;
10501e04c3fSmrg   }
10601e04c3fSmrg
10701e04c3fSmrg   case SpvOpGroupNonUniformInverseBallot: {
10801e04c3fSmrg      /* This one is just a BallotBitfieldExtract with subgroup invocation.
10901e04c3fSmrg       * We could add a NIR intrinsic but it's easier to just lower it on the
11001e04c3fSmrg       * spot.
11101e04c3fSmrg       */
11201e04c3fSmrg      nir_intrinsic_instr *intrin =
11301e04c3fSmrg         nir_intrinsic_instr_create(b->nb.shader,
11401e04c3fSmrg                                    nir_intrinsic_ballot_bitfield_extract);
11501e04c3fSmrg
1167ec681f3Smrg      intrin->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
11701e04c3fSmrg      intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));
11801e04c3fSmrg
11901e04c3fSmrg      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
1207ec681f3Smrg                                 dest_type->type, NULL);
12101e04c3fSmrg      nir_builder_instr_insert(&b->nb, &intrin->instr);
12201e04c3fSmrg
1237ec681f3Smrg      vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
12401e04c3fSmrg      break;
12501e04c3fSmrg   }
12601e04c3fSmrg
12701e04c3fSmrg   case SpvOpGroupNonUniformBallotBitExtract:
12801e04c3fSmrg   case SpvOpGroupNonUniformBallotBitCount:
12901e04c3fSmrg   case SpvOpGroupNonUniformBallotFindLSB:
13001e04c3fSmrg   case SpvOpGroupNonUniformBallotFindMSB: {
13101e04c3fSmrg      nir_ssa_def *src0, *src1 = NULL;
13201e04c3fSmrg      nir_intrinsic_op op;
13301e04c3fSmrg      switch (opcode) {
13401e04c3fSmrg      case SpvOpGroupNonUniformBallotBitExtract:
13501e04c3fSmrg         op = nir_intrinsic_ballot_bitfield_extract;
1367ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[4]);
1377ec681f3Smrg         src1 = vtn_get_nir_ssa(b, w[5]);
13801e04c3fSmrg         break;
13901e04c3fSmrg      case SpvOpGroupNonUniformBallotBitCount:
14001e04c3fSmrg         switch ((SpvGroupOperation)w[4]) {
14101e04c3fSmrg         case SpvGroupOperationReduce:
14201e04c3fSmrg            op = nir_intrinsic_ballot_bit_count_reduce;
14301e04c3fSmrg            break;
14401e04c3fSmrg         case SpvGroupOperationInclusiveScan:
14501e04c3fSmrg            op = nir_intrinsic_ballot_bit_count_inclusive;
14601e04c3fSmrg            break;
14701e04c3fSmrg         case SpvGroupOperationExclusiveScan:
14801e04c3fSmrg            op = nir_intrinsic_ballot_bit_count_exclusive;
14901e04c3fSmrg            break;
15001e04c3fSmrg         default:
15101e04c3fSmrg            unreachable("Invalid group operation");
15201e04c3fSmrg         }
1537ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[5]);
15401e04c3fSmrg         break;
15501e04c3fSmrg      case SpvOpGroupNonUniformBallotFindLSB:
15601e04c3fSmrg         op = nir_intrinsic_ballot_find_lsb;
1577ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[4]);
15801e04c3fSmrg         break;
15901e04c3fSmrg      case SpvOpGroupNonUniformBallotFindMSB:
16001e04c3fSmrg         op = nir_intrinsic_ballot_find_msb;
1617ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[4]);
16201e04c3fSmrg         break;
16301e04c3fSmrg      default:
16401e04c3fSmrg         unreachable("Unhandled opcode");
16501e04c3fSmrg      }
16601e04c3fSmrg
16701e04c3fSmrg      nir_intrinsic_instr *intrin =
16801e04c3fSmrg         nir_intrinsic_instr_create(b->nb.shader, op);
16901e04c3fSmrg
17001e04c3fSmrg      intrin->src[0] = nir_src_for_ssa(src0);
17101e04c3fSmrg      if (src1)
17201e04c3fSmrg         intrin->src[1] = nir_src_for_ssa(src1);
17301e04c3fSmrg
17401e04c3fSmrg      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
1757ec681f3Smrg                                 dest_type->type, NULL);
17601e04c3fSmrg      nir_builder_instr_insert(&b->nb, &intrin->instr);
17701e04c3fSmrg
1787ec681f3Smrg      vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
17901e04c3fSmrg      break;
18001e04c3fSmrg   }
18101e04c3fSmrg
18201e04c3fSmrg   case SpvOpGroupNonUniformBroadcastFirst:
1837ec681f3Smrg   case SpvOpSubgroupFirstInvocationKHR: {
1847ec681f3Smrg      bool has_scope = (opcode != SpvOpSubgroupFirstInvocationKHR);
1857ec681f3Smrg      vtn_push_ssa_value(b, w[2],
1867ec681f3Smrg         vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation,
1877ec681f3Smrg                                  vtn_ssa_value(b, w[3 + has_scope]),
1887ec681f3Smrg                                  NULL, 0, 0));
18901e04c3fSmrg      break;
1907ec681f3Smrg   }
19101e04c3fSmrg
19201e04c3fSmrg   case SpvOpGroupNonUniformBroadcast:
1937ec681f3Smrg   case SpvOpGroupBroadcast:
1947ec681f3Smrg   case SpvOpSubgroupReadInvocationKHR: {
1957ec681f3Smrg      bool has_scope = (opcode != SpvOpSubgroupReadInvocationKHR);
1967ec681f3Smrg      vtn_push_ssa_value(b, w[2],
1977ec681f3Smrg         vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation,
1987ec681f3Smrg                                  vtn_ssa_value(b, w[3 + has_scope]),
1997ec681f3Smrg                                  vtn_get_nir_ssa(b, w[4 + has_scope]), 0, 0));
20001e04c3fSmrg      break;
2017ec681f3Smrg   }
20201e04c3fSmrg
20301e04c3fSmrg   case SpvOpGroupNonUniformAll:
20401e04c3fSmrg   case SpvOpGroupNonUniformAny:
2057ec681f3Smrg   case SpvOpGroupNonUniformAllEqual:
2067ec681f3Smrg   case SpvOpGroupAll:
2077ec681f3Smrg   case SpvOpGroupAny:
2087ec681f3Smrg   case SpvOpSubgroupAllKHR:
2097ec681f3Smrg   case SpvOpSubgroupAnyKHR:
2107ec681f3Smrg   case SpvOpSubgroupAllEqualKHR: {
2117ec681f3Smrg      vtn_fail_if(dest_type->type != glsl_bool_type(),
21201e04c3fSmrg                  "OpGroupNonUniform(All|Any|AllEqual) must return a bool");
21301e04c3fSmrg      nir_intrinsic_op op;
21401e04c3fSmrg      switch (opcode) {
21501e04c3fSmrg      case SpvOpGroupNonUniformAll:
2167ec681f3Smrg      case SpvOpGroupAll:
2177ec681f3Smrg      case SpvOpSubgroupAllKHR:
21801e04c3fSmrg         op = nir_intrinsic_vote_all;
21901e04c3fSmrg         break;
22001e04c3fSmrg      case SpvOpGroupNonUniformAny:
2217ec681f3Smrg      case SpvOpGroupAny:
2227ec681f3Smrg      case SpvOpSubgroupAnyKHR:
22301e04c3fSmrg         op = nir_intrinsic_vote_any;
22401e04c3fSmrg         break;
2257ec681f3Smrg      case SpvOpSubgroupAllEqualKHR:
2267ec681f3Smrg         op = nir_intrinsic_vote_ieq;
2277ec681f3Smrg         break;
2287ec681f3Smrg      case SpvOpGroupNonUniformAllEqual:
2297ec681f3Smrg         switch (glsl_get_base_type(vtn_ssa_value(b, w[4])->type)) {
23001e04c3fSmrg         case GLSL_TYPE_FLOAT:
2317ec681f3Smrg         case GLSL_TYPE_FLOAT16:
23201e04c3fSmrg         case GLSL_TYPE_DOUBLE:
23301e04c3fSmrg            op = nir_intrinsic_vote_feq;
23401e04c3fSmrg            break;
23501e04c3fSmrg         case GLSL_TYPE_UINT:
23601e04c3fSmrg         case GLSL_TYPE_INT:
2377ec681f3Smrg         case GLSL_TYPE_UINT8:
2387ec681f3Smrg         case GLSL_TYPE_INT8:
2397ec681f3Smrg         case GLSL_TYPE_UINT16:
2407ec681f3Smrg         case GLSL_TYPE_INT16:
24101e04c3fSmrg         case GLSL_TYPE_UINT64:
24201e04c3fSmrg         case GLSL_TYPE_INT64:
24301e04c3fSmrg         case GLSL_TYPE_BOOL:
24401e04c3fSmrg            op = nir_intrinsic_vote_ieq;
24501e04c3fSmrg            break;
24601e04c3fSmrg         default:
24701e04c3fSmrg            unreachable("Unhandled type");
24801e04c3fSmrg         }
24901e04c3fSmrg         break;
25001e04c3fSmrg      default:
25101e04c3fSmrg         unreachable("Unhandled opcode");
25201e04c3fSmrg      }
25301e04c3fSmrg
2547ec681f3Smrg      nir_ssa_def *src0;
2557ec681f3Smrg      if (opcode == SpvOpGroupNonUniformAll || opcode == SpvOpGroupAll ||
2567ec681f3Smrg          opcode == SpvOpGroupNonUniformAny || opcode == SpvOpGroupAny ||
2577ec681f3Smrg          opcode == SpvOpGroupNonUniformAllEqual) {
2587ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[4]);
2597ec681f3Smrg      } else {
2607ec681f3Smrg         src0 = vtn_get_nir_ssa(b, w[3]);
2617ec681f3Smrg      }
26201e04c3fSmrg      nir_intrinsic_instr *intrin =
26301e04c3fSmrg         nir_intrinsic_instr_create(b->nb.shader, op);
2647ec681f3Smrg      if (nir_intrinsic_infos[op].src_components[0] == 0)
2657ec681f3Smrg         intrin->num_components = src0->num_components;
26601e04c3fSmrg      intrin->src[0] = nir_src_for_ssa(src0);
26701e04c3fSmrg      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
2687ec681f3Smrg                                 dest_type->type, NULL);
26901e04c3fSmrg      nir_builder_instr_insert(&b->nb, &intrin->instr);
27001e04c3fSmrg
2717ec681f3Smrg      vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
27201e04c3fSmrg      break;
27301e04c3fSmrg   }
27401e04c3fSmrg
27501e04c3fSmrg   case SpvOpGroupNonUniformShuffle:
27601e04c3fSmrg   case SpvOpGroupNonUniformShuffleXor:
27701e04c3fSmrg   case SpvOpGroupNonUniformShuffleUp:
27801e04c3fSmrg   case SpvOpGroupNonUniformShuffleDown: {
27901e04c3fSmrg      nir_intrinsic_op op;
28001e04c3fSmrg      switch (opcode) {
28101e04c3fSmrg      case SpvOpGroupNonUniformShuffle:
28201e04c3fSmrg         op = nir_intrinsic_shuffle;
28301e04c3fSmrg         break;
28401e04c3fSmrg      case SpvOpGroupNonUniformShuffleXor:
28501e04c3fSmrg         op = nir_intrinsic_shuffle_xor;
28601e04c3fSmrg         break;
28701e04c3fSmrg      case SpvOpGroupNonUniformShuffleUp:
28801e04c3fSmrg         op = nir_intrinsic_shuffle_up;
28901e04c3fSmrg         break;
29001e04c3fSmrg      case SpvOpGroupNonUniformShuffleDown:
29101e04c3fSmrg         op = nir_intrinsic_shuffle_down;
29201e04c3fSmrg         break;
29301e04c3fSmrg      default:
29401e04c3fSmrg         unreachable("Invalid opcode");
29501e04c3fSmrg      }
2967ec681f3Smrg      vtn_push_ssa_value(b, w[2],
2977ec681f3Smrg         vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[4]),
2987ec681f3Smrg                                  vtn_get_nir_ssa(b, w[5]), 0, 0));
2997ec681f3Smrg      break;
3007ec681f3Smrg   }
3017ec681f3Smrg
3027ec681f3Smrg   case SpvOpSubgroupShuffleINTEL:
3037ec681f3Smrg   case SpvOpSubgroupShuffleXorINTEL: {
3047ec681f3Smrg      nir_intrinsic_op op = opcode == SpvOpSubgroupShuffleINTEL ?
3057ec681f3Smrg         nir_intrinsic_shuffle : nir_intrinsic_shuffle_xor;
3067ec681f3Smrg      vtn_push_ssa_value(b, w[2],
3077ec681f3Smrg         vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[3]),
3087ec681f3Smrg                                  vtn_get_nir_ssa(b, w[4]), 0, 0));
3097ec681f3Smrg      break;
3107ec681f3Smrg   }
3117ec681f3Smrg
3127ec681f3Smrg   case SpvOpSubgroupShuffleUpINTEL:
3137ec681f3Smrg   case SpvOpSubgroupShuffleDownINTEL: {
3147ec681f3Smrg      /* TODO: Move this lower on the compiler stack, where we can move the
3157ec681f3Smrg       * current/other data to adjacent registers to avoid doing a shuffle
3167ec681f3Smrg       * twice.
3177ec681f3Smrg       */
3187ec681f3Smrg
3197ec681f3Smrg      nir_builder *nb = &b->nb;
3207ec681f3Smrg      nir_ssa_def *size = nir_load_subgroup_size(nb);
3217ec681f3Smrg      nir_ssa_def *delta = vtn_get_nir_ssa(b, w[5]);
3227ec681f3Smrg
3237ec681f3Smrg      /* Rewrite UP in terms of DOWN.
3247ec681f3Smrg       *
3257ec681f3Smrg       *   UP(a, b, delta) == DOWN(a, b, size - delta)
3267ec681f3Smrg       */
3277ec681f3Smrg      if (opcode == SpvOpSubgroupShuffleUpINTEL)
3287ec681f3Smrg         delta = nir_isub(nb, size, delta);
3297ec681f3Smrg
3307ec681f3Smrg      nir_ssa_def *index = nir_iadd(nb, nir_load_subgroup_invocation(nb), delta);
3317ec681f3Smrg      struct vtn_ssa_value *current =
3327ec681f3Smrg         vtn_build_subgroup_instr(b, nir_intrinsic_shuffle, vtn_ssa_value(b, w[3]),
3337ec681f3Smrg                                  index, 0, 0);
3347ec681f3Smrg
3357ec681f3Smrg      struct vtn_ssa_value *next =
3367ec681f3Smrg         vtn_build_subgroup_instr(b, nir_intrinsic_shuffle, vtn_ssa_value(b, w[4]),
3377ec681f3Smrg                                  nir_isub(nb, index, size), 0, 0);
3387ec681f3Smrg
3397ec681f3Smrg      nir_ssa_def *cond = nir_ilt(nb, index, size);
3407ec681f3Smrg      vtn_push_nir_ssa(b, w[2], nir_bcsel(nb, cond, current->def, next->def));
3417ec681f3Smrg
34201e04c3fSmrg      break;
34301e04c3fSmrg   }
34401e04c3fSmrg
34501e04c3fSmrg   case SpvOpGroupNonUniformQuadBroadcast:
3467ec681f3Smrg      vtn_push_ssa_value(b, w[2],
3477ec681f3Smrg         vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast,
3487ec681f3Smrg                                  vtn_ssa_value(b, w[4]),
3497ec681f3Smrg                                  vtn_get_nir_ssa(b, w[5]), 0, 0));
35001e04c3fSmrg      break;
35101e04c3fSmrg
35201e04c3fSmrg   case SpvOpGroupNonUniformQuadSwap: {
3537e102996Smaya      unsigned direction = vtn_constant_uint(b, w[5]);
35401e04c3fSmrg      nir_intrinsic_op op;
35501e04c3fSmrg      switch (direction) {
35601e04c3fSmrg      case 0:
35701e04c3fSmrg         op = nir_intrinsic_quad_swap_horizontal;
35801e04c3fSmrg         break;
35901e04c3fSmrg      case 1:
36001e04c3fSmrg         op = nir_intrinsic_quad_swap_vertical;
36101e04c3fSmrg         break;
36201e04c3fSmrg      case 2:
36301e04c3fSmrg         op = nir_intrinsic_quad_swap_diagonal;
36401e04c3fSmrg         break;
36501e04c3fSmrg      default:
36601e04c3fSmrg         vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
36701e04c3fSmrg      }
3687ec681f3Smrg      vtn_push_ssa_value(b, w[2],
3697ec681f3Smrg         vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[4]), NULL, 0, 0));
37001e04c3fSmrg      break;
37101e04c3fSmrg   }
37201e04c3fSmrg
37301e04c3fSmrg   case SpvOpGroupNonUniformIAdd:
37401e04c3fSmrg   case SpvOpGroupNonUniformFAdd:
37501e04c3fSmrg   case SpvOpGroupNonUniformIMul:
37601e04c3fSmrg   case SpvOpGroupNonUniformFMul:
37701e04c3fSmrg   case SpvOpGroupNonUniformSMin:
37801e04c3fSmrg   case SpvOpGroupNonUniformUMin:
37901e04c3fSmrg   case SpvOpGroupNonUniformFMin:
38001e04c3fSmrg   case SpvOpGroupNonUniformSMax:
38101e04c3fSmrg   case SpvOpGroupNonUniformUMax:
38201e04c3fSmrg   case SpvOpGroupNonUniformFMax:
38301e04c3fSmrg   case SpvOpGroupNonUniformBitwiseAnd:
38401e04c3fSmrg   case SpvOpGroupNonUniformBitwiseOr:
38501e04c3fSmrg   case SpvOpGroupNonUniformBitwiseXor:
38601e04c3fSmrg   case SpvOpGroupNonUniformLogicalAnd:
38701e04c3fSmrg   case SpvOpGroupNonUniformLogicalOr:
3887ec681f3Smrg   case SpvOpGroupNonUniformLogicalXor:
3897ec681f3Smrg   case SpvOpGroupIAdd:
3907ec681f3Smrg   case SpvOpGroupFAdd:
3917ec681f3Smrg   case SpvOpGroupFMin:
3927ec681f3Smrg   case SpvOpGroupUMin:
3937ec681f3Smrg   case SpvOpGroupSMin:
3947ec681f3Smrg   case SpvOpGroupFMax:
3957ec681f3Smrg   case SpvOpGroupUMax:
3967ec681f3Smrg   case SpvOpGroupSMax:
3977ec681f3Smrg   case SpvOpGroupIAddNonUniformAMD:
3987ec681f3Smrg   case SpvOpGroupFAddNonUniformAMD:
3997ec681f3Smrg   case SpvOpGroupFMinNonUniformAMD:
4007ec681f3Smrg   case SpvOpGroupUMinNonUniformAMD:
4017ec681f3Smrg   case SpvOpGroupSMinNonUniformAMD:
4027ec681f3Smrg   case SpvOpGroupFMaxNonUniformAMD:
4037ec681f3Smrg   case SpvOpGroupUMaxNonUniformAMD:
4047ec681f3Smrg   case SpvOpGroupSMaxNonUniformAMD: {
40501e04c3fSmrg      nir_op reduction_op;
40601e04c3fSmrg      switch (opcode) {
40701e04c3fSmrg      case SpvOpGroupNonUniformIAdd:
4087ec681f3Smrg      case SpvOpGroupIAdd:
4097ec681f3Smrg      case SpvOpGroupIAddNonUniformAMD:
41001e04c3fSmrg         reduction_op = nir_op_iadd;
41101e04c3fSmrg         break;
41201e04c3fSmrg      case SpvOpGroupNonUniformFAdd:
4137ec681f3Smrg      case SpvOpGroupFAdd:
4147ec681f3Smrg      case SpvOpGroupFAddNonUniformAMD:
41501e04c3fSmrg         reduction_op = nir_op_fadd;
41601e04c3fSmrg         break;
41701e04c3fSmrg      case SpvOpGroupNonUniformIMul:
41801e04c3fSmrg         reduction_op = nir_op_imul;
41901e04c3fSmrg         break;
42001e04c3fSmrg      case SpvOpGroupNonUniformFMul:
42101e04c3fSmrg         reduction_op = nir_op_fmul;
42201e04c3fSmrg         break;
42301e04c3fSmrg      case SpvOpGroupNonUniformSMin:
4247ec681f3Smrg      case SpvOpGroupSMin:
4257ec681f3Smrg      case SpvOpGroupSMinNonUniformAMD:
42601e04c3fSmrg         reduction_op = nir_op_imin;
42701e04c3fSmrg         break;
42801e04c3fSmrg      case SpvOpGroupNonUniformUMin:
4297ec681f3Smrg      case SpvOpGroupUMin:
4307ec681f3Smrg      case SpvOpGroupUMinNonUniformAMD:
43101e04c3fSmrg         reduction_op = nir_op_umin;
43201e04c3fSmrg         break;
43301e04c3fSmrg      case SpvOpGroupNonUniformFMin:
4347ec681f3Smrg      case SpvOpGroupFMin:
4357ec681f3Smrg      case SpvOpGroupFMinNonUniformAMD:
43601e04c3fSmrg         reduction_op = nir_op_fmin;
43701e04c3fSmrg         break;
43801e04c3fSmrg      case SpvOpGroupNonUniformSMax:
4397ec681f3Smrg      case SpvOpGroupSMax:
4407ec681f3Smrg      case SpvOpGroupSMaxNonUniformAMD:
44101e04c3fSmrg         reduction_op = nir_op_imax;
44201e04c3fSmrg         break;
44301e04c3fSmrg      case SpvOpGroupNonUniformUMax:
4447ec681f3Smrg      case SpvOpGroupUMax:
4457ec681f3Smrg      case SpvOpGroupUMaxNonUniformAMD:
44601e04c3fSmrg         reduction_op = nir_op_umax;
44701e04c3fSmrg         break;
44801e04c3fSmrg      case SpvOpGroupNonUniformFMax:
4497ec681f3Smrg      case SpvOpGroupFMax:
4507ec681f3Smrg      case SpvOpGroupFMaxNonUniformAMD:
45101e04c3fSmrg         reduction_op = nir_op_fmax;
45201e04c3fSmrg         break;
45301e04c3fSmrg      case SpvOpGroupNonUniformBitwiseAnd:
45401e04c3fSmrg      case SpvOpGroupNonUniformLogicalAnd:
45501e04c3fSmrg         reduction_op = nir_op_iand;
45601e04c3fSmrg         break;
45701e04c3fSmrg      case SpvOpGroupNonUniformBitwiseOr:
45801e04c3fSmrg      case SpvOpGroupNonUniformLogicalOr:
45901e04c3fSmrg         reduction_op = nir_op_ior;
46001e04c3fSmrg         break;
46101e04c3fSmrg      case SpvOpGroupNonUniformBitwiseXor:
46201e04c3fSmrg      case SpvOpGroupNonUniformLogicalXor:
46301e04c3fSmrg         reduction_op = nir_op_ixor;
46401e04c3fSmrg         break;
46501e04c3fSmrg      default:
46601e04c3fSmrg         unreachable("Invalid reduction operation");
46701e04c3fSmrg      }
46801e04c3fSmrg
46901e04c3fSmrg      nir_intrinsic_op op;
47001e04c3fSmrg      unsigned cluster_size = 0;
47101e04c3fSmrg      switch ((SpvGroupOperation)w[4]) {
47201e04c3fSmrg      case SpvGroupOperationReduce:
47301e04c3fSmrg         op = nir_intrinsic_reduce;
47401e04c3fSmrg         break;
47501e04c3fSmrg      case SpvGroupOperationInclusiveScan:
47601e04c3fSmrg         op = nir_intrinsic_inclusive_scan;
47701e04c3fSmrg         break;
47801e04c3fSmrg      case SpvGroupOperationExclusiveScan:
47901e04c3fSmrg         op = nir_intrinsic_exclusive_scan;
48001e04c3fSmrg         break;
48101e04c3fSmrg      case SpvGroupOperationClusteredReduce:
48201e04c3fSmrg         op = nir_intrinsic_reduce;
48301e04c3fSmrg         assert(count == 7);
4847e102996Smaya         cluster_size = vtn_constant_uint(b, w[6]);
48501e04c3fSmrg         break;
48601e04c3fSmrg      default:
48701e04c3fSmrg         unreachable("Invalid group operation");
48801e04c3fSmrg      }
48901e04c3fSmrg
4907ec681f3Smrg      vtn_push_ssa_value(b, w[2],
4917ec681f3Smrg         vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[5]), NULL,
4927ec681f3Smrg                                  reduction_op, cluster_size));
49301e04c3fSmrg      break;
49401e04c3fSmrg   }
49501e04c3fSmrg
49601e04c3fSmrg   default:
49701e04c3fSmrg      unreachable("Invalid SPIR-V opcode");
49801e04c3fSmrg   }
49901e04c3fSmrg}
500