101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "vtn_private.h" 2501e04c3fSmrg 267ec681f3Smrgstatic struct vtn_ssa_value * 2701e04c3fSmrgvtn_build_subgroup_instr(struct vtn_builder *b, 2801e04c3fSmrg nir_intrinsic_op nir_op, 2901e04c3fSmrg struct vtn_ssa_value *src0, 3001e04c3fSmrg nir_ssa_def *index, 3101e04c3fSmrg unsigned const_idx0, 3201e04c3fSmrg unsigned const_idx1) 3301e04c3fSmrg{ 3401e04c3fSmrg /* Some of the subgroup operations take an index. SPIR-V allows this to be 3501e04c3fSmrg * any integer type. To make things simpler for drivers, we only support 3601e04c3fSmrg * 32-bit indices. 3701e04c3fSmrg */ 3801e04c3fSmrg if (index && index->bit_size != 32) 3901e04c3fSmrg index = nir_u2u32(&b->nb, index); 4001e04c3fSmrg 417ec681f3Smrg struct vtn_ssa_value *dst = vtn_create_ssa_value(b, src0->type); 427ec681f3Smrg 4301e04c3fSmrg vtn_assert(dst->type == src0->type); 4401e04c3fSmrg if (!glsl_type_is_vector_or_scalar(dst->type)) { 4501e04c3fSmrg for (unsigned i = 0; i < glsl_get_length(dst->type); i++) { 467ec681f3Smrg dst->elems[0] = 477ec681f3Smrg vtn_build_subgroup_instr(b, nir_op, src0->elems[i], index, 487ec681f3Smrg const_idx0, const_idx1); 4901e04c3fSmrg } 507ec681f3Smrg return dst; 5101e04c3fSmrg } 5201e04c3fSmrg 5301e04c3fSmrg nir_intrinsic_instr *intrin = 5401e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, nir_op); 5501e04c3fSmrg nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, 5601e04c3fSmrg dst->type, NULL); 5701e04c3fSmrg intrin->num_components = intrin->dest.ssa.num_components; 5801e04c3fSmrg 5901e04c3fSmrg intrin->src[0] = nir_src_for_ssa(src0->def); 6001e04c3fSmrg if (index) 6101e04c3fSmrg intrin->src[1] = nir_src_for_ssa(index); 6201e04c3fSmrg 6301e04c3fSmrg intrin->const_index[0] = const_idx0; 6401e04c3fSmrg intrin->const_index[1] = const_idx1; 6501e04c3fSmrg 6601e04c3fSmrg nir_builder_instr_insert(&b->nb, &intrin->instr); 6701e04c3fSmrg 6801e04c3fSmrg dst->def = &intrin->dest.ssa; 697ec681f3Smrg 707ec681f3Smrg return dst; 7101e04c3fSmrg} 7201e04c3fSmrg 7301e04c3fSmrgvoid 7401e04c3fSmrgvtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode, 7501e04c3fSmrg const uint32_t *w, unsigned count) 7601e04c3fSmrg{ 777ec681f3Smrg struct vtn_type *dest_type = vtn_get_type(b, w[1]); 7801e04c3fSmrg 7901e04c3fSmrg switch (opcode) { 8001e04c3fSmrg case SpvOpGroupNonUniformElect: { 817ec681f3Smrg vtn_fail_if(dest_type->type != glsl_bool_type(), 8201e04c3fSmrg "OpGroupNonUniformElect must return a Bool"); 8301e04c3fSmrg nir_intrinsic_instr *elect = 8401e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect); 8501e04c3fSmrg nir_ssa_dest_init_for_type(&elect->instr, &elect->dest, 867ec681f3Smrg dest_type->type, NULL); 8701e04c3fSmrg nir_builder_instr_insert(&b->nb, &elect->instr); 887ec681f3Smrg vtn_push_nir_ssa(b, w[2], &elect->dest.ssa); 8901e04c3fSmrg break; 9001e04c3fSmrg } 9101e04c3fSmrg 927ec681f3Smrg case SpvOpGroupNonUniformBallot: 937ec681f3Smrg case SpvOpSubgroupBallotKHR: { 947ec681f3Smrg bool has_scope = (opcode != SpvOpSubgroupBallotKHR); 957ec681f3Smrg vtn_fail_if(dest_type->type != glsl_vector_type(GLSL_TYPE_UINT, 4), 9601e04c3fSmrg "OpGroupNonUniformBallot must return a uvec4"); 9701e04c3fSmrg nir_intrinsic_instr *ballot = 9801e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot); 997ec681f3Smrg ballot->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[3 + has_scope])); 10001e04c3fSmrg nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL); 10101e04c3fSmrg ballot->num_components = 4; 10201e04c3fSmrg nir_builder_instr_insert(&b->nb, &ballot->instr); 1037ec681f3Smrg vtn_push_nir_ssa(b, w[2], &ballot->dest.ssa); 10401e04c3fSmrg break; 10501e04c3fSmrg } 10601e04c3fSmrg 10701e04c3fSmrg case SpvOpGroupNonUniformInverseBallot: { 10801e04c3fSmrg /* This one is just a BallotBitfieldExtract with subgroup invocation. 10901e04c3fSmrg * We could add a NIR intrinsic but it's easier to just lower it on the 11001e04c3fSmrg * spot. 11101e04c3fSmrg */ 11201e04c3fSmrg nir_intrinsic_instr *intrin = 11301e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, 11401e04c3fSmrg nir_intrinsic_ballot_bitfield_extract); 11501e04c3fSmrg 1167ec681f3Smrg intrin->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4])); 11701e04c3fSmrg intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb)); 11801e04c3fSmrg 11901e04c3fSmrg nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, 1207ec681f3Smrg dest_type->type, NULL); 12101e04c3fSmrg nir_builder_instr_insert(&b->nb, &intrin->instr); 12201e04c3fSmrg 1237ec681f3Smrg vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 12401e04c3fSmrg break; 12501e04c3fSmrg } 12601e04c3fSmrg 12701e04c3fSmrg case SpvOpGroupNonUniformBallotBitExtract: 12801e04c3fSmrg case SpvOpGroupNonUniformBallotBitCount: 12901e04c3fSmrg case SpvOpGroupNonUniformBallotFindLSB: 13001e04c3fSmrg case SpvOpGroupNonUniformBallotFindMSB: { 13101e04c3fSmrg nir_ssa_def *src0, *src1 = NULL; 13201e04c3fSmrg nir_intrinsic_op op; 13301e04c3fSmrg switch (opcode) { 13401e04c3fSmrg case SpvOpGroupNonUniformBallotBitExtract: 13501e04c3fSmrg op = nir_intrinsic_ballot_bitfield_extract; 1367ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[4]); 1377ec681f3Smrg src1 = vtn_get_nir_ssa(b, w[5]); 13801e04c3fSmrg break; 13901e04c3fSmrg case SpvOpGroupNonUniformBallotBitCount: 14001e04c3fSmrg switch ((SpvGroupOperation)w[4]) { 14101e04c3fSmrg case SpvGroupOperationReduce: 14201e04c3fSmrg op = nir_intrinsic_ballot_bit_count_reduce; 14301e04c3fSmrg break; 14401e04c3fSmrg case SpvGroupOperationInclusiveScan: 14501e04c3fSmrg op = nir_intrinsic_ballot_bit_count_inclusive; 14601e04c3fSmrg break; 14701e04c3fSmrg case SpvGroupOperationExclusiveScan: 14801e04c3fSmrg op = nir_intrinsic_ballot_bit_count_exclusive; 14901e04c3fSmrg break; 15001e04c3fSmrg default: 15101e04c3fSmrg unreachable("Invalid group operation"); 15201e04c3fSmrg } 1537ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[5]); 15401e04c3fSmrg break; 15501e04c3fSmrg case SpvOpGroupNonUniformBallotFindLSB: 15601e04c3fSmrg op = nir_intrinsic_ballot_find_lsb; 1577ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[4]); 15801e04c3fSmrg break; 15901e04c3fSmrg case SpvOpGroupNonUniformBallotFindMSB: 16001e04c3fSmrg op = nir_intrinsic_ballot_find_msb; 1617ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[4]); 16201e04c3fSmrg break; 16301e04c3fSmrg default: 16401e04c3fSmrg unreachable("Unhandled opcode"); 16501e04c3fSmrg } 16601e04c3fSmrg 16701e04c3fSmrg nir_intrinsic_instr *intrin = 16801e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, op); 16901e04c3fSmrg 17001e04c3fSmrg intrin->src[0] = nir_src_for_ssa(src0); 17101e04c3fSmrg if (src1) 17201e04c3fSmrg intrin->src[1] = nir_src_for_ssa(src1); 17301e04c3fSmrg 17401e04c3fSmrg nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, 1757ec681f3Smrg dest_type->type, NULL); 17601e04c3fSmrg nir_builder_instr_insert(&b->nb, &intrin->instr); 17701e04c3fSmrg 1787ec681f3Smrg vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 17901e04c3fSmrg break; 18001e04c3fSmrg } 18101e04c3fSmrg 18201e04c3fSmrg case SpvOpGroupNonUniformBroadcastFirst: 1837ec681f3Smrg case SpvOpSubgroupFirstInvocationKHR: { 1847ec681f3Smrg bool has_scope = (opcode != SpvOpSubgroupFirstInvocationKHR); 1857ec681f3Smrg vtn_push_ssa_value(b, w[2], 1867ec681f3Smrg vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation, 1877ec681f3Smrg vtn_ssa_value(b, w[3 + has_scope]), 1887ec681f3Smrg NULL, 0, 0)); 18901e04c3fSmrg break; 1907ec681f3Smrg } 19101e04c3fSmrg 19201e04c3fSmrg case SpvOpGroupNonUniformBroadcast: 1937ec681f3Smrg case SpvOpGroupBroadcast: 1947ec681f3Smrg case SpvOpSubgroupReadInvocationKHR: { 1957ec681f3Smrg bool has_scope = (opcode != SpvOpSubgroupReadInvocationKHR); 1967ec681f3Smrg vtn_push_ssa_value(b, w[2], 1977ec681f3Smrg vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation, 1987ec681f3Smrg vtn_ssa_value(b, w[3 + has_scope]), 1997ec681f3Smrg vtn_get_nir_ssa(b, w[4 + has_scope]), 0, 0)); 20001e04c3fSmrg break; 2017ec681f3Smrg } 20201e04c3fSmrg 20301e04c3fSmrg case SpvOpGroupNonUniformAll: 20401e04c3fSmrg case SpvOpGroupNonUniformAny: 2057ec681f3Smrg case SpvOpGroupNonUniformAllEqual: 2067ec681f3Smrg case SpvOpGroupAll: 2077ec681f3Smrg case SpvOpGroupAny: 2087ec681f3Smrg case SpvOpSubgroupAllKHR: 2097ec681f3Smrg case SpvOpSubgroupAnyKHR: 2107ec681f3Smrg case SpvOpSubgroupAllEqualKHR: { 2117ec681f3Smrg vtn_fail_if(dest_type->type != glsl_bool_type(), 21201e04c3fSmrg "OpGroupNonUniform(All|Any|AllEqual) must return a bool"); 21301e04c3fSmrg nir_intrinsic_op op; 21401e04c3fSmrg switch (opcode) { 21501e04c3fSmrg case SpvOpGroupNonUniformAll: 2167ec681f3Smrg case SpvOpGroupAll: 2177ec681f3Smrg case SpvOpSubgroupAllKHR: 21801e04c3fSmrg op = nir_intrinsic_vote_all; 21901e04c3fSmrg break; 22001e04c3fSmrg case SpvOpGroupNonUniformAny: 2217ec681f3Smrg case SpvOpGroupAny: 2227ec681f3Smrg case SpvOpSubgroupAnyKHR: 22301e04c3fSmrg op = nir_intrinsic_vote_any; 22401e04c3fSmrg break; 2257ec681f3Smrg case SpvOpSubgroupAllEqualKHR: 2267ec681f3Smrg op = nir_intrinsic_vote_ieq; 2277ec681f3Smrg break; 2287ec681f3Smrg case SpvOpGroupNonUniformAllEqual: 2297ec681f3Smrg switch (glsl_get_base_type(vtn_ssa_value(b, w[4])->type)) { 23001e04c3fSmrg case GLSL_TYPE_FLOAT: 2317ec681f3Smrg case GLSL_TYPE_FLOAT16: 23201e04c3fSmrg case GLSL_TYPE_DOUBLE: 23301e04c3fSmrg op = nir_intrinsic_vote_feq; 23401e04c3fSmrg break; 23501e04c3fSmrg case GLSL_TYPE_UINT: 23601e04c3fSmrg case GLSL_TYPE_INT: 2377ec681f3Smrg case GLSL_TYPE_UINT8: 2387ec681f3Smrg case GLSL_TYPE_INT8: 2397ec681f3Smrg case GLSL_TYPE_UINT16: 2407ec681f3Smrg case GLSL_TYPE_INT16: 24101e04c3fSmrg case GLSL_TYPE_UINT64: 24201e04c3fSmrg case GLSL_TYPE_INT64: 24301e04c3fSmrg case GLSL_TYPE_BOOL: 24401e04c3fSmrg op = nir_intrinsic_vote_ieq; 24501e04c3fSmrg break; 24601e04c3fSmrg default: 24701e04c3fSmrg unreachable("Unhandled type"); 24801e04c3fSmrg } 24901e04c3fSmrg break; 25001e04c3fSmrg default: 25101e04c3fSmrg unreachable("Unhandled opcode"); 25201e04c3fSmrg } 25301e04c3fSmrg 2547ec681f3Smrg nir_ssa_def *src0; 2557ec681f3Smrg if (opcode == SpvOpGroupNonUniformAll || opcode == SpvOpGroupAll || 2567ec681f3Smrg opcode == SpvOpGroupNonUniformAny || opcode == SpvOpGroupAny || 2577ec681f3Smrg opcode == SpvOpGroupNonUniformAllEqual) { 2587ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[4]); 2597ec681f3Smrg } else { 2607ec681f3Smrg src0 = vtn_get_nir_ssa(b, w[3]); 2617ec681f3Smrg } 26201e04c3fSmrg nir_intrinsic_instr *intrin = 26301e04c3fSmrg nir_intrinsic_instr_create(b->nb.shader, op); 2647ec681f3Smrg if (nir_intrinsic_infos[op].src_components[0] == 0) 2657ec681f3Smrg intrin->num_components = src0->num_components; 26601e04c3fSmrg intrin->src[0] = nir_src_for_ssa(src0); 26701e04c3fSmrg nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, 2687ec681f3Smrg dest_type->type, NULL); 26901e04c3fSmrg nir_builder_instr_insert(&b->nb, &intrin->instr); 27001e04c3fSmrg 2717ec681f3Smrg vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 27201e04c3fSmrg break; 27301e04c3fSmrg } 27401e04c3fSmrg 27501e04c3fSmrg case SpvOpGroupNonUniformShuffle: 27601e04c3fSmrg case SpvOpGroupNonUniformShuffleXor: 27701e04c3fSmrg case SpvOpGroupNonUniformShuffleUp: 27801e04c3fSmrg case SpvOpGroupNonUniformShuffleDown: { 27901e04c3fSmrg nir_intrinsic_op op; 28001e04c3fSmrg switch (opcode) { 28101e04c3fSmrg case SpvOpGroupNonUniformShuffle: 28201e04c3fSmrg op = nir_intrinsic_shuffle; 28301e04c3fSmrg break; 28401e04c3fSmrg case SpvOpGroupNonUniformShuffleXor: 28501e04c3fSmrg op = nir_intrinsic_shuffle_xor; 28601e04c3fSmrg break; 28701e04c3fSmrg case SpvOpGroupNonUniformShuffleUp: 28801e04c3fSmrg op = nir_intrinsic_shuffle_up; 28901e04c3fSmrg break; 29001e04c3fSmrg case SpvOpGroupNonUniformShuffleDown: 29101e04c3fSmrg op = nir_intrinsic_shuffle_down; 29201e04c3fSmrg break; 29301e04c3fSmrg default: 29401e04c3fSmrg unreachable("Invalid opcode"); 29501e04c3fSmrg } 2967ec681f3Smrg vtn_push_ssa_value(b, w[2], 2977ec681f3Smrg vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[4]), 2987ec681f3Smrg vtn_get_nir_ssa(b, w[5]), 0, 0)); 2997ec681f3Smrg break; 3007ec681f3Smrg } 3017ec681f3Smrg 3027ec681f3Smrg case SpvOpSubgroupShuffleINTEL: 3037ec681f3Smrg case SpvOpSubgroupShuffleXorINTEL: { 3047ec681f3Smrg nir_intrinsic_op op = opcode == SpvOpSubgroupShuffleINTEL ? 3057ec681f3Smrg nir_intrinsic_shuffle : nir_intrinsic_shuffle_xor; 3067ec681f3Smrg vtn_push_ssa_value(b, w[2], 3077ec681f3Smrg vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[3]), 3087ec681f3Smrg vtn_get_nir_ssa(b, w[4]), 0, 0)); 3097ec681f3Smrg break; 3107ec681f3Smrg } 3117ec681f3Smrg 3127ec681f3Smrg case SpvOpSubgroupShuffleUpINTEL: 3137ec681f3Smrg case SpvOpSubgroupShuffleDownINTEL: { 3147ec681f3Smrg /* TODO: Move this lower on the compiler stack, where we can move the 3157ec681f3Smrg * current/other data to adjacent registers to avoid doing a shuffle 3167ec681f3Smrg * twice. 3177ec681f3Smrg */ 3187ec681f3Smrg 3197ec681f3Smrg nir_builder *nb = &b->nb; 3207ec681f3Smrg nir_ssa_def *size = nir_load_subgroup_size(nb); 3217ec681f3Smrg nir_ssa_def *delta = vtn_get_nir_ssa(b, w[5]); 3227ec681f3Smrg 3237ec681f3Smrg /* Rewrite UP in terms of DOWN. 3247ec681f3Smrg * 3257ec681f3Smrg * UP(a, b, delta) == DOWN(a, b, size - delta) 3267ec681f3Smrg */ 3277ec681f3Smrg if (opcode == SpvOpSubgroupShuffleUpINTEL) 3287ec681f3Smrg delta = nir_isub(nb, size, delta); 3297ec681f3Smrg 3307ec681f3Smrg nir_ssa_def *index = nir_iadd(nb, nir_load_subgroup_invocation(nb), delta); 3317ec681f3Smrg struct vtn_ssa_value *current = 3327ec681f3Smrg vtn_build_subgroup_instr(b, nir_intrinsic_shuffle, vtn_ssa_value(b, w[3]), 3337ec681f3Smrg index, 0, 0); 3347ec681f3Smrg 3357ec681f3Smrg struct vtn_ssa_value *next = 3367ec681f3Smrg vtn_build_subgroup_instr(b, nir_intrinsic_shuffle, vtn_ssa_value(b, w[4]), 3377ec681f3Smrg nir_isub(nb, index, size), 0, 0); 3387ec681f3Smrg 3397ec681f3Smrg nir_ssa_def *cond = nir_ilt(nb, index, size); 3407ec681f3Smrg vtn_push_nir_ssa(b, w[2], nir_bcsel(nb, cond, current->def, next->def)); 3417ec681f3Smrg 34201e04c3fSmrg break; 34301e04c3fSmrg } 34401e04c3fSmrg 34501e04c3fSmrg case SpvOpGroupNonUniformQuadBroadcast: 3467ec681f3Smrg vtn_push_ssa_value(b, w[2], 3477ec681f3Smrg vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast, 3487ec681f3Smrg vtn_ssa_value(b, w[4]), 3497ec681f3Smrg vtn_get_nir_ssa(b, w[5]), 0, 0)); 35001e04c3fSmrg break; 35101e04c3fSmrg 35201e04c3fSmrg case SpvOpGroupNonUniformQuadSwap: { 3537e102996Smaya unsigned direction = vtn_constant_uint(b, w[5]); 35401e04c3fSmrg nir_intrinsic_op op; 35501e04c3fSmrg switch (direction) { 35601e04c3fSmrg case 0: 35701e04c3fSmrg op = nir_intrinsic_quad_swap_horizontal; 35801e04c3fSmrg break; 35901e04c3fSmrg case 1: 36001e04c3fSmrg op = nir_intrinsic_quad_swap_vertical; 36101e04c3fSmrg break; 36201e04c3fSmrg case 2: 36301e04c3fSmrg op = nir_intrinsic_quad_swap_diagonal; 36401e04c3fSmrg break; 36501e04c3fSmrg default: 36601e04c3fSmrg vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap"); 36701e04c3fSmrg } 3687ec681f3Smrg vtn_push_ssa_value(b, w[2], 3697ec681f3Smrg vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[4]), NULL, 0, 0)); 37001e04c3fSmrg break; 37101e04c3fSmrg } 37201e04c3fSmrg 37301e04c3fSmrg case SpvOpGroupNonUniformIAdd: 37401e04c3fSmrg case SpvOpGroupNonUniformFAdd: 37501e04c3fSmrg case SpvOpGroupNonUniformIMul: 37601e04c3fSmrg case SpvOpGroupNonUniformFMul: 37701e04c3fSmrg case SpvOpGroupNonUniformSMin: 37801e04c3fSmrg case SpvOpGroupNonUniformUMin: 37901e04c3fSmrg case SpvOpGroupNonUniformFMin: 38001e04c3fSmrg case SpvOpGroupNonUniformSMax: 38101e04c3fSmrg case SpvOpGroupNonUniformUMax: 38201e04c3fSmrg case SpvOpGroupNonUniformFMax: 38301e04c3fSmrg case SpvOpGroupNonUniformBitwiseAnd: 38401e04c3fSmrg case SpvOpGroupNonUniformBitwiseOr: 38501e04c3fSmrg case SpvOpGroupNonUniformBitwiseXor: 38601e04c3fSmrg case SpvOpGroupNonUniformLogicalAnd: 38701e04c3fSmrg case SpvOpGroupNonUniformLogicalOr: 3887ec681f3Smrg case SpvOpGroupNonUniformLogicalXor: 3897ec681f3Smrg case SpvOpGroupIAdd: 3907ec681f3Smrg case SpvOpGroupFAdd: 3917ec681f3Smrg case SpvOpGroupFMin: 3927ec681f3Smrg case SpvOpGroupUMin: 3937ec681f3Smrg case SpvOpGroupSMin: 3947ec681f3Smrg case SpvOpGroupFMax: 3957ec681f3Smrg case SpvOpGroupUMax: 3967ec681f3Smrg case SpvOpGroupSMax: 3977ec681f3Smrg case SpvOpGroupIAddNonUniformAMD: 3987ec681f3Smrg case SpvOpGroupFAddNonUniformAMD: 3997ec681f3Smrg case SpvOpGroupFMinNonUniformAMD: 4007ec681f3Smrg case SpvOpGroupUMinNonUniformAMD: 4017ec681f3Smrg case SpvOpGroupSMinNonUniformAMD: 4027ec681f3Smrg case SpvOpGroupFMaxNonUniformAMD: 4037ec681f3Smrg case SpvOpGroupUMaxNonUniformAMD: 4047ec681f3Smrg case SpvOpGroupSMaxNonUniformAMD: { 40501e04c3fSmrg nir_op reduction_op; 40601e04c3fSmrg switch (opcode) { 40701e04c3fSmrg case SpvOpGroupNonUniformIAdd: 4087ec681f3Smrg case SpvOpGroupIAdd: 4097ec681f3Smrg case SpvOpGroupIAddNonUniformAMD: 41001e04c3fSmrg reduction_op = nir_op_iadd; 41101e04c3fSmrg break; 41201e04c3fSmrg case SpvOpGroupNonUniformFAdd: 4137ec681f3Smrg case SpvOpGroupFAdd: 4147ec681f3Smrg case SpvOpGroupFAddNonUniformAMD: 41501e04c3fSmrg reduction_op = nir_op_fadd; 41601e04c3fSmrg break; 41701e04c3fSmrg case SpvOpGroupNonUniformIMul: 41801e04c3fSmrg reduction_op = nir_op_imul; 41901e04c3fSmrg break; 42001e04c3fSmrg case SpvOpGroupNonUniformFMul: 42101e04c3fSmrg reduction_op = nir_op_fmul; 42201e04c3fSmrg break; 42301e04c3fSmrg case SpvOpGroupNonUniformSMin: 4247ec681f3Smrg case SpvOpGroupSMin: 4257ec681f3Smrg case SpvOpGroupSMinNonUniformAMD: 42601e04c3fSmrg reduction_op = nir_op_imin; 42701e04c3fSmrg break; 42801e04c3fSmrg case SpvOpGroupNonUniformUMin: 4297ec681f3Smrg case SpvOpGroupUMin: 4307ec681f3Smrg case SpvOpGroupUMinNonUniformAMD: 43101e04c3fSmrg reduction_op = nir_op_umin; 43201e04c3fSmrg break; 43301e04c3fSmrg case SpvOpGroupNonUniformFMin: 4347ec681f3Smrg case SpvOpGroupFMin: 4357ec681f3Smrg case SpvOpGroupFMinNonUniformAMD: 43601e04c3fSmrg reduction_op = nir_op_fmin; 43701e04c3fSmrg break; 43801e04c3fSmrg case SpvOpGroupNonUniformSMax: 4397ec681f3Smrg case SpvOpGroupSMax: 4407ec681f3Smrg case SpvOpGroupSMaxNonUniformAMD: 44101e04c3fSmrg reduction_op = nir_op_imax; 44201e04c3fSmrg break; 44301e04c3fSmrg case SpvOpGroupNonUniformUMax: 4447ec681f3Smrg case SpvOpGroupUMax: 4457ec681f3Smrg case SpvOpGroupUMaxNonUniformAMD: 44601e04c3fSmrg reduction_op = nir_op_umax; 44701e04c3fSmrg break; 44801e04c3fSmrg case SpvOpGroupNonUniformFMax: 4497ec681f3Smrg case SpvOpGroupFMax: 4507ec681f3Smrg case SpvOpGroupFMaxNonUniformAMD: 45101e04c3fSmrg reduction_op = nir_op_fmax; 45201e04c3fSmrg break; 45301e04c3fSmrg case SpvOpGroupNonUniformBitwiseAnd: 45401e04c3fSmrg case SpvOpGroupNonUniformLogicalAnd: 45501e04c3fSmrg reduction_op = nir_op_iand; 45601e04c3fSmrg break; 45701e04c3fSmrg case SpvOpGroupNonUniformBitwiseOr: 45801e04c3fSmrg case SpvOpGroupNonUniformLogicalOr: 45901e04c3fSmrg reduction_op = nir_op_ior; 46001e04c3fSmrg break; 46101e04c3fSmrg case SpvOpGroupNonUniformBitwiseXor: 46201e04c3fSmrg case SpvOpGroupNonUniformLogicalXor: 46301e04c3fSmrg reduction_op = nir_op_ixor; 46401e04c3fSmrg break; 46501e04c3fSmrg default: 46601e04c3fSmrg unreachable("Invalid reduction operation"); 46701e04c3fSmrg } 46801e04c3fSmrg 46901e04c3fSmrg nir_intrinsic_op op; 47001e04c3fSmrg unsigned cluster_size = 0; 47101e04c3fSmrg switch ((SpvGroupOperation)w[4]) { 47201e04c3fSmrg case SpvGroupOperationReduce: 47301e04c3fSmrg op = nir_intrinsic_reduce; 47401e04c3fSmrg break; 47501e04c3fSmrg case SpvGroupOperationInclusiveScan: 47601e04c3fSmrg op = nir_intrinsic_inclusive_scan; 47701e04c3fSmrg break; 47801e04c3fSmrg case SpvGroupOperationExclusiveScan: 47901e04c3fSmrg op = nir_intrinsic_exclusive_scan; 48001e04c3fSmrg break; 48101e04c3fSmrg case SpvGroupOperationClusteredReduce: 48201e04c3fSmrg op = nir_intrinsic_reduce; 48301e04c3fSmrg assert(count == 7); 4847e102996Smaya cluster_size = vtn_constant_uint(b, w[6]); 48501e04c3fSmrg break; 48601e04c3fSmrg default: 48701e04c3fSmrg unreachable("Invalid group operation"); 48801e04c3fSmrg } 48901e04c3fSmrg 4907ec681f3Smrg vtn_push_ssa_value(b, w[2], 4917ec681f3Smrg vtn_build_subgroup_instr(b, op, vtn_ssa_value(b, w[5]), NULL, 4927ec681f3Smrg reduction_op, cluster_size)); 49301e04c3fSmrg break; 49401e04c3fSmrg } 49501e04c3fSmrg 49601e04c3fSmrg default: 49701e04c3fSmrg unreachable("Invalid SPIR-V opcode"); 49801e04c3fSmrg } 49901e04c3fSmrg} 500