101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <math.h> 2501e04c3fSmrg#include "vtn_private.h" 267e102996Smaya#include "spirv_info.h" 2701e04c3fSmrg 2801e04c3fSmrg/* 2901e04c3fSmrg * Normally, column vectors in SPIR-V correspond to a single NIR SSA 3001e04c3fSmrg * definition. But for matrix multiplies, we want to do one routine for 3101e04c3fSmrg * multiplying a matrix by a matrix and then pretend that vectors are matrices 3201e04c3fSmrg * with one column. So we "wrap" these things, and unwrap the result before we 3301e04c3fSmrg * send it off. 3401e04c3fSmrg */ 3501e04c3fSmrg 3601e04c3fSmrgstatic struct vtn_ssa_value * 3701e04c3fSmrgwrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) 3801e04c3fSmrg{ 3901e04c3fSmrg if (val == NULL) 4001e04c3fSmrg return NULL; 4101e04c3fSmrg 4201e04c3fSmrg if (glsl_type_is_matrix(val->type)) 4301e04c3fSmrg return val; 4401e04c3fSmrg 4501e04c3fSmrg struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); 467ec681f3Smrg dest->type = glsl_get_bare_type(val->type); 4701e04c3fSmrg dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); 4801e04c3fSmrg dest->elems[0] = val; 4901e04c3fSmrg 5001e04c3fSmrg return dest; 5101e04c3fSmrg} 5201e04c3fSmrg 5301e04c3fSmrgstatic struct vtn_ssa_value * 5401e04c3fSmrgunwrap_matrix(struct vtn_ssa_value *val) 5501e04c3fSmrg{ 5601e04c3fSmrg if (glsl_type_is_matrix(val->type)) 5701e04c3fSmrg return val; 5801e04c3fSmrg 5901e04c3fSmrg return val->elems[0]; 6001e04c3fSmrg} 6101e04c3fSmrg 6201e04c3fSmrgstatic struct vtn_ssa_value * 6301e04c3fSmrgmatrix_multiply(struct vtn_builder *b, 6401e04c3fSmrg struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) 6501e04c3fSmrg{ 6601e04c3fSmrg 6701e04c3fSmrg struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); 6801e04c3fSmrg struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); 6901e04c3fSmrg struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); 7001e04c3fSmrg struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); 7101e04c3fSmrg 7201e04c3fSmrg unsigned src0_rows = glsl_get_vector_elements(src0->type); 7301e04c3fSmrg unsigned src0_columns = glsl_get_matrix_columns(src0->type); 7401e04c3fSmrg unsigned src1_columns = glsl_get_matrix_columns(src1->type); 7501e04c3fSmrg 7601e04c3fSmrg const struct glsl_type *dest_type; 7701e04c3fSmrg if (src1_columns > 1) { 7801e04c3fSmrg dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), 7901e04c3fSmrg src0_rows, src1_columns); 8001e04c3fSmrg } else { 8101e04c3fSmrg dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); 8201e04c3fSmrg } 8301e04c3fSmrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); 8401e04c3fSmrg 8501e04c3fSmrg dest = wrap_matrix(b, dest); 8601e04c3fSmrg 8701e04c3fSmrg bool transpose_result = false; 8801e04c3fSmrg if (src0_transpose && src1_transpose) { 8901e04c3fSmrg /* transpose(A) * transpose(B) = transpose(B * A) */ 9001e04c3fSmrg src1 = src0_transpose; 9101e04c3fSmrg src0 = src1_transpose; 9201e04c3fSmrg src0_transpose = NULL; 9301e04c3fSmrg src1_transpose = NULL; 9401e04c3fSmrg transpose_result = true; 9501e04c3fSmrg } 9601e04c3fSmrg 9701e04c3fSmrg if (src0_transpose && !src1_transpose && 9801e04c3fSmrg glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { 9901e04c3fSmrg /* We already have the rows of src0 and the columns of src1 available, 10001e04c3fSmrg * so we can just take the dot product of each row with each column to 10101e04c3fSmrg * get the result. 10201e04c3fSmrg */ 10301e04c3fSmrg 10401e04c3fSmrg for (unsigned i = 0; i < src1_columns; i++) { 10501e04c3fSmrg nir_ssa_def *vec_src[4]; 10601e04c3fSmrg for (unsigned j = 0; j < src0_rows; j++) { 10701e04c3fSmrg vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, 10801e04c3fSmrg src1->elems[i]->def); 10901e04c3fSmrg } 11001e04c3fSmrg dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); 11101e04c3fSmrg } 11201e04c3fSmrg } else { 11301e04c3fSmrg /* We don't handle the case where src1 is transposed but not src0, since 11401e04c3fSmrg * the general case only uses individual components of src1 so the 11501e04c3fSmrg * optimizer should chew through the transpose we emitted for src1. 11601e04c3fSmrg */ 11701e04c3fSmrg 11801e04c3fSmrg for (unsigned i = 0; i < src1_columns; i++) { 11901e04c3fSmrg /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ 12001e04c3fSmrg dest->elems[i]->def = 1217ec681f3Smrg nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def, 1227ec681f3Smrg nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); 1237ec681f3Smrg for (int j = src0_columns - 2; j >= 0; j--) { 12401e04c3fSmrg dest->elems[i]->def = 1257ec681f3Smrg nir_ffma(&b->nb, src0->elems[j]->def, 1267ec681f3Smrg nir_channel(&b->nb, src1->elems[i]->def, j), 1277ec681f3Smrg dest->elems[i]->def); 12801e04c3fSmrg } 12901e04c3fSmrg } 13001e04c3fSmrg } 13101e04c3fSmrg 13201e04c3fSmrg dest = unwrap_matrix(dest); 13301e04c3fSmrg 13401e04c3fSmrg if (transpose_result) 13501e04c3fSmrg dest = vtn_ssa_transpose(b, dest); 13601e04c3fSmrg 13701e04c3fSmrg return dest; 13801e04c3fSmrg} 13901e04c3fSmrg 14001e04c3fSmrgstatic struct vtn_ssa_value * 14101e04c3fSmrgmat_times_scalar(struct vtn_builder *b, 14201e04c3fSmrg struct vtn_ssa_value *mat, 14301e04c3fSmrg nir_ssa_def *scalar) 14401e04c3fSmrg{ 14501e04c3fSmrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); 14601e04c3fSmrg for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { 14701e04c3fSmrg if (glsl_base_type_is_integer(glsl_get_base_type(mat->type))) 14801e04c3fSmrg dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); 14901e04c3fSmrg else 15001e04c3fSmrg dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); 15101e04c3fSmrg } 15201e04c3fSmrg 15301e04c3fSmrg return dest; 15401e04c3fSmrg} 15501e04c3fSmrg 1567ec681f3Smrgstatic struct vtn_ssa_value * 15701e04c3fSmrgvtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, 15801e04c3fSmrg struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) 15901e04c3fSmrg{ 16001e04c3fSmrg switch (opcode) { 16101e04c3fSmrg case SpvOpFNegate: { 1627ec681f3Smrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 16301e04c3fSmrg unsigned cols = glsl_get_matrix_columns(src0->type); 16401e04c3fSmrg for (unsigned i = 0; i < cols; i++) 1657ec681f3Smrg dest->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); 1667ec681f3Smrg return dest; 16701e04c3fSmrg } 16801e04c3fSmrg 16901e04c3fSmrg case SpvOpFAdd: { 1707ec681f3Smrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 17101e04c3fSmrg unsigned cols = glsl_get_matrix_columns(src0->type); 17201e04c3fSmrg for (unsigned i = 0; i < cols; i++) 1737ec681f3Smrg dest->elems[i]->def = 17401e04c3fSmrg nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); 1757ec681f3Smrg return dest; 17601e04c3fSmrg } 17701e04c3fSmrg 17801e04c3fSmrg case SpvOpFSub: { 1797ec681f3Smrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 18001e04c3fSmrg unsigned cols = glsl_get_matrix_columns(src0->type); 18101e04c3fSmrg for (unsigned i = 0; i < cols; i++) 1827ec681f3Smrg dest->elems[i]->def = 18301e04c3fSmrg nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); 1847ec681f3Smrg return dest; 18501e04c3fSmrg } 18601e04c3fSmrg 18701e04c3fSmrg case SpvOpTranspose: 1887ec681f3Smrg return vtn_ssa_transpose(b, src0); 18901e04c3fSmrg 19001e04c3fSmrg case SpvOpMatrixTimesScalar: 19101e04c3fSmrg if (src0->transposed) { 1927ec681f3Smrg return vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, 1937ec681f3Smrg src1->def)); 19401e04c3fSmrg } else { 1957ec681f3Smrg return mat_times_scalar(b, src0, src1->def); 19601e04c3fSmrg } 19701e04c3fSmrg break; 19801e04c3fSmrg 19901e04c3fSmrg case SpvOpVectorTimesMatrix: 20001e04c3fSmrg case SpvOpMatrixTimesVector: 20101e04c3fSmrg case SpvOpMatrixTimesMatrix: 20201e04c3fSmrg if (opcode == SpvOpVectorTimesMatrix) { 2037ec681f3Smrg return matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); 20401e04c3fSmrg } else { 2057ec681f3Smrg return matrix_multiply(b, src0, src1); 20601e04c3fSmrg } 20701e04c3fSmrg break; 20801e04c3fSmrg 2097e102996Smaya default: vtn_fail_with_opcode("unknown matrix opcode", opcode); 21001e04c3fSmrg } 21101e04c3fSmrg} 21201e04c3fSmrg 2137ec681f3Smrgstatic nir_alu_type 2147ec681f3Smrgconvert_op_src_type(SpvOp opcode) 2157ec681f3Smrg{ 2167ec681f3Smrg switch (opcode) { 2177ec681f3Smrg case SpvOpFConvert: 2187ec681f3Smrg case SpvOpConvertFToS: 2197ec681f3Smrg case SpvOpConvertFToU: 2207ec681f3Smrg return nir_type_float; 2217ec681f3Smrg case SpvOpSConvert: 2227ec681f3Smrg case SpvOpConvertSToF: 2237ec681f3Smrg case SpvOpSatConvertSToU: 2247ec681f3Smrg return nir_type_int; 2257ec681f3Smrg case SpvOpUConvert: 2267ec681f3Smrg case SpvOpConvertUToF: 2277ec681f3Smrg case SpvOpSatConvertUToS: 2287ec681f3Smrg return nir_type_uint; 2297ec681f3Smrg default: 2307ec681f3Smrg unreachable("Unhandled conversion op"); 2317ec681f3Smrg } 2327ec681f3Smrg} 2337ec681f3Smrg 2347ec681f3Smrgstatic nir_alu_type 2357ec681f3Smrgconvert_op_dst_type(SpvOp opcode) 2367ec681f3Smrg{ 2377ec681f3Smrg switch (opcode) { 2387ec681f3Smrg case SpvOpFConvert: 2397ec681f3Smrg case SpvOpConvertSToF: 2407ec681f3Smrg case SpvOpConvertUToF: 2417ec681f3Smrg return nir_type_float; 2427ec681f3Smrg case SpvOpSConvert: 2437ec681f3Smrg case SpvOpConvertFToS: 2447ec681f3Smrg case SpvOpSatConvertUToS: 2457ec681f3Smrg return nir_type_int; 2467ec681f3Smrg case SpvOpUConvert: 2477ec681f3Smrg case SpvOpConvertFToU: 2487ec681f3Smrg case SpvOpSatConvertSToU: 2497ec681f3Smrg return nir_type_uint; 2507ec681f3Smrg default: 2517ec681f3Smrg unreachable("Unhandled conversion op"); 2527ec681f3Smrg } 2537ec681f3Smrg} 2547ec681f3Smrg 25501e04c3fSmrgnir_op 25601e04c3fSmrgvtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, 2577ec681f3Smrg SpvOp opcode, bool *swap, bool *exact, 25801e04c3fSmrg unsigned src_bit_size, unsigned dst_bit_size) 25901e04c3fSmrg{ 26001e04c3fSmrg /* Indicates that the first two arguments should be swapped. This is 26101e04c3fSmrg * used for implementing greater-than and less-than-or-equal. 26201e04c3fSmrg */ 26301e04c3fSmrg *swap = false; 26401e04c3fSmrg 2657ec681f3Smrg *exact = false; 2667ec681f3Smrg 26701e04c3fSmrg switch (opcode) { 26801e04c3fSmrg case SpvOpSNegate: return nir_op_ineg; 26901e04c3fSmrg case SpvOpFNegate: return nir_op_fneg; 27001e04c3fSmrg case SpvOpNot: return nir_op_inot; 27101e04c3fSmrg case SpvOpIAdd: return nir_op_iadd; 27201e04c3fSmrg case SpvOpFAdd: return nir_op_fadd; 27301e04c3fSmrg case SpvOpISub: return nir_op_isub; 27401e04c3fSmrg case SpvOpFSub: return nir_op_fsub; 27501e04c3fSmrg case SpvOpIMul: return nir_op_imul; 27601e04c3fSmrg case SpvOpFMul: return nir_op_fmul; 27701e04c3fSmrg case SpvOpUDiv: return nir_op_udiv; 27801e04c3fSmrg case SpvOpSDiv: return nir_op_idiv; 27901e04c3fSmrg case SpvOpFDiv: return nir_op_fdiv; 28001e04c3fSmrg case SpvOpUMod: return nir_op_umod; 28101e04c3fSmrg case SpvOpSMod: return nir_op_imod; 28201e04c3fSmrg case SpvOpFMod: return nir_op_fmod; 28301e04c3fSmrg case SpvOpSRem: return nir_op_irem; 28401e04c3fSmrg case SpvOpFRem: return nir_op_frem; 28501e04c3fSmrg 28601e04c3fSmrg case SpvOpShiftRightLogical: return nir_op_ushr; 28701e04c3fSmrg case SpvOpShiftRightArithmetic: return nir_op_ishr; 28801e04c3fSmrg case SpvOpShiftLeftLogical: return nir_op_ishl; 28901e04c3fSmrg case SpvOpLogicalOr: return nir_op_ior; 29001e04c3fSmrg case SpvOpLogicalEqual: return nir_op_ieq; 29101e04c3fSmrg case SpvOpLogicalNotEqual: return nir_op_ine; 29201e04c3fSmrg case SpvOpLogicalAnd: return nir_op_iand; 29301e04c3fSmrg case SpvOpLogicalNot: return nir_op_inot; 29401e04c3fSmrg case SpvOpBitwiseOr: return nir_op_ior; 29501e04c3fSmrg case SpvOpBitwiseXor: return nir_op_ixor; 29601e04c3fSmrg case SpvOpBitwiseAnd: return nir_op_iand; 29701e04c3fSmrg case SpvOpSelect: return nir_op_bcsel; 29801e04c3fSmrg case SpvOpIEqual: return nir_op_ieq; 29901e04c3fSmrg 30001e04c3fSmrg case SpvOpBitFieldInsert: return nir_op_bitfield_insert; 30101e04c3fSmrg case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; 30201e04c3fSmrg case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; 30301e04c3fSmrg case SpvOpBitReverse: return nir_op_bitfield_reverse; 3047ec681f3Smrg 3057ec681f3Smrg case SpvOpUCountLeadingZerosINTEL: return nir_op_uclz; 3067ec681f3Smrg /* SpvOpUCountTrailingZerosINTEL is handled elsewhere. */ 3077ec681f3Smrg case SpvOpAbsISubINTEL: return nir_op_uabs_isub; 3087ec681f3Smrg case SpvOpAbsUSubINTEL: return nir_op_uabs_usub; 3097ec681f3Smrg case SpvOpIAddSatINTEL: return nir_op_iadd_sat; 3107ec681f3Smrg case SpvOpUAddSatINTEL: return nir_op_uadd_sat; 3117ec681f3Smrg case SpvOpIAverageINTEL: return nir_op_ihadd; 3127ec681f3Smrg case SpvOpUAverageINTEL: return nir_op_uhadd; 3137ec681f3Smrg case SpvOpIAverageRoundedINTEL: return nir_op_irhadd; 3147ec681f3Smrg case SpvOpUAverageRoundedINTEL: return nir_op_urhadd; 3157ec681f3Smrg case SpvOpISubSatINTEL: return nir_op_isub_sat; 3167ec681f3Smrg case SpvOpUSubSatINTEL: return nir_op_usub_sat; 3177ec681f3Smrg case SpvOpIMul32x16INTEL: return nir_op_imul_32x16; 3187ec681f3Smrg case SpvOpUMul32x16INTEL: return nir_op_umul_32x16; 31901e04c3fSmrg 32001e04c3fSmrg /* The ordered / unordered operators need special implementation besides 32101e04c3fSmrg * the logical operator to use since they also need to check if operands are 32201e04c3fSmrg * ordered. 32301e04c3fSmrg */ 3247ec681f3Smrg case SpvOpFOrdEqual: *exact = true; return nir_op_feq; 3257ec681f3Smrg case SpvOpFUnordEqual: *exact = true; return nir_op_feq; 3267ec681f3Smrg case SpvOpINotEqual: return nir_op_ine; 3277ec681f3Smrg case SpvOpLessOrGreater: /* Deprecated, use OrdNotEqual */ 3287ec681f3Smrg case SpvOpFOrdNotEqual: *exact = true; return nir_op_fneu; 3297ec681f3Smrg case SpvOpFUnordNotEqual: *exact = true; return nir_op_fneu; 3307ec681f3Smrg case SpvOpULessThan: return nir_op_ult; 3317ec681f3Smrg case SpvOpSLessThan: return nir_op_ilt; 3327ec681f3Smrg case SpvOpFOrdLessThan: *exact = true; return nir_op_flt; 3337ec681f3Smrg case SpvOpFUnordLessThan: *exact = true; return nir_op_flt; 3347ec681f3Smrg case SpvOpUGreaterThan: *swap = true; return nir_op_ult; 3357ec681f3Smrg case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; 3367ec681f3Smrg case SpvOpFOrdGreaterThan: *swap = true; *exact = true; return nir_op_flt; 3377ec681f3Smrg case SpvOpFUnordGreaterThan: *swap = true; *exact = true; return nir_op_flt; 3387ec681f3Smrg case SpvOpULessThanEqual: *swap = true; return nir_op_uge; 3397ec681f3Smrg case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; 3407ec681f3Smrg case SpvOpFOrdLessThanEqual: *swap = true; *exact = true; return nir_op_fge; 3417ec681f3Smrg case SpvOpFUnordLessThanEqual: *swap = true; *exact = true; return nir_op_fge; 3427ec681f3Smrg case SpvOpUGreaterThanEqual: return nir_op_uge; 3437ec681f3Smrg case SpvOpSGreaterThanEqual: return nir_op_ige; 3447ec681f3Smrg case SpvOpFOrdGreaterThanEqual: *exact = true; return nir_op_fge; 3457ec681f3Smrg case SpvOpFUnordGreaterThanEqual: *exact = true; return nir_op_fge; 34601e04c3fSmrg 34701e04c3fSmrg /* Conversions: */ 34801e04c3fSmrg case SpvOpQuantizeToF16: return nir_op_fquantize2f16; 34901e04c3fSmrg case SpvOpUConvert: 35001e04c3fSmrg case SpvOpConvertFToU: 35101e04c3fSmrg case SpvOpConvertFToS: 35201e04c3fSmrg case SpvOpConvertSToF: 35301e04c3fSmrg case SpvOpConvertUToF: 35401e04c3fSmrg case SpvOpSConvert: 35501e04c3fSmrg case SpvOpFConvert: { 3567ec681f3Smrg nir_alu_type src_type = convert_op_src_type(opcode) | src_bit_size; 3577ec681f3Smrg nir_alu_type dst_type = convert_op_dst_type(opcode) | dst_bit_size; 35801e04c3fSmrg return nir_type_conversion_op(src_type, dst_type, nir_rounding_mode_undef); 35901e04c3fSmrg } 3607ec681f3Smrg 3617ec681f3Smrg case SpvOpPtrCastToGeneric: return nir_op_mov; 3627ec681f3Smrg case SpvOpGenericCastToPtr: return nir_op_mov; 3637ec681f3Smrg 36401e04c3fSmrg /* Derivatives: */ 36501e04c3fSmrg case SpvOpDPdx: return nir_op_fddx; 36601e04c3fSmrg case SpvOpDPdy: return nir_op_fddy; 36701e04c3fSmrg case SpvOpDPdxFine: return nir_op_fddx_fine; 36801e04c3fSmrg case SpvOpDPdyFine: return nir_op_fddy_fine; 36901e04c3fSmrg case SpvOpDPdxCoarse: return nir_op_fddx_coarse; 37001e04c3fSmrg case SpvOpDPdyCoarse: return nir_op_fddy_coarse; 37101e04c3fSmrg 3727ec681f3Smrg case SpvOpIsNormal: return nir_op_fisnormal; 3737ec681f3Smrg case SpvOpIsFinite: return nir_op_fisfinite; 3747ec681f3Smrg 37501e04c3fSmrg default: 37601e04c3fSmrg vtn_fail("No NIR equivalent: %u", opcode); 37701e04c3fSmrg } 37801e04c3fSmrg} 37901e04c3fSmrg 38001e04c3fSmrgstatic void 3817ec681f3Smrghandle_no_contraction(struct vtn_builder *b, UNUSED struct vtn_value *val, 3827ec681f3Smrg UNUSED int member, const struct vtn_decoration *dec, 3837ec681f3Smrg UNUSED void *_void) 38401e04c3fSmrg{ 38501e04c3fSmrg vtn_assert(dec->scope == VTN_DEC_DECORATION); 38601e04c3fSmrg if (dec->decoration != SpvDecorationNoContraction) 38701e04c3fSmrg return; 38801e04c3fSmrg 38901e04c3fSmrg b->nb.exact = true; 39001e04c3fSmrg} 39101e04c3fSmrg 3927ec681f3Smrgvoid 3937ec681f3Smrgvtn_handle_no_contraction(struct vtn_builder *b, struct vtn_value *val) 39401e04c3fSmrg{ 3957ec681f3Smrg vtn_foreach_decoration(b, val, handle_no_contraction, NULL); 3967ec681f3Smrg} 3977ec681f3Smrg 3987ec681f3Smrgnir_rounding_mode 3997ec681f3Smrgvtn_rounding_mode_to_nir(struct vtn_builder *b, SpvFPRoundingMode mode) 4007ec681f3Smrg{ 4017ec681f3Smrg switch (mode) { 40201e04c3fSmrg case SpvFPRoundingModeRTE: 4037ec681f3Smrg return nir_rounding_mode_rtne; 40401e04c3fSmrg case SpvFPRoundingModeRTZ: 4057ec681f3Smrg return nir_rounding_mode_rtz; 4067ec681f3Smrg case SpvFPRoundingModeRTP: 4077ec681f3Smrg vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4087ec681f3Smrg "FPRoundingModeRTP is only supported in kernels"); 4097ec681f3Smrg return nir_rounding_mode_ru; 4107ec681f3Smrg case SpvFPRoundingModeRTN: 4117ec681f3Smrg vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4127ec681f3Smrg "FPRoundingModeRTN is only supported in kernels"); 4137ec681f3Smrg return nir_rounding_mode_rd; 4147ec681f3Smrg default: 4157ec681f3Smrg vtn_fail("Unsupported rounding mode: %s", 4167ec681f3Smrg spirv_fproundingmode_to_string(mode)); 4177ec681f3Smrg break; 4187ec681f3Smrg } 4197ec681f3Smrg} 4207ec681f3Smrg 4217ec681f3Smrgstruct conversion_opts { 4227ec681f3Smrg nir_rounding_mode rounding_mode; 4237ec681f3Smrg bool saturate; 4247ec681f3Smrg}; 4257ec681f3Smrg 4267ec681f3Smrgstatic void 4277ec681f3Smrghandle_conversion_opts(struct vtn_builder *b, UNUSED struct vtn_value *val, 4287ec681f3Smrg UNUSED int member, 4297ec681f3Smrg const struct vtn_decoration *dec, void *_opts) 4307ec681f3Smrg{ 4317ec681f3Smrg struct conversion_opts *opts = _opts; 4327ec681f3Smrg 4337ec681f3Smrg switch (dec->decoration) { 4347ec681f3Smrg case SpvDecorationFPRoundingMode: 4357ec681f3Smrg opts->rounding_mode = vtn_rounding_mode_to_nir(b, dec->operands[0]); 43601e04c3fSmrg break; 4377ec681f3Smrg 4387ec681f3Smrg case SpvDecorationSaturatedConversion: 4397ec681f3Smrg vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4407ec681f3Smrg "Saturated conversions are only allowed in kernels"); 4417ec681f3Smrg opts->saturate = true; 4427ec681f3Smrg break; 4437ec681f3Smrg 44401e04c3fSmrg default: 4457ec681f3Smrg break; 4467ec681f3Smrg } 4477ec681f3Smrg} 4487ec681f3Smrg 4497ec681f3Smrgstatic void 4507ec681f3Smrghandle_no_wrap(UNUSED struct vtn_builder *b, UNUSED struct vtn_value *val, 4517ec681f3Smrg UNUSED int member, 4527ec681f3Smrg const struct vtn_decoration *dec, void *_alu) 4537ec681f3Smrg{ 4547ec681f3Smrg nir_alu_instr *alu = _alu; 4557ec681f3Smrg switch (dec->decoration) { 4567ec681f3Smrg case SpvDecorationNoSignedWrap: 4577ec681f3Smrg alu->no_signed_wrap = true; 4587ec681f3Smrg break; 4597ec681f3Smrg case SpvDecorationNoUnsignedWrap: 4607ec681f3Smrg alu->no_unsigned_wrap = true; 4617ec681f3Smrg break; 4627ec681f3Smrg default: 4637ec681f3Smrg /* Do nothing. */ 46401e04c3fSmrg break; 46501e04c3fSmrg } 46601e04c3fSmrg} 46701e04c3fSmrg 46801e04c3fSmrgvoid 46901e04c3fSmrgvtn_handle_alu(struct vtn_builder *b, SpvOp opcode, 47001e04c3fSmrg const uint32_t *w, unsigned count) 47101e04c3fSmrg{ 4727ec681f3Smrg struct vtn_value *dest_val = vtn_untyped_value(b, w[2]); 4737ec681f3Smrg const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type; 47401e04c3fSmrg 4757ec681f3Smrg vtn_handle_no_contraction(b, dest_val); 47601e04c3fSmrg 47701e04c3fSmrg /* Collect the various SSA sources */ 47801e04c3fSmrg const unsigned num_inputs = count - 3; 47901e04c3fSmrg struct vtn_ssa_value *vtn_src[4] = { NULL, }; 48001e04c3fSmrg for (unsigned i = 0; i < num_inputs; i++) 48101e04c3fSmrg vtn_src[i] = vtn_ssa_value(b, w[i + 3]); 48201e04c3fSmrg 48301e04c3fSmrg if (glsl_type_is_matrix(vtn_src[0]->type) || 48401e04c3fSmrg (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { 4857ec681f3Smrg vtn_push_ssa_value(b, w[2], 4867ec681f3Smrg vtn_handle_matrix_alu(b, opcode, vtn_src[0], vtn_src[1])); 4877e102996Smaya b->nb.exact = b->exact; 48801e04c3fSmrg return; 48901e04c3fSmrg } 49001e04c3fSmrg 4917ec681f3Smrg struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); 49201e04c3fSmrg nir_ssa_def *src[4] = { NULL, }; 49301e04c3fSmrg for (unsigned i = 0; i < num_inputs; i++) { 49401e04c3fSmrg vtn_assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); 49501e04c3fSmrg src[i] = vtn_src[i]->def; 49601e04c3fSmrg } 49701e04c3fSmrg 49801e04c3fSmrg switch (opcode) { 49901e04c3fSmrg case SpvOpAny: 5007ec681f3Smrg dest->def = nir_bany(&b->nb, src[0]); 50101e04c3fSmrg break; 50201e04c3fSmrg 50301e04c3fSmrg case SpvOpAll: 5047ec681f3Smrg dest->def = nir_ball(&b->nb, src[0]); 50501e04c3fSmrg break; 50601e04c3fSmrg 50701e04c3fSmrg case SpvOpOuterProduct: { 50801e04c3fSmrg for (unsigned i = 0; i < src[1]->num_components; i++) { 5097ec681f3Smrg dest->elems[i]->def = 51001e04c3fSmrg nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); 51101e04c3fSmrg } 51201e04c3fSmrg break; 51301e04c3fSmrg } 51401e04c3fSmrg 51501e04c3fSmrg case SpvOpDot: 5167ec681f3Smrg dest->def = nir_fdot(&b->nb, src[0], src[1]); 51701e04c3fSmrg break; 51801e04c3fSmrg 51901e04c3fSmrg case SpvOpIAddCarry: 5207ec681f3Smrg vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 5217ec681f3Smrg dest->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); 5227ec681f3Smrg dest->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); 52301e04c3fSmrg break; 52401e04c3fSmrg 52501e04c3fSmrg case SpvOpISubBorrow: 5267ec681f3Smrg vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 5277ec681f3Smrg dest->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); 5287ec681f3Smrg dest->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); 52901e04c3fSmrg break; 53001e04c3fSmrg 5317e102996Smaya case SpvOpUMulExtended: { 5327ec681f3Smrg vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 5337e102996Smaya nir_ssa_def *umul = nir_umul_2x32_64(&b->nb, src[0], src[1]); 5347ec681f3Smrg dest->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, umul); 5357ec681f3Smrg dest->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, umul); 53601e04c3fSmrg break; 5377e102996Smaya } 53801e04c3fSmrg 5397e102996Smaya case SpvOpSMulExtended: { 5407ec681f3Smrg vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 5417e102996Smaya nir_ssa_def *smul = nir_imul_2x32_64(&b->nb, src[0], src[1]); 5427ec681f3Smrg dest->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, smul); 5437ec681f3Smrg dest->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, smul); 54401e04c3fSmrg break; 5457e102996Smaya } 54601e04c3fSmrg 54701e04c3fSmrg case SpvOpFwidth: 5487ec681f3Smrg dest->def = nir_fadd(&b->nb, 54901e04c3fSmrg nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), 55001e04c3fSmrg nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); 55101e04c3fSmrg break; 55201e04c3fSmrg case SpvOpFwidthFine: 5537ec681f3Smrg dest->def = nir_fadd(&b->nb, 55401e04c3fSmrg nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), 55501e04c3fSmrg nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); 55601e04c3fSmrg break; 55701e04c3fSmrg case SpvOpFwidthCoarse: 5587ec681f3Smrg dest->def = nir_fadd(&b->nb, 55901e04c3fSmrg nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), 56001e04c3fSmrg nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); 56101e04c3fSmrg break; 56201e04c3fSmrg 56301e04c3fSmrg case SpvOpVectorTimesScalar: 56401e04c3fSmrg /* The builder will take care of splatting for us. */ 5657ec681f3Smrg dest->def = nir_fmul(&b->nb, src[0], src[1]); 5667ec681f3Smrg break; 5677ec681f3Smrg 5687ec681f3Smrg case SpvOpIsNan: { 5697ec681f3Smrg const bool save_exact = b->nb.exact; 5707ec681f3Smrg 5717ec681f3Smrg b->nb.exact = true; 5727ec681f3Smrg dest->def = nir_fneu(&b->nb, src[0], src[0]); 5737ec681f3Smrg b->nb.exact = save_exact; 57401e04c3fSmrg break; 5757ec681f3Smrg } 5767ec681f3Smrg 5777ec681f3Smrg case SpvOpOrdered: { 5787ec681f3Smrg const bool save_exact = b->nb.exact; 57901e04c3fSmrg 5807ec681f3Smrg b->nb.exact = true; 5817ec681f3Smrg dest->def = nir_iand(&b->nb, nir_feq(&b->nb, src[0], src[0]), 5827ec681f3Smrg nir_feq(&b->nb, src[1], src[1])); 5837ec681f3Smrg b->nb.exact = save_exact; 58401e04c3fSmrg break; 5857ec681f3Smrg } 5867ec681f3Smrg 5877ec681f3Smrg case SpvOpUnordered: { 5887ec681f3Smrg const bool save_exact = b->nb.exact; 5897ec681f3Smrg 5907ec681f3Smrg b->nb.exact = true; 5917ec681f3Smrg dest->def = nir_ior(&b->nb, nir_fneu(&b->nb, src[0], src[0]), 5927ec681f3Smrg nir_fneu(&b->nb, src[1], src[1])); 5937ec681f3Smrg b->nb.exact = save_exact; 5947ec681f3Smrg break; 5957ec681f3Smrg } 59601e04c3fSmrg 59701e04c3fSmrg case SpvOpIsInf: { 59801e04c3fSmrg nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); 5997ec681f3Smrg dest->def = nir_ieq(&b->nb, nir_fabs(&b->nb, src[0]), inf); 6007ec681f3Smrg break; 6017ec681f3Smrg } 6027ec681f3Smrg 6037ec681f3Smrg case SpvOpFUnordEqual: { 6047ec681f3Smrg const bool save_exact = b->nb.exact; 6057ec681f3Smrg 6067ec681f3Smrg b->nb.exact = true; 6077ec681f3Smrg 6087ec681f3Smrg /* This could also be implemented as !(a < b || b < a). If one or both 6097ec681f3Smrg * of the source are numbers, later optimization passes can easily 6107ec681f3Smrg * eliminate the isnan() checks. This may trim the sequence down to a 6117ec681f3Smrg * single (a == b) operation. Otherwise, the optimizer can transform 6127ec681f3Smrg * whatever is left to !(a < b || b < a). Since some applications will 6137ec681f3Smrg * open-code this sequence, these optimizations are needed anyway. 6147ec681f3Smrg */ 6157ec681f3Smrg dest->def = 6167ec681f3Smrg nir_ior(&b->nb, 6177ec681f3Smrg nir_feq(&b->nb, src[0], src[1]), 6187ec681f3Smrg nir_ior(&b->nb, 6197ec681f3Smrg nir_fneu(&b->nb, src[0], src[0]), 6207ec681f3Smrg nir_fneu(&b->nb, src[1], src[1]))); 6217ec681f3Smrg 6227ec681f3Smrg b->nb.exact = save_exact; 62301e04c3fSmrg break; 62401e04c3fSmrg } 62501e04c3fSmrg 62601e04c3fSmrg case SpvOpFUnordLessThan: 62701e04c3fSmrg case SpvOpFUnordGreaterThan: 62801e04c3fSmrg case SpvOpFUnordLessThanEqual: 62901e04c3fSmrg case SpvOpFUnordGreaterThanEqual: { 63001e04c3fSmrg bool swap; 6317ec681f3Smrg bool unused_exact; 63201e04c3fSmrg unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 6337ec681f3Smrg unsigned dst_bit_size = glsl_get_bit_size(dest_type); 63401e04c3fSmrg nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, 6357ec681f3Smrg &unused_exact, 63601e04c3fSmrg src_bit_size, dst_bit_size); 63701e04c3fSmrg 63801e04c3fSmrg if (swap) { 63901e04c3fSmrg nir_ssa_def *tmp = src[0]; 64001e04c3fSmrg src[0] = src[1]; 64101e04c3fSmrg src[1] = tmp; 64201e04c3fSmrg } 64301e04c3fSmrg 6447ec681f3Smrg const bool save_exact = b->nb.exact; 6457ec681f3Smrg 6467ec681f3Smrg b->nb.exact = true; 6477ec681f3Smrg 6487ec681f3Smrg /* Use the property FUnordLessThan(a, b) ≡ !FOrdGreaterThanEqual(a, b). */ 6497ec681f3Smrg switch (op) { 6507ec681f3Smrg case nir_op_fge: op = nir_op_flt; break; 6517ec681f3Smrg case nir_op_flt: op = nir_op_fge; break; 6527ec681f3Smrg default: unreachable("Impossible opcode."); 6537ec681f3Smrg } 6547ec681f3Smrg 6557ec681f3Smrg dest->def = 6567ec681f3Smrg nir_inot(&b->nb, 6577ec681f3Smrg nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL)); 6587ec681f3Smrg 6597ec681f3Smrg b->nb.exact = save_exact; 66001e04c3fSmrg break; 66101e04c3fSmrg } 66201e04c3fSmrg 6637ec681f3Smrg case SpvOpLessOrGreater: 66401e04c3fSmrg case SpvOpFOrdNotEqual: { 66501e04c3fSmrg /* For all the SpvOpFOrd* comparisons apart from NotEqual, the value 66601e04c3fSmrg * from the ALU will probably already be false if the operands are not 66701e04c3fSmrg * ordered so we don’t need to handle it specially. 66801e04c3fSmrg */ 6697ec681f3Smrg const bool save_exact = b->nb.exact; 67001e04c3fSmrg 6717ec681f3Smrg b->nb.exact = true; 67201e04c3fSmrg 6737ec681f3Smrg /* This could also be implemented as (a < b || b < a). If one or both 6747ec681f3Smrg * of the source are numbers, later optimization passes can easily 6757ec681f3Smrg * eliminate the isnan() checks. This may trim the sequence down to a 6767ec681f3Smrg * single (a != b) operation. Otherwise, the optimizer can transform 6777ec681f3Smrg * whatever is left to (a < b || b < a). Since some applications will 6787ec681f3Smrg * open-code this sequence, these optimizations are needed anyway. 6797ec681f3Smrg */ 6807ec681f3Smrg dest->def = 68101e04c3fSmrg nir_iand(&b->nb, 6827ec681f3Smrg nir_fneu(&b->nb, src[0], src[1]), 68301e04c3fSmrg nir_iand(&b->nb, 68401e04c3fSmrg nir_feq(&b->nb, src[0], src[0]), 68501e04c3fSmrg nir_feq(&b->nb, src[1], src[1]))); 6867ec681f3Smrg 6877ec681f3Smrg b->nb.exact = save_exact; 68801e04c3fSmrg break; 68901e04c3fSmrg } 69001e04c3fSmrg 6917ec681f3Smrg case SpvOpUConvert: 6927ec681f3Smrg case SpvOpConvertFToU: 6937ec681f3Smrg case SpvOpConvertFToS: 6947ec681f3Smrg case SpvOpConvertSToF: 6957ec681f3Smrg case SpvOpConvertUToF: 6967ec681f3Smrg case SpvOpSConvert: 6977ec681f3Smrg case SpvOpFConvert: 6987ec681f3Smrg case SpvOpSatConvertSToU: 6997ec681f3Smrg case SpvOpSatConvertUToS: { 7007ec681f3Smrg unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 7017ec681f3Smrg unsigned dst_bit_size = glsl_get_bit_size(dest_type); 7027ec681f3Smrg nir_alu_type src_type = convert_op_src_type(opcode) | src_bit_size; 7037ec681f3Smrg nir_alu_type dst_type = convert_op_dst_type(opcode) | dst_bit_size; 7047ec681f3Smrg 7057ec681f3Smrg struct conversion_opts opts = { 7067ec681f3Smrg .rounding_mode = nir_rounding_mode_undef, 7077ec681f3Smrg .saturate = false, 7087ec681f3Smrg }; 7097ec681f3Smrg vtn_foreach_decoration(b, dest_val, handle_conversion_opts, &opts); 7107ec681f3Smrg 7117ec681f3Smrg if (opcode == SpvOpSatConvertSToU || opcode == SpvOpSatConvertUToS) 7127ec681f3Smrg opts.saturate = true; 7137ec681f3Smrg 7147ec681f3Smrg if (b->shader->info.stage == MESA_SHADER_KERNEL) { 7157ec681f3Smrg if (opts.rounding_mode == nir_rounding_mode_undef && !opts.saturate) { 7167ec681f3Smrg nir_op op = nir_type_conversion_op(src_type, dst_type, 7177ec681f3Smrg nir_rounding_mode_undef); 7187ec681f3Smrg dest->def = nir_build_alu(&b->nb, op, src[0], NULL, NULL, NULL); 7197ec681f3Smrg } else { 7207ec681f3Smrg dest->def = nir_convert_alu_types(&b->nb, dst_bit_size, src[0], 7217ec681f3Smrg src_type, dst_type, 7227ec681f3Smrg opts.rounding_mode, opts.saturate); 7237ec681f3Smrg } 7247ec681f3Smrg } else { 7257ec681f3Smrg vtn_fail_if(opts.rounding_mode != nir_rounding_mode_undef && 7267ec681f3Smrg dst_type != nir_type_float16, 7277ec681f3Smrg "Rounding modes are only allowed on conversions to " 7287ec681f3Smrg "16-bit float types"); 7297ec681f3Smrg nir_op op = nir_type_conversion_op(src_type, dst_type, 7307ec681f3Smrg opts.rounding_mode); 7317ec681f3Smrg dest->def = nir_build_alu(&b->nb, op, src[0], NULL, NULL, NULL); 7327ec681f3Smrg } 73301e04c3fSmrg break; 73401e04c3fSmrg } 73501e04c3fSmrg 73601e04c3fSmrg case SpvOpBitFieldInsert: 73701e04c3fSmrg case SpvOpBitFieldSExtract: 73801e04c3fSmrg case SpvOpBitFieldUExtract: 73901e04c3fSmrg case SpvOpShiftLeftLogical: 74001e04c3fSmrg case SpvOpShiftRightArithmetic: 74101e04c3fSmrg case SpvOpShiftRightLogical: { 74201e04c3fSmrg bool swap; 7437ec681f3Smrg bool exact; 74401e04c3fSmrg unsigned src0_bit_size = glsl_get_bit_size(vtn_src[0]->type); 7457ec681f3Smrg unsigned dst_bit_size = glsl_get_bit_size(dest_type); 7467ec681f3Smrg nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact, 74701e04c3fSmrg src0_bit_size, dst_bit_size); 74801e04c3fSmrg 7497ec681f3Smrg assert(!exact); 7507ec681f3Smrg 75101e04c3fSmrg assert (op == nir_op_ushr || op == nir_op_ishr || op == nir_op_ishl || 75201e04c3fSmrg op == nir_op_bitfield_insert || op == nir_op_ubitfield_extract || 75301e04c3fSmrg op == nir_op_ibitfield_extract); 75401e04c3fSmrg 75501e04c3fSmrg for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { 75601e04c3fSmrg unsigned src_bit_size = 75701e04c3fSmrg nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]); 75801e04c3fSmrg if (src_bit_size == 0) 75901e04c3fSmrg continue; 76001e04c3fSmrg if (src_bit_size != src[i]->bit_size) { 76101e04c3fSmrg assert(src_bit_size == 32); 76201e04c3fSmrg /* Convert the Shift, Offset and Count operands to 32 bits, which is the bitsize 76301e04c3fSmrg * supported by the NIR instructions. See discussion here: 76401e04c3fSmrg * 76501e04c3fSmrg * https://lists.freedesktop.org/archives/mesa-dev/2018-April/193026.html 76601e04c3fSmrg */ 76701e04c3fSmrg src[i] = nir_u2u32(&b->nb, src[i]); 76801e04c3fSmrg } 76901e04c3fSmrg } 7707ec681f3Smrg dest->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); 77101e04c3fSmrg break; 77201e04c3fSmrg } 77301e04c3fSmrg 7747ec681f3Smrg case SpvOpSignBitSet: 7757ec681f3Smrg dest->def = nir_i2b(&b->nb, 7767ec681f3Smrg nir_ushr(&b->nb, src[0], nir_imm_int(&b->nb, src[0]->bit_size - 1))); 7777ec681f3Smrg break; 7787e102996Smaya 7797ec681f3Smrg case SpvOpUCountTrailingZerosINTEL: 7807ec681f3Smrg dest->def = nir_umin(&b->nb, 7817ec681f3Smrg nir_find_lsb(&b->nb, src[0]), 7827ec681f3Smrg nir_imm_int(&b->nb, 32u)); 7837ec681f3Smrg break; 7847ec681f3Smrg 7857ec681f3Smrg case SpvOpBitCount: { 7867ec681f3Smrg /* bit_count always returns int32, but the SPIR-V opcode just says the return 7877ec681f3Smrg * value needs to be big enough to store the number of bits. 7887ec681f3Smrg */ 7897ec681f3Smrg dest->def = nir_u2u(&b->nb, nir_bit_count(&b->nb, src[0]), glsl_get_bit_size(dest_type)); 7907e102996Smaya break; 7917e102996Smaya } 7927e102996Smaya 7937ec681f3Smrg case SpvOpSDotKHR: 7947ec681f3Smrg case SpvOpUDotKHR: 7957ec681f3Smrg case SpvOpSUDotKHR: 7967ec681f3Smrg case SpvOpSDotAccSatKHR: 7977ec681f3Smrg case SpvOpUDotAccSatKHR: 7987ec681f3Smrg case SpvOpSUDotAccSatKHR: 7997ec681f3Smrg unreachable("Should have called vtn_handle_integer_dot instead."); 8007ec681f3Smrg 80101e04c3fSmrg default: { 80201e04c3fSmrg bool swap; 8037ec681f3Smrg bool exact; 80401e04c3fSmrg unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 8057ec681f3Smrg unsigned dst_bit_size = glsl_get_bit_size(dest_type); 80601e04c3fSmrg nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, 8077ec681f3Smrg &exact, 80801e04c3fSmrg src_bit_size, dst_bit_size); 80901e04c3fSmrg 81001e04c3fSmrg if (swap) { 81101e04c3fSmrg nir_ssa_def *tmp = src[0]; 81201e04c3fSmrg src[0] = src[1]; 81301e04c3fSmrg src[1] = tmp; 81401e04c3fSmrg } 81501e04c3fSmrg 81601e04c3fSmrg switch (op) { 81701e04c3fSmrg case nir_op_ishl: 81801e04c3fSmrg case nir_op_ishr: 81901e04c3fSmrg case nir_op_ushr: 82001e04c3fSmrg if (src[1]->bit_size != 32) 82101e04c3fSmrg src[1] = nir_u2u32(&b->nb, src[1]); 82201e04c3fSmrg break; 82301e04c3fSmrg default: 82401e04c3fSmrg break; 82501e04c3fSmrg } 82601e04c3fSmrg 8277ec681f3Smrg const bool save_exact = b->nb.exact; 8287ec681f3Smrg 8297ec681f3Smrg if (exact) 8307ec681f3Smrg b->nb.exact = true; 8317ec681f3Smrg 8327ec681f3Smrg dest->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); 8337ec681f3Smrg 8347ec681f3Smrg b->nb.exact = save_exact; 83501e04c3fSmrg break; 83601e04c3fSmrg } /* default */ 83701e04c3fSmrg } 83801e04c3fSmrg 8397ec681f3Smrg switch (opcode) { 8407ec681f3Smrg case SpvOpIAdd: 8417ec681f3Smrg case SpvOpIMul: 8427ec681f3Smrg case SpvOpISub: 8437ec681f3Smrg case SpvOpShiftLeftLogical: 8447ec681f3Smrg case SpvOpSNegate: { 8457ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(dest->def->parent_instr); 8467ec681f3Smrg vtn_foreach_decoration(b, dest_val, handle_no_wrap, alu); 8477ec681f3Smrg break; 8487ec681f3Smrg } 8497ec681f3Smrg default: 8507ec681f3Smrg /* Do nothing. */ 8517ec681f3Smrg break; 8527ec681f3Smrg } 8537ec681f3Smrg 8547ec681f3Smrg vtn_push_ssa_value(b, w[2], dest); 8557ec681f3Smrg 8567ec681f3Smrg b->nb.exact = b->exact; 8577ec681f3Smrg} 8587ec681f3Smrg 8597ec681f3Smrgvoid 8607ec681f3Smrgvtn_handle_integer_dot(struct vtn_builder *b, SpvOp opcode, 8617ec681f3Smrg const uint32_t *w, unsigned count) 8627ec681f3Smrg{ 8637ec681f3Smrg struct vtn_value *dest_val = vtn_untyped_value(b, w[2]); 8647ec681f3Smrg const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type; 8657ec681f3Smrg const unsigned dest_size = glsl_get_bit_size(dest_type); 8667ec681f3Smrg 8677ec681f3Smrg vtn_handle_no_contraction(b, dest_val); 8687ec681f3Smrg 8697ec681f3Smrg /* Collect the various SSA sources. 8707ec681f3Smrg * 8717ec681f3Smrg * Due to the optional "Packed Vector Format" field, determine number of 8727ec681f3Smrg * inputs from the opcode. This differs from vtn_handle_alu. 8737ec681f3Smrg */ 8747ec681f3Smrg const unsigned num_inputs = (opcode == SpvOpSDotAccSatKHR || 8757ec681f3Smrg opcode == SpvOpUDotAccSatKHR || 8767ec681f3Smrg opcode == SpvOpSUDotAccSatKHR) ? 3 : 2; 8777ec681f3Smrg 8787ec681f3Smrg vtn_assert(count >= num_inputs + 3); 8797ec681f3Smrg 8807ec681f3Smrg struct vtn_ssa_value *vtn_src[3] = { NULL, }; 8817ec681f3Smrg nir_ssa_def *src[3] = { NULL, }; 8827ec681f3Smrg 8837ec681f3Smrg for (unsigned i = 0; i < num_inputs; i++) { 8847ec681f3Smrg vtn_src[i] = vtn_ssa_value(b, w[i + 3]); 8857ec681f3Smrg src[i] = vtn_src[i]->def; 8867ec681f3Smrg 8877ec681f3Smrg vtn_assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); 8887ec681f3Smrg } 8897ec681f3Smrg 8907ec681f3Smrg /* For all of the opcodes *except* SpvOpSUDotKHR and SpvOpSUDotAccSatKHR, 8917ec681f3Smrg * the SPV_KHR_integer_dot_product spec says: 8927ec681f3Smrg * 8937ec681f3Smrg * _Vector 1_ and _Vector 2_ must have the same type. 8947ec681f3Smrg * 8957ec681f3Smrg * The practical requirement is the same bit-size and the same number of 8967ec681f3Smrg * components. 8977ec681f3Smrg */ 8987ec681f3Smrg vtn_fail_if(glsl_get_bit_size(vtn_src[0]->type) != 8997ec681f3Smrg glsl_get_bit_size(vtn_src[1]->type) || 9007ec681f3Smrg glsl_get_vector_elements(vtn_src[0]->type) != 9017ec681f3Smrg glsl_get_vector_elements(vtn_src[1]->type), 9027ec681f3Smrg "Vector 1 and vector 2 source of opcode %s must have the same " 9037ec681f3Smrg "type", 9047ec681f3Smrg spirv_op_to_string(opcode)); 9057ec681f3Smrg 9067ec681f3Smrg if (num_inputs == 3) { 9077ec681f3Smrg /* The SPV_KHR_integer_dot_product spec says: 9087ec681f3Smrg * 9097ec681f3Smrg * The type of Accumulator must be the same as Result Type. 9107ec681f3Smrg * 9117ec681f3Smrg * The handling of SpvOpSDotAccSatKHR and friends with the packed 4x8 9127ec681f3Smrg * types (far below) assumes these types have the same size. 9137ec681f3Smrg */ 9147ec681f3Smrg vtn_fail_if(dest_type != vtn_src[2]->type, 9157ec681f3Smrg "Accumulator type must be the same as Result Type for " 9167ec681f3Smrg "opcode %s", 9177ec681f3Smrg spirv_op_to_string(opcode)); 9187ec681f3Smrg } 9197ec681f3Smrg 9207ec681f3Smrg unsigned packed_bit_size = 8; 9217ec681f3Smrg if (glsl_type_is_vector(vtn_src[0]->type)) { 9227ec681f3Smrg /* FINISHME: Is this actually as good or better for platforms that don't 9237ec681f3Smrg * have the special instructions (i.e., one or both of has_dot_4x8 or 9247ec681f3Smrg * has_sudot_4x8 is false)? 9257ec681f3Smrg */ 9267ec681f3Smrg if (glsl_get_vector_elements(vtn_src[0]->type) == 4 && 9277ec681f3Smrg glsl_get_bit_size(vtn_src[0]->type) == 8 && 9287ec681f3Smrg glsl_get_bit_size(dest_type) <= 32) { 9297ec681f3Smrg src[0] = nir_pack_32_4x8(&b->nb, src[0]); 9307ec681f3Smrg src[1] = nir_pack_32_4x8(&b->nb, src[1]); 9317ec681f3Smrg } else if (glsl_get_vector_elements(vtn_src[0]->type) == 2 && 9327ec681f3Smrg glsl_get_bit_size(vtn_src[0]->type) == 16 && 9337ec681f3Smrg glsl_get_bit_size(dest_type) <= 32 && 9347ec681f3Smrg opcode != SpvOpSUDotKHR && 9357ec681f3Smrg opcode != SpvOpSUDotAccSatKHR) { 9367ec681f3Smrg src[0] = nir_pack_32_2x16(&b->nb, src[0]); 9377ec681f3Smrg src[1] = nir_pack_32_2x16(&b->nb, src[1]); 9387ec681f3Smrg packed_bit_size = 16; 9397ec681f3Smrg } 9407ec681f3Smrg } else if (glsl_type_is_scalar(vtn_src[0]->type) && 9417ec681f3Smrg glsl_type_is_32bit(vtn_src[0]->type)) { 9427ec681f3Smrg /* The SPV_KHR_integer_dot_product spec says: 9437ec681f3Smrg * 9447ec681f3Smrg * When _Vector 1_ and _Vector 2_ are scalar integer types, _Packed 9457ec681f3Smrg * Vector Format_ must be specified to select how the integers are to 9467ec681f3Smrg * be interpreted as vectors. 9477ec681f3Smrg * 9487ec681f3Smrg * The "Packed Vector Format" value follows the last input. 9497ec681f3Smrg */ 9507ec681f3Smrg vtn_assert(count == (num_inputs + 4)); 9517ec681f3Smrg const SpvPackedVectorFormat pack_format = w[num_inputs + 3]; 9527ec681f3Smrg vtn_fail_if(pack_format != SpvPackedVectorFormatPackedVectorFormat4x8BitKHR, 9537ec681f3Smrg "Unsupported vector packing format %d for opcode %s", 9547ec681f3Smrg pack_format, spirv_op_to_string(opcode)); 9557ec681f3Smrg } else { 9567ec681f3Smrg vtn_fail_with_opcode("Invalid source types.", opcode); 9577ec681f3Smrg } 9587ec681f3Smrg 9597ec681f3Smrg nir_ssa_def *dest = NULL; 9607ec681f3Smrg 9617ec681f3Smrg if (src[0]->num_components > 1) { 9627ec681f3Smrg const nir_op s_conversion_op = 9637ec681f3Smrg nir_type_conversion_op(nir_type_int, nir_type_int | dest_size, 9647ec681f3Smrg nir_rounding_mode_undef); 9657ec681f3Smrg 9667ec681f3Smrg const nir_op u_conversion_op = 9677ec681f3Smrg nir_type_conversion_op(nir_type_uint, nir_type_uint | dest_size, 9687ec681f3Smrg nir_rounding_mode_undef); 9697ec681f3Smrg 9707ec681f3Smrg nir_op src0_conversion_op; 9717ec681f3Smrg nir_op src1_conversion_op; 9727ec681f3Smrg 9737ec681f3Smrg switch (opcode) { 9747ec681f3Smrg case SpvOpSDotKHR: 9757ec681f3Smrg case SpvOpSDotAccSatKHR: 9767ec681f3Smrg src0_conversion_op = s_conversion_op; 9777ec681f3Smrg src1_conversion_op = s_conversion_op; 9787ec681f3Smrg break; 9797ec681f3Smrg 9807ec681f3Smrg case SpvOpUDotKHR: 9817ec681f3Smrg case SpvOpUDotAccSatKHR: 9827ec681f3Smrg src0_conversion_op = u_conversion_op; 9837ec681f3Smrg src1_conversion_op = u_conversion_op; 9847ec681f3Smrg break; 9857ec681f3Smrg 9867ec681f3Smrg case SpvOpSUDotKHR: 9877ec681f3Smrg case SpvOpSUDotAccSatKHR: 9887ec681f3Smrg src0_conversion_op = s_conversion_op; 9897ec681f3Smrg src1_conversion_op = u_conversion_op; 9907ec681f3Smrg break; 9917ec681f3Smrg 9927ec681f3Smrg default: 9937ec681f3Smrg unreachable("Invalid opcode."); 9947ec681f3Smrg } 9957ec681f3Smrg 9967ec681f3Smrg /* The SPV_KHR_integer_dot_product spec says: 9977ec681f3Smrg * 9987ec681f3Smrg * All components of the input vectors are sign-extended to the bit 9997ec681f3Smrg * width of the result's type. The sign-extended input vectors are 10007ec681f3Smrg * then multiplied component-wise and all components of the vector 10017ec681f3Smrg * resulting from the component-wise multiplication are added 10027ec681f3Smrg * together. The resulting value will equal the low-order N bits of 10037ec681f3Smrg * the correct result R, where N is the result width and R is 10047ec681f3Smrg * computed with enough precision to avoid overflow and underflow. 10057ec681f3Smrg */ 10067ec681f3Smrg const unsigned vector_components = 10077ec681f3Smrg glsl_get_vector_elements(vtn_src[0]->type); 10087ec681f3Smrg 10097ec681f3Smrg for (unsigned i = 0; i < vector_components; i++) { 10107ec681f3Smrg nir_ssa_def *const src0 = 10117ec681f3Smrg nir_build_alu(&b->nb, src0_conversion_op, 10127ec681f3Smrg nir_channel(&b->nb, src[0], i), NULL, NULL, NULL); 10137ec681f3Smrg 10147ec681f3Smrg nir_ssa_def *const src1 = 10157ec681f3Smrg nir_build_alu(&b->nb, src1_conversion_op, 10167ec681f3Smrg nir_channel(&b->nb, src[1], i), NULL, NULL, NULL); 10177ec681f3Smrg 10187ec681f3Smrg nir_ssa_def *const mul_result = nir_imul(&b->nb, src0, src1); 10197ec681f3Smrg 10207ec681f3Smrg dest = (i == 0) ? mul_result : nir_iadd(&b->nb, dest, mul_result); 10217ec681f3Smrg } 10227ec681f3Smrg 10237ec681f3Smrg if (num_inputs == 3) { 10247ec681f3Smrg /* For SpvOpSDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 10257ec681f3Smrg * 10267ec681f3Smrg * Signed integer dot product of _Vector 1_ and _Vector 2_ and 10277ec681f3Smrg * signed saturating addition of the result with _Accumulator_. 10287ec681f3Smrg * 10297ec681f3Smrg * For SpvOpUDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 10307ec681f3Smrg * 10317ec681f3Smrg * Unsigned integer dot product of _Vector 1_ and _Vector 2_ and 10327ec681f3Smrg * unsigned saturating addition of the result with _Accumulator_. 10337ec681f3Smrg * 10347ec681f3Smrg * For SpvOpSUDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 10357ec681f3Smrg * 10367ec681f3Smrg * Mixed-signedness integer dot product of _Vector 1_ and _Vector 10377ec681f3Smrg * 2_ and signed saturating addition of the result with 10387ec681f3Smrg * _Accumulator_. 10397ec681f3Smrg */ 10407ec681f3Smrg dest = (opcode == SpvOpUDotAccSatKHR) 10417ec681f3Smrg ? nir_uadd_sat(&b->nb, dest, src[2]) 10427ec681f3Smrg : nir_iadd_sat(&b->nb, dest, src[2]); 10437ec681f3Smrg } 10447ec681f3Smrg } else { 10457ec681f3Smrg assert(src[0]->num_components == 1 && src[1]->num_components == 1); 10467ec681f3Smrg assert(src[0]->bit_size == 32 && src[1]->bit_size == 32); 10477ec681f3Smrg 10487ec681f3Smrg nir_ssa_def *const zero = nir_imm_zero(&b->nb, 1, 32); 10497ec681f3Smrg bool is_signed = opcode == SpvOpSDotKHR || opcode == SpvOpSUDotKHR || 10507ec681f3Smrg opcode == SpvOpSDotAccSatKHR || opcode == SpvOpSUDotAccSatKHR; 10517ec681f3Smrg 10527ec681f3Smrg if (packed_bit_size == 16) { 10537ec681f3Smrg switch (opcode) { 10547ec681f3Smrg case SpvOpSDotKHR: 10557ec681f3Smrg dest = nir_sdot_2x16_iadd(&b->nb, src[0], src[1], zero); 10567ec681f3Smrg break; 10577ec681f3Smrg case SpvOpUDotKHR: 10587ec681f3Smrg dest = nir_udot_2x16_uadd(&b->nb, src[0], src[1], zero); 10597ec681f3Smrg break; 10607ec681f3Smrg case SpvOpSDotAccSatKHR: 10617ec681f3Smrg if (dest_size == 32) 10627ec681f3Smrg dest = nir_sdot_2x16_iadd_sat(&b->nb, src[0], src[1], src[2]); 10637ec681f3Smrg else 10647ec681f3Smrg dest = nir_sdot_2x16_iadd(&b->nb, src[0], src[1], zero); 10657ec681f3Smrg break; 10667ec681f3Smrg case SpvOpUDotAccSatKHR: 10677ec681f3Smrg if (dest_size == 32) 10687ec681f3Smrg dest = nir_udot_2x16_uadd_sat(&b->nb, src[0], src[1], src[2]); 10697ec681f3Smrg else 10707ec681f3Smrg dest = nir_udot_2x16_uadd(&b->nb, src[0], src[1], zero); 10717ec681f3Smrg break; 10727ec681f3Smrg default: 10737ec681f3Smrg unreachable("Invalid opcode."); 10747ec681f3Smrg } 10757ec681f3Smrg } else { 10767ec681f3Smrg switch (opcode) { 10777ec681f3Smrg case SpvOpSDotKHR: 10787ec681f3Smrg dest = nir_sdot_4x8_iadd(&b->nb, src[0], src[1], zero); 10797ec681f3Smrg break; 10807ec681f3Smrg case SpvOpUDotKHR: 10817ec681f3Smrg dest = nir_udot_4x8_uadd(&b->nb, src[0], src[1], zero); 10827ec681f3Smrg break; 10837ec681f3Smrg case SpvOpSUDotKHR: 10847ec681f3Smrg dest = nir_sudot_4x8_iadd(&b->nb, src[0], src[1], zero); 10857ec681f3Smrg break; 10867ec681f3Smrg case SpvOpSDotAccSatKHR: 10877ec681f3Smrg if (dest_size == 32) 10887ec681f3Smrg dest = nir_sdot_4x8_iadd_sat(&b->nb, src[0], src[1], src[2]); 10897ec681f3Smrg else 10907ec681f3Smrg dest = nir_sdot_4x8_iadd(&b->nb, src[0], src[1], zero); 10917ec681f3Smrg break; 10927ec681f3Smrg case SpvOpUDotAccSatKHR: 10937ec681f3Smrg if (dest_size == 32) 10947ec681f3Smrg dest = nir_udot_4x8_uadd_sat(&b->nb, src[0], src[1], src[2]); 10957ec681f3Smrg else 10967ec681f3Smrg dest = nir_udot_4x8_uadd(&b->nb, src[0], src[1], zero); 10977ec681f3Smrg break; 10987ec681f3Smrg case SpvOpSUDotAccSatKHR: 10997ec681f3Smrg if (dest_size == 32) 11007ec681f3Smrg dest = nir_sudot_4x8_iadd_sat(&b->nb, src[0], src[1], src[2]); 11017ec681f3Smrg else 11027ec681f3Smrg dest = nir_sudot_4x8_iadd(&b->nb, src[0], src[1], zero); 11037ec681f3Smrg break; 11047ec681f3Smrg default: 11057ec681f3Smrg unreachable("Invalid opcode."); 11067ec681f3Smrg } 11077ec681f3Smrg } 11087ec681f3Smrg 11097ec681f3Smrg if (dest_size != 32) { 11107ec681f3Smrg /* When the accumulator is 32-bits, a NIR dot-product with saturate 11117ec681f3Smrg * is generated above. In all other cases a regular dot-product is 11127ec681f3Smrg * generated above, and separate addition with saturate is generated 11137ec681f3Smrg * here. 11147ec681f3Smrg * 11157ec681f3Smrg * The SPV_KHR_integer_dot_product spec says: 11167ec681f3Smrg * 11177ec681f3Smrg * If any of the multiplications or additions, with the exception 11187ec681f3Smrg * of the final accumulation, overflow or underflow, the result of 11197ec681f3Smrg * the instruction is undefined. 11207ec681f3Smrg * 11217ec681f3Smrg * Therefore it is safe to cast the dot-product result down to the 11227ec681f3Smrg * size of the accumulator before doing the addition. Since the 11237ec681f3Smrg * result of the dot-product cannot overflow 32-bits, this is also 11247ec681f3Smrg * safe to cast up. 11257ec681f3Smrg */ 11267ec681f3Smrg if (num_inputs == 3) { 11277ec681f3Smrg dest = is_signed 11287ec681f3Smrg ? nir_iadd_sat(&b->nb, nir_i2i(&b->nb, dest, dest_size), src[2]) 11297ec681f3Smrg : nir_uadd_sat(&b->nb, nir_u2u(&b->nb, dest, dest_size), src[2]); 11307ec681f3Smrg } else { 11317ec681f3Smrg dest = is_signed 11327ec681f3Smrg ? nir_i2i(&b->nb, dest, dest_size) 11337ec681f3Smrg : nir_u2u(&b->nb, dest, dest_size); 11347ec681f3Smrg } 11357ec681f3Smrg } 11367ec681f3Smrg } 11377ec681f3Smrg 11387ec681f3Smrg vtn_push_nir_ssa(b, w[2], dest); 11397ec681f3Smrg 11407e102996Smaya b->nb.exact = b->exact; 11417e102996Smaya} 11427e102996Smaya 11437e102996Smayavoid 11447e102996Smayavtn_handle_bitcast(struct vtn_builder *b, const uint32_t *w, unsigned count) 11457e102996Smaya{ 11467e102996Smaya vtn_assert(count == 4); 11477e102996Smaya /* From the definition of OpBitcast in the SPIR-V 1.2 spec: 11487e102996Smaya * 11497e102996Smaya * "If Result Type has the same number of components as Operand, they 11507e102996Smaya * must also have the same component width, and results are computed per 11517e102996Smaya * component. 11527e102996Smaya * 11537e102996Smaya * If Result Type has a different number of components than Operand, the 11547e102996Smaya * total number of bits in Result Type must equal the total number of 11557e102996Smaya * bits in Operand. Let L be the type, either Result Type or Operand’s 11567e102996Smaya * type, that has the larger number of components. Let S be the other 11577e102996Smaya * type, with the smaller number of components. The number of components 11587e102996Smaya * in L must be an integer multiple of the number of components in S. 11597e102996Smaya * The first component (that is, the only or lowest-numbered component) 11607e102996Smaya * of S maps to the first components of L, and so on, up to the last 11617e102996Smaya * component of S mapping to the last components of L. Within this 11627e102996Smaya * mapping, any single component of S (mapping to multiple components of 11637e102996Smaya * L) maps its lower-ordered bits to the lower-numbered components of L." 11647e102996Smaya */ 11657e102996Smaya 11667ec681f3Smrg struct vtn_type *type = vtn_get_type(b, w[1]); 11677ec681f3Smrg struct nir_ssa_def *src = vtn_get_nir_ssa(b, w[3]); 11687e102996Smaya 11697e102996Smaya vtn_fail_if(src->num_components * src->bit_size != 11707e102996Smaya glsl_get_vector_elements(type->type) * glsl_get_bit_size(type->type), 11717e102996Smaya "Source and destination of OpBitcast must have the same " 11727e102996Smaya "total number of bits"); 11737ec681f3Smrg nir_ssa_def *val = 11747ec681f3Smrg nir_bitcast_vector(&b->nb, src, glsl_get_bit_size(type->type)); 11757ec681f3Smrg vtn_push_nir_ssa(b, w[2], val); 117601e04c3fSmrg} 1177