compiler/nir/nir_constant_expressions.py

01e04c3fSmrgimport re
7e102996Smayafrom nir_opcodes import opcodes
7e102996Smayafrom nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
01e04c3fSmrg
01e04c3fSmrgdef type_add_size(type_, size):
01e04c3fSmrg    if type_has_size(type_):
01e04c3fSmrg        return type_
01e04c3fSmrg    return type_ + str(size)
01e04c3fSmrg
01e04c3fSmrgdef op_bit_sizes(op):
01e04c3fSmrg    sizes = None
01e04c3fSmrg    if not type_has_size(op.output_type):
01e04c3fSmrg        sizes = set(type_sizes(op.output_type))
01e04c3fSmrg
01e04c3fSmrg    for input_type in op.input_types:
01e04c3fSmrg        if not type_has_size(input_type):
01e04c3fSmrg            if sizes is None:
01e04c3fSmrg                sizes = set(type_sizes(input_type))
01e04c3fSmrg            else:
01e04c3fSmrg                sizes = sizes.intersection(set(type_sizes(input_type)))
01e04c3fSmrg
01e04c3fSmrg    return sorted(list(sizes)) if sizes is not None else None
01e04c3fSmrg
01e04c3fSmrgdef get_const_field(type_):
7e102996Smaya    if type_size(type_) == 1:
7e102996Smaya        return 'b'
7e102996Smaya    elif type_base_type(type_) == 'bool':
7e102996Smaya        return 'i' + str(type_size(type_))
01e04c3fSmrg    elif type_ == "float16":
01e04c3fSmrg        return "u16"
01e04c3fSmrg    else:
7e102996Smaya        return type_base_type(type_)[0] + str(type_size(type_))
01e04c3fSmrg
01e04c3fSmrgtemplate = """\
01e04c3fSmrg/*
01e04c3fSmrg * Copyright (C) 2014 Intel Corporation
01e04c3fSmrg *
01e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
01e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
01e04c3fSmrg * to deal in the Software without restriction, including without limitation
01e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
01e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
01e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
01e04c3fSmrg *
01e04c3fSmrg * The above copyright notice and this permission notice (including the next
01e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
01e04c3fSmrg * Software.
01e04c3fSmrg *
01e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
01e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
01e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
01e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
01e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
01e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
01e04c3fSmrg * IN THE SOFTWARE.
01e04c3fSmrg *
01e04c3fSmrg * Authors:
01e04c3fSmrg *    Jason Ekstrand (jason@jlekstrand.net)
01e04c3fSmrg */
01e04c3fSmrg
01e04c3fSmrg#include <math.h>
01e04c3fSmrg#include "util/rounding.h" /* for _mesa_roundeven */
01e04c3fSmrg#include "util/half_float.h"
7ec681f3Smrg#include "util/double.h"
7ec681f3Smrg#include "util/softfloat.h"
7e102996Smaya#include "util/bigmath.h"
01e04c3fSmrg#include "nir_constant_expressions.h"
01e04c3fSmrg
7ec681f3Smrg/**
7ec681f3Smrg * \brief Checks if the provided value is a denorm and flushes it to zero.
7ec681f3Smrg */
7ec681f3Smrgstatic void
7ec681f3Smrgconstant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
7ec681f3Smrg{
7ec681f3Smrg    switch(bit_size) {
7ec681f3Smrg    case 64:
7ec681f3Smrg        if (0 == (value->u64 & 0x7ff0000000000000))
7ec681f3Smrg            value->u64 &= 0x8000000000000000;
7ec681f3Smrg        break;
7ec681f3Smrg    case 32:
7ec681f3Smrg        if (0 == (value->u32 & 0x7f800000))
7ec681f3Smrg            value->u32 &= 0x80000000;
7ec681f3Smrg        break;
7ec681f3Smrg    case 16:
7ec681f3Smrg        if (0 == (value->u16 & 0x7c00))
7ec681f3Smrg            value->u16 &= 0x8000;
7ec681f3Smrg    }
7ec681f3Smrg}
7e102996Smaya
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of packSnorm4x8.
01e04c3fSmrg */
01e04c3fSmrgstatic uint8_t
01e04c3fSmrgpack_snorm_1x8(float x)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    packSnorm4x8
01e04c3fSmrg     *    ------------
01e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
01e04c3fSmrg     *    follows:
01e04c3fSmrg     *
01e04c3fSmrg     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
01e04c3fSmrg     *
01e04c3fSmrg     * We must first cast the float to an int, because casting a negative
01e04c3fSmrg     * float to a uint is undefined.
01e04c3fSmrg     */
01e04c3fSmrg   return (uint8_t) (int)
01e04c3fSmrg          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of packSnorm2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic uint16_t
01e04c3fSmrgpack_snorm_1x16(float x)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    packSnorm2x16
01e04c3fSmrg     *    -------------
01e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
01e04c3fSmrg     *    follows:
01e04c3fSmrg     *
01e04c3fSmrg     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
01e04c3fSmrg     *
01e04c3fSmrg     * We must first cast the float to an int, because casting a negative
01e04c3fSmrg     * float to a uint is undefined.
01e04c3fSmrg     */
01e04c3fSmrg   return (uint16_t) (int)
01e04c3fSmrg          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of unpackSnorm4x8.
01e04c3fSmrg */
01e04c3fSmrgstatic float
01e04c3fSmrgunpack_snorm_1x8(uint8_t u)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    unpackSnorm4x8
01e04c3fSmrg     *    --------------
01e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
01e04c3fSmrg     *    done as follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
01e04c3fSmrg     */
01e04c3fSmrg   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of unpackSnorm2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic float
01e04c3fSmrgunpack_snorm_1x16(uint16_t u)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    unpackSnorm2x16
01e04c3fSmrg     *    ---------------
01e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
01e04c3fSmrg     *    done as follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
01e04c3fSmrg     */
01e04c3fSmrg   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component packUnorm4x8.
01e04c3fSmrg */
01e04c3fSmrgstatic uint8_t
01e04c3fSmrgpack_unorm_1x8(float x)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    packUnorm4x8
01e04c3fSmrg     *    ------------
01e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
01e04c3fSmrg     *    follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
01e04c3fSmrg     */
01e04c3fSmrg   return (uint8_t) (int)
01e04c3fSmrg          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component packUnorm2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic uint16_t
01e04c3fSmrgpack_unorm_1x16(float x)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    packUnorm2x16
01e04c3fSmrg     *    -------------
01e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
01e04c3fSmrg     *    follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
01e04c3fSmrg     */
01e04c3fSmrg   return (uint16_t) (int)
01e04c3fSmrg          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of unpackUnorm4x8.
01e04c3fSmrg */
01e04c3fSmrgstatic float
01e04c3fSmrgunpack_unorm_1x8(uint8_t u)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    unpackUnorm4x8
01e04c3fSmrg     *    --------------
01e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
01e04c3fSmrg     *    done as follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       unpackUnorm4x8: f / 255.0
01e04c3fSmrg     */
01e04c3fSmrg   return (float) u / 255.0f;
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of unpackUnorm2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic float
01e04c3fSmrgunpack_unorm_1x16(uint16_t u)
01e04c3fSmrg{
01e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
01e04c3fSmrg     *
01e04c3fSmrg     *    unpackUnorm2x16
01e04c3fSmrg     *    ---------------
01e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
01e04c3fSmrg     *    done as follows:
01e04c3fSmrg     *
01e04c3fSmrg     *       unpackUnorm2x16: f / 65535.0
01e04c3fSmrg     */
01e04c3fSmrg   return (float) u / 65535.0f;
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of packHalf2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic uint16_t
01e04c3fSmrgpack_half_1x16(float x)
01e04c3fSmrg{
01e04c3fSmrg   return _mesa_float_to_half(x);
01e04c3fSmrg}
01e04c3fSmrg
7ec681f3Smrg/**
7ec681f3Smrg * Evaluate one component of unpackHalf2x16.
7ec681f3Smrg */
7ec681f3Smrgstatic float
7ec681f3Smrgunpack_half_1x16_flush_to_zero(uint16_t u)
7ec681f3Smrg{
7ec681f3Smrg   if (0 == (u & 0x7c00))
7ec681f3Smrg      u &= 0x8000;
7ec681f3Smrg   return _mesa_half_to_float(u);
7ec681f3Smrg}
7ec681f3Smrg
01e04c3fSmrg/**
01e04c3fSmrg * Evaluate one component of unpackHalf2x16.
01e04c3fSmrg */
01e04c3fSmrgstatic float
01e04c3fSmrgunpack_half_1x16(uint16_t u)
01e04c3fSmrg{
01e04c3fSmrg   return _mesa_half_to_float(u);
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg/* Some typed vector structures to make things like src0.y work */
7e102996Smayatypedef int8_t int1_t;
7e102996Smayatypedef uint8_t uint1_t;
01e04c3fSmrgtypedef float float16_t;
01e04c3fSmrgtypedef float float32_t;
01e04c3fSmrgtypedef double float64_t;
7e102996Smayatypedef bool bool1_t;
7e102996Smayatypedef bool bool8_t;
7e102996Smayatypedef bool bool16_t;
01e04c3fSmrgtypedef bool bool32_t;
7e102996Smayatypedef bool bool64_t;
7e102996Smaya% for type in ["float", "int", "uint", "bool"]:
01e04c3fSmrg% for width in type_sizes(type):
01e04c3fSmrgstruct ${type}${width}_vec {
01e04c3fSmrg   ${type}${width}_t x;
01e04c3fSmrg   ${type}${width}_t y;
01e04c3fSmrg   ${type}${width}_t z;
01e04c3fSmrg   ${type}${width}_t w;
7ec681f3Smrg   ${type}${width}_t e;
7ec681f3Smrg   ${type}${width}_t f;
7ec681f3Smrg   ${type}${width}_t g;
7ec681f3Smrg   ${type}${width}_t h;
7ec681f3Smrg   ${type}${width}_t i;
7ec681f3Smrg   ${type}${width}_t j;
7ec681f3Smrg   ${type}${width}_t k;
7ec681f3Smrg   ${type}${width}_t l;
7ec681f3Smrg   ${type}${width}_t m;
7ec681f3Smrg   ${type}${width}_t n;
7ec681f3Smrg   ${type}${width}_t o;
7ec681f3Smrg   ${type}${width}_t p;
01e04c3fSmrg};
01e04c3fSmrg% endfor
01e04c3fSmrg% endfor
01e04c3fSmrg
7ec681f3Smrg<%def name="evaluate_op(op, bit_size, execution_mode)">
01e04c3fSmrg   <%
01e04c3fSmrg   output_type = type_add_size(op.output_type, bit_size)
01e04c3fSmrg   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
01e04c3fSmrg   %>
01e04c3fSmrg
01e04c3fSmrg   ## For each non-per-component input, create a variable srcN that
01e04c3fSmrg   ## contains x, y, z, and w elements which are filled in with the
01e04c3fSmrg   ## appropriately-typed values.
01e04c3fSmrg   % for j in range(op.num_inputs):
01e04c3fSmrg      % if op.input_sizes[j] == 0:
01e04c3fSmrg         <% continue %>
01e04c3fSmrg      % elif "src" + str(j) not in op.const_expr:
01e04c3fSmrg         ## Avoid unused variable warnings
01e04c3fSmrg         <% continue %>
01e04c3fSmrg      %endif
01e04c3fSmrg
01e04c3fSmrg      const struct ${input_types[j]}_vec src${j} = {
01e04c3fSmrg      % for k in range(op.input_sizes[j]):
7e102996Smaya         % if input_types[j] == "int1":
7e102996Smaya             /* 1-bit integers use a 0/-1 convention */
7e102996Smaya             -(int1_t)_src[${j}][${k}].b,
01e04c3fSmrg         % elif input_types[j] == "float16":
7e102996Smaya            _mesa_half_to_float(_src[${j}][${k}].u16),
01e04c3fSmrg         % else:
7e102996Smaya            _src[${j}][${k}].${get_const_field(input_types[j])},
01e04c3fSmrg         % endif
01e04c3fSmrg      % endfor
7ec681f3Smrg      % for k in range(op.input_sizes[j], 16):
01e04c3fSmrg         0,
01e04c3fSmrg      % endfor
01e04c3fSmrg      };
01e04c3fSmrg   % endfor
01e04c3fSmrg
01e04c3fSmrg   % if op.output_size == 0:
01e04c3fSmrg      ## For per-component instructions, we need to iterate over the
01e04c3fSmrg      ## components and apply the constant expression one component
01e04c3fSmrg      ## at a time.
01e04c3fSmrg      for (unsigned _i = 0; _i < num_components; _i++) {
01e04c3fSmrg         ## For each per-component input, create a variable srcN that
01e04c3fSmrg         ## contains the value of the current (_i'th) component.
01e04c3fSmrg         % for j in range(op.num_inputs):
01e04c3fSmrg            % if op.input_sizes[j] != 0:
01e04c3fSmrg               <% continue %>
01e04c3fSmrg            % elif "src" + str(j) not in op.const_expr:
01e04c3fSmrg               ## Avoid unused variable warnings
01e04c3fSmrg               <% continue %>
7e102996Smaya            % elif input_types[j] == "int1":
7e102996Smaya               /* 1-bit integers use a 0/-1 convention */
7e102996Smaya               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
01e04c3fSmrg            % elif input_types[j] == "float16":
01e04c3fSmrg               const float src${j} =
7e102996Smaya                  _mesa_half_to_float(_src[${j}][_i].u16);
01e04c3fSmrg            % else:
01e04c3fSmrg               const ${input_types[j]}_t src${j} =
7e102996Smaya                  _src[${j}][_i].${get_const_field(input_types[j])};
01e04c3fSmrg            % endif
01e04c3fSmrg         % endfor
01e04c3fSmrg
01e04c3fSmrg         ## Create an appropriately-typed variable dst and assign the
01e04c3fSmrg         ## result of the const_expr to it.  If const_expr already contains
01e04c3fSmrg         ## writes to dst, just include const_expr directly.
01e04c3fSmrg         % if "dst" in op.const_expr:
01e04c3fSmrg            ${output_type}_t dst;
01e04c3fSmrg
01e04c3fSmrg            ${op.const_expr}
01e04c3fSmrg         % else:
01e04c3fSmrg            ${output_type}_t dst = ${op.const_expr};
01e04c3fSmrg         % endif
01e04c3fSmrg
01e04c3fSmrg         ## Store the current component of the actual destination to the
01e04c3fSmrg         ## value of dst.
7e102996Smaya         % if output_type == "int1" or output_type == "uint1":
7e102996Smaya            /* 1-bit integers get truncated */
7e102996Smaya            _dst_val[_i].b = dst & 1;
7e102996Smaya         % elif output_type.startswith("bool"):
7e102996Smaya            ## Sanitize the C value to a proper NIR 0/-1 bool
7e102996Smaya            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
01e04c3fSmrg         % elif output_type == "float16":
7ec681f3Smrg            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
7ec681f3Smrg               _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);
7ec681f3Smrg            } else {
7ec681f3Smrg               _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);
7ec681f3Smrg            }
01e04c3fSmrg         % else:
7e102996Smaya            _dst_val[_i].${get_const_field(output_type)} = dst;
01e04c3fSmrg         % endif
7ec681f3Smrg
7ec681f3Smrg         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
7ec681f3Smrg            % if type_has_size(output_type):
7ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
7ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
7ec681f3Smrg               }
7ec681f3Smrg            % else:
7ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
7ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
7ec681f3Smrg               }
7ec681f3Smrg            %endif
7ec681f3Smrg         % endif
01e04c3fSmrg      }
01e04c3fSmrg   % else:
01e04c3fSmrg      ## In the non-per-component case, create a struct dst with
01e04c3fSmrg      ## appropriately-typed elements x, y, z, and w and assign the result
01e04c3fSmrg      ## of the const_expr to all components of dst, or include the
01e04c3fSmrg      ## const_expr directly if it writes to dst already.
01e04c3fSmrg      struct ${output_type}_vec dst;
01e04c3fSmrg
01e04c3fSmrg      % if "dst" in op.const_expr:
01e04c3fSmrg         ${op.const_expr}
01e04c3fSmrg      % else:
01e04c3fSmrg         ## Splat the value to all components.  This way expressions which
01e04c3fSmrg         ## write the same value to all components don't need to explicitly
7ec681f3Smrg         ## write to dest.
01e04c3fSmrg         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
01e04c3fSmrg      % endif
01e04c3fSmrg
01e04c3fSmrg      ## For each component in the destination, copy the value of dst to
01e04c3fSmrg      ## the actual destination.
01e04c3fSmrg      % for k in range(op.output_size):
7e102996Smaya         % if output_type == "int1" or output_type == "uint1":
7e102996Smaya            /* 1-bit integers get truncated */
7ec681f3Smrg            _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;
7e102996Smaya         % elif output_type.startswith("bool"):
7e102996Smaya            ## Sanitize the C value to a proper NIR 0/-1 bool
7ec681f3Smrg            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};
01e04c3fSmrg         % elif output_type == "float16":
7ec681f3Smrg            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
7ec681f3Smrg               _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});
7ec681f3Smrg            } else {
7ec681f3Smrg               _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});
7ec681f3Smrg            }
01e04c3fSmrg         % else:
7ec681f3Smrg            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};
7ec681f3Smrg         % endif
7ec681f3Smrg
7ec681f3Smrg         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
7ec681f3Smrg            % if type_has_size(output_type):
7ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
7ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
7ec681f3Smrg               }
7ec681f3Smrg            % else:
7ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
7ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
7ec681f3Smrg               }
7ec681f3Smrg            % endif
01e04c3fSmrg         % endif
01e04c3fSmrg      % endfor
01e04c3fSmrg   % endif
01e04c3fSmrg</%def>
01e04c3fSmrg
01e04c3fSmrg% for name, op in sorted(opcodes.items()):
7ec681f3Smrg% if op.name == "fsat":
7ec681f3Smrg#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
7ec681f3Smrg#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
7ec681f3Smrg#endif
7ec681f3Smrg% endif
7e102996Smayastatic void
7e102996Smayaevaluate_${name}(nir_const_value *_dst_val,
7ec681f3Smrg                 UNUSED unsigned num_components,
01e04c3fSmrg                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
7ec681f3Smrg                 UNUSED nir_const_value **_src,
7ec681f3Smrg                 UNUSED unsigned execution_mode)
01e04c3fSmrg{
01e04c3fSmrg   % if op_bit_sizes(op) is not None:
01e04c3fSmrg      switch (bit_size) {
01e04c3fSmrg      % for bit_size in op_bit_sizes(op):
01e04c3fSmrg      case ${bit_size}: {
7ec681f3Smrg         ${evaluate_op(op, bit_size, execution_mode)}
01e04c3fSmrg         break;
01e04c3fSmrg      }
01e04c3fSmrg      % endfor
01e04c3fSmrg
01e04c3fSmrg      default:
01e04c3fSmrg         unreachable("unknown bit width");
01e04c3fSmrg      }
01e04c3fSmrg   % else:
7ec681f3Smrg      ${evaluate_op(op, 0, execution_mode)}
01e04c3fSmrg   % endif
01e04c3fSmrg}
7ec681f3Smrg% if op.name == "fsat":
7ec681f3Smrg#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
7ec681f3Smrg#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
7ec681f3Smrg#endif
7ec681f3Smrg% endif
01e04c3fSmrg% endfor
01e04c3fSmrg
7e102996Smayavoid
7e102996Smayanir_eval_const_opcode(nir_op op, nir_const_value *dest,
7e102996Smaya                      unsigned num_components, unsigned bit_width,
7ec681f3Smrg                      nir_const_value **src,
7ec681f3Smrg                      unsigned float_controls_execution_mode)
01e04c3fSmrg{
01e04c3fSmrg   switch (op) {
01e04c3fSmrg% for name in sorted(opcodes.keys()):
01e04c3fSmrg   case nir_op_${name}:
7ec681f3Smrg      evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
7e102996Smaya      return;
01e04c3fSmrg% endfor
01e04c3fSmrg   default:
01e04c3fSmrg      unreachable("shouldn't get here");
01e04c3fSmrg   }
01e04c3fSmrg}"""
01e04c3fSmrg
01e04c3fSmrgfrom mako.template import Template
01e04c3fSmrg
01e04c3fSmrgprint(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
7ec681f3Smrg                                type_base_type=type_base_type,
7ec681f3Smrg                                type_size=type_size,
01e04c3fSmrg                                type_has_size=type_has_size,
01e04c3fSmrg                                type_add_size=type_add_size,
01e04c3fSmrg                                op_bit_sizes=op_bit_sizes,
01e04c3fSmrg                                get_const_field=get_const_field))