101e04c3fSmrgimport re
27e102996Smayafrom nir_opcodes import opcodes
37e102996Smayafrom nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
401e04c3fSmrg
501e04c3fSmrgdef type_add_size(type_, size):
601e04c3fSmrg    if type_has_size(type_):
701e04c3fSmrg        return type_
801e04c3fSmrg    return type_ + str(size)
901e04c3fSmrg
1001e04c3fSmrgdef op_bit_sizes(op):
1101e04c3fSmrg    sizes = None
1201e04c3fSmrg    if not type_has_size(op.output_type):
1301e04c3fSmrg        sizes = set(type_sizes(op.output_type))
1401e04c3fSmrg
1501e04c3fSmrg    for input_type in op.input_types:
1601e04c3fSmrg        if not type_has_size(input_type):
1701e04c3fSmrg            if sizes is None:
1801e04c3fSmrg                sizes = set(type_sizes(input_type))
1901e04c3fSmrg            else:
2001e04c3fSmrg                sizes = sizes.intersection(set(type_sizes(input_type)))
2101e04c3fSmrg
2201e04c3fSmrg    return sorted(list(sizes)) if sizes is not None else None
2301e04c3fSmrg
2401e04c3fSmrgdef get_const_field(type_):
257e102996Smaya    if type_size(type_) == 1:
267e102996Smaya        return 'b'
277e102996Smaya    elif type_base_type(type_) == 'bool':
287e102996Smaya        return 'i' + str(type_size(type_))
2901e04c3fSmrg    elif type_ == "float16":
3001e04c3fSmrg        return "u16"
3101e04c3fSmrg    else:
327e102996Smaya        return type_base_type(type_)[0] + str(type_size(type_))
3301e04c3fSmrg
3401e04c3fSmrgtemplate = """\
3501e04c3fSmrg/*
3601e04c3fSmrg * Copyright (C) 2014 Intel Corporation
3701e04c3fSmrg *
3801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
3901e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
4001e04c3fSmrg * to deal in the Software without restriction, including without limitation
4101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
4201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
4301e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
4401e04c3fSmrg *
4501e04c3fSmrg * The above copyright notice and this permission notice (including the next
4601e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
4701e04c3fSmrg * Software.
4801e04c3fSmrg *
4901e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5001e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5101e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
5201e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5301e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
5401e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
5501e04c3fSmrg * IN THE SOFTWARE.
5601e04c3fSmrg *
5701e04c3fSmrg * Authors:
5801e04c3fSmrg *    Jason Ekstrand (jason@jlekstrand.net)
5901e04c3fSmrg */
6001e04c3fSmrg
6101e04c3fSmrg#include <math.h>
6201e04c3fSmrg#include "util/rounding.h" /* for _mesa_roundeven */
6301e04c3fSmrg#include "util/half_float.h"
647ec681f3Smrg#include "util/double.h"
657ec681f3Smrg#include "util/softfloat.h"
667e102996Smaya#include "util/bigmath.h"
6701e04c3fSmrg#include "nir_constant_expressions.h"
6801e04c3fSmrg
697ec681f3Smrg/**
707ec681f3Smrg * \brief Checks if the provided value is a denorm and flushes it to zero.
717ec681f3Smrg */
727ec681f3Smrgstatic void
737ec681f3Smrgconstant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
747ec681f3Smrg{
757ec681f3Smrg    switch(bit_size) {
767ec681f3Smrg    case 64:
777ec681f3Smrg        if (0 == (value->u64 & 0x7ff0000000000000))
787ec681f3Smrg            value->u64 &= 0x8000000000000000;
797ec681f3Smrg        break;
807ec681f3Smrg    case 32:
817ec681f3Smrg        if (0 == (value->u32 & 0x7f800000))
827ec681f3Smrg            value->u32 &= 0x80000000;
837ec681f3Smrg        break;
847ec681f3Smrg    case 16:
857ec681f3Smrg        if (0 == (value->u16 & 0x7c00))
867ec681f3Smrg            value->u16 &= 0x8000;
877ec681f3Smrg    }
887ec681f3Smrg}
897e102996Smaya
9001e04c3fSmrg/**
9101e04c3fSmrg * Evaluate one component of packSnorm4x8.
9201e04c3fSmrg */
9301e04c3fSmrgstatic uint8_t
9401e04c3fSmrgpack_snorm_1x8(float x)
9501e04c3fSmrg{
9601e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
9701e04c3fSmrg     *
9801e04c3fSmrg     *    packSnorm4x8
9901e04c3fSmrg     *    ------------
10001e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
10101e04c3fSmrg     *    follows:
10201e04c3fSmrg     *
10301e04c3fSmrg     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
10401e04c3fSmrg     *
10501e04c3fSmrg     * We must first cast the float to an int, because casting a negative
10601e04c3fSmrg     * float to a uint is undefined.
10701e04c3fSmrg     */
10801e04c3fSmrg   return (uint8_t) (int)
10901e04c3fSmrg          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
11001e04c3fSmrg}
11101e04c3fSmrg
11201e04c3fSmrg/**
11301e04c3fSmrg * Evaluate one component of packSnorm2x16.
11401e04c3fSmrg */
11501e04c3fSmrgstatic uint16_t
11601e04c3fSmrgpack_snorm_1x16(float x)
11701e04c3fSmrg{
11801e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
11901e04c3fSmrg     *
12001e04c3fSmrg     *    packSnorm2x16
12101e04c3fSmrg     *    -------------
12201e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
12301e04c3fSmrg     *    follows:
12401e04c3fSmrg     *
12501e04c3fSmrg     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
12601e04c3fSmrg     *
12701e04c3fSmrg     * We must first cast the float to an int, because casting a negative
12801e04c3fSmrg     * float to a uint is undefined.
12901e04c3fSmrg     */
13001e04c3fSmrg   return (uint16_t) (int)
13101e04c3fSmrg          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
13201e04c3fSmrg}
13301e04c3fSmrg
13401e04c3fSmrg/**
13501e04c3fSmrg * Evaluate one component of unpackSnorm4x8.
13601e04c3fSmrg */
13701e04c3fSmrgstatic float
13801e04c3fSmrgunpack_snorm_1x8(uint8_t u)
13901e04c3fSmrg{
14001e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
14101e04c3fSmrg     *
14201e04c3fSmrg     *    unpackSnorm4x8
14301e04c3fSmrg     *    --------------
14401e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
14501e04c3fSmrg     *    done as follows:
14601e04c3fSmrg     *
14701e04c3fSmrg     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
14801e04c3fSmrg     */
14901e04c3fSmrg   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
15001e04c3fSmrg}
15101e04c3fSmrg
15201e04c3fSmrg/**
15301e04c3fSmrg * Evaluate one component of unpackSnorm2x16.
15401e04c3fSmrg */
15501e04c3fSmrgstatic float
15601e04c3fSmrgunpack_snorm_1x16(uint16_t u)
15701e04c3fSmrg{
15801e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
15901e04c3fSmrg     *
16001e04c3fSmrg     *    unpackSnorm2x16
16101e04c3fSmrg     *    ---------------
16201e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
16301e04c3fSmrg     *    done as follows:
16401e04c3fSmrg     *
16501e04c3fSmrg     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
16601e04c3fSmrg     */
16701e04c3fSmrg   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
16801e04c3fSmrg}
16901e04c3fSmrg
17001e04c3fSmrg/**
17101e04c3fSmrg * Evaluate one component packUnorm4x8.
17201e04c3fSmrg */
17301e04c3fSmrgstatic uint8_t
17401e04c3fSmrgpack_unorm_1x8(float x)
17501e04c3fSmrg{
17601e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
17701e04c3fSmrg     *
17801e04c3fSmrg     *    packUnorm4x8
17901e04c3fSmrg     *    ------------
18001e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
18101e04c3fSmrg     *    follows:
18201e04c3fSmrg     *
18301e04c3fSmrg     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
18401e04c3fSmrg     */
18501e04c3fSmrg   return (uint8_t) (int)
18601e04c3fSmrg          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
18701e04c3fSmrg}
18801e04c3fSmrg
18901e04c3fSmrg/**
19001e04c3fSmrg * Evaluate one component packUnorm2x16.
19101e04c3fSmrg */
19201e04c3fSmrgstatic uint16_t
19301e04c3fSmrgpack_unorm_1x16(float x)
19401e04c3fSmrg{
19501e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
19601e04c3fSmrg     *
19701e04c3fSmrg     *    packUnorm2x16
19801e04c3fSmrg     *    -------------
19901e04c3fSmrg     *    The conversion for component c of v to fixed point is done as
20001e04c3fSmrg     *    follows:
20101e04c3fSmrg     *
20201e04c3fSmrg     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
20301e04c3fSmrg     */
20401e04c3fSmrg   return (uint16_t) (int)
20501e04c3fSmrg          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
20601e04c3fSmrg}
20701e04c3fSmrg
20801e04c3fSmrg/**
20901e04c3fSmrg * Evaluate one component of unpackUnorm4x8.
21001e04c3fSmrg */
21101e04c3fSmrgstatic float
21201e04c3fSmrgunpack_unorm_1x8(uint8_t u)
21301e04c3fSmrg{
21401e04c3fSmrg    /* From section 8.4 of the GLSL 4.30 spec:
21501e04c3fSmrg     *
21601e04c3fSmrg     *    unpackUnorm4x8
21701e04c3fSmrg     *    --------------
21801e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
21901e04c3fSmrg     *    done as follows:
22001e04c3fSmrg     *
22101e04c3fSmrg     *       unpackUnorm4x8: f / 255.0
22201e04c3fSmrg     */
22301e04c3fSmrg   return (float) u / 255.0f;
22401e04c3fSmrg}
22501e04c3fSmrg
22601e04c3fSmrg/**
22701e04c3fSmrg * Evaluate one component of unpackUnorm2x16.
22801e04c3fSmrg */
22901e04c3fSmrgstatic float
23001e04c3fSmrgunpack_unorm_1x16(uint16_t u)
23101e04c3fSmrg{
23201e04c3fSmrg    /* From section 8.4 of the GLSL ES 3.00 spec:
23301e04c3fSmrg     *
23401e04c3fSmrg     *    unpackUnorm2x16
23501e04c3fSmrg     *    ---------------
23601e04c3fSmrg     *    The conversion for unpacked fixed-point value f to floating point is
23701e04c3fSmrg     *    done as follows:
23801e04c3fSmrg     *
23901e04c3fSmrg     *       unpackUnorm2x16: f / 65535.0
24001e04c3fSmrg     */
24101e04c3fSmrg   return (float) u / 65535.0f;
24201e04c3fSmrg}
24301e04c3fSmrg
24401e04c3fSmrg/**
24501e04c3fSmrg * Evaluate one component of packHalf2x16.
24601e04c3fSmrg */
24701e04c3fSmrgstatic uint16_t
24801e04c3fSmrgpack_half_1x16(float x)
24901e04c3fSmrg{
25001e04c3fSmrg   return _mesa_float_to_half(x);
25101e04c3fSmrg}
25201e04c3fSmrg
2537ec681f3Smrg/**
2547ec681f3Smrg * Evaluate one component of unpackHalf2x16.
2557ec681f3Smrg */
2567ec681f3Smrgstatic float
2577ec681f3Smrgunpack_half_1x16_flush_to_zero(uint16_t u)
2587ec681f3Smrg{
2597ec681f3Smrg   if (0 == (u & 0x7c00))
2607ec681f3Smrg      u &= 0x8000;
2617ec681f3Smrg   return _mesa_half_to_float(u);
2627ec681f3Smrg}
2637ec681f3Smrg
26401e04c3fSmrg/**
26501e04c3fSmrg * Evaluate one component of unpackHalf2x16.
26601e04c3fSmrg */
26701e04c3fSmrgstatic float
26801e04c3fSmrgunpack_half_1x16(uint16_t u)
26901e04c3fSmrg{
27001e04c3fSmrg   return _mesa_half_to_float(u);
27101e04c3fSmrg}
27201e04c3fSmrg
27301e04c3fSmrg/* Some typed vector structures to make things like src0.y work */
2747e102996Smayatypedef int8_t int1_t;
2757e102996Smayatypedef uint8_t uint1_t;
27601e04c3fSmrgtypedef float float16_t;
27701e04c3fSmrgtypedef float float32_t;
27801e04c3fSmrgtypedef double float64_t;
2797e102996Smayatypedef bool bool1_t;
2807e102996Smayatypedef bool bool8_t;
2817e102996Smayatypedef bool bool16_t;
28201e04c3fSmrgtypedef bool bool32_t;
2837e102996Smayatypedef bool bool64_t;
2847e102996Smaya% for type in ["float", "int", "uint", "bool"]:
28501e04c3fSmrg% for width in type_sizes(type):
28601e04c3fSmrgstruct ${type}${width}_vec {
28701e04c3fSmrg   ${type}${width}_t x;
28801e04c3fSmrg   ${type}${width}_t y;
28901e04c3fSmrg   ${type}${width}_t z;
29001e04c3fSmrg   ${type}${width}_t w;
2917ec681f3Smrg   ${type}${width}_t e;
2927ec681f3Smrg   ${type}${width}_t f;
2937ec681f3Smrg   ${type}${width}_t g;
2947ec681f3Smrg   ${type}${width}_t h;
2957ec681f3Smrg   ${type}${width}_t i;
2967ec681f3Smrg   ${type}${width}_t j;
2977ec681f3Smrg   ${type}${width}_t k;
2987ec681f3Smrg   ${type}${width}_t l;
2997ec681f3Smrg   ${type}${width}_t m;
3007ec681f3Smrg   ${type}${width}_t n;
3017ec681f3Smrg   ${type}${width}_t o;
3027ec681f3Smrg   ${type}${width}_t p;
30301e04c3fSmrg};
30401e04c3fSmrg% endfor
30501e04c3fSmrg% endfor
30601e04c3fSmrg
3077ec681f3Smrg<%def name="evaluate_op(op, bit_size, execution_mode)">
30801e04c3fSmrg   <%
30901e04c3fSmrg   output_type = type_add_size(op.output_type, bit_size)
31001e04c3fSmrg   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
31101e04c3fSmrg   %>
31201e04c3fSmrg
31301e04c3fSmrg   ## For each non-per-component input, create a variable srcN that
31401e04c3fSmrg   ## contains x, y, z, and w elements which are filled in with the
31501e04c3fSmrg   ## appropriately-typed values.
31601e04c3fSmrg   % for j in range(op.num_inputs):
31701e04c3fSmrg      % if op.input_sizes[j] == 0:
31801e04c3fSmrg         <% continue %>
31901e04c3fSmrg      % elif "src" + str(j) not in op.const_expr:
32001e04c3fSmrg         ## Avoid unused variable warnings
32101e04c3fSmrg         <% continue %>
32201e04c3fSmrg      %endif
32301e04c3fSmrg
32401e04c3fSmrg      const struct ${input_types[j]}_vec src${j} = {
32501e04c3fSmrg      % for k in range(op.input_sizes[j]):
3267e102996Smaya         % if input_types[j] == "int1":
3277e102996Smaya             /* 1-bit integers use a 0/-1 convention */
3287e102996Smaya             -(int1_t)_src[${j}][${k}].b,
32901e04c3fSmrg         % elif input_types[j] == "float16":
3307e102996Smaya            _mesa_half_to_float(_src[${j}][${k}].u16),
33101e04c3fSmrg         % else:
3327e102996Smaya            _src[${j}][${k}].${get_const_field(input_types[j])},
33301e04c3fSmrg         % endif
33401e04c3fSmrg      % endfor
3357ec681f3Smrg      % for k in range(op.input_sizes[j], 16):
33601e04c3fSmrg         0,
33701e04c3fSmrg      % endfor
33801e04c3fSmrg      };
33901e04c3fSmrg   % endfor
34001e04c3fSmrg
34101e04c3fSmrg   % if op.output_size == 0:
34201e04c3fSmrg      ## For per-component instructions, we need to iterate over the
34301e04c3fSmrg      ## components and apply the constant expression one component
34401e04c3fSmrg      ## at a time.
34501e04c3fSmrg      for (unsigned _i = 0; _i < num_components; _i++) {
34601e04c3fSmrg         ## For each per-component input, create a variable srcN that
34701e04c3fSmrg         ## contains the value of the current (_i'th) component.
34801e04c3fSmrg         % for j in range(op.num_inputs):
34901e04c3fSmrg            % if op.input_sizes[j] != 0:
35001e04c3fSmrg               <% continue %>
35101e04c3fSmrg            % elif "src" + str(j) not in op.const_expr:
35201e04c3fSmrg               ## Avoid unused variable warnings
35301e04c3fSmrg               <% continue %>
3547e102996Smaya            % elif input_types[j] == "int1":
3557e102996Smaya               /* 1-bit integers use a 0/-1 convention */
3567e102996Smaya               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
35701e04c3fSmrg            % elif input_types[j] == "float16":
35801e04c3fSmrg               const float src${j} =
3597e102996Smaya                  _mesa_half_to_float(_src[${j}][_i].u16);
36001e04c3fSmrg            % else:
36101e04c3fSmrg               const ${input_types[j]}_t src${j} =
3627e102996Smaya                  _src[${j}][_i].${get_const_field(input_types[j])};
36301e04c3fSmrg            % endif
36401e04c3fSmrg         % endfor
36501e04c3fSmrg
36601e04c3fSmrg         ## Create an appropriately-typed variable dst and assign the
36701e04c3fSmrg         ## result of the const_expr to it.  If const_expr already contains
36801e04c3fSmrg         ## writes to dst, just include const_expr directly.
36901e04c3fSmrg         % if "dst" in op.const_expr:
37001e04c3fSmrg            ${output_type}_t dst;
37101e04c3fSmrg
37201e04c3fSmrg            ${op.const_expr}
37301e04c3fSmrg         % else:
37401e04c3fSmrg            ${output_type}_t dst = ${op.const_expr};
37501e04c3fSmrg         % endif
37601e04c3fSmrg
37701e04c3fSmrg         ## Store the current component of the actual destination to the
37801e04c3fSmrg         ## value of dst.
3797e102996Smaya         % if output_type == "int1" or output_type == "uint1":
3807e102996Smaya            /* 1-bit integers get truncated */
3817e102996Smaya            _dst_val[_i].b = dst & 1;
3827e102996Smaya         % elif output_type.startswith("bool"):
3837e102996Smaya            ## Sanitize the C value to a proper NIR 0/-1 bool
3847e102996Smaya            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
38501e04c3fSmrg         % elif output_type == "float16":
3867ec681f3Smrg            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
3877ec681f3Smrg               _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);
3887ec681f3Smrg            } else {
3897ec681f3Smrg               _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);
3907ec681f3Smrg            }
39101e04c3fSmrg         % else:
3927e102996Smaya            _dst_val[_i].${get_const_field(output_type)} = dst;
39301e04c3fSmrg         % endif
3947ec681f3Smrg
3957ec681f3Smrg         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
3967ec681f3Smrg            % if type_has_size(output_type):
3977ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
3987ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
3997ec681f3Smrg               }
4007ec681f3Smrg            % else:
4017ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
4027ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
4037ec681f3Smrg               }
4047ec681f3Smrg            %endif
4057ec681f3Smrg         % endif
40601e04c3fSmrg      }
40701e04c3fSmrg   % else:
40801e04c3fSmrg      ## In the non-per-component case, create a struct dst with
40901e04c3fSmrg      ## appropriately-typed elements x, y, z, and w and assign the result
41001e04c3fSmrg      ## of the const_expr to all components of dst, or include the
41101e04c3fSmrg      ## const_expr directly if it writes to dst already.
41201e04c3fSmrg      struct ${output_type}_vec dst;
41301e04c3fSmrg
41401e04c3fSmrg      % if "dst" in op.const_expr:
41501e04c3fSmrg         ${op.const_expr}
41601e04c3fSmrg      % else:
41701e04c3fSmrg         ## Splat the value to all components.  This way expressions which
41801e04c3fSmrg         ## write the same value to all components don't need to explicitly
4197ec681f3Smrg         ## write to dest.
42001e04c3fSmrg         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
42101e04c3fSmrg      % endif
42201e04c3fSmrg
42301e04c3fSmrg      ## For each component in the destination, copy the value of dst to
42401e04c3fSmrg      ## the actual destination.
42501e04c3fSmrg      % for k in range(op.output_size):
4267e102996Smaya         % if output_type == "int1" or output_type == "uint1":
4277e102996Smaya            /* 1-bit integers get truncated */
4287ec681f3Smrg            _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;
4297e102996Smaya         % elif output_type.startswith("bool"):
4307e102996Smaya            ## Sanitize the C value to a proper NIR 0/-1 bool
4317ec681f3Smrg            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};
43201e04c3fSmrg         % elif output_type == "float16":
4337ec681f3Smrg            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
4347ec681f3Smrg               _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});
4357ec681f3Smrg            } else {
4367ec681f3Smrg               _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});
4377ec681f3Smrg            }
43801e04c3fSmrg         % else:
4397ec681f3Smrg            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};
4407ec681f3Smrg         % endif
4417ec681f3Smrg
4427ec681f3Smrg         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
4437ec681f3Smrg            % if type_has_size(output_type):
4447ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
4457ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
4467ec681f3Smrg               }
4477ec681f3Smrg            % else:
4487ec681f3Smrg               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
4497ec681f3Smrg                  constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
4507ec681f3Smrg               }
4517ec681f3Smrg            % endif
45201e04c3fSmrg         % endif
45301e04c3fSmrg      % endfor
45401e04c3fSmrg   % endif
45501e04c3fSmrg</%def>
45601e04c3fSmrg
45701e04c3fSmrg% for name, op in sorted(opcodes.items()):
4587ec681f3Smrg% if op.name == "fsat":
4597ec681f3Smrg#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
4607ec681f3Smrg#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
4617ec681f3Smrg#endif
4627ec681f3Smrg% endif
4637e102996Smayastatic void
4647e102996Smayaevaluate_${name}(nir_const_value *_dst_val,
4657ec681f3Smrg                 UNUSED unsigned num_components,
46601e04c3fSmrg                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
4677ec681f3Smrg                 UNUSED nir_const_value **_src,
4687ec681f3Smrg                 UNUSED unsigned execution_mode)
46901e04c3fSmrg{
47001e04c3fSmrg   % if op_bit_sizes(op) is not None:
47101e04c3fSmrg      switch (bit_size) {
47201e04c3fSmrg      % for bit_size in op_bit_sizes(op):
47301e04c3fSmrg      case ${bit_size}: {
4747ec681f3Smrg         ${evaluate_op(op, bit_size, execution_mode)}
47501e04c3fSmrg         break;
47601e04c3fSmrg      }
47701e04c3fSmrg      % endfor
47801e04c3fSmrg
47901e04c3fSmrg      default:
48001e04c3fSmrg         unreachable("unknown bit width");
48101e04c3fSmrg      }
48201e04c3fSmrg   % else:
4837ec681f3Smrg      ${evaluate_op(op, 0, execution_mode)}
48401e04c3fSmrg   % endif
48501e04c3fSmrg}
4867ec681f3Smrg% if op.name == "fsat":
4877ec681f3Smrg#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
4887ec681f3Smrg#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
4897ec681f3Smrg#endif
4907ec681f3Smrg% endif
49101e04c3fSmrg% endfor
49201e04c3fSmrg
4937e102996Smayavoid
4947e102996Smayanir_eval_const_opcode(nir_op op, nir_const_value *dest,
4957e102996Smaya                      unsigned num_components, unsigned bit_width,
4967ec681f3Smrg                      nir_const_value **src,
4977ec681f3Smrg                      unsigned float_controls_execution_mode)
49801e04c3fSmrg{
49901e04c3fSmrg   switch (op) {
50001e04c3fSmrg% for name in sorted(opcodes.keys()):
50101e04c3fSmrg   case nir_op_${name}:
5027ec681f3Smrg      evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
5037e102996Smaya      return;
50401e04c3fSmrg% endfor
50501e04c3fSmrg   default:
50601e04c3fSmrg      unreachable("shouldn't get here");
50701e04c3fSmrg   }
50801e04c3fSmrg}"""
50901e04c3fSmrg
51001e04c3fSmrgfrom mako.template import Template
51101e04c3fSmrg
51201e04c3fSmrgprint(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
5137ec681f3Smrg                                type_base_type=type_base_type,
5147ec681f3Smrg                                type_size=type_size,
51501e04c3fSmrg                                type_has_size=type_has_size,
51601e04c3fSmrg                                type_add_size=type_add_size,
51701e04c3fSmrg                                op_bit_sizes=op_bit_sizes,
51801e04c3fSmrg                                get_const_field=get_const_field))
519