1from __future__ import print_function 2 3import re 4from nir_opcodes import opcodes 5from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type 6 7def type_add_size(type_, size): 8 if type_has_size(type_): 9 return type_ 10 return type_ + str(size) 11 12def op_bit_sizes(op): 13 sizes = None 14 if not type_has_size(op.output_type): 15 sizes = set(type_sizes(op.output_type)) 16 17 for input_type in op.input_types: 18 if not type_has_size(input_type): 19 if sizes is None: 20 sizes = set(type_sizes(input_type)) 21 else: 22 sizes = sizes.intersection(set(type_sizes(input_type))) 23 24 return sorted(list(sizes)) if sizes is not None else None 25 26def get_const_field(type_): 27 if type_size(type_) == 1: 28 return 'b' 29 elif type_base_type(type_) == 'bool': 30 return 'i' + str(type_size(type_)) 31 elif type_ == "float16": 32 return "u16" 33 else: 34 return type_base_type(type_)[0] + str(type_size(type_)) 35 36template = """\ 37/* 38 * Copyright (C) 2014 Intel Corporation 39 * 40 * Permission is hereby granted, free of charge, to any person obtaining a 41 * copy of this software and associated documentation files (the "Software"), 42 * to deal in the Software without restriction, including without limitation 43 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 44 * and/or sell copies of the Software, and to permit persons to whom the 45 * Software is furnished to do so, subject to the following conditions: 46 * 47 * The above copyright notice and this permission notice (including the next 48 * paragraph) shall be included in all copies or substantial portions of the 49 * Software. 50 * 51 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 52 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 53 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 54 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 55 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 56 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 57 * IN THE SOFTWARE. 58 * 59 * Authors: 60 * Jason Ekstrand (jason@jlekstrand.net) 61 */ 62 63#include <math.h> 64#include "util/rounding.h" /* for _mesa_roundeven */ 65#include "util/half_float.h" 66#include "util/bigmath.h" 67#include "nir_constant_expressions.h" 68 69#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits))) 70 71/** 72 * Evaluate one component of packSnorm4x8. 73 */ 74static uint8_t 75pack_snorm_1x8(float x) 76{ 77 /* From section 8.4 of the GLSL 4.30 spec: 78 * 79 * packSnorm4x8 80 * ------------ 81 * The conversion for component c of v to fixed point is done as 82 * follows: 83 * 84 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 85 * 86 * We must first cast the float to an int, because casting a negative 87 * float to a uint is undefined. 88 */ 89 return (uint8_t) (int) 90 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 91} 92 93/** 94 * Evaluate one component of packSnorm2x16. 95 */ 96static uint16_t 97pack_snorm_1x16(float x) 98{ 99 /* From section 8.4 of the GLSL ES 3.00 spec: 100 * 101 * packSnorm2x16 102 * ------------- 103 * The conversion for component c of v to fixed point is done as 104 * follows: 105 * 106 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 107 * 108 * We must first cast the float to an int, because casting a negative 109 * float to a uint is undefined. 110 */ 111 return (uint16_t) (int) 112 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 113} 114 115/** 116 * Evaluate one component of unpackSnorm4x8. 117 */ 118static float 119unpack_snorm_1x8(uint8_t u) 120{ 121 /* From section 8.4 of the GLSL 4.30 spec: 122 * 123 * unpackSnorm4x8 124 * -------------- 125 * The conversion for unpacked fixed-point value f to floating point is 126 * done as follows: 127 * 128 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 129 */ 130 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 131} 132 133/** 134 * Evaluate one component of unpackSnorm2x16. 135 */ 136static float 137unpack_snorm_1x16(uint16_t u) 138{ 139 /* From section 8.4 of the GLSL ES 3.00 spec: 140 * 141 * unpackSnorm2x16 142 * --------------- 143 * The conversion for unpacked fixed-point value f to floating point is 144 * done as follows: 145 * 146 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 147 */ 148 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 149} 150 151/** 152 * Evaluate one component packUnorm4x8. 153 */ 154static uint8_t 155pack_unorm_1x8(float x) 156{ 157 /* From section 8.4 of the GLSL 4.30 spec: 158 * 159 * packUnorm4x8 160 * ------------ 161 * The conversion for component c of v to fixed point is done as 162 * follows: 163 * 164 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 165 */ 166 return (uint8_t) (int) 167 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 168} 169 170/** 171 * Evaluate one component packUnorm2x16. 172 */ 173static uint16_t 174pack_unorm_1x16(float x) 175{ 176 /* From section 8.4 of the GLSL ES 3.00 spec: 177 * 178 * packUnorm2x16 179 * ------------- 180 * The conversion for component c of v to fixed point is done as 181 * follows: 182 * 183 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 184 */ 185 return (uint16_t) (int) 186 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 187} 188 189/** 190 * Evaluate one component of unpackUnorm4x8. 191 */ 192static float 193unpack_unorm_1x8(uint8_t u) 194{ 195 /* From section 8.4 of the GLSL 4.30 spec: 196 * 197 * unpackUnorm4x8 198 * -------------- 199 * The conversion for unpacked fixed-point value f to floating point is 200 * done as follows: 201 * 202 * unpackUnorm4x8: f / 255.0 203 */ 204 return (float) u / 255.0f; 205} 206 207/** 208 * Evaluate one component of unpackUnorm2x16. 209 */ 210static float 211unpack_unorm_1x16(uint16_t u) 212{ 213 /* From section 8.4 of the GLSL ES 3.00 spec: 214 * 215 * unpackUnorm2x16 216 * --------------- 217 * The conversion for unpacked fixed-point value f to floating point is 218 * done as follows: 219 * 220 * unpackUnorm2x16: f / 65535.0 221 */ 222 return (float) u / 65535.0f; 223} 224 225/** 226 * Evaluate one component of packHalf2x16. 227 */ 228static uint16_t 229pack_half_1x16(float x) 230{ 231 return _mesa_float_to_half(x); 232} 233 234/** 235 * Evaluate one component of unpackHalf2x16. 236 */ 237static float 238unpack_half_1x16(uint16_t u) 239{ 240 return _mesa_half_to_float(u); 241} 242 243/* Some typed vector structures to make things like src0.y work */ 244typedef int8_t int1_t; 245typedef uint8_t uint1_t; 246typedef float float16_t; 247typedef float float32_t; 248typedef double float64_t; 249typedef bool bool1_t; 250typedef bool bool8_t; 251typedef bool bool16_t; 252typedef bool bool32_t; 253typedef bool bool64_t; 254% for type in ["float", "int", "uint", "bool"]: 255% for width in type_sizes(type): 256struct ${type}${width}_vec { 257 ${type}${width}_t x; 258 ${type}${width}_t y; 259 ${type}${width}_t z; 260 ${type}${width}_t w; 261}; 262% endfor 263% endfor 264 265<%def name="evaluate_op(op, bit_size)"> 266 <% 267 output_type = type_add_size(op.output_type, bit_size) 268 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] 269 %> 270 271 ## For each non-per-component input, create a variable srcN that 272 ## contains x, y, z, and w elements which are filled in with the 273 ## appropriately-typed values. 274 % for j in range(op.num_inputs): 275 % if op.input_sizes[j] == 0: 276 <% continue %> 277 % elif "src" + str(j) not in op.const_expr: 278 ## Avoid unused variable warnings 279 <% continue %> 280 %endif 281 282 const struct ${input_types[j]}_vec src${j} = { 283 % for k in range(op.input_sizes[j]): 284 % if input_types[j] == "int1": 285 /* 1-bit integers use a 0/-1 convention */ 286 -(int1_t)_src[${j}][${k}].b, 287 % elif input_types[j] == "float16": 288 _mesa_half_to_float(_src[${j}][${k}].u16), 289 % else: 290 _src[${j}][${k}].${get_const_field(input_types[j])}, 291 % endif 292 % endfor 293 % for k in range(op.input_sizes[j], 4): 294 0, 295 % endfor 296 }; 297 % endfor 298 299 % if op.output_size == 0: 300 ## For per-component instructions, we need to iterate over the 301 ## components and apply the constant expression one component 302 ## at a time. 303 for (unsigned _i = 0; _i < num_components; _i++) { 304 ## For each per-component input, create a variable srcN that 305 ## contains the value of the current (_i'th) component. 306 % for j in range(op.num_inputs): 307 % if op.input_sizes[j] != 0: 308 <% continue %> 309 % elif "src" + str(j) not in op.const_expr: 310 ## Avoid unused variable warnings 311 <% continue %> 312 % elif input_types[j] == "int1": 313 /* 1-bit integers use a 0/-1 convention */ 314 const int1_t src${j} = -(int1_t)_src[${j}][_i].b; 315 % elif input_types[j] == "float16": 316 const float src${j} = 317 _mesa_half_to_float(_src[${j}][_i].u16); 318 % else: 319 const ${input_types[j]}_t src${j} = 320 _src[${j}][_i].${get_const_field(input_types[j])}; 321 % endif 322 % endfor 323 324 ## Create an appropriately-typed variable dst and assign the 325 ## result of the const_expr to it. If const_expr already contains 326 ## writes to dst, just include const_expr directly. 327 % if "dst" in op.const_expr: 328 ${output_type}_t dst; 329 330 ${op.const_expr} 331 % else: 332 ${output_type}_t dst = ${op.const_expr}; 333 % endif 334 335 ## Store the current component of the actual destination to the 336 ## value of dst. 337 % if output_type == "int1" or output_type == "uint1": 338 /* 1-bit integers get truncated */ 339 _dst_val[_i].b = dst & 1; 340 % elif output_type.startswith("bool"): 341 ## Sanitize the C value to a proper NIR 0/-1 bool 342 _dst_val[_i].${get_const_field(output_type)} = -(int)dst; 343 % elif output_type == "float16": 344 _dst_val[_i].u16 = _mesa_float_to_half(dst); 345 % else: 346 _dst_val[_i].${get_const_field(output_type)} = dst; 347 % endif 348 } 349 % else: 350 ## In the non-per-component case, create a struct dst with 351 ## appropriately-typed elements x, y, z, and w and assign the result 352 ## of the const_expr to all components of dst, or include the 353 ## const_expr directly if it writes to dst already. 354 struct ${output_type}_vec dst; 355 356 % if "dst" in op.const_expr: 357 ${op.const_expr} 358 % else: 359 ## Splat the value to all components. This way expressions which 360 ## write the same value to all components don't need to explicitly 361 ## write to dest. One such example is fnoise which has a 362 ## const_expr of 0.0f. 363 dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; 364 % endif 365 366 ## For each component in the destination, copy the value of dst to 367 ## the actual destination. 368 % for k in range(op.output_size): 369 % if output_type == "int1" or output_type == "uint1": 370 /* 1-bit integers get truncated */ 371 _dst_val[${k}].b = dst.${"xyzw"[k]} & 1; 372 % elif output_type.startswith("bool"): 373 ## Sanitize the C value to a proper NIR 0/-1 bool 374 _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzw"[k]}; 375 % elif output_type == "float16": 376 _dst_val[${k}].u16 = _mesa_float_to_half(dst.${"xyzw"[k]}); 377 % else: 378 _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzw"[k]}; 379 % endif 380 % endfor 381 % endif 382</%def> 383 384% for name, op in sorted(opcodes.items()): 385static void 386evaluate_${name}(nir_const_value *_dst_val, 387 MAYBE_UNUSED unsigned num_components, 388 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size, 389 MAYBE_UNUSED nir_const_value **_src) 390{ 391 % if op_bit_sizes(op) is not None: 392 switch (bit_size) { 393 % for bit_size in op_bit_sizes(op): 394 case ${bit_size}: { 395 ${evaluate_op(op, bit_size)} 396 break; 397 } 398 % endfor 399 400 default: 401 unreachable("unknown bit width"); 402 } 403 % else: 404 ${evaluate_op(op, 0)} 405 % endif 406} 407% endfor 408 409void 410nir_eval_const_opcode(nir_op op, nir_const_value *dest, 411 unsigned num_components, unsigned bit_width, 412 nir_const_value **src) 413{ 414 switch (op) { 415% for name in sorted(opcodes.keys()): 416 case nir_op_${name}: 417 evaluate_${name}(dest, num_components, bit_width, src); 418 return; 419% endfor 420 default: 421 unreachable("shouldn't get here"); 422 } 423}""" 424 425from mako.template import Template 426 427print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes, 428 type_has_size=type_has_size, 429 type_add_size=type_add_size, 430 op_bit_sizes=op_bit_sizes, 431 get_const_field=get_const_field)) 432