1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2012 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "ir.h" 25b8e80941Smrg#include "ir_builder.h" 26b8e80941Smrg#include "ir_optimization.h" 27b8e80941Smrg#include "ir_rvalue_visitor.h" 28b8e80941Smrg 29b8e80941Smrgnamespace { 30b8e80941Smrg 31b8e80941Smrgusing namespace ir_builder; 32b8e80941Smrg 33b8e80941Smrg/** 34b8e80941Smrg * A visitor that lowers built-in floating-point pack/unpack expressions 35b8e80941Smrg * such packSnorm2x16. 36b8e80941Smrg */ 37b8e80941Smrgclass lower_packing_builtins_visitor : public ir_rvalue_visitor { 38b8e80941Smrgpublic: 39b8e80941Smrg /** 40b8e80941Smrg * \param op_mask is a bitmask of `enum lower_packing_builtins_op` 41b8e80941Smrg */ 42b8e80941Smrg explicit lower_packing_builtins_visitor(int op_mask) 43b8e80941Smrg : op_mask(op_mask), 44b8e80941Smrg progress(false) 45b8e80941Smrg { 46b8e80941Smrg factory.instructions = &factory_instructions; 47b8e80941Smrg } 48b8e80941Smrg 49b8e80941Smrg virtual ~lower_packing_builtins_visitor() 50b8e80941Smrg { 51b8e80941Smrg assert(factory_instructions.is_empty()); 52b8e80941Smrg } 53b8e80941Smrg 54b8e80941Smrg bool get_progress() { return progress; } 55b8e80941Smrg 56b8e80941Smrg void handle_rvalue(ir_rvalue **rvalue) 57b8e80941Smrg { 58b8e80941Smrg if (!*rvalue) 59b8e80941Smrg return; 60b8e80941Smrg 61b8e80941Smrg ir_expression *expr = (*rvalue)->as_expression(); 62b8e80941Smrg if (!expr) 63b8e80941Smrg return; 64b8e80941Smrg 65b8e80941Smrg enum lower_packing_builtins_op lowering_op = 66b8e80941Smrg choose_lowering_op(expr->operation); 67b8e80941Smrg 68b8e80941Smrg if (lowering_op == LOWER_PACK_UNPACK_NONE) 69b8e80941Smrg return; 70b8e80941Smrg 71b8e80941Smrg setup_factory(ralloc_parent(expr)); 72b8e80941Smrg 73b8e80941Smrg ir_rvalue *op0 = expr->operands[0]; 74b8e80941Smrg ralloc_steal(factory.mem_ctx, op0); 75b8e80941Smrg 76b8e80941Smrg switch (lowering_op) { 77b8e80941Smrg case LOWER_PACK_SNORM_2x16: 78b8e80941Smrg *rvalue = lower_pack_snorm_2x16(op0); 79b8e80941Smrg break; 80b8e80941Smrg case LOWER_PACK_SNORM_4x8: 81b8e80941Smrg *rvalue = lower_pack_snorm_4x8(op0); 82b8e80941Smrg break; 83b8e80941Smrg case LOWER_PACK_UNORM_2x16: 84b8e80941Smrg *rvalue = lower_pack_unorm_2x16(op0); 85b8e80941Smrg break; 86b8e80941Smrg case LOWER_PACK_UNORM_4x8: 87b8e80941Smrg *rvalue = lower_pack_unorm_4x8(op0); 88b8e80941Smrg break; 89b8e80941Smrg case LOWER_PACK_HALF_2x16: 90b8e80941Smrg *rvalue = lower_pack_half_2x16(op0); 91b8e80941Smrg break; 92b8e80941Smrg case LOWER_UNPACK_SNORM_2x16: 93b8e80941Smrg *rvalue = lower_unpack_snorm_2x16(op0); 94b8e80941Smrg break; 95b8e80941Smrg case LOWER_UNPACK_SNORM_4x8: 96b8e80941Smrg *rvalue = lower_unpack_snorm_4x8(op0); 97b8e80941Smrg break; 98b8e80941Smrg case LOWER_UNPACK_UNORM_2x16: 99b8e80941Smrg *rvalue = lower_unpack_unorm_2x16(op0); 100b8e80941Smrg break; 101b8e80941Smrg case LOWER_UNPACK_UNORM_4x8: 102b8e80941Smrg *rvalue = lower_unpack_unorm_4x8(op0); 103b8e80941Smrg break; 104b8e80941Smrg case LOWER_UNPACK_HALF_2x16: 105b8e80941Smrg *rvalue = lower_unpack_half_2x16(op0); 106b8e80941Smrg break; 107b8e80941Smrg case LOWER_PACK_UNPACK_NONE: 108b8e80941Smrg case LOWER_PACK_USE_BFI: 109b8e80941Smrg case LOWER_PACK_USE_BFE: 110b8e80941Smrg assert(!"not reached"); 111b8e80941Smrg break; 112b8e80941Smrg } 113b8e80941Smrg 114b8e80941Smrg teardown_factory(); 115b8e80941Smrg progress = true; 116b8e80941Smrg } 117b8e80941Smrg 118b8e80941Smrgprivate: 119b8e80941Smrg const int op_mask; 120b8e80941Smrg bool progress; 121b8e80941Smrg ir_factory factory; 122b8e80941Smrg exec_list factory_instructions; 123b8e80941Smrg 124b8e80941Smrg /** 125b8e80941Smrg * Determine the needed lowering operation by filtering \a expr_op 126b8e80941Smrg * through \ref op_mask. 127b8e80941Smrg */ 128b8e80941Smrg enum lower_packing_builtins_op 129b8e80941Smrg choose_lowering_op(ir_expression_operation expr_op) 130b8e80941Smrg { 131b8e80941Smrg /* C++ regards int and enum as fundamentally different types. 132b8e80941Smrg * So, we can't simply return from each case; we must cast the return 133b8e80941Smrg * value. 134b8e80941Smrg */ 135b8e80941Smrg int result; 136b8e80941Smrg 137b8e80941Smrg switch (expr_op) { 138b8e80941Smrg case ir_unop_pack_snorm_2x16: 139b8e80941Smrg result = op_mask & LOWER_PACK_SNORM_2x16; 140b8e80941Smrg break; 141b8e80941Smrg case ir_unop_pack_snorm_4x8: 142b8e80941Smrg result = op_mask & LOWER_PACK_SNORM_4x8; 143b8e80941Smrg break; 144b8e80941Smrg case ir_unop_pack_unorm_2x16: 145b8e80941Smrg result = op_mask & LOWER_PACK_UNORM_2x16; 146b8e80941Smrg break; 147b8e80941Smrg case ir_unop_pack_unorm_4x8: 148b8e80941Smrg result = op_mask & LOWER_PACK_UNORM_4x8; 149b8e80941Smrg break; 150b8e80941Smrg case ir_unop_pack_half_2x16: 151b8e80941Smrg result = op_mask & LOWER_PACK_HALF_2x16; 152b8e80941Smrg break; 153b8e80941Smrg case ir_unop_unpack_snorm_2x16: 154b8e80941Smrg result = op_mask & LOWER_UNPACK_SNORM_2x16; 155b8e80941Smrg break; 156b8e80941Smrg case ir_unop_unpack_snorm_4x8: 157b8e80941Smrg result = op_mask & LOWER_UNPACK_SNORM_4x8; 158b8e80941Smrg break; 159b8e80941Smrg case ir_unop_unpack_unorm_2x16: 160b8e80941Smrg result = op_mask & LOWER_UNPACK_UNORM_2x16; 161b8e80941Smrg break; 162b8e80941Smrg case ir_unop_unpack_unorm_4x8: 163b8e80941Smrg result = op_mask & LOWER_UNPACK_UNORM_4x8; 164b8e80941Smrg break; 165b8e80941Smrg case ir_unop_unpack_half_2x16: 166b8e80941Smrg result = op_mask & LOWER_UNPACK_HALF_2x16; 167b8e80941Smrg break; 168b8e80941Smrg default: 169b8e80941Smrg result = LOWER_PACK_UNPACK_NONE; 170b8e80941Smrg break; 171b8e80941Smrg } 172b8e80941Smrg 173b8e80941Smrg return static_cast<enum lower_packing_builtins_op>(result); 174b8e80941Smrg } 175b8e80941Smrg 176b8e80941Smrg void 177b8e80941Smrg setup_factory(void *mem_ctx) 178b8e80941Smrg { 179b8e80941Smrg assert(factory.mem_ctx == NULL); 180b8e80941Smrg assert(factory.instructions->is_empty()); 181b8e80941Smrg 182b8e80941Smrg factory.mem_ctx = mem_ctx; 183b8e80941Smrg } 184b8e80941Smrg 185b8e80941Smrg void 186b8e80941Smrg teardown_factory() 187b8e80941Smrg { 188b8e80941Smrg base_ir->insert_before(factory.instructions); 189b8e80941Smrg assert(factory.instructions->is_empty()); 190b8e80941Smrg factory.mem_ctx = NULL; 191b8e80941Smrg } 192b8e80941Smrg 193b8e80941Smrg template <typename T> 194b8e80941Smrg ir_constant* 195b8e80941Smrg constant(T x) 196b8e80941Smrg { 197b8e80941Smrg return factory.constant(x); 198b8e80941Smrg } 199b8e80941Smrg 200b8e80941Smrg /** 201b8e80941Smrg * \brief Pack two uint16's into a single uint32. 202b8e80941Smrg * 203b8e80941Smrg * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 204b8e80941Smrg * where the least significant bits specify the first element of the pair. 205b8e80941Smrg * Return the uint32. 206b8e80941Smrg */ 207b8e80941Smrg ir_rvalue* 208b8e80941Smrg pack_uvec2_to_uint(ir_rvalue *uvec2_rval) 209b8e80941Smrg { 210b8e80941Smrg assert(uvec2_rval->type == glsl_type::uvec2_type); 211b8e80941Smrg 212b8e80941Smrg /* uvec2 u = UVEC2_RVAL; */ 213b8e80941Smrg ir_variable *u = factory.make_temp(glsl_type::uvec2_type, 214b8e80941Smrg "tmp_pack_uvec2_to_uint"); 215b8e80941Smrg factory.emit(assign(u, uvec2_rval)); 216b8e80941Smrg 217b8e80941Smrg if (op_mask & LOWER_PACK_USE_BFI) { 218b8e80941Smrg return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), 219b8e80941Smrg swizzle_y(u), 220b8e80941Smrg constant(16u), 221b8e80941Smrg constant(16u)); 222b8e80941Smrg } 223b8e80941Smrg 224b8e80941Smrg /* return (u.y << 16) | (u.x & 0xffff); */ 225b8e80941Smrg return bit_or(lshift(swizzle_y(u), constant(16u)), 226b8e80941Smrg bit_and(swizzle_x(u), constant(0xffffu))); 227b8e80941Smrg } 228b8e80941Smrg 229b8e80941Smrg /** 230b8e80941Smrg * \brief Pack four uint8's into a single uint32. 231b8e80941Smrg * 232b8e80941Smrg * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a 233b8e80941Smrg * uint32 where the least significant bits specify the first element of the 234b8e80941Smrg * 4-tuple. Return the uint32. 235b8e80941Smrg */ 236b8e80941Smrg ir_rvalue* 237b8e80941Smrg pack_uvec4_to_uint(ir_rvalue *uvec4_rval) 238b8e80941Smrg { 239b8e80941Smrg assert(uvec4_rval->type == glsl_type::uvec4_type); 240b8e80941Smrg 241b8e80941Smrg ir_variable *u = factory.make_temp(glsl_type::uvec4_type, 242b8e80941Smrg "tmp_pack_uvec4_to_uint"); 243b8e80941Smrg 244b8e80941Smrg if (op_mask & LOWER_PACK_USE_BFI) { 245b8e80941Smrg /* uvec4 u = UVEC4_RVAL; */ 246b8e80941Smrg factory.emit(assign(u, uvec4_rval)); 247b8e80941Smrg 248b8e80941Smrg return bitfield_insert(bitfield_insert( 249b8e80941Smrg bitfield_insert( 250b8e80941Smrg bit_and(swizzle_x(u), constant(0xffu)), 251b8e80941Smrg swizzle_y(u), constant(8u), constant(8u)), 252b8e80941Smrg swizzle_z(u), constant(16u), constant(8u)), 253b8e80941Smrg swizzle_w(u), constant(24u), constant(8u)); 254b8e80941Smrg } 255b8e80941Smrg 256b8e80941Smrg /* uvec4 u = UVEC4_RVAL & 0xff */ 257b8e80941Smrg factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); 258b8e80941Smrg 259b8e80941Smrg /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ 260b8e80941Smrg return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), 261b8e80941Smrg lshift(swizzle_z(u), constant(16u))), 262b8e80941Smrg bit_or(lshift(swizzle_y(u), constant(8u)), 263b8e80941Smrg swizzle_x(u))); 264b8e80941Smrg } 265b8e80941Smrg 266b8e80941Smrg /** 267b8e80941Smrg * \brief Unpack a uint32 into two uint16's. 268b8e80941Smrg * 269b8e80941Smrg * Interpret the given uint32 as a uint16 pair where the uint32's least 270b8e80941Smrg * significant bits specify the pair's first element. Return the uint16 271b8e80941Smrg * pair as a uvec2. 272b8e80941Smrg */ 273b8e80941Smrg ir_rvalue* 274b8e80941Smrg unpack_uint_to_uvec2(ir_rvalue *uint_rval) 275b8e80941Smrg { 276b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 277b8e80941Smrg 278b8e80941Smrg /* uint u = UINT_RVAL; */ 279b8e80941Smrg ir_variable *u = factory.make_temp(glsl_type::uint_type, 280b8e80941Smrg "tmp_unpack_uint_to_uvec2_u"); 281b8e80941Smrg factory.emit(assign(u, uint_rval)); 282b8e80941Smrg 283b8e80941Smrg /* uvec2 u2; */ 284b8e80941Smrg ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, 285b8e80941Smrg "tmp_unpack_uint_to_uvec2_u2"); 286b8e80941Smrg 287b8e80941Smrg /* u2.x = u & 0xffffu; */ 288b8e80941Smrg factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); 289b8e80941Smrg 290b8e80941Smrg /* u2.y = u >> 16u; */ 291b8e80941Smrg factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); 292b8e80941Smrg 293b8e80941Smrg return deref(u2).val; 294b8e80941Smrg } 295b8e80941Smrg 296b8e80941Smrg /** 297b8e80941Smrg * \brief Unpack a uint32 into two int16's. 298b8e80941Smrg * 299b8e80941Smrg * Specifically each 16-bit value is sign-extended to the full width of an 300b8e80941Smrg * int32 on return. 301b8e80941Smrg */ 302b8e80941Smrg ir_rvalue * 303b8e80941Smrg unpack_uint_to_ivec2(ir_rvalue *uint_rval) 304b8e80941Smrg { 305b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 306b8e80941Smrg 307b8e80941Smrg if (!(op_mask & LOWER_PACK_USE_BFE)) { 308b8e80941Smrg return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), 309b8e80941Smrg constant(16u)), 310b8e80941Smrg constant(16u)); 311b8e80941Smrg } 312b8e80941Smrg 313b8e80941Smrg ir_variable *i = factory.make_temp(glsl_type::int_type, 314b8e80941Smrg "tmp_unpack_uint_to_ivec2_i"); 315b8e80941Smrg factory.emit(assign(i, u2i(uint_rval))); 316b8e80941Smrg 317b8e80941Smrg /* ivec2 i2; */ 318b8e80941Smrg ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, 319b8e80941Smrg "tmp_unpack_uint_to_ivec2_i2"); 320b8e80941Smrg 321b8e80941Smrg factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), 322b8e80941Smrg WRITEMASK_X)); 323b8e80941Smrg factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), 324b8e80941Smrg WRITEMASK_Y)); 325b8e80941Smrg 326b8e80941Smrg return deref(i2).val; 327b8e80941Smrg } 328b8e80941Smrg 329b8e80941Smrg /** 330b8e80941Smrg * \brief Unpack a uint32 into four uint8's. 331b8e80941Smrg * 332b8e80941Smrg * Interpret the given uint32 as a uint8 4-tuple where the uint32's least 333b8e80941Smrg * significant bits specify the 4-tuple's first element. Return the uint8 334b8e80941Smrg * 4-tuple as a uvec4. 335b8e80941Smrg */ 336b8e80941Smrg ir_rvalue* 337b8e80941Smrg unpack_uint_to_uvec4(ir_rvalue *uint_rval) 338b8e80941Smrg { 339b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 340b8e80941Smrg 341b8e80941Smrg /* uint u = UINT_RVAL; */ 342b8e80941Smrg ir_variable *u = factory.make_temp(glsl_type::uint_type, 343b8e80941Smrg "tmp_unpack_uint_to_uvec4_u"); 344b8e80941Smrg factory.emit(assign(u, uint_rval)); 345b8e80941Smrg 346b8e80941Smrg /* uvec4 u4; */ 347b8e80941Smrg ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, 348b8e80941Smrg "tmp_unpack_uint_to_uvec4_u4"); 349b8e80941Smrg 350b8e80941Smrg /* u4.x = u & 0xffu; */ 351b8e80941Smrg factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); 352b8e80941Smrg 353b8e80941Smrg if (op_mask & LOWER_PACK_USE_BFE) { 354b8e80941Smrg /* u4.y = bitfield_extract(u, 8, 8); */ 355b8e80941Smrg factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), 356b8e80941Smrg WRITEMASK_Y)); 357b8e80941Smrg 358b8e80941Smrg /* u4.z = bitfield_extract(u, 16, 8); */ 359b8e80941Smrg factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), 360b8e80941Smrg WRITEMASK_Z)); 361b8e80941Smrg } else { 362b8e80941Smrg /* u4.y = (u >> 8u) & 0xffu; */ 363b8e80941Smrg factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), 364b8e80941Smrg constant(0xffu)), WRITEMASK_Y)); 365b8e80941Smrg 366b8e80941Smrg /* u4.z = (u >> 16u) & 0xffu; */ 367b8e80941Smrg factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), 368b8e80941Smrg constant(0xffu)), WRITEMASK_Z)); 369b8e80941Smrg } 370b8e80941Smrg 371b8e80941Smrg /* u4.w = (u >> 24u) */ 372b8e80941Smrg factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); 373b8e80941Smrg 374b8e80941Smrg return deref(u4).val; 375b8e80941Smrg } 376b8e80941Smrg 377b8e80941Smrg /** 378b8e80941Smrg * \brief Unpack a uint32 into four int8's. 379b8e80941Smrg * 380b8e80941Smrg * Specifically each 8-bit value is sign-extended to the full width of an 381b8e80941Smrg * int32 on return. 382b8e80941Smrg */ 383b8e80941Smrg ir_rvalue * 384b8e80941Smrg unpack_uint_to_ivec4(ir_rvalue *uint_rval) 385b8e80941Smrg { 386b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 387b8e80941Smrg 388b8e80941Smrg if (!(op_mask & LOWER_PACK_USE_BFE)) { 389b8e80941Smrg return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), 390b8e80941Smrg constant(24u)), 391b8e80941Smrg constant(24u)); 392b8e80941Smrg } 393b8e80941Smrg 394b8e80941Smrg ir_variable *i = factory.make_temp(glsl_type::int_type, 395b8e80941Smrg "tmp_unpack_uint_to_ivec4_i"); 396b8e80941Smrg factory.emit(assign(i, u2i(uint_rval))); 397b8e80941Smrg 398b8e80941Smrg /* ivec4 i4; */ 399b8e80941Smrg ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, 400b8e80941Smrg "tmp_unpack_uint_to_ivec4_i4"); 401b8e80941Smrg 402b8e80941Smrg factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), 403b8e80941Smrg WRITEMASK_X)); 404b8e80941Smrg factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), 405b8e80941Smrg WRITEMASK_Y)); 406b8e80941Smrg factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), 407b8e80941Smrg WRITEMASK_Z)); 408b8e80941Smrg factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), 409b8e80941Smrg WRITEMASK_W)); 410b8e80941Smrg 411b8e80941Smrg return deref(i4).val; 412b8e80941Smrg } 413b8e80941Smrg 414b8e80941Smrg /** 415b8e80941Smrg * \brief Lower a packSnorm2x16 expression. 416b8e80941Smrg * 417b8e80941Smrg * \param vec2_rval is packSnorm2x16's input 418b8e80941Smrg * \return packSnorm2x16's output as a uint rvalue 419b8e80941Smrg */ 420b8e80941Smrg ir_rvalue* 421b8e80941Smrg lower_pack_snorm_2x16(ir_rvalue *vec2_rval) 422b8e80941Smrg { 423b8e80941Smrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 424b8e80941Smrg * 425b8e80941Smrg * highp uint packSnorm2x16(vec2 v) 426b8e80941Smrg * -------------------------------- 427b8e80941Smrg * First, converts each component of the normalized floating-point value 428b8e80941Smrg * v into 16-bit integer values. Then, the results are packed into the 429b8e80941Smrg * returned 32-bit unsigned integer. 430b8e80941Smrg * 431b8e80941Smrg * The conversion for component c of v to fixed point is done as 432b8e80941Smrg * follows: 433b8e80941Smrg * 434b8e80941Smrg * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 435b8e80941Smrg * 436b8e80941Smrg * The first component of the vector will be written to the least 437b8e80941Smrg * significant bits of the output; the last component will be written to 438b8e80941Smrg * the most significant bits. 439b8e80941Smrg * 440b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 441b8e80941Smrg * 442b8e80941Smrg * return pack_uvec2_to_uint( 443b8e80941Smrg * uvec2(ivec2( 444b8e80941Smrg * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); 445b8e80941Smrg * 446b8e80941Smrg * It is necessary to first convert the vec2 to ivec2 rather than directly 447b8e80941Smrg * converting vec2 to uvec2 because the latter conversion is undefined. 448b8e80941Smrg * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to 449b8e80941Smrg * convert a negative floating point value to an uint". 450b8e80941Smrg */ 451b8e80941Smrg assert(vec2_rval->type == glsl_type::vec2_type); 452b8e80941Smrg 453b8e80941Smrg ir_rvalue *result = pack_uvec2_to_uint( 454b8e80941Smrg i2u(f2i(round_even(mul(clamp(vec2_rval, 455b8e80941Smrg constant(-1.0f), 456b8e80941Smrg constant(1.0f)), 457b8e80941Smrg constant(32767.0f)))))); 458b8e80941Smrg 459b8e80941Smrg assert(result->type == glsl_type::uint_type); 460b8e80941Smrg return result; 461b8e80941Smrg } 462b8e80941Smrg 463b8e80941Smrg /** 464b8e80941Smrg * \brief Lower a packSnorm4x8 expression. 465b8e80941Smrg * 466b8e80941Smrg * \param vec4_rval is packSnorm4x8's input 467b8e80941Smrg * \return packSnorm4x8's output as a uint rvalue 468b8e80941Smrg */ 469b8e80941Smrg ir_rvalue* 470b8e80941Smrg lower_pack_snorm_4x8(ir_rvalue *vec4_rval) 471b8e80941Smrg { 472b8e80941Smrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 473b8e80941Smrg * 474b8e80941Smrg * highp uint packSnorm4x8(vec4 v) 475b8e80941Smrg * ------------------------------- 476b8e80941Smrg * First, converts each component of the normalized floating-point value 477b8e80941Smrg * v into 8-bit integer values. Then, the results are packed into the 478b8e80941Smrg * returned 32-bit unsigned integer. 479b8e80941Smrg * 480b8e80941Smrg * The conversion for component c of v to fixed point is done as 481b8e80941Smrg * follows: 482b8e80941Smrg * 483b8e80941Smrg * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 484b8e80941Smrg * 485b8e80941Smrg * The first component of the vector will be written to the least 486b8e80941Smrg * significant bits of the output; the last component will be written to 487b8e80941Smrg * the most significant bits. 488b8e80941Smrg * 489b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 490b8e80941Smrg * 491b8e80941Smrg * return pack_uvec4_to_uint( 492b8e80941Smrg * uvec4(ivec4( 493b8e80941Smrg * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); 494b8e80941Smrg * 495b8e80941Smrg * It is necessary to first convert the vec4 to ivec4 rather than directly 496b8e80941Smrg * converting vec4 to uvec4 because the latter conversion is undefined. 497b8e80941Smrg * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to 498b8e80941Smrg * convert a negative floating point value to an uint". 499b8e80941Smrg */ 500b8e80941Smrg assert(vec4_rval->type == glsl_type::vec4_type); 501b8e80941Smrg 502b8e80941Smrg ir_rvalue *result = pack_uvec4_to_uint( 503b8e80941Smrg i2u(f2i(round_even(mul(clamp(vec4_rval, 504b8e80941Smrg constant(-1.0f), 505b8e80941Smrg constant(1.0f)), 506b8e80941Smrg constant(127.0f)))))); 507b8e80941Smrg 508b8e80941Smrg assert(result->type == glsl_type::uint_type); 509b8e80941Smrg return result; 510b8e80941Smrg } 511b8e80941Smrg 512b8e80941Smrg /** 513b8e80941Smrg * \brief Lower an unpackSnorm2x16 expression. 514b8e80941Smrg * 515b8e80941Smrg * \param uint_rval is unpackSnorm2x16's input 516b8e80941Smrg * \return unpackSnorm2x16's output as a vec2 rvalue 517b8e80941Smrg */ 518b8e80941Smrg ir_rvalue* 519b8e80941Smrg lower_unpack_snorm_2x16(ir_rvalue *uint_rval) 520b8e80941Smrg { 521b8e80941Smrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 522b8e80941Smrg * 523b8e80941Smrg * highp vec2 unpackSnorm2x16 (highp uint p) 524b8e80941Smrg * ----------------------------------------- 525b8e80941Smrg * First, unpacks a single 32-bit unsigned integer p into a pair of 526b8e80941Smrg * 16-bit unsigned integers. Then, each component is converted to 527b8e80941Smrg * a normalized floating-point value to generate the returned 528b8e80941Smrg * two-component vector. 529b8e80941Smrg * 530b8e80941Smrg * The conversion for unpacked fixed-point value f to floating point is 531b8e80941Smrg * done as follows: 532b8e80941Smrg * 533b8e80941Smrg * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) 534b8e80941Smrg * 535b8e80941Smrg * The first component of the returned vector will be extracted from the 536b8e80941Smrg * least significant bits of the input; the last component will be 537b8e80941Smrg * extracted from the most significant bits. 538b8e80941Smrg * 539b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 540b8e80941Smrg * 541b8e80941Smrg * return clamp( 542b8e80941Smrg * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, 543b8e80941Smrg * -1.0f, 1.0f); 544b8e80941Smrg * 545b8e80941Smrg * The above IR may appear unnecessarily complex, but the intermediate 546b8e80941Smrg * conversion to ivec2 and the bit shifts are necessary to correctly unpack 547b8e80941Smrg * negative floats. 548b8e80941Smrg * 549b8e80941Smrg * To see why, consider packing and then unpacking vec2(-1.0, 0.0). 550b8e80941Smrg * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we 551b8e80941Smrg * place that int16 into an int32, which results in the *positive* integer 552b8e80941Smrg * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather 553b8e80941Smrg * unimportant bit 16. We must now extend the int16's sign bit into bits 554b8e80941Smrg * 17-32, which is accomplished by left-shifting then right-shifting. 555b8e80941Smrg */ 556b8e80941Smrg 557b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 558b8e80941Smrg 559b8e80941Smrg ir_rvalue *result = 560b8e80941Smrg clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), 561b8e80941Smrg constant(32767.0f)), 562b8e80941Smrg constant(-1.0f), 563b8e80941Smrg constant(1.0f)); 564b8e80941Smrg 565b8e80941Smrg assert(result->type == glsl_type::vec2_type); 566b8e80941Smrg return result; 567b8e80941Smrg } 568b8e80941Smrg 569b8e80941Smrg /** 570b8e80941Smrg * \brief Lower an unpackSnorm4x8 expression. 571b8e80941Smrg * 572b8e80941Smrg * \param uint_rval is unpackSnorm4x8's input 573b8e80941Smrg * \return unpackSnorm4x8's output as a vec4 rvalue 574b8e80941Smrg */ 575b8e80941Smrg ir_rvalue* 576b8e80941Smrg lower_unpack_snorm_4x8(ir_rvalue *uint_rval) 577b8e80941Smrg { 578b8e80941Smrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 579b8e80941Smrg * 580b8e80941Smrg * highp vec4 unpackSnorm4x8 (highp uint p) 581b8e80941Smrg * ---------------------------------------- 582b8e80941Smrg * First, unpacks a single 32-bit unsigned integer p into four 583b8e80941Smrg * 8-bit unsigned integers. Then, each component is converted to 584b8e80941Smrg * a normalized floating-point value to generate the returned 585b8e80941Smrg * four-component vector. 586b8e80941Smrg * 587b8e80941Smrg * The conversion for unpacked fixed-point value f to floating point is 588b8e80941Smrg * done as follows: 589b8e80941Smrg * 590b8e80941Smrg * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 591b8e80941Smrg * 592b8e80941Smrg * The first component of the returned vector will be extracted from the 593b8e80941Smrg * least significant bits of the input; the last component will be 594b8e80941Smrg * extracted from the most significant bits. 595b8e80941Smrg * 596b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 597b8e80941Smrg * 598b8e80941Smrg * return clamp( 599b8e80941Smrg * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, 600b8e80941Smrg * -1.0f, 1.0f); 601b8e80941Smrg * 602b8e80941Smrg * The above IR may appear unnecessarily complex, but the intermediate 603b8e80941Smrg * conversion to ivec4 and the bit shifts are necessary to correctly unpack 604b8e80941Smrg * negative floats. 605b8e80941Smrg * 606b8e80941Smrg * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, 607b8e80941Smrg * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we 608b8e80941Smrg * place that int8 into an int32, which results in the *positive* integer 609b8e80941Smrg * 0x000000ff. The int8's sign bit becomes, in the int32, the rather 610b8e80941Smrg * unimportant bit 8. We must now extend the int8's sign bit into bits 611b8e80941Smrg * 9-32, which is accomplished by left-shifting then right-shifting. 612b8e80941Smrg */ 613b8e80941Smrg 614b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 615b8e80941Smrg 616b8e80941Smrg ir_rvalue *result = 617b8e80941Smrg clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), 618b8e80941Smrg constant(127.0f)), 619b8e80941Smrg constant(-1.0f), 620b8e80941Smrg constant(1.0f)); 621b8e80941Smrg 622b8e80941Smrg assert(result->type == glsl_type::vec4_type); 623b8e80941Smrg return result; 624b8e80941Smrg } 625b8e80941Smrg 626b8e80941Smrg /** 627b8e80941Smrg * \brief Lower a packUnorm2x16 expression. 628b8e80941Smrg * 629b8e80941Smrg * \param vec2_rval is packUnorm2x16's input 630b8e80941Smrg * \return packUnorm2x16's output as a uint rvalue 631b8e80941Smrg */ 632b8e80941Smrg ir_rvalue* 633b8e80941Smrg lower_pack_unorm_2x16(ir_rvalue *vec2_rval) 634b8e80941Smrg { 635b8e80941Smrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 636b8e80941Smrg * 637b8e80941Smrg * highp uint packUnorm2x16 (vec2 v) 638b8e80941Smrg * --------------------------------- 639b8e80941Smrg * First, converts each component of the normalized floating-point value 640b8e80941Smrg * v into 16-bit integer values. Then, the results are packed into the 641b8e80941Smrg * returned 32-bit unsigned integer. 642b8e80941Smrg * 643b8e80941Smrg * The conversion for component c of v to fixed point is done as 644b8e80941Smrg * follows: 645b8e80941Smrg * 646b8e80941Smrg * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 647b8e80941Smrg * 648b8e80941Smrg * The first component of the vector will be written to the least 649b8e80941Smrg * significant bits of the output; the last component will be written to 650b8e80941Smrg * the most significant bits. 651b8e80941Smrg * 652b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 653b8e80941Smrg * 654b8e80941Smrg * return pack_uvec2_to_uint(uvec2( 655b8e80941Smrg * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); 656b8e80941Smrg * 657b8e80941Smrg * Here it is safe to directly convert the vec2 to uvec2 because the vec2 658b8e80941Smrg * has been clamped to a non-negative range. 659b8e80941Smrg */ 660b8e80941Smrg 661b8e80941Smrg assert(vec2_rval->type == glsl_type::vec2_type); 662b8e80941Smrg 663b8e80941Smrg ir_rvalue *result = pack_uvec2_to_uint( 664b8e80941Smrg f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); 665b8e80941Smrg 666b8e80941Smrg assert(result->type == glsl_type::uint_type); 667b8e80941Smrg return result; 668b8e80941Smrg } 669b8e80941Smrg 670b8e80941Smrg /** 671b8e80941Smrg * \brief Lower a packUnorm4x8 expression. 672b8e80941Smrg * 673b8e80941Smrg * \param vec4_rval is packUnorm4x8's input 674b8e80941Smrg * \return packUnorm4x8's output as a uint rvalue 675b8e80941Smrg */ 676b8e80941Smrg ir_rvalue* 677b8e80941Smrg lower_pack_unorm_4x8(ir_rvalue *vec4_rval) 678b8e80941Smrg { 679b8e80941Smrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 680b8e80941Smrg * 681b8e80941Smrg * highp uint packUnorm4x8 (vec4 v) 682b8e80941Smrg * -------------------------------- 683b8e80941Smrg * First, converts each component of the normalized floating-point value 684b8e80941Smrg * v into 8-bit integer values. Then, the results are packed into the 685b8e80941Smrg * returned 32-bit unsigned integer. 686b8e80941Smrg * 687b8e80941Smrg * The conversion for component c of v to fixed point is done as 688b8e80941Smrg * follows: 689b8e80941Smrg * 690b8e80941Smrg * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 691b8e80941Smrg * 692b8e80941Smrg * The first component of the vector will be written to the least 693b8e80941Smrg * significant bits of the output; the last component will be written to 694b8e80941Smrg * the most significant bits. 695b8e80941Smrg * 696b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 697b8e80941Smrg * 698b8e80941Smrg * return pack_uvec4_to_uint(uvec4( 699b8e80941Smrg * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); 700b8e80941Smrg * 701b8e80941Smrg * Here it is safe to directly convert the vec4 to uvec4 because the vec4 702b8e80941Smrg * has been clamped to a non-negative range. 703b8e80941Smrg */ 704b8e80941Smrg 705b8e80941Smrg assert(vec4_rval->type == glsl_type::vec4_type); 706b8e80941Smrg 707b8e80941Smrg ir_rvalue *result = pack_uvec4_to_uint( 708b8e80941Smrg f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); 709b8e80941Smrg 710b8e80941Smrg assert(result->type == glsl_type::uint_type); 711b8e80941Smrg return result; 712b8e80941Smrg } 713b8e80941Smrg 714b8e80941Smrg /** 715b8e80941Smrg * \brief Lower an unpackUnorm2x16 expression. 716b8e80941Smrg * 717b8e80941Smrg * \param uint_rval is unpackUnorm2x16's input 718b8e80941Smrg * \return unpackUnorm2x16's output as a vec2 rvalue 719b8e80941Smrg */ 720b8e80941Smrg ir_rvalue* 721b8e80941Smrg lower_unpack_unorm_2x16(ir_rvalue *uint_rval) 722b8e80941Smrg { 723b8e80941Smrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 724b8e80941Smrg * 725b8e80941Smrg * highp vec2 unpackUnorm2x16 (highp uint p) 726b8e80941Smrg * ----------------------------------------- 727b8e80941Smrg * First, unpacks a single 32-bit unsigned integer p into a pair of 728b8e80941Smrg * 16-bit unsigned integers. Then, each component is converted to 729b8e80941Smrg * a normalized floating-point value to generate the returned 730b8e80941Smrg * two-component vector. 731b8e80941Smrg * 732b8e80941Smrg * The conversion for unpacked fixed-point value f to floating point is 733b8e80941Smrg * done as follows: 734b8e80941Smrg * 735b8e80941Smrg * unpackUnorm2x16: f / 65535.0 736b8e80941Smrg * 737b8e80941Smrg * The first component of the returned vector will be extracted from the 738b8e80941Smrg * least significant bits of the input; the last component will be 739b8e80941Smrg * extracted from the most significant bits. 740b8e80941Smrg * 741b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 742b8e80941Smrg * 743b8e80941Smrg * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; 744b8e80941Smrg */ 745b8e80941Smrg 746b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 747b8e80941Smrg 748b8e80941Smrg ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), 749b8e80941Smrg constant(65535.0f)); 750b8e80941Smrg 751b8e80941Smrg assert(result->type == glsl_type::vec2_type); 752b8e80941Smrg return result; 753b8e80941Smrg } 754b8e80941Smrg 755b8e80941Smrg /** 756b8e80941Smrg * \brief Lower an unpackUnorm4x8 expression. 757b8e80941Smrg * 758b8e80941Smrg * \param uint_rval is unpackUnorm4x8's input 759b8e80941Smrg * \return unpackUnorm4x8's output as a vec4 rvalue 760b8e80941Smrg */ 761b8e80941Smrg ir_rvalue* 762b8e80941Smrg lower_unpack_unorm_4x8(ir_rvalue *uint_rval) 763b8e80941Smrg { 764b8e80941Smrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 765b8e80941Smrg * 766b8e80941Smrg * highp vec4 unpackUnorm4x8 (highp uint p) 767b8e80941Smrg * ---------------------------------------- 768b8e80941Smrg * First, unpacks a single 32-bit unsigned integer p into four 769b8e80941Smrg * 8-bit unsigned integers. Then, each component is converted to 770b8e80941Smrg * a normalized floating-point value to generate the returned 771b8e80941Smrg * two-component vector. 772b8e80941Smrg * 773b8e80941Smrg * The conversion for unpacked fixed-point value f to floating point is 774b8e80941Smrg * done as follows: 775b8e80941Smrg * 776b8e80941Smrg * unpackUnorm4x8: f / 255.0 777b8e80941Smrg * 778b8e80941Smrg * The first component of the returned vector will be extracted from the 779b8e80941Smrg * least significant bits of the input; the last component will be 780b8e80941Smrg * extracted from the most significant bits. 781b8e80941Smrg * 782b8e80941Smrg * This function generates IR that approximates the following pseudo-GLSL: 783b8e80941Smrg * 784b8e80941Smrg * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; 785b8e80941Smrg */ 786b8e80941Smrg 787b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 788b8e80941Smrg 789b8e80941Smrg ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), 790b8e80941Smrg constant(255.0f)); 791b8e80941Smrg 792b8e80941Smrg assert(result->type == glsl_type::vec4_type); 793b8e80941Smrg return result; 794b8e80941Smrg } 795b8e80941Smrg 796b8e80941Smrg /** 797b8e80941Smrg * \brief Lower the component-wise calculation of packHalf2x16. 798b8e80941Smrg * 799b8e80941Smrg * \param f_rval is one component of packHafl2x16's input 800b8e80941Smrg * \param e_rval is the unshifted exponent bits of f_rval 801b8e80941Smrg * \param m_rval is the unshifted mantissa bits of f_rval 802b8e80941Smrg * 803b8e80941Smrg * \return a uint rvalue that encodes a float16 in its lower 16 bits 804b8e80941Smrg */ 805b8e80941Smrg ir_rvalue* 806b8e80941Smrg pack_half_1x16_nosign(ir_rvalue *f_rval, 807b8e80941Smrg ir_rvalue *e_rval, 808b8e80941Smrg ir_rvalue *m_rval) 809b8e80941Smrg { 810b8e80941Smrg assert(e_rval->type == glsl_type::uint_type); 811b8e80941Smrg assert(m_rval->type == glsl_type::uint_type); 812b8e80941Smrg 813b8e80941Smrg /* uint u16; */ 814b8e80941Smrg ir_variable *u16 = factory.make_temp(glsl_type::uint_type, 815b8e80941Smrg "tmp_pack_half_1x16_u16"); 816b8e80941Smrg 817b8e80941Smrg /* float f = FLOAT_RVAL; */ 818b8e80941Smrg ir_variable *f = factory.make_temp(glsl_type::float_type, 819b8e80941Smrg "tmp_pack_half_1x16_f"); 820b8e80941Smrg factory.emit(assign(f, f_rval)); 821b8e80941Smrg 822b8e80941Smrg /* uint e = E_RVAL; */ 823b8e80941Smrg ir_variable *e = factory.make_temp(glsl_type::uint_type, 824b8e80941Smrg "tmp_pack_half_1x16_e"); 825b8e80941Smrg factory.emit(assign(e, e_rval)); 826b8e80941Smrg 827b8e80941Smrg /* uint m = M_RVAL; */ 828b8e80941Smrg ir_variable *m = factory.make_temp(glsl_type::uint_type, 829b8e80941Smrg "tmp_pack_half_1x16_m"); 830b8e80941Smrg factory.emit(assign(m, m_rval)); 831b8e80941Smrg 832b8e80941Smrg /* Preliminaries 833b8e80941Smrg * ------------- 834b8e80941Smrg * 835b8e80941Smrg * For a float16, the bit layout is: 836b8e80941Smrg * 837b8e80941Smrg * sign: 15 838b8e80941Smrg * exponent: 10:14 839b8e80941Smrg * mantissa: 0:9 840b8e80941Smrg * 841b8e80941Smrg * Let f16 be a float16 value. The sign, exponent, and mantissa 842b8e80941Smrg * determine its value thus: 843b8e80941Smrg * 844b8e80941Smrg * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 845b8e80941Smrg * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 846b8e80941Smrg * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 847b8e80941Smrg * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 848b8e80941Smrg * if e16 = 31 and m16 != 0, then NaN (5) 849b8e80941Smrg * 850b8e80941Smrg * where 0 <= m16 < 2^10. 851b8e80941Smrg * 852b8e80941Smrg * For a float32, the bit layout is: 853b8e80941Smrg * 854b8e80941Smrg * sign: 31 855b8e80941Smrg * exponent: 23:30 856b8e80941Smrg * mantissa: 0:22 857b8e80941Smrg * 858b8e80941Smrg * Let f32 be a float32 value. The sign, exponent, and mantissa 859b8e80941Smrg * determine its value thus: 860b8e80941Smrg * 861b8e80941Smrg * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 862b8e80941Smrg * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 863b8e80941Smrg * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 864b8e80941Smrg * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 865b8e80941Smrg * if e32 = 255 and m32 != 0, then NaN (14) 866b8e80941Smrg * 867b8e80941Smrg * where 0 <= m32 < 2^23. 868b8e80941Smrg * 869b8e80941Smrg * The minimum and maximum normal float16 values are 870b8e80941Smrg * 871b8e80941Smrg * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) 872b8e80941Smrg * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) 873b8e80941Smrg * 874b8e80941Smrg * The step at max_norm16 is 875b8e80941Smrg * 876b8e80941Smrg * max_step16 = 2^5 (22) 877b8e80941Smrg * 878b8e80941Smrg * Observe that the float16 boundary values in equations 20-21 lie in the 879b8e80941Smrg * range of normal float32 values. 880b8e80941Smrg * 881b8e80941Smrg * 882b8e80941Smrg * Rounding Behavior 883b8e80941Smrg * ----------------- 884b8e80941Smrg * Not all float32 values can be exactly represented as a float16. We 885b8e80941Smrg * round all such intermediate float32 values to the nearest float16; if 886b8e80941Smrg * the float32 is exactly between to float16 values, we round to the one 887b8e80941Smrg * with an even mantissa. This rounding behavior has several benefits: 888b8e80941Smrg * 889b8e80941Smrg * - It has no sign bias. 890b8e80941Smrg * 891b8e80941Smrg * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's 892b8e80941Smrg * GPU ISA. 893b8e80941Smrg * 894b8e80941Smrg * - By reproducing the behavior of the GPU (at least on Intel hardware), 895b8e80941Smrg * compile-time evaluation of constant packHalf2x16 GLSL expressions will 896b8e80941Smrg * result in the same value as if the expression were executed on the 897b8e80941Smrg * GPU. 898b8e80941Smrg * 899b8e80941Smrg * Calculation 900b8e80941Smrg * ----------- 901b8e80941Smrg * Our task is to compute s16, e16, m16 given f32. Since this function 902b8e80941Smrg * ignores the sign bit, assume that s32 = s16 = 0. There are several 903b8e80941Smrg * cases consider. 904b8e80941Smrg */ 905b8e80941Smrg 906b8e80941Smrg factory.emit( 907b8e80941Smrg 908b8e80941Smrg /* Case 1) f32 is NaN 909b8e80941Smrg * 910b8e80941Smrg * The resultant f16 will also be NaN. 911b8e80941Smrg */ 912b8e80941Smrg 913b8e80941Smrg /* if (e32 == 255 && m32 != 0) { */ 914b8e80941Smrg if_tree(logic_and(equal(e, constant(0xffu << 23u)), 915b8e80941Smrg logic_not(equal(m, constant(0u)))), 916b8e80941Smrg 917b8e80941Smrg assign(u16, constant(0x7fffu)), 918b8e80941Smrg 919b8e80941Smrg /* Case 2) f32 lies in the range [0, min_norm16). 920b8e80941Smrg * 921b8e80941Smrg * The resultant float16 will be either zero, subnormal, or normal. 922b8e80941Smrg * 923b8e80941Smrg * Solving 924b8e80941Smrg * 925b8e80941Smrg * f32 = min_norm16 (30) 926b8e80941Smrg * 927b8e80941Smrg * gives 928b8e80941Smrg * 929b8e80941Smrg * e32 = 113 and m32 = 0 (31) 930b8e80941Smrg * 931b8e80941Smrg * Therefore this case occurs if and only if 932b8e80941Smrg * 933b8e80941Smrg * e32 < 113 (32) 934b8e80941Smrg */ 935b8e80941Smrg 936b8e80941Smrg /* } else if (e32 < 113) { */ 937b8e80941Smrg if_tree(less(e, constant(113u << 23u)), 938b8e80941Smrg 939b8e80941Smrg /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ 940b8e80941Smrg assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), 941b8e80941Smrg constant((float) (1 << 24)))))), 942b8e80941Smrg 943b8e80941Smrg /* Case 3) f32 lies in the range 944b8e80941Smrg * [min_norm16, max_norm16 + max_step16). 945b8e80941Smrg * 946b8e80941Smrg * The resultant float16 will be either normal or infinite. 947b8e80941Smrg * 948b8e80941Smrg * Solving 949b8e80941Smrg * 950b8e80941Smrg * f32 = max_norm16 + max_step16 (40) 951b8e80941Smrg * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) 952b8e80941Smrg * = 2^16 (42) 953b8e80941Smrg * gives 954b8e80941Smrg * 955b8e80941Smrg * e32 = 143 and m32 = 0 (43) 956b8e80941Smrg * 957b8e80941Smrg * We already solved the boundary condition f32 = min_norm16 above 958b8e80941Smrg * in equation 31. Therefore this case occurs if and only if 959b8e80941Smrg * 960b8e80941Smrg * 113 <= e32 and e32 < 143 961b8e80941Smrg */ 962b8e80941Smrg 963b8e80941Smrg /* } else if (e32 < 143) { */ 964b8e80941Smrg if_tree(less(e, constant(143u << 23u)), 965b8e80941Smrg 966b8e80941Smrg /* The addition below handles the case where the mantissa rounds 967b8e80941Smrg * up to 1024 and bumps the exponent. 968b8e80941Smrg * 969b8e80941Smrg * u16 = ((e - (112u << 23u)) >> 13u) 970b8e80941Smrg * + round_to_even((float(m) / (1u << 13u)); 971b8e80941Smrg */ 972b8e80941Smrg assign(u16, add(rshift(sub(e, constant(112u << 23u)), 973b8e80941Smrg constant(13u)), 974b8e80941Smrg f2u(round_even( 975b8e80941Smrg div(u2f(m), constant((float) (1 << 13))))))), 976b8e80941Smrg 977b8e80941Smrg /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. 978b8e80941Smrg * 979b8e80941Smrg * The resultant float16 will be infinite. 980b8e80941Smrg * 981b8e80941Smrg * The cases above caught all float32 values in the range 982b8e80941Smrg * [0, max_norm16 + max_step16), so this is the fall-through case. 983b8e80941Smrg */ 984b8e80941Smrg 985b8e80941Smrg /* } else { */ 986b8e80941Smrg 987b8e80941Smrg assign(u16, constant(31u << 10u)))))); 988b8e80941Smrg 989b8e80941Smrg /* } */ 990b8e80941Smrg 991b8e80941Smrg return deref(u16).val; 992b8e80941Smrg } 993b8e80941Smrg 994b8e80941Smrg /** 995b8e80941Smrg * \brief Lower a packHalf2x16 expression. 996b8e80941Smrg * 997b8e80941Smrg * \param vec2_rval is packHalf2x16's input 998b8e80941Smrg * \return packHalf2x16's output as a uint rvalue 999b8e80941Smrg */ 1000b8e80941Smrg ir_rvalue* 1001b8e80941Smrg lower_pack_half_2x16(ir_rvalue *vec2_rval) 1002b8e80941Smrg { 1003b8e80941Smrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 1004b8e80941Smrg * 1005b8e80941Smrg * highp uint packHalf2x16 (mediump vec2 v) 1006b8e80941Smrg * ---------------------------------------- 1007b8e80941Smrg * Returns an unsigned integer obtained by converting the components of 1008b8e80941Smrg * a two-component floating-point vector to the 16-bit floating-point 1009b8e80941Smrg * representation found in the OpenGL ES Specification, and then packing 1010b8e80941Smrg * these two 16-bit integers into a 32-bit unsigned integer. 1011b8e80941Smrg * 1012b8e80941Smrg * The first vector component specifies the 16 least- significant bits 1013b8e80941Smrg * of the result; the second component specifies the 16 most-significant 1014b8e80941Smrg * bits. 1015b8e80941Smrg */ 1016b8e80941Smrg 1017b8e80941Smrg assert(vec2_rval->type == glsl_type::vec2_type); 1018b8e80941Smrg 1019b8e80941Smrg /* vec2 f = VEC2_RVAL; */ 1020b8e80941Smrg ir_variable *f = factory.make_temp(glsl_type::vec2_type, 1021b8e80941Smrg "tmp_pack_half_2x16_f"); 1022b8e80941Smrg factory.emit(assign(f, vec2_rval)); 1023b8e80941Smrg 1024b8e80941Smrg /* uvec2 f32 = bitcast_f2u(f); */ 1025b8e80941Smrg ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 1026b8e80941Smrg "tmp_pack_half_2x16_f32"); 1027b8e80941Smrg factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); 1028b8e80941Smrg 1029b8e80941Smrg /* uvec2 f16; */ 1030b8e80941Smrg ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 1031b8e80941Smrg "tmp_pack_half_2x16_f16"); 1032b8e80941Smrg 1033b8e80941Smrg /* Get f32's unshifted exponent bits. 1034b8e80941Smrg * 1035b8e80941Smrg * uvec2 e = f32 & 0x7f800000u; 1036b8e80941Smrg */ 1037b8e80941Smrg ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 1038b8e80941Smrg "tmp_pack_half_2x16_e"); 1039b8e80941Smrg factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); 1040b8e80941Smrg 1041b8e80941Smrg /* Get f32's unshifted mantissa bits. 1042b8e80941Smrg * 1043b8e80941Smrg * uvec2 m = f32 & 0x007fffffu; 1044b8e80941Smrg */ 1045b8e80941Smrg ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 1046b8e80941Smrg "tmp_pack_half_2x16_m"); 1047b8e80941Smrg factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); 1048b8e80941Smrg 1049b8e80941Smrg /* Set f16's exponent and mantissa bits. 1050b8e80941Smrg * 1051b8e80941Smrg * f16.x = pack_half_1x16_nosign(e.x, m.x); 1052b8e80941Smrg * f16.y = pack_half_1y16_nosign(e.y, m.y); 1053b8e80941Smrg */ 1054b8e80941Smrg factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), 1055b8e80941Smrg swizzle_x(e), 1056b8e80941Smrg swizzle_x(m)), 1057b8e80941Smrg WRITEMASK_X)); 1058b8e80941Smrg factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), 1059b8e80941Smrg swizzle_y(e), 1060b8e80941Smrg swizzle_y(m)), 1061b8e80941Smrg WRITEMASK_Y)); 1062b8e80941Smrg 1063b8e80941Smrg /* Set f16's sign bits. 1064b8e80941Smrg * 1065b8e80941Smrg * f16 |= (f32 & (1u << 31u) >> 16u; 1066b8e80941Smrg */ 1067b8e80941Smrg factory.emit( 1068b8e80941Smrg assign(f16, bit_or(f16, 1069b8e80941Smrg rshift(bit_and(f32, constant(1u << 31u)), 1070b8e80941Smrg constant(16u))))); 1071b8e80941Smrg 1072b8e80941Smrg 1073b8e80941Smrg /* return (f16.y << 16u) | f16.x; */ 1074b8e80941Smrg ir_rvalue *result = bit_or(lshift(swizzle_y(f16), 1075b8e80941Smrg constant(16u)), 1076b8e80941Smrg swizzle_x(f16)); 1077b8e80941Smrg 1078b8e80941Smrg assert(result->type == glsl_type::uint_type); 1079b8e80941Smrg return result; 1080b8e80941Smrg } 1081b8e80941Smrg 1082b8e80941Smrg /** 1083b8e80941Smrg * \brief Lower the component-wise calculation of unpackHalf2x16. 1084b8e80941Smrg * 1085b8e80941Smrg * Given a uint that encodes a float16 in its lower 16 bits, this function 1086b8e80941Smrg * returns a uint that encodes a float32 with the same value. The sign bit 1087b8e80941Smrg * of the float16 is ignored. 1088b8e80941Smrg * 1089b8e80941Smrg * \param e_rval is the unshifted exponent bits of a float16 1090b8e80941Smrg * \param m_rval is the unshifted mantissa bits of a float16 1091b8e80941Smrg * \param a uint rvalue that encodes a float32 1092b8e80941Smrg */ 1093b8e80941Smrg ir_rvalue* 1094b8e80941Smrg unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) 1095b8e80941Smrg { 1096b8e80941Smrg assert(e_rval->type == glsl_type::uint_type); 1097b8e80941Smrg assert(m_rval->type == glsl_type::uint_type); 1098b8e80941Smrg 1099b8e80941Smrg /* uint u32; */ 1100b8e80941Smrg ir_variable *u32 = factory.make_temp(glsl_type::uint_type, 1101b8e80941Smrg "tmp_unpack_half_1x16_u32"); 1102b8e80941Smrg 1103b8e80941Smrg /* uint e = E_RVAL; */ 1104b8e80941Smrg ir_variable *e = factory.make_temp(glsl_type::uint_type, 1105b8e80941Smrg "tmp_unpack_half_1x16_e"); 1106b8e80941Smrg factory.emit(assign(e, e_rval)); 1107b8e80941Smrg 1108b8e80941Smrg /* uint m = M_RVAL; */ 1109b8e80941Smrg ir_variable *m = factory.make_temp(glsl_type::uint_type, 1110b8e80941Smrg "tmp_unpack_half_1x16_m"); 1111b8e80941Smrg factory.emit(assign(m, m_rval)); 1112b8e80941Smrg 1113b8e80941Smrg /* Preliminaries 1114b8e80941Smrg * ------------- 1115b8e80941Smrg * 1116b8e80941Smrg * For a float16, the bit layout is: 1117b8e80941Smrg * 1118b8e80941Smrg * sign: 15 1119b8e80941Smrg * exponent: 10:14 1120b8e80941Smrg * mantissa: 0:9 1121b8e80941Smrg * 1122b8e80941Smrg * Let f16 be a float16 value. The sign, exponent, and mantissa 1123b8e80941Smrg * determine its value thus: 1124b8e80941Smrg * 1125b8e80941Smrg * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 1126b8e80941Smrg * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 1127b8e80941Smrg * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 1128b8e80941Smrg * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 1129b8e80941Smrg * if e16 = 31 and m16 != 0, then NaN (5) 1130b8e80941Smrg * 1131b8e80941Smrg * where 0 <= m16 < 2^10. 1132b8e80941Smrg * 1133b8e80941Smrg * For a float32, the bit layout is: 1134b8e80941Smrg * 1135b8e80941Smrg * sign: 31 1136b8e80941Smrg * exponent: 23:30 1137b8e80941Smrg * mantissa: 0:22 1138b8e80941Smrg * 1139b8e80941Smrg * Let f32 be a float32 value. The sign, exponent, and mantissa 1140b8e80941Smrg * determine its value thus: 1141b8e80941Smrg * 1142b8e80941Smrg * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 1143b8e80941Smrg * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 1144b8e80941Smrg * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 1145b8e80941Smrg * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 1146b8e80941Smrg * if e32 = 255 and m32 != 0, then NaN (14) 1147b8e80941Smrg * 1148b8e80941Smrg * where 0 <= m32 < 2^23. 1149b8e80941Smrg * 1150b8e80941Smrg * Calculation 1151b8e80941Smrg * ----------- 1152b8e80941Smrg * Our task is to compute s32, e32, m32 given f16. Since this function 1153b8e80941Smrg * ignores the sign bit, assume that s32 = s16 = 0. There are several 1154b8e80941Smrg * cases consider. 1155b8e80941Smrg */ 1156b8e80941Smrg 1157b8e80941Smrg factory.emit( 1158b8e80941Smrg 1159b8e80941Smrg /* Case 1) f16 is zero or subnormal. 1160b8e80941Smrg * 1161b8e80941Smrg * The simplest method of calcuating f32 in this case is 1162b8e80941Smrg * 1163b8e80941Smrg * f32 = f16 (20) 1164b8e80941Smrg * = 2^(-14) * (m16 / 2^10) (21) 1165b8e80941Smrg * = m16 / 2^(-24) (22) 1166b8e80941Smrg */ 1167b8e80941Smrg 1168b8e80941Smrg /* if (e16 == 0) { */ 1169b8e80941Smrg if_tree(equal(e, constant(0u)), 1170b8e80941Smrg 1171b8e80941Smrg /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ 1172b8e80941Smrg assign(u32, expr(ir_unop_bitcast_f2u, 1173b8e80941Smrg div(u2f(m), constant((float)(1 << 24))))), 1174b8e80941Smrg 1175b8e80941Smrg /* Case 2) f16 is normal. 1176b8e80941Smrg * 1177b8e80941Smrg * The equation 1178b8e80941Smrg * 1179b8e80941Smrg * f32 = f16 (30) 1180b8e80941Smrg * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) 1181b8e80941Smrg * 2^(e16 - 15) * (1 + m16 / 2^10) 1182b8e80941Smrg * 1183b8e80941Smrg * can be decomposed into two 1184b8e80941Smrg * 1185b8e80941Smrg * 2^(e32 - 127) = 2^(e16 - 15) (32) 1186b8e80941Smrg * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) 1187b8e80941Smrg * 1188b8e80941Smrg * which solve to 1189b8e80941Smrg * 1190b8e80941Smrg * e32 = e16 + 112 (34) 1191b8e80941Smrg * m32 = m16 * 2^13 (35) 1192b8e80941Smrg */ 1193b8e80941Smrg 1194b8e80941Smrg /* } else if (e16 < 31)) { */ 1195b8e80941Smrg if_tree(less(e, constant(31u << 10u)), 1196b8e80941Smrg 1197b8e80941Smrg /* u32 = ((e + (112 << 10)) | m) << 13; 1198b8e80941Smrg */ 1199b8e80941Smrg assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), 1200b8e80941Smrg constant(13u))), 1201b8e80941Smrg 1202b8e80941Smrg 1203b8e80941Smrg /* Case 3) f16 is infinite. */ 1204b8e80941Smrg if_tree(equal(m, constant(0u)), 1205b8e80941Smrg 1206b8e80941Smrg assign(u32, constant(255u << 23u)), 1207b8e80941Smrg 1208b8e80941Smrg /* Case 4) f16 is NaN. */ 1209b8e80941Smrg /* } else { */ 1210b8e80941Smrg 1211b8e80941Smrg assign(u32, constant(0x7fffffffu)))))); 1212b8e80941Smrg 1213b8e80941Smrg /* } */ 1214b8e80941Smrg 1215b8e80941Smrg return deref(u32).val; 1216b8e80941Smrg } 1217b8e80941Smrg 1218b8e80941Smrg /** 1219b8e80941Smrg * \brief Lower an unpackHalf2x16 expression. 1220b8e80941Smrg * 1221b8e80941Smrg * \param uint_rval is unpackHalf2x16's input 1222b8e80941Smrg * \return unpackHalf2x16's output as a vec2 rvalue 1223b8e80941Smrg */ 1224b8e80941Smrg ir_rvalue* 1225b8e80941Smrg lower_unpack_half_2x16(ir_rvalue *uint_rval) 1226b8e80941Smrg { 1227b8e80941Smrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 1228b8e80941Smrg * 1229b8e80941Smrg * mediump vec2 unpackHalf2x16 (highp uint v) 1230b8e80941Smrg * ------------------------------------------ 1231b8e80941Smrg * Returns a two-component floating-point vector with components 1232b8e80941Smrg * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit 1233b8e80941Smrg * values, interpreting those values as 16-bit floating-point numbers 1234b8e80941Smrg * according to the OpenGL ES Specification, and converting them to 1235b8e80941Smrg * 32-bit floating-point values. 1236b8e80941Smrg * 1237b8e80941Smrg * The first component of the vector is obtained from the 1238b8e80941Smrg * 16 least-significant bits of v; the second component is obtained 1239b8e80941Smrg * from the 16 most-significant bits of v. 1240b8e80941Smrg */ 1241b8e80941Smrg assert(uint_rval->type == glsl_type::uint_type); 1242b8e80941Smrg 1243b8e80941Smrg /* uint u = RVALUE; 1244b8e80941Smrg * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); 1245b8e80941Smrg */ 1246b8e80941Smrg ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 1247b8e80941Smrg "tmp_unpack_half_2x16_f16"); 1248b8e80941Smrg factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); 1249b8e80941Smrg 1250b8e80941Smrg /* uvec2 f32; */ 1251b8e80941Smrg ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 1252b8e80941Smrg "tmp_unpack_half_2x16_f32"); 1253b8e80941Smrg 1254b8e80941Smrg /* Get f16's unshifted exponent bits. 1255b8e80941Smrg * 1256b8e80941Smrg * uvec2 e = f16 & 0x7c00u; 1257b8e80941Smrg */ 1258b8e80941Smrg ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 1259b8e80941Smrg "tmp_unpack_half_2x16_e"); 1260b8e80941Smrg factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); 1261b8e80941Smrg 1262b8e80941Smrg /* Get f16's unshifted mantissa bits. 1263b8e80941Smrg * 1264b8e80941Smrg * uvec2 m = f16 & 0x03ffu; 1265b8e80941Smrg */ 1266b8e80941Smrg ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 1267b8e80941Smrg "tmp_unpack_half_2x16_m"); 1268b8e80941Smrg factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); 1269b8e80941Smrg 1270b8e80941Smrg /* Set f32's exponent and mantissa bits. 1271b8e80941Smrg * 1272b8e80941Smrg * f32.x = unpack_half_1x16_nosign(e.x, m.x); 1273b8e80941Smrg * f32.y = unpack_half_1x16_nosign(e.y, m.y); 1274b8e80941Smrg */ 1275b8e80941Smrg factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), 1276b8e80941Smrg swizzle_x(m)), 1277b8e80941Smrg WRITEMASK_X)); 1278b8e80941Smrg factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), 1279b8e80941Smrg swizzle_y(m)), 1280b8e80941Smrg WRITEMASK_Y)); 1281b8e80941Smrg 1282b8e80941Smrg /* Set f32's sign bit. 1283b8e80941Smrg * 1284b8e80941Smrg * f32 |= (f16 & 0x8000u) << 16u; 1285b8e80941Smrg */ 1286b8e80941Smrg factory.emit(assign(f32, bit_or(f32, 1287b8e80941Smrg lshift(bit_and(f16, 1288b8e80941Smrg constant(0x8000u)), 1289b8e80941Smrg constant(16u))))); 1290b8e80941Smrg 1291b8e80941Smrg /* return bitcast_u2f(f32); */ 1292b8e80941Smrg ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); 1293b8e80941Smrg assert(result->type == glsl_type::vec2_type); 1294b8e80941Smrg return result; 1295b8e80941Smrg } 1296b8e80941Smrg}; 1297b8e80941Smrg 1298b8e80941Smrg} // namespace anonymous 1299b8e80941Smrg 1300b8e80941Smrg/** 1301b8e80941Smrg * \brief Lower the builtin packing functions. 1302b8e80941Smrg * 1303b8e80941Smrg * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. 1304b8e80941Smrg */ 1305b8e80941Smrgbool 1306b8e80941Smrglower_packing_builtins(exec_list *instructions, int op_mask) 1307b8e80941Smrg{ 1308b8e80941Smrg lower_packing_builtins_visitor v(op_mask); 1309b8e80941Smrg visit_list_elements(&v, instructions, true); 1310b8e80941Smrg return v.get_progress(); 1311b8e80941Smrg} 1312