101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2012 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "ir.h" 2501e04c3fSmrg#include "ir_builder.h" 2601e04c3fSmrg#include "ir_optimization.h" 2701e04c3fSmrg#include "ir_rvalue_visitor.h" 2801e04c3fSmrg 2901e04c3fSmrgnamespace { 3001e04c3fSmrg 3101e04c3fSmrgusing namespace ir_builder; 3201e04c3fSmrg 3301e04c3fSmrg/** 3401e04c3fSmrg * A visitor that lowers built-in floating-point pack/unpack expressions 3501e04c3fSmrg * such packSnorm2x16. 3601e04c3fSmrg */ 3701e04c3fSmrgclass lower_packing_builtins_visitor : public ir_rvalue_visitor { 3801e04c3fSmrgpublic: 3901e04c3fSmrg /** 4001e04c3fSmrg * \param op_mask is a bitmask of `enum lower_packing_builtins_op` 4101e04c3fSmrg */ 4201e04c3fSmrg explicit lower_packing_builtins_visitor(int op_mask) 4301e04c3fSmrg : op_mask(op_mask), 4401e04c3fSmrg progress(false) 4501e04c3fSmrg { 4601e04c3fSmrg factory.instructions = &factory_instructions; 4701e04c3fSmrg } 4801e04c3fSmrg 4901e04c3fSmrg virtual ~lower_packing_builtins_visitor() 5001e04c3fSmrg { 5101e04c3fSmrg assert(factory_instructions.is_empty()); 5201e04c3fSmrg } 5301e04c3fSmrg 5401e04c3fSmrg bool get_progress() { return progress; } 5501e04c3fSmrg 5601e04c3fSmrg void handle_rvalue(ir_rvalue **rvalue) 5701e04c3fSmrg { 5801e04c3fSmrg if (!*rvalue) 5901e04c3fSmrg return; 6001e04c3fSmrg 6101e04c3fSmrg ir_expression *expr = (*rvalue)->as_expression(); 6201e04c3fSmrg if (!expr) 6301e04c3fSmrg return; 6401e04c3fSmrg 6501e04c3fSmrg enum lower_packing_builtins_op lowering_op = 6601e04c3fSmrg choose_lowering_op(expr->operation); 6701e04c3fSmrg 6801e04c3fSmrg if (lowering_op == LOWER_PACK_UNPACK_NONE) 6901e04c3fSmrg return; 7001e04c3fSmrg 7101e04c3fSmrg setup_factory(ralloc_parent(expr)); 7201e04c3fSmrg 7301e04c3fSmrg ir_rvalue *op0 = expr->operands[0]; 7401e04c3fSmrg ralloc_steal(factory.mem_ctx, op0); 7501e04c3fSmrg 7601e04c3fSmrg switch (lowering_op) { 7701e04c3fSmrg case LOWER_PACK_SNORM_2x16: 7801e04c3fSmrg *rvalue = lower_pack_snorm_2x16(op0); 7901e04c3fSmrg break; 8001e04c3fSmrg case LOWER_PACK_SNORM_4x8: 8101e04c3fSmrg *rvalue = lower_pack_snorm_4x8(op0); 8201e04c3fSmrg break; 8301e04c3fSmrg case LOWER_PACK_UNORM_2x16: 8401e04c3fSmrg *rvalue = lower_pack_unorm_2x16(op0); 8501e04c3fSmrg break; 8601e04c3fSmrg case LOWER_PACK_UNORM_4x8: 8701e04c3fSmrg *rvalue = lower_pack_unorm_4x8(op0); 8801e04c3fSmrg break; 8901e04c3fSmrg case LOWER_PACK_HALF_2x16: 9001e04c3fSmrg *rvalue = lower_pack_half_2x16(op0); 9101e04c3fSmrg break; 9201e04c3fSmrg case LOWER_UNPACK_SNORM_2x16: 9301e04c3fSmrg *rvalue = lower_unpack_snorm_2x16(op0); 9401e04c3fSmrg break; 9501e04c3fSmrg case LOWER_UNPACK_SNORM_4x8: 9601e04c3fSmrg *rvalue = lower_unpack_snorm_4x8(op0); 9701e04c3fSmrg break; 9801e04c3fSmrg case LOWER_UNPACK_UNORM_2x16: 9901e04c3fSmrg *rvalue = lower_unpack_unorm_2x16(op0); 10001e04c3fSmrg break; 10101e04c3fSmrg case LOWER_UNPACK_UNORM_4x8: 10201e04c3fSmrg *rvalue = lower_unpack_unorm_4x8(op0); 10301e04c3fSmrg break; 10401e04c3fSmrg case LOWER_UNPACK_HALF_2x16: 10501e04c3fSmrg *rvalue = lower_unpack_half_2x16(op0); 10601e04c3fSmrg break; 10701e04c3fSmrg case LOWER_PACK_UNPACK_NONE: 10801e04c3fSmrg case LOWER_PACK_USE_BFI: 10901e04c3fSmrg case LOWER_PACK_USE_BFE: 11001e04c3fSmrg assert(!"not reached"); 11101e04c3fSmrg break; 11201e04c3fSmrg } 11301e04c3fSmrg 11401e04c3fSmrg teardown_factory(); 11501e04c3fSmrg progress = true; 11601e04c3fSmrg } 11701e04c3fSmrg 11801e04c3fSmrgprivate: 11901e04c3fSmrg const int op_mask; 12001e04c3fSmrg bool progress; 12101e04c3fSmrg ir_factory factory; 12201e04c3fSmrg exec_list factory_instructions; 12301e04c3fSmrg 12401e04c3fSmrg /** 12501e04c3fSmrg * Determine the needed lowering operation by filtering \a expr_op 12601e04c3fSmrg * through \ref op_mask. 12701e04c3fSmrg */ 12801e04c3fSmrg enum lower_packing_builtins_op 12901e04c3fSmrg choose_lowering_op(ir_expression_operation expr_op) 13001e04c3fSmrg { 13101e04c3fSmrg /* C++ regards int and enum as fundamentally different types. 13201e04c3fSmrg * So, we can't simply return from each case; we must cast the return 13301e04c3fSmrg * value. 13401e04c3fSmrg */ 13501e04c3fSmrg int result; 13601e04c3fSmrg 13701e04c3fSmrg switch (expr_op) { 13801e04c3fSmrg case ir_unop_pack_snorm_2x16: 13901e04c3fSmrg result = op_mask & LOWER_PACK_SNORM_2x16; 14001e04c3fSmrg break; 14101e04c3fSmrg case ir_unop_pack_snorm_4x8: 14201e04c3fSmrg result = op_mask & LOWER_PACK_SNORM_4x8; 14301e04c3fSmrg break; 14401e04c3fSmrg case ir_unop_pack_unorm_2x16: 14501e04c3fSmrg result = op_mask & LOWER_PACK_UNORM_2x16; 14601e04c3fSmrg break; 14701e04c3fSmrg case ir_unop_pack_unorm_4x8: 14801e04c3fSmrg result = op_mask & LOWER_PACK_UNORM_4x8; 14901e04c3fSmrg break; 15001e04c3fSmrg case ir_unop_pack_half_2x16: 15101e04c3fSmrg result = op_mask & LOWER_PACK_HALF_2x16; 15201e04c3fSmrg break; 15301e04c3fSmrg case ir_unop_unpack_snorm_2x16: 15401e04c3fSmrg result = op_mask & LOWER_UNPACK_SNORM_2x16; 15501e04c3fSmrg break; 15601e04c3fSmrg case ir_unop_unpack_snorm_4x8: 15701e04c3fSmrg result = op_mask & LOWER_UNPACK_SNORM_4x8; 15801e04c3fSmrg break; 15901e04c3fSmrg case ir_unop_unpack_unorm_2x16: 16001e04c3fSmrg result = op_mask & LOWER_UNPACK_UNORM_2x16; 16101e04c3fSmrg break; 16201e04c3fSmrg case ir_unop_unpack_unorm_4x8: 16301e04c3fSmrg result = op_mask & LOWER_UNPACK_UNORM_4x8; 16401e04c3fSmrg break; 16501e04c3fSmrg case ir_unop_unpack_half_2x16: 16601e04c3fSmrg result = op_mask & LOWER_UNPACK_HALF_2x16; 16701e04c3fSmrg break; 16801e04c3fSmrg default: 16901e04c3fSmrg result = LOWER_PACK_UNPACK_NONE; 17001e04c3fSmrg break; 17101e04c3fSmrg } 17201e04c3fSmrg 17301e04c3fSmrg return static_cast<enum lower_packing_builtins_op>(result); 17401e04c3fSmrg } 17501e04c3fSmrg 17601e04c3fSmrg void 17701e04c3fSmrg setup_factory(void *mem_ctx) 17801e04c3fSmrg { 17901e04c3fSmrg assert(factory.mem_ctx == NULL); 18001e04c3fSmrg assert(factory.instructions->is_empty()); 18101e04c3fSmrg 18201e04c3fSmrg factory.mem_ctx = mem_ctx; 18301e04c3fSmrg } 18401e04c3fSmrg 18501e04c3fSmrg void 18601e04c3fSmrg teardown_factory() 18701e04c3fSmrg { 18801e04c3fSmrg base_ir->insert_before(factory.instructions); 18901e04c3fSmrg assert(factory.instructions->is_empty()); 19001e04c3fSmrg factory.mem_ctx = NULL; 19101e04c3fSmrg } 19201e04c3fSmrg 19301e04c3fSmrg template <typename T> 19401e04c3fSmrg ir_constant* 19501e04c3fSmrg constant(T x) 19601e04c3fSmrg { 19701e04c3fSmrg return factory.constant(x); 19801e04c3fSmrg } 19901e04c3fSmrg 20001e04c3fSmrg /** 20101e04c3fSmrg * \brief Pack two uint16's into a single uint32. 20201e04c3fSmrg * 20301e04c3fSmrg * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 20401e04c3fSmrg * where the least significant bits specify the first element of the pair. 20501e04c3fSmrg * Return the uint32. 20601e04c3fSmrg */ 20701e04c3fSmrg ir_rvalue* 20801e04c3fSmrg pack_uvec2_to_uint(ir_rvalue *uvec2_rval) 20901e04c3fSmrg { 21001e04c3fSmrg assert(uvec2_rval->type == glsl_type::uvec2_type); 21101e04c3fSmrg 21201e04c3fSmrg /* uvec2 u = UVEC2_RVAL; */ 21301e04c3fSmrg ir_variable *u = factory.make_temp(glsl_type::uvec2_type, 21401e04c3fSmrg "tmp_pack_uvec2_to_uint"); 21501e04c3fSmrg factory.emit(assign(u, uvec2_rval)); 21601e04c3fSmrg 21701e04c3fSmrg if (op_mask & LOWER_PACK_USE_BFI) { 21801e04c3fSmrg return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), 21901e04c3fSmrg swizzle_y(u), 22001e04c3fSmrg constant(16u), 22101e04c3fSmrg constant(16u)); 22201e04c3fSmrg } 22301e04c3fSmrg 22401e04c3fSmrg /* return (u.y << 16) | (u.x & 0xffff); */ 22501e04c3fSmrg return bit_or(lshift(swizzle_y(u), constant(16u)), 22601e04c3fSmrg bit_and(swizzle_x(u), constant(0xffffu))); 22701e04c3fSmrg } 22801e04c3fSmrg 22901e04c3fSmrg /** 23001e04c3fSmrg * \brief Pack four uint8's into a single uint32. 23101e04c3fSmrg * 23201e04c3fSmrg * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a 23301e04c3fSmrg * uint32 where the least significant bits specify the first element of the 23401e04c3fSmrg * 4-tuple. Return the uint32. 23501e04c3fSmrg */ 23601e04c3fSmrg ir_rvalue* 23701e04c3fSmrg pack_uvec4_to_uint(ir_rvalue *uvec4_rval) 23801e04c3fSmrg { 23901e04c3fSmrg assert(uvec4_rval->type == glsl_type::uvec4_type); 24001e04c3fSmrg 24101e04c3fSmrg ir_variable *u = factory.make_temp(glsl_type::uvec4_type, 24201e04c3fSmrg "tmp_pack_uvec4_to_uint"); 24301e04c3fSmrg 24401e04c3fSmrg if (op_mask & LOWER_PACK_USE_BFI) { 24501e04c3fSmrg /* uvec4 u = UVEC4_RVAL; */ 24601e04c3fSmrg factory.emit(assign(u, uvec4_rval)); 24701e04c3fSmrg 24801e04c3fSmrg return bitfield_insert(bitfield_insert( 24901e04c3fSmrg bitfield_insert( 25001e04c3fSmrg bit_and(swizzle_x(u), constant(0xffu)), 25101e04c3fSmrg swizzle_y(u), constant(8u), constant(8u)), 25201e04c3fSmrg swizzle_z(u), constant(16u), constant(8u)), 25301e04c3fSmrg swizzle_w(u), constant(24u), constant(8u)); 25401e04c3fSmrg } 25501e04c3fSmrg 25601e04c3fSmrg /* uvec4 u = UVEC4_RVAL & 0xff */ 25701e04c3fSmrg factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); 25801e04c3fSmrg 25901e04c3fSmrg /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ 26001e04c3fSmrg return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), 26101e04c3fSmrg lshift(swizzle_z(u), constant(16u))), 26201e04c3fSmrg bit_or(lshift(swizzle_y(u), constant(8u)), 26301e04c3fSmrg swizzle_x(u))); 26401e04c3fSmrg } 26501e04c3fSmrg 26601e04c3fSmrg /** 26701e04c3fSmrg * \brief Unpack a uint32 into two uint16's. 26801e04c3fSmrg * 26901e04c3fSmrg * Interpret the given uint32 as a uint16 pair where the uint32's least 27001e04c3fSmrg * significant bits specify the pair's first element. Return the uint16 27101e04c3fSmrg * pair as a uvec2. 27201e04c3fSmrg */ 27301e04c3fSmrg ir_rvalue* 27401e04c3fSmrg unpack_uint_to_uvec2(ir_rvalue *uint_rval) 27501e04c3fSmrg { 27601e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 27701e04c3fSmrg 27801e04c3fSmrg /* uint u = UINT_RVAL; */ 27901e04c3fSmrg ir_variable *u = factory.make_temp(glsl_type::uint_type, 28001e04c3fSmrg "tmp_unpack_uint_to_uvec2_u"); 28101e04c3fSmrg factory.emit(assign(u, uint_rval)); 28201e04c3fSmrg 28301e04c3fSmrg /* uvec2 u2; */ 28401e04c3fSmrg ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, 28501e04c3fSmrg "tmp_unpack_uint_to_uvec2_u2"); 28601e04c3fSmrg 28701e04c3fSmrg /* u2.x = u & 0xffffu; */ 28801e04c3fSmrg factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); 28901e04c3fSmrg 29001e04c3fSmrg /* u2.y = u >> 16u; */ 29101e04c3fSmrg factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); 29201e04c3fSmrg 29301e04c3fSmrg return deref(u2).val; 29401e04c3fSmrg } 29501e04c3fSmrg 29601e04c3fSmrg /** 29701e04c3fSmrg * \brief Unpack a uint32 into two int16's. 29801e04c3fSmrg * 29901e04c3fSmrg * Specifically each 16-bit value is sign-extended to the full width of an 30001e04c3fSmrg * int32 on return. 30101e04c3fSmrg */ 30201e04c3fSmrg ir_rvalue * 30301e04c3fSmrg unpack_uint_to_ivec2(ir_rvalue *uint_rval) 30401e04c3fSmrg { 30501e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 30601e04c3fSmrg 30701e04c3fSmrg if (!(op_mask & LOWER_PACK_USE_BFE)) { 30801e04c3fSmrg return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), 30901e04c3fSmrg constant(16u)), 31001e04c3fSmrg constant(16u)); 31101e04c3fSmrg } 31201e04c3fSmrg 31301e04c3fSmrg ir_variable *i = factory.make_temp(glsl_type::int_type, 31401e04c3fSmrg "tmp_unpack_uint_to_ivec2_i"); 31501e04c3fSmrg factory.emit(assign(i, u2i(uint_rval))); 31601e04c3fSmrg 31701e04c3fSmrg /* ivec2 i2; */ 31801e04c3fSmrg ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, 31901e04c3fSmrg "tmp_unpack_uint_to_ivec2_i2"); 32001e04c3fSmrg 32101e04c3fSmrg factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), 32201e04c3fSmrg WRITEMASK_X)); 32301e04c3fSmrg factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), 32401e04c3fSmrg WRITEMASK_Y)); 32501e04c3fSmrg 32601e04c3fSmrg return deref(i2).val; 32701e04c3fSmrg } 32801e04c3fSmrg 32901e04c3fSmrg /** 33001e04c3fSmrg * \brief Unpack a uint32 into four uint8's. 33101e04c3fSmrg * 33201e04c3fSmrg * Interpret the given uint32 as a uint8 4-tuple where the uint32's least 33301e04c3fSmrg * significant bits specify the 4-tuple's first element. Return the uint8 33401e04c3fSmrg * 4-tuple as a uvec4. 33501e04c3fSmrg */ 33601e04c3fSmrg ir_rvalue* 33701e04c3fSmrg unpack_uint_to_uvec4(ir_rvalue *uint_rval) 33801e04c3fSmrg { 33901e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 34001e04c3fSmrg 34101e04c3fSmrg /* uint u = UINT_RVAL; */ 34201e04c3fSmrg ir_variable *u = factory.make_temp(glsl_type::uint_type, 34301e04c3fSmrg "tmp_unpack_uint_to_uvec4_u"); 34401e04c3fSmrg factory.emit(assign(u, uint_rval)); 34501e04c3fSmrg 34601e04c3fSmrg /* uvec4 u4; */ 34701e04c3fSmrg ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, 34801e04c3fSmrg "tmp_unpack_uint_to_uvec4_u4"); 34901e04c3fSmrg 35001e04c3fSmrg /* u4.x = u & 0xffu; */ 35101e04c3fSmrg factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); 35201e04c3fSmrg 35301e04c3fSmrg if (op_mask & LOWER_PACK_USE_BFE) { 35401e04c3fSmrg /* u4.y = bitfield_extract(u, 8, 8); */ 35501e04c3fSmrg factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), 35601e04c3fSmrg WRITEMASK_Y)); 35701e04c3fSmrg 35801e04c3fSmrg /* u4.z = bitfield_extract(u, 16, 8); */ 35901e04c3fSmrg factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), 36001e04c3fSmrg WRITEMASK_Z)); 36101e04c3fSmrg } else { 36201e04c3fSmrg /* u4.y = (u >> 8u) & 0xffu; */ 36301e04c3fSmrg factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), 36401e04c3fSmrg constant(0xffu)), WRITEMASK_Y)); 36501e04c3fSmrg 36601e04c3fSmrg /* u4.z = (u >> 16u) & 0xffu; */ 36701e04c3fSmrg factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), 36801e04c3fSmrg constant(0xffu)), WRITEMASK_Z)); 36901e04c3fSmrg } 37001e04c3fSmrg 37101e04c3fSmrg /* u4.w = (u >> 24u) */ 37201e04c3fSmrg factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); 37301e04c3fSmrg 37401e04c3fSmrg return deref(u4).val; 37501e04c3fSmrg } 37601e04c3fSmrg 37701e04c3fSmrg /** 37801e04c3fSmrg * \brief Unpack a uint32 into four int8's. 37901e04c3fSmrg * 38001e04c3fSmrg * Specifically each 8-bit value is sign-extended to the full width of an 38101e04c3fSmrg * int32 on return. 38201e04c3fSmrg */ 38301e04c3fSmrg ir_rvalue * 38401e04c3fSmrg unpack_uint_to_ivec4(ir_rvalue *uint_rval) 38501e04c3fSmrg { 38601e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 38701e04c3fSmrg 38801e04c3fSmrg if (!(op_mask & LOWER_PACK_USE_BFE)) { 38901e04c3fSmrg return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), 39001e04c3fSmrg constant(24u)), 39101e04c3fSmrg constant(24u)); 39201e04c3fSmrg } 39301e04c3fSmrg 39401e04c3fSmrg ir_variable *i = factory.make_temp(glsl_type::int_type, 39501e04c3fSmrg "tmp_unpack_uint_to_ivec4_i"); 39601e04c3fSmrg factory.emit(assign(i, u2i(uint_rval))); 39701e04c3fSmrg 39801e04c3fSmrg /* ivec4 i4; */ 39901e04c3fSmrg ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, 40001e04c3fSmrg "tmp_unpack_uint_to_ivec4_i4"); 40101e04c3fSmrg 40201e04c3fSmrg factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), 40301e04c3fSmrg WRITEMASK_X)); 40401e04c3fSmrg factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), 40501e04c3fSmrg WRITEMASK_Y)); 40601e04c3fSmrg factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), 40701e04c3fSmrg WRITEMASK_Z)); 40801e04c3fSmrg factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), 40901e04c3fSmrg WRITEMASK_W)); 41001e04c3fSmrg 41101e04c3fSmrg return deref(i4).val; 41201e04c3fSmrg } 41301e04c3fSmrg 41401e04c3fSmrg /** 41501e04c3fSmrg * \brief Lower a packSnorm2x16 expression. 41601e04c3fSmrg * 41701e04c3fSmrg * \param vec2_rval is packSnorm2x16's input 41801e04c3fSmrg * \return packSnorm2x16's output as a uint rvalue 41901e04c3fSmrg */ 42001e04c3fSmrg ir_rvalue* 42101e04c3fSmrg lower_pack_snorm_2x16(ir_rvalue *vec2_rval) 42201e04c3fSmrg { 42301e04c3fSmrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 42401e04c3fSmrg * 42501e04c3fSmrg * highp uint packSnorm2x16(vec2 v) 42601e04c3fSmrg * -------------------------------- 42701e04c3fSmrg * First, converts each component of the normalized floating-point value 42801e04c3fSmrg * v into 16-bit integer values. Then, the results are packed into the 42901e04c3fSmrg * returned 32-bit unsigned integer. 43001e04c3fSmrg * 43101e04c3fSmrg * The conversion for component c of v to fixed point is done as 43201e04c3fSmrg * follows: 43301e04c3fSmrg * 43401e04c3fSmrg * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 43501e04c3fSmrg * 43601e04c3fSmrg * The first component of the vector will be written to the least 43701e04c3fSmrg * significant bits of the output; the last component will be written to 43801e04c3fSmrg * the most significant bits. 43901e04c3fSmrg * 44001e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 44101e04c3fSmrg * 44201e04c3fSmrg * return pack_uvec2_to_uint( 44301e04c3fSmrg * uvec2(ivec2( 44401e04c3fSmrg * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); 44501e04c3fSmrg * 44601e04c3fSmrg * It is necessary to first convert the vec2 to ivec2 rather than directly 44701e04c3fSmrg * converting vec2 to uvec2 because the latter conversion is undefined. 44801e04c3fSmrg * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to 44901e04c3fSmrg * convert a negative floating point value to an uint". 45001e04c3fSmrg */ 45101e04c3fSmrg assert(vec2_rval->type == glsl_type::vec2_type); 45201e04c3fSmrg 45301e04c3fSmrg ir_rvalue *result = pack_uvec2_to_uint( 45401e04c3fSmrg i2u(f2i(round_even(mul(clamp(vec2_rval, 45501e04c3fSmrg constant(-1.0f), 45601e04c3fSmrg constant(1.0f)), 45701e04c3fSmrg constant(32767.0f)))))); 45801e04c3fSmrg 45901e04c3fSmrg assert(result->type == glsl_type::uint_type); 46001e04c3fSmrg return result; 46101e04c3fSmrg } 46201e04c3fSmrg 46301e04c3fSmrg /** 46401e04c3fSmrg * \brief Lower a packSnorm4x8 expression. 46501e04c3fSmrg * 46601e04c3fSmrg * \param vec4_rval is packSnorm4x8's input 46701e04c3fSmrg * \return packSnorm4x8's output as a uint rvalue 46801e04c3fSmrg */ 46901e04c3fSmrg ir_rvalue* 47001e04c3fSmrg lower_pack_snorm_4x8(ir_rvalue *vec4_rval) 47101e04c3fSmrg { 47201e04c3fSmrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 47301e04c3fSmrg * 47401e04c3fSmrg * highp uint packSnorm4x8(vec4 v) 47501e04c3fSmrg * ------------------------------- 47601e04c3fSmrg * First, converts each component of the normalized floating-point value 47701e04c3fSmrg * v into 8-bit integer values. Then, the results are packed into the 47801e04c3fSmrg * returned 32-bit unsigned integer. 47901e04c3fSmrg * 48001e04c3fSmrg * The conversion for component c of v to fixed point is done as 48101e04c3fSmrg * follows: 48201e04c3fSmrg * 48301e04c3fSmrg * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 48401e04c3fSmrg * 48501e04c3fSmrg * The first component of the vector will be written to the least 48601e04c3fSmrg * significant bits of the output; the last component will be written to 48701e04c3fSmrg * the most significant bits. 48801e04c3fSmrg * 48901e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 49001e04c3fSmrg * 49101e04c3fSmrg * return pack_uvec4_to_uint( 49201e04c3fSmrg * uvec4(ivec4( 49301e04c3fSmrg * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); 49401e04c3fSmrg * 49501e04c3fSmrg * It is necessary to first convert the vec4 to ivec4 rather than directly 49601e04c3fSmrg * converting vec4 to uvec4 because the latter conversion is undefined. 49701e04c3fSmrg * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to 49801e04c3fSmrg * convert a negative floating point value to an uint". 49901e04c3fSmrg */ 50001e04c3fSmrg assert(vec4_rval->type == glsl_type::vec4_type); 50101e04c3fSmrg 50201e04c3fSmrg ir_rvalue *result = pack_uvec4_to_uint( 50301e04c3fSmrg i2u(f2i(round_even(mul(clamp(vec4_rval, 50401e04c3fSmrg constant(-1.0f), 50501e04c3fSmrg constant(1.0f)), 50601e04c3fSmrg constant(127.0f)))))); 50701e04c3fSmrg 50801e04c3fSmrg assert(result->type == glsl_type::uint_type); 50901e04c3fSmrg return result; 51001e04c3fSmrg } 51101e04c3fSmrg 51201e04c3fSmrg /** 51301e04c3fSmrg * \brief Lower an unpackSnorm2x16 expression. 51401e04c3fSmrg * 51501e04c3fSmrg * \param uint_rval is unpackSnorm2x16's input 51601e04c3fSmrg * \return unpackSnorm2x16's output as a vec2 rvalue 51701e04c3fSmrg */ 51801e04c3fSmrg ir_rvalue* 51901e04c3fSmrg lower_unpack_snorm_2x16(ir_rvalue *uint_rval) 52001e04c3fSmrg { 52101e04c3fSmrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 52201e04c3fSmrg * 52301e04c3fSmrg * highp vec2 unpackSnorm2x16 (highp uint p) 52401e04c3fSmrg * ----------------------------------------- 52501e04c3fSmrg * First, unpacks a single 32-bit unsigned integer p into a pair of 52601e04c3fSmrg * 16-bit unsigned integers. Then, each component is converted to 52701e04c3fSmrg * a normalized floating-point value to generate the returned 52801e04c3fSmrg * two-component vector. 52901e04c3fSmrg * 53001e04c3fSmrg * The conversion for unpacked fixed-point value f to floating point is 53101e04c3fSmrg * done as follows: 53201e04c3fSmrg * 53301e04c3fSmrg * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) 53401e04c3fSmrg * 53501e04c3fSmrg * The first component of the returned vector will be extracted from the 53601e04c3fSmrg * least significant bits of the input; the last component will be 53701e04c3fSmrg * extracted from the most significant bits. 53801e04c3fSmrg * 53901e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 54001e04c3fSmrg * 54101e04c3fSmrg * return clamp( 54201e04c3fSmrg * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, 54301e04c3fSmrg * -1.0f, 1.0f); 54401e04c3fSmrg * 54501e04c3fSmrg * The above IR may appear unnecessarily complex, but the intermediate 54601e04c3fSmrg * conversion to ivec2 and the bit shifts are necessary to correctly unpack 54701e04c3fSmrg * negative floats. 54801e04c3fSmrg * 54901e04c3fSmrg * To see why, consider packing and then unpacking vec2(-1.0, 0.0). 55001e04c3fSmrg * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we 55101e04c3fSmrg * place that int16 into an int32, which results in the *positive* integer 55201e04c3fSmrg * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather 55301e04c3fSmrg * unimportant bit 16. We must now extend the int16's sign bit into bits 55401e04c3fSmrg * 17-32, which is accomplished by left-shifting then right-shifting. 55501e04c3fSmrg */ 55601e04c3fSmrg 55701e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 55801e04c3fSmrg 55901e04c3fSmrg ir_rvalue *result = 56001e04c3fSmrg clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), 56101e04c3fSmrg constant(32767.0f)), 56201e04c3fSmrg constant(-1.0f), 56301e04c3fSmrg constant(1.0f)); 56401e04c3fSmrg 56501e04c3fSmrg assert(result->type == glsl_type::vec2_type); 56601e04c3fSmrg return result; 56701e04c3fSmrg } 56801e04c3fSmrg 56901e04c3fSmrg /** 57001e04c3fSmrg * \brief Lower an unpackSnorm4x8 expression. 57101e04c3fSmrg * 57201e04c3fSmrg * \param uint_rval is unpackSnorm4x8's input 57301e04c3fSmrg * \return unpackSnorm4x8's output as a vec4 rvalue 57401e04c3fSmrg */ 57501e04c3fSmrg ir_rvalue* 57601e04c3fSmrg lower_unpack_snorm_4x8(ir_rvalue *uint_rval) 57701e04c3fSmrg { 57801e04c3fSmrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 57901e04c3fSmrg * 58001e04c3fSmrg * highp vec4 unpackSnorm4x8 (highp uint p) 58101e04c3fSmrg * ---------------------------------------- 58201e04c3fSmrg * First, unpacks a single 32-bit unsigned integer p into four 58301e04c3fSmrg * 8-bit unsigned integers. Then, each component is converted to 58401e04c3fSmrg * a normalized floating-point value to generate the returned 58501e04c3fSmrg * four-component vector. 58601e04c3fSmrg * 58701e04c3fSmrg * The conversion for unpacked fixed-point value f to floating point is 58801e04c3fSmrg * done as follows: 58901e04c3fSmrg * 59001e04c3fSmrg * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 59101e04c3fSmrg * 59201e04c3fSmrg * The first component of the returned vector will be extracted from the 59301e04c3fSmrg * least significant bits of the input; the last component will be 59401e04c3fSmrg * extracted from the most significant bits. 59501e04c3fSmrg * 59601e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 59701e04c3fSmrg * 59801e04c3fSmrg * return clamp( 59901e04c3fSmrg * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, 60001e04c3fSmrg * -1.0f, 1.0f); 60101e04c3fSmrg * 60201e04c3fSmrg * The above IR may appear unnecessarily complex, but the intermediate 60301e04c3fSmrg * conversion to ivec4 and the bit shifts are necessary to correctly unpack 60401e04c3fSmrg * negative floats. 60501e04c3fSmrg * 60601e04c3fSmrg * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, 60701e04c3fSmrg * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we 60801e04c3fSmrg * place that int8 into an int32, which results in the *positive* integer 60901e04c3fSmrg * 0x000000ff. The int8's sign bit becomes, in the int32, the rather 61001e04c3fSmrg * unimportant bit 8. We must now extend the int8's sign bit into bits 61101e04c3fSmrg * 9-32, which is accomplished by left-shifting then right-shifting. 61201e04c3fSmrg */ 61301e04c3fSmrg 61401e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 61501e04c3fSmrg 61601e04c3fSmrg ir_rvalue *result = 61701e04c3fSmrg clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), 61801e04c3fSmrg constant(127.0f)), 61901e04c3fSmrg constant(-1.0f), 62001e04c3fSmrg constant(1.0f)); 62101e04c3fSmrg 62201e04c3fSmrg assert(result->type == glsl_type::vec4_type); 62301e04c3fSmrg return result; 62401e04c3fSmrg } 62501e04c3fSmrg 62601e04c3fSmrg /** 62701e04c3fSmrg * \brief Lower a packUnorm2x16 expression. 62801e04c3fSmrg * 62901e04c3fSmrg * \param vec2_rval is packUnorm2x16's input 63001e04c3fSmrg * \return packUnorm2x16's output as a uint rvalue 63101e04c3fSmrg */ 63201e04c3fSmrg ir_rvalue* 63301e04c3fSmrg lower_pack_unorm_2x16(ir_rvalue *vec2_rval) 63401e04c3fSmrg { 63501e04c3fSmrg /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 63601e04c3fSmrg * 63701e04c3fSmrg * highp uint packUnorm2x16 (vec2 v) 63801e04c3fSmrg * --------------------------------- 63901e04c3fSmrg * First, converts each component of the normalized floating-point value 64001e04c3fSmrg * v into 16-bit integer values. Then, the results are packed into the 64101e04c3fSmrg * returned 32-bit unsigned integer. 64201e04c3fSmrg * 64301e04c3fSmrg * The conversion for component c of v to fixed point is done as 64401e04c3fSmrg * follows: 64501e04c3fSmrg * 64601e04c3fSmrg * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 64701e04c3fSmrg * 64801e04c3fSmrg * The first component of the vector will be written to the least 64901e04c3fSmrg * significant bits of the output; the last component will be written to 65001e04c3fSmrg * the most significant bits. 65101e04c3fSmrg * 65201e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 65301e04c3fSmrg * 65401e04c3fSmrg * return pack_uvec2_to_uint(uvec2( 65501e04c3fSmrg * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); 65601e04c3fSmrg * 65701e04c3fSmrg * Here it is safe to directly convert the vec2 to uvec2 because the vec2 65801e04c3fSmrg * has been clamped to a non-negative range. 65901e04c3fSmrg */ 66001e04c3fSmrg 66101e04c3fSmrg assert(vec2_rval->type == glsl_type::vec2_type); 66201e04c3fSmrg 66301e04c3fSmrg ir_rvalue *result = pack_uvec2_to_uint( 66401e04c3fSmrg f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); 66501e04c3fSmrg 66601e04c3fSmrg assert(result->type == glsl_type::uint_type); 66701e04c3fSmrg return result; 66801e04c3fSmrg } 66901e04c3fSmrg 67001e04c3fSmrg /** 67101e04c3fSmrg * \brief Lower a packUnorm4x8 expression. 67201e04c3fSmrg * 67301e04c3fSmrg * \param vec4_rval is packUnorm4x8's input 67401e04c3fSmrg * \return packUnorm4x8's output as a uint rvalue 67501e04c3fSmrg */ 67601e04c3fSmrg ir_rvalue* 67701e04c3fSmrg lower_pack_unorm_4x8(ir_rvalue *vec4_rval) 67801e04c3fSmrg { 67901e04c3fSmrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 68001e04c3fSmrg * 68101e04c3fSmrg * highp uint packUnorm4x8 (vec4 v) 68201e04c3fSmrg * -------------------------------- 68301e04c3fSmrg * First, converts each component of the normalized floating-point value 68401e04c3fSmrg * v into 8-bit integer values. Then, the results are packed into the 68501e04c3fSmrg * returned 32-bit unsigned integer. 68601e04c3fSmrg * 68701e04c3fSmrg * The conversion for component c of v to fixed point is done as 68801e04c3fSmrg * follows: 68901e04c3fSmrg * 69001e04c3fSmrg * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 69101e04c3fSmrg * 69201e04c3fSmrg * The first component of the vector will be written to the least 69301e04c3fSmrg * significant bits of the output; the last component will be written to 69401e04c3fSmrg * the most significant bits. 69501e04c3fSmrg * 69601e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 69701e04c3fSmrg * 69801e04c3fSmrg * return pack_uvec4_to_uint(uvec4( 69901e04c3fSmrg * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); 70001e04c3fSmrg * 70101e04c3fSmrg * Here it is safe to directly convert the vec4 to uvec4 because the vec4 70201e04c3fSmrg * has been clamped to a non-negative range. 70301e04c3fSmrg */ 70401e04c3fSmrg 70501e04c3fSmrg assert(vec4_rval->type == glsl_type::vec4_type); 70601e04c3fSmrg 70701e04c3fSmrg ir_rvalue *result = pack_uvec4_to_uint( 70801e04c3fSmrg f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); 70901e04c3fSmrg 71001e04c3fSmrg assert(result->type == glsl_type::uint_type); 71101e04c3fSmrg return result; 71201e04c3fSmrg } 71301e04c3fSmrg 71401e04c3fSmrg /** 71501e04c3fSmrg * \brief Lower an unpackUnorm2x16 expression. 71601e04c3fSmrg * 71701e04c3fSmrg * \param uint_rval is unpackUnorm2x16's input 71801e04c3fSmrg * \return unpackUnorm2x16's output as a vec2 rvalue 71901e04c3fSmrg */ 72001e04c3fSmrg ir_rvalue* 72101e04c3fSmrg lower_unpack_unorm_2x16(ir_rvalue *uint_rval) 72201e04c3fSmrg { 72301e04c3fSmrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 72401e04c3fSmrg * 72501e04c3fSmrg * highp vec2 unpackUnorm2x16 (highp uint p) 72601e04c3fSmrg * ----------------------------------------- 72701e04c3fSmrg * First, unpacks a single 32-bit unsigned integer p into a pair of 72801e04c3fSmrg * 16-bit unsigned integers. Then, each component is converted to 72901e04c3fSmrg * a normalized floating-point value to generate the returned 73001e04c3fSmrg * two-component vector. 73101e04c3fSmrg * 73201e04c3fSmrg * The conversion for unpacked fixed-point value f to floating point is 73301e04c3fSmrg * done as follows: 73401e04c3fSmrg * 73501e04c3fSmrg * unpackUnorm2x16: f / 65535.0 73601e04c3fSmrg * 73701e04c3fSmrg * The first component of the returned vector will be extracted from the 73801e04c3fSmrg * least significant bits of the input; the last component will be 73901e04c3fSmrg * extracted from the most significant bits. 74001e04c3fSmrg * 74101e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 74201e04c3fSmrg * 74301e04c3fSmrg * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; 74401e04c3fSmrg */ 74501e04c3fSmrg 74601e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 74701e04c3fSmrg 74801e04c3fSmrg ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), 74901e04c3fSmrg constant(65535.0f)); 75001e04c3fSmrg 75101e04c3fSmrg assert(result->type == glsl_type::vec2_type); 75201e04c3fSmrg return result; 75301e04c3fSmrg } 75401e04c3fSmrg 75501e04c3fSmrg /** 75601e04c3fSmrg * \brief Lower an unpackUnorm4x8 expression. 75701e04c3fSmrg * 75801e04c3fSmrg * \param uint_rval is unpackUnorm4x8's input 75901e04c3fSmrg * \return unpackUnorm4x8's output as a vec4 rvalue 76001e04c3fSmrg */ 76101e04c3fSmrg ir_rvalue* 76201e04c3fSmrg lower_unpack_unorm_4x8(ir_rvalue *uint_rval) 76301e04c3fSmrg { 76401e04c3fSmrg /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 76501e04c3fSmrg * 76601e04c3fSmrg * highp vec4 unpackUnorm4x8 (highp uint p) 76701e04c3fSmrg * ---------------------------------------- 76801e04c3fSmrg * First, unpacks a single 32-bit unsigned integer p into four 76901e04c3fSmrg * 8-bit unsigned integers. Then, each component is converted to 77001e04c3fSmrg * a normalized floating-point value to generate the returned 77101e04c3fSmrg * two-component vector. 77201e04c3fSmrg * 77301e04c3fSmrg * The conversion for unpacked fixed-point value f to floating point is 77401e04c3fSmrg * done as follows: 77501e04c3fSmrg * 77601e04c3fSmrg * unpackUnorm4x8: f / 255.0 77701e04c3fSmrg * 77801e04c3fSmrg * The first component of the returned vector will be extracted from the 77901e04c3fSmrg * least significant bits of the input; the last component will be 78001e04c3fSmrg * extracted from the most significant bits. 78101e04c3fSmrg * 78201e04c3fSmrg * This function generates IR that approximates the following pseudo-GLSL: 78301e04c3fSmrg * 78401e04c3fSmrg * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; 78501e04c3fSmrg */ 78601e04c3fSmrg 78701e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 78801e04c3fSmrg 78901e04c3fSmrg ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), 79001e04c3fSmrg constant(255.0f)); 79101e04c3fSmrg 79201e04c3fSmrg assert(result->type == glsl_type::vec4_type); 79301e04c3fSmrg return result; 79401e04c3fSmrg } 79501e04c3fSmrg 79601e04c3fSmrg /** 79701e04c3fSmrg * \brief Lower the component-wise calculation of packHalf2x16. 79801e04c3fSmrg * 79901e04c3fSmrg * \param f_rval is one component of packHafl2x16's input 80001e04c3fSmrg * \param e_rval is the unshifted exponent bits of f_rval 80101e04c3fSmrg * \param m_rval is the unshifted mantissa bits of f_rval 80201e04c3fSmrg * 80301e04c3fSmrg * \return a uint rvalue that encodes a float16 in its lower 16 bits 80401e04c3fSmrg */ 80501e04c3fSmrg ir_rvalue* 80601e04c3fSmrg pack_half_1x16_nosign(ir_rvalue *f_rval, 80701e04c3fSmrg ir_rvalue *e_rval, 80801e04c3fSmrg ir_rvalue *m_rval) 80901e04c3fSmrg { 81001e04c3fSmrg assert(e_rval->type == glsl_type::uint_type); 81101e04c3fSmrg assert(m_rval->type == glsl_type::uint_type); 81201e04c3fSmrg 81301e04c3fSmrg /* uint u16; */ 81401e04c3fSmrg ir_variable *u16 = factory.make_temp(glsl_type::uint_type, 81501e04c3fSmrg "tmp_pack_half_1x16_u16"); 81601e04c3fSmrg 81701e04c3fSmrg /* float f = FLOAT_RVAL; */ 81801e04c3fSmrg ir_variable *f = factory.make_temp(glsl_type::float_type, 81901e04c3fSmrg "tmp_pack_half_1x16_f"); 82001e04c3fSmrg factory.emit(assign(f, f_rval)); 82101e04c3fSmrg 82201e04c3fSmrg /* uint e = E_RVAL; */ 82301e04c3fSmrg ir_variable *e = factory.make_temp(glsl_type::uint_type, 82401e04c3fSmrg "tmp_pack_half_1x16_e"); 82501e04c3fSmrg factory.emit(assign(e, e_rval)); 82601e04c3fSmrg 82701e04c3fSmrg /* uint m = M_RVAL; */ 82801e04c3fSmrg ir_variable *m = factory.make_temp(glsl_type::uint_type, 82901e04c3fSmrg "tmp_pack_half_1x16_m"); 83001e04c3fSmrg factory.emit(assign(m, m_rval)); 83101e04c3fSmrg 83201e04c3fSmrg /* Preliminaries 83301e04c3fSmrg * ------------- 83401e04c3fSmrg * 83501e04c3fSmrg * For a float16, the bit layout is: 83601e04c3fSmrg * 83701e04c3fSmrg * sign: 15 83801e04c3fSmrg * exponent: 10:14 83901e04c3fSmrg * mantissa: 0:9 84001e04c3fSmrg * 84101e04c3fSmrg * Let f16 be a float16 value. The sign, exponent, and mantissa 84201e04c3fSmrg * determine its value thus: 84301e04c3fSmrg * 84401e04c3fSmrg * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 84501e04c3fSmrg * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 84601e04c3fSmrg * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 84701e04c3fSmrg * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 84801e04c3fSmrg * if e16 = 31 and m16 != 0, then NaN (5) 84901e04c3fSmrg * 85001e04c3fSmrg * where 0 <= m16 < 2^10. 85101e04c3fSmrg * 85201e04c3fSmrg * For a float32, the bit layout is: 85301e04c3fSmrg * 85401e04c3fSmrg * sign: 31 85501e04c3fSmrg * exponent: 23:30 85601e04c3fSmrg * mantissa: 0:22 85701e04c3fSmrg * 85801e04c3fSmrg * Let f32 be a float32 value. The sign, exponent, and mantissa 85901e04c3fSmrg * determine its value thus: 86001e04c3fSmrg * 86101e04c3fSmrg * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 86201e04c3fSmrg * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 86301e04c3fSmrg * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 86401e04c3fSmrg * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 86501e04c3fSmrg * if e32 = 255 and m32 != 0, then NaN (14) 86601e04c3fSmrg * 86701e04c3fSmrg * where 0 <= m32 < 2^23. 86801e04c3fSmrg * 86901e04c3fSmrg * The minimum and maximum normal float16 values are 87001e04c3fSmrg * 87101e04c3fSmrg * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) 87201e04c3fSmrg * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) 87301e04c3fSmrg * 87401e04c3fSmrg * The step at max_norm16 is 87501e04c3fSmrg * 87601e04c3fSmrg * max_step16 = 2^5 (22) 87701e04c3fSmrg * 87801e04c3fSmrg * Observe that the float16 boundary values in equations 20-21 lie in the 87901e04c3fSmrg * range of normal float32 values. 88001e04c3fSmrg * 88101e04c3fSmrg * 88201e04c3fSmrg * Rounding Behavior 88301e04c3fSmrg * ----------------- 88401e04c3fSmrg * Not all float32 values can be exactly represented as a float16. We 88501e04c3fSmrg * round all such intermediate float32 values to the nearest float16; if 88601e04c3fSmrg * the float32 is exactly between to float16 values, we round to the one 88701e04c3fSmrg * with an even mantissa. This rounding behavior has several benefits: 88801e04c3fSmrg * 88901e04c3fSmrg * - It has no sign bias. 89001e04c3fSmrg * 89101e04c3fSmrg * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's 89201e04c3fSmrg * GPU ISA. 89301e04c3fSmrg * 89401e04c3fSmrg * - By reproducing the behavior of the GPU (at least on Intel hardware), 89501e04c3fSmrg * compile-time evaluation of constant packHalf2x16 GLSL expressions will 89601e04c3fSmrg * result in the same value as if the expression were executed on the 89701e04c3fSmrg * GPU. 89801e04c3fSmrg * 89901e04c3fSmrg * Calculation 90001e04c3fSmrg * ----------- 90101e04c3fSmrg * Our task is to compute s16, e16, m16 given f32. Since this function 90201e04c3fSmrg * ignores the sign bit, assume that s32 = s16 = 0. There are several 90301e04c3fSmrg * cases consider. 90401e04c3fSmrg */ 90501e04c3fSmrg 90601e04c3fSmrg factory.emit( 90701e04c3fSmrg 90801e04c3fSmrg /* Case 1) f32 is NaN 90901e04c3fSmrg * 91001e04c3fSmrg * The resultant f16 will also be NaN. 91101e04c3fSmrg */ 91201e04c3fSmrg 91301e04c3fSmrg /* if (e32 == 255 && m32 != 0) { */ 91401e04c3fSmrg if_tree(logic_and(equal(e, constant(0xffu << 23u)), 91501e04c3fSmrg logic_not(equal(m, constant(0u)))), 91601e04c3fSmrg 91701e04c3fSmrg assign(u16, constant(0x7fffu)), 91801e04c3fSmrg 91901e04c3fSmrg /* Case 2) f32 lies in the range [0, min_norm16). 92001e04c3fSmrg * 92101e04c3fSmrg * The resultant float16 will be either zero, subnormal, or normal. 92201e04c3fSmrg * 92301e04c3fSmrg * Solving 92401e04c3fSmrg * 92501e04c3fSmrg * f32 = min_norm16 (30) 92601e04c3fSmrg * 92701e04c3fSmrg * gives 92801e04c3fSmrg * 92901e04c3fSmrg * e32 = 113 and m32 = 0 (31) 93001e04c3fSmrg * 93101e04c3fSmrg * Therefore this case occurs if and only if 93201e04c3fSmrg * 93301e04c3fSmrg * e32 < 113 (32) 93401e04c3fSmrg */ 93501e04c3fSmrg 93601e04c3fSmrg /* } else if (e32 < 113) { */ 93701e04c3fSmrg if_tree(less(e, constant(113u << 23u)), 93801e04c3fSmrg 93901e04c3fSmrg /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ 94001e04c3fSmrg assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), 94101e04c3fSmrg constant((float) (1 << 24)))))), 94201e04c3fSmrg 94301e04c3fSmrg /* Case 3) f32 lies in the range 94401e04c3fSmrg * [min_norm16, max_norm16 + max_step16). 94501e04c3fSmrg * 94601e04c3fSmrg * The resultant float16 will be either normal or infinite. 94701e04c3fSmrg * 94801e04c3fSmrg * Solving 94901e04c3fSmrg * 95001e04c3fSmrg * f32 = max_norm16 + max_step16 (40) 95101e04c3fSmrg * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) 95201e04c3fSmrg * = 2^16 (42) 95301e04c3fSmrg * gives 95401e04c3fSmrg * 95501e04c3fSmrg * e32 = 143 and m32 = 0 (43) 95601e04c3fSmrg * 95701e04c3fSmrg * We already solved the boundary condition f32 = min_norm16 above 95801e04c3fSmrg * in equation 31. Therefore this case occurs if and only if 95901e04c3fSmrg * 96001e04c3fSmrg * 113 <= e32 and e32 < 143 96101e04c3fSmrg */ 96201e04c3fSmrg 96301e04c3fSmrg /* } else if (e32 < 143) { */ 96401e04c3fSmrg if_tree(less(e, constant(143u << 23u)), 96501e04c3fSmrg 96601e04c3fSmrg /* The addition below handles the case where the mantissa rounds 96701e04c3fSmrg * up to 1024 and bumps the exponent. 96801e04c3fSmrg * 96901e04c3fSmrg * u16 = ((e - (112u << 23u)) >> 13u) 97001e04c3fSmrg * + round_to_even((float(m) / (1u << 13u)); 97101e04c3fSmrg */ 97201e04c3fSmrg assign(u16, add(rshift(sub(e, constant(112u << 23u)), 97301e04c3fSmrg constant(13u)), 97401e04c3fSmrg f2u(round_even( 97501e04c3fSmrg div(u2f(m), constant((float) (1 << 13))))))), 97601e04c3fSmrg 97701e04c3fSmrg /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. 97801e04c3fSmrg * 97901e04c3fSmrg * The resultant float16 will be infinite. 98001e04c3fSmrg * 98101e04c3fSmrg * The cases above caught all float32 values in the range 98201e04c3fSmrg * [0, max_norm16 + max_step16), so this is the fall-through case. 98301e04c3fSmrg */ 98401e04c3fSmrg 98501e04c3fSmrg /* } else { */ 98601e04c3fSmrg 98701e04c3fSmrg assign(u16, constant(31u << 10u)))))); 98801e04c3fSmrg 98901e04c3fSmrg /* } */ 99001e04c3fSmrg 99101e04c3fSmrg return deref(u16).val; 99201e04c3fSmrg } 99301e04c3fSmrg 99401e04c3fSmrg /** 99501e04c3fSmrg * \brief Lower a packHalf2x16 expression. 99601e04c3fSmrg * 99701e04c3fSmrg * \param vec2_rval is packHalf2x16's input 99801e04c3fSmrg * \return packHalf2x16's output as a uint rvalue 99901e04c3fSmrg */ 100001e04c3fSmrg ir_rvalue* 100101e04c3fSmrg lower_pack_half_2x16(ir_rvalue *vec2_rval) 100201e04c3fSmrg { 100301e04c3fSmrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 100401e04c3fSmrg * 100501e04c3fSmrg * highp uint packHalf2x16 (mediump vec2 v) 100601e04c3fSmrg * ---------------------------------------- 100701e04c3fSmrg * Returns an unsigned integer obtained by converting the components of 100801e04c3fSmrg * a two-component floating-point vector to the 16-bit floating-point 100901e04c3fSmrg * representation found in the OpenGL ES Specification, and then packing 101001e04c3fSmrg * these two 16-bit integers into a 32-bit unsigned integer. 101101e04c3fSmrg * 101201e04c3fSmrg * The first vector component specifies the 16 least- significant bits 101301e04c3fSmrg * of the result; the second component specifies the 16 most-significant 101401e04c3fSmrg * bits. 101501e04c3fSmrg */ 101601e04c3fSmrg 101701e04c3fSmrg assert(vec2_rval->type == glsl_type::vec2_type); 101801e04c3fSmrg 101901e04c3fSmrg /* vec2 f = VEC2_RVAL; */ 102001e04c3fSmrg ir_variable *f = factory.make_temp(glsl_type::vec2_type, 102101e04c3fSmrg "tmp_pack_half_2x16_f"); 102201e04c3fSmrg factory.emit(assign(f, vec2_rval)); 102301e04c3fSmrg 102401e04c3fSmrg /* uvec2 f32 = bitcast_f2u(f); */ 102501e04c3fSmrg ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 102601e04c3fSmrg "tmp_pack_half_2x16_f32"); 102701e04c3fSmrg factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); 102801e04c3fSmrg 102901e04c3fSmrg /* uvec2 f16; */ 103001e04c3fSmrg ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 103101e04c3fSmrg "tmp_pack_half_2x16_f16"); 103201e04c3fSmrg 103301e04c3fSmrg /* Get f32's unshifted exponent bits. 103401e04c3fSmrg * 103501e04c3fSmrg * uvec2 e = f32 & 0x7f800000u; 103601e04c3fSmrg */ 103701e04c3fSmrg ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 103801e04c3fSmrg "tmp_pack_half_2x16_e"); 103901e04c3fSmrg factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); 104001e04c3fSmrg 104101e04c3fSmrg /* Get f32's unshifted mantissa bits. 104201e04c3fSmrg * 104301e04c3fSmrg * uvec2 m = f32 & 0x007fffffu; 104401e04c3fSmrg */ 104501e04c3fSmrg ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 104601e04c3fSmrg "tmp_pack_half_2x16_m"); 104701e04c3fSmrg factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); 104801e04c3fSmrg 104901e04c3fSmrg /* Set f16's exponent and mantissa bits. 105001e04c3fSmrg * 105101e04c3fSmrg * f16.x = pack_half_1x16_nosign(e.x, m.x); 105201e04c3fSmrg * f16.y = pack_half_1y16_nosign(e.y, m.y); 105301e04c3fSmrg */ 105401e04c3fSmrg factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), 105501e04c3fSmrg swizzle_x(e), 105601e04c3fSmrg swizzle_x(m)), 105701e04c3fSmrg WRITEMASK_X)); 105801e04c3fSmrg factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), 105901e04c3fSmrg swizzle_y(e), 106001e04c3fSmrg swizzle_y(m)), 106101e04c3fSmrg WRITEMASK_Y)); 106201e04c3fSmrg 106301e04c3fSmrg /* Set f16's sign bits. 106401e04c3fSmrg * 106501e04c3fSmrg * f16 |= (f32 & (1u << 31u) >> 16u; 106601e04c3fSmrg */ 106701e04c3fSmrg factory.emit( 106801e04c3fSmrg assign(f16, bit_or(f16, 106901e04c3fSmrg rshift(bit_and(f32, constant(1u << 31u)), 107001e04c3fSmrg constant(16u))))); 107101e04c3fSmrg 107201e04c3fSmrg 107301e04c3fSmrg /* return (f16.y << 16u) | f16.x; */ 107401e04c3fSmrg ir_rvalue *result = bit_or(lshift(swizzle_y(f16), 107501e04c3fSmrg constant(16u)), 107601e04c3fSmrg swizzle_x(f16)); 107701e04c3fSmrg 107801e04c3fSmrg assert(result->type == glsl_type::uint_type); 107901e04c3fSmrg return result; 108001e04c3fSmrg } 108101e04c3fSmrg 108201e04c3fSmrg /** 108301e04c3fSmrg * \brief Lower the component-wise calculation of unpackHalf2x16. 108401e04c3fSmrg * 108501e04c3fSmrg * Given a uint that encodes a float16 in its lower 16 bits, this function 108601e04c3fSmrg * returns a uint that encodes a float32 with the same value. The sign bit 108701e04c3fSmrg * of the float16 is ignored. 108801e04c3fSmrg * 108901e04c3fSmrg * \param e_rval is the unshifted exponent bits of a float16 109001e04c3fSmrg * \param m_rval is the unshifted mantissa bits of a float16 109101e04c3fSmrg * \param a uint rvalue that encodes a float32 109201e04c3fSmrg */ 109301e04c3fSmrg ir_rvalue* 109401e04c3fSmrg unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) 109501e04c3fSmrg { 109601e04c3fSmrg assert(e_rval->type == glsl_type::uint_type); 109701e04c3fSmrg assert(m_rval->type == glsl_type::uint_type); 109801e04c3fSmrg 109901e04c3fSmrg /* uint u32; */ 110001e04c3fSmrg ir_variable *u32 = factory.make_temp(glsl_type::uint_type, 110101e04c3fSmrg "tmp_unpack_half_1x16_u32"); 110201e04c3fSmrg 110301e04c3fSmrg /* uint e = E_RVAL; */ 110401e04c3fSmrg ir_variable *e = factory.make_temp(glsl_type::uint_type, 110501e04c3fSmrg "tmp_unpack_half_1x16_e"); 110601e04c3fSmrg factory.emit(assign(e, e_rval)); 110701e04c3fSmrg 110801e04c3fSmrg /* uint m = M_RVAL; */ 110901e04c3fSmrg ir_variable *m = factory.make_temp(glsl_type::uint_type, 111001e04c3fSmrg "tmp_unpack_half_1x16_m"); 111101e04c3fSmrg factory.emit(assign(m, m_rval)); 111201e04c3fSmrg 111301e04c3fSmrg /* Preliminaries 111401e04c3fSmrg * ------------- 111501e04c3fSmrg * 111601e04c3fSmrg * For a float16, the bit layout is: 111701e04c3fSmrg * 111801e04c3fSmrg * sign: 15 111901e04c3fSmrg * exponent: 10:14 112001e04c3fSmrg * mantissa: 0:9 112101e04c3fSmrg * 112201e04c3fSmrg * Let f16 be a float16 value. The sign, exponent, and mantissa 112301e04c3fSmrg * determine its value thus: 112401e04c3fSmrg * 112501e04c3fSmrg * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 112601e04c3fSmrg * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 112701e04c3fSmrg * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 112801e04c3fSmrg * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 112901e04c3fSmrg * if e16 = 31 and m16 != 0, then NaN (5) 113001e04c3fSmrg * 113101e04c3fSmrg * where 0 <= m16 < 2^10. 113201e04c3fSmrg * 113301e04c3fSmrg * For a float32, the bit layout is: 113401e04c3fSmrg * 113501e04c3fSmrg * sign: 31 113601e04c3fSmrg * exponent: 23:30 113701e04c3fSmrg * mantissa: 0:22 113801e04c3fSmrg * 113901e04c3fSmrg * Let f32 be a float32 value. The sign, exponent, and mantissa 114001e04c3fSmrg * determine its value thus: 114101e04c3fSmrg * 114201e04c3fSmrg * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 114301e04c3fSmrg * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 114401e04c3fSmrg * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 114501e04c3fSmrg * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 114601e04c3fSmrg * if e32 = 255 and m32 != 0, then NaN (14) 114701e04c3fSmrg * 114801e04c3fSmrg * where 0 <= m32 < 2^23. 114901e04c3fSmrg * 115001e04c3fSmrg * Calculation 115101e04c3fSmrg * ----------- 115201e04c3fSmrg * Our task is to compute s32, e32, m32 given f16. Since this function 115301e04c3fSmrg * ignores the sign bit, assume that s32 = s16 = 0. There are several 115401e04c3fSmrg * cases consider. 115501e04c3fSmrg */ 115601e04c3fSmrg 115701e04c3fSmrg factory.emit( 115801e04c3fSmrg 115901e04c3fSmrg /* Case 1) f16 is zero or subnormal. 116001e04c3fSmrg * 116101e04c3fSmrg * The simplest method of calcuating f32 in this case is 116201e04c3fSmrg * 116301e04c3fSmrg * f32 = f16 (20) 116401e04c3fSmrg * = 2^(-14) * (m16 / 2^10) (21) 116501e04c3fSmrg * = m16 / 2^(-24) (22) 116601e04c3fSmrg */ 116701e04c3fSmrg 116801e04c3fSmrg /* if (e16 == 0) { */ 116901e04c3fSmrg if_tree(equal(e, constant(0u)), 117001e04c3fSmrg 117101e04c3fSmrg /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ 117201e04c3fSmrg assign(u32, expr(ir_unop_bitcast_f2u, 117301e04c3fSmrg div(u2f(m), constant((float)(1 << 24))))), 117401e04c3fSmrg 117501e04c3fSmrg /* Case 2) f16 is normal. 117601e04c3fSmrg * 117701e04c3fSmrg * The equation 117801e04c3fSmrg * 117901e04c3fSmrg * f32 = f16 (30) 118001e04c3fSmrg * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) 118101e04c3fSmrg * 2^(e16 - 15) * (1 + m16 / 2^10) 118201e04c3fSmrg * 118301e04c3fSmrg * can be decomposed into two 118401e04c3fSmrg * 118501e04c3fSmrg * 2^(e32 - 127) = 2^(e16 - 15) (32) 118601e04c3fSmrg * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) 118701e04c3fSmrg * 118801e04c3fSmrg * which solve to 118901e04c3fSmrg * 119001e04c3fSmrg * e32 = e16 + 112 (34) 119101e04c3fSmrg * m32 = m16 * 2^13 (35) 119201e04c3fSmrg */ 119301e04c3fSmrg 119401e04c3fSmrg /* } else if (e16 < 31)) { */ 119501e04c3fSmrg if_tree(less(e, constant(31u << 10u)), 119601e04c3fSmrg 119701e04c3fSmrg /* u32 = ((e + (112 << 10)) | m) << 13; 119801e04c3fSmrg */ 119901e04c3fSmrg assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), 120001e04c3fSmrg constant(13u))), 120101e04c3fSmrg 120201e04c3fSmrg 120301e04c3fSmrg /* Case 3) f16 is infinite. */ 120401e04c3fSmrg if_tree(equal(m, constant(0u)), 120501e04c3fSmrg 120601e04c3fSmrg assign(u32, constant(255u << 23u)), 120701e04c3fSmrg 120801e04c3fSmrg /* Case 4) f16 is NaN. */ 120901e04c3fSmrg /* } else { */ 121001e04c3fSmrg 121101e04c3fSmrg assign(u32, constant(0x7fffffffu)))))); 121201e04c3fSmrg 121301e04c3fSmrg /* } */ 121401e04c3fSmrg 121501e04c3fSmrg return deref(u32).val; 121601e04c3fSmrg } 121701e04c3fSmrg 121801e04c3fSmrg /** 121901e04c3fSmrg * \brief Lower an unpackHalf2x16 expression. 122001e04c3fSmrg * 122101e04c3fSmrg * \param uint_rval is unpackHalf2x16's input 122201e04c3fSmrg * \return unpackHalf2x16's output as a vec2 rvalue 122301e04c3fSmrg */ 122401e04c3fSmrg ir_rvalue* 122501e04c3fSmrg lower_unpack_half_2x16(ir_rvalue *uint_rval) 122601e04c3fSmrg { 122701e04c3fSmrg /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 122801e04c3fSmrg * 122901e04c3fSmrg * mediump vec2 unpackHalf2x16 (highp uint v) 123001e04c3fSmrg * ------------------------------------------ 123101e04c3fSmrg * Returns a two-component floating-point vector with components 123201e04c3fSmrg * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit 123301e04c3fSmrg * values, interpreting those values as 16-bit floating-point numbers 123401e04c3fSmrg * according to the OpenGL ES Specification, and converting them to 123501e04c3fSmrg * 32-bit floating-point values. 123601e04c3fSmrg * 123701e04c3fSmrg * The first component of the vector is obtained from the 123801e04c3fSmrg * 16 least-significant bits of v; the second component is obtained 123901e04c3fSmrg * from the 16 most-significant bits of v. 124001e04c3fSmrg */ 124101e04c3fSmrg assert(uint_rval->type == glsl_type::uint_type); 124201e04c3fSmrg 124301e04c3fSmrg /* uint u = RVALUE; 124401e04c3fSmrg * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); 124501e04c3fSmrg */ 124601e04c3fSmrg ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 124701e04c3fSmrg "tmp_unpack_half_2x16_f16"); 124801e04c3fSmrg factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); 124901e04c3fSmrg 125001e04c3fSmrg /* uvec2 f32; */ 125101e04c3fSmrg ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 125201e04c3fSmrg "tmp_unpack_half_2x16_f32"); 125301e04c3fSmrg 125401e04c3fSmrg /* Get f16's unshifted exponent bits. 125501e04c3fSmrg * 125601e04c3fSmrg * uvec2 e = f16 & 0x7c00u; 125701e04c3fSmrg */ 125801e04c3fSmrg ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 125901e04c3fSmrg "tmp_unpack_half_2x16_e"); 126001e04c3fSmrg factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); 126101e04c3fSmrg 126201e04c3fSmrg /* Get f16's unshifted mantissa bits. 126301e04c3fSmrg * 126401e04c3fSmrg * uvec2 m = f16 & 0x03ffu; 126501e04c3fSmrg */ 126601e04c3fSmrg ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 126701e04c3fSmrg "tmp_unpack_half_2x16_m"); 126801e04c3fSmrg factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); 126901e04c3fSmrg 127001e04c3fSmrg /* Set f32's exponent and mantissa bits. 127101e04c3fSmrg * 127201e04c3fSmrg * f32.x = unpack_half_1x16_nosign(e.x, m.x); 127301e04c3fSmrg * f32.y = unpack_half_1x16_nosign(e.y, m.y); 127401e04c3fSmrg */ 127501e04c3fSmrg factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), 127601e04c3fSmrg swizzle_x(m)), 127701e04c3fSmrg WRITEMASK_X)); 127801e04c3fSmrg factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), 127901e04c3fSmrg swizzle_y(m)), 128001e04c3fSmrg WRITEMASK_Y)); 128101e04c3fSmrg 128201e04c3fSmrg /* Set f32's sign bit. 128301e04c3fSmrg * 128401e04c3fSmrg * f32 |= (f16 & 0x8000u) << 16u; 128501e04c3fSmrg */ 128601e04c3fSmrg factory.emit(assign(f32, bit_or(f32, 128701e04c3fSmrg lshift(bit_and(f16, 128801e04c3fSmrg constant(0x8000u)), 128901e04c3fSmrg constant(16u))))); 129001e04c3fSmrg 129101e04c3fSmrg /* return bitcast_u2f(f32); */ 129201e04c3fSmrg ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); 129301e04c3fSmrg assert(result->type == glsl_type::vec2_type); 129401e04c3fSmrg return result; 129501e04c3fSmrg } 129601e04c3fSmrg}; 129701e04c3fSmrg 129801e04c3fSmrg} // namespace anonymous 129901e04c3fSmrg 130001e04c3fSmrg/** 130101e04c3fSmrg * \brief Lower the builtin packing functions. 130201e04c3fSmrg * 130301e04c3fSmrg * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. 130401e04c3fSmrg */ 130501e04c3fSmrgbool 130601e04c3fSmrglower_packing_builtins(exec_list *instructions, int op_mask) 130701e04c3fSmrg{ 130801e04c3fSmrg lower_packing_builtins_visitor v(op_mask); 130901e04c3fSmrg visit_list_elements(&v, instructions, true); 131001e04c3fSmrg return v.get_progress(); 131101e04c3fSmrg} 1312