half_float.c revision 01e04c3f
101e04c3fSmrg/* 201e04c3fSmrg * Mesa 3-D graphics library 301e04c3fSmrg * 401e04c3fSmrg * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. 501e04c3fSmrg * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 601e04c3fSmrg * Copyright 2018 Advanced Micro Devices, Inc. 701e04c3fSmrg * 801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 901e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 1001e04c3fSmrg * to deal in the Software without restriction, including without limitation 1101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1301e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1401e04c3fSmrg * 1501e04c3fSmrg * The above copyright notice and this permission notice shall be included 1601e04c3fSmrg * in all copies or substantial portions of the Software. 1701e04c3fSmrg * 1801e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1901e04c3fSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2001e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 2101e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 2201e04c3fSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2301e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2401e04c3fSmrg * OTHER DEALINGS IN THE SOFTWARE. 2501e04c3fSmrg */ 2601e04c3fSmrg 2701e04c3fSmrg#include <math.h> 2801e04c3fSmrg#include <assert.h> 2901e04c3fSmrg#include "half_float.h" 3001e04c3fSmrg#include "rounding.h" 3101e04c3fSmrg#include "macros.h" 3201e04c3fSmrg 3301e04c3fSmrgtypedef union { float f; int32_t i; uint32_t u; } fi_type; 3401e04c3fSmrg 3501e04c3fSmrg/** 3601e04c3fSmrg * Convert a 4-byte float to a 2-byte half float. 3701e04c3fSmrg * 3801e04c3fSmrg * Not all float32 values can be represented exactly as a float16 value. We 3901e04c3fSmrg * round such intermediate float32 values to the nearest float16. When the 4001e04c3fSmrg * float32 lies exactly between to float16 values, we round to the one with 4101e04c3fSmrg * an even mantissa. 4201e04c3fSmrg * 4301e04c3fSmrg * This rounding behavior has several benefits: 4401e04c3fSmrg * - It has no sign bias. 4501e04c3fSmrg * 4601e04c3fSmrg * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's 4701e04c3fSmrg * GPU ISA. 4801e04c3fSmrg * 4901e04c3fSmrg * - By reproducing the behavior of the GPU (at least on Intel hardware), 5001e04c3fSmrg * compile-time evaluation of constant packHalf2x16 GLSL expressions will 5101e04c3fSmrg * result in the same value as if the expression were executed on the GPU. 5201e04c3fSmrg */ 5301e04c3fSmrguint16_t 5401e04c3fSmrg_mesa_float_to_half(float val) 5501e04c3fSmrg{ 5601e04c3fSmrg const fi_type fi = {val}; 5701e04c3fSmrg const int flt_m = fi.i & 0x7fffff; 5801e04c3fSmrg const int flt_e = (fi.i >> 23) & 0xff; 5901e04c3fSmrg const int flt_s = (fi.i >> 31) & 0x1; 6001e04c3fSmrg int s, e, m = 0; 6101e04c3fSmrg uint16_t result; 6201e04c3fSmrg 6301e04c3fSmrg /* sign bit */ 6401e04c3fSmrg s = flt_s; 6501e04c3fSmrg 6601e04c3fSmrg /* handle special cases */ 6701e04c3fSmrg if ((flt_e == 0) && (flt_m == 0)) { 6801e04c3fSmrg /* zero */ 6901e04c3fSmrg /* m = 0; - already set */ 7001e04c3fSmrg e = 0; 7101e04c3fSmrg } 7201e04c3fSmrg else if ((flt_e == 0) && (flt_m != 0)) { 7301e04c3fSmrg /* denorm -- denorm float maps to 0 half */ 7401e04c3fSmrg /* m = 0; - already set */ 7501e04c3fSmrg e = 0; 7601e04c3fSmrg } 7701e04c3fSmrg else if ((flt_e == 0xff) && (flt_m == 0)) { 7801e04c3fSmrg /* infinity */ 7901e04c3fSmrg /* m = 0; - already set */ 8001e04c3fSmrg e = 31; 8101e04c3fSmrg } 8201e04c3fSmrg else if ((flt_e == 0xff) && (flt_m != 0)) { 8301e04c3fSmrg /* NaN */ 8401e04c3fSmrg m = 1; 8501e04c3fSmrg e = 31; 8601e04c3fSmrg } 8701e04c3fSmrg else { 8801e04c3fSmrg /* regular number */ 8901e04c3fSmrg const int new_exp = flt_e - 127; 9001e04c3fSmrg if (new_exp < -14) { 9101e04c3fSmrg /* The float32 lies in the range (0.0, min_normal16) and is rounded 9201e04c3fSmrg * to a nearby float16 value. The result will be either zero, subnormal, 9301e04c3fSmrg * or normal. 9401e04c3fSmrg */ 9501e04c3fSmrg e = 0; 9601e04c3fSmrg m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); 9701e04c3fSmrg } 9801e04c3fSmrg else if (new_exp > 15) { 9901e04c3fSmrg /* map this value to infinity */ 10001e04c3fSmrg /* m = 0; - already set */ 10101e04c3fSmrg e = 31; 10201e04c3fSmrg } 10301e04c3fSmrg else { 10401e04c3fSmrg /* The float32 lies in the range 10501e04c3fSmrg * [min_normal16, max_normal16 + max_step16) 10601e04c3fSmrg * and is rounded to a nearby float16 value. The result will be 10701e04c3fSmrg * either normal or infinite. 10801e04c3fSmrg */ 10901e04c3fSmrg e = new_exp + 15; 11001e04c3fSmrg m = _mesa_lroundevenf(flt_m / (float) (1 << 13)); 11101e04c3fSmrg } 11201e04c3fSmrg } 11301e04c3fSmrg 11401e04c3fSmrg assert(0 <= m && m <= 1024); 11501e04c3fSmrg if (m == 1024) { 11601e04c3fSmrg /* The float32 was rounded upwards into the range of the next exponent, 11701e04c3fSmrg * so bump the exponent. This correctly handles the case where f32 11801e04c3fSmrg * should be rounded up to float16 infinity. 11901e04c3fSmrg */ 12001e04c3fSmrg ++e; 12101e04c3fSmrg m = 0; 12201e04c3fSmrg } 12301e04c3fSmrg 12401e04c3fSmrg result = (s << 15) | (e << 10) | m; 12501e04c3fSmrg return result; 12601e04c3fSmrg} 12701e04c3fSmrg 12801e04c3fSmrg 12901e04c3fSmrg/** 13001e04c3fSmrg * Convert a 2-byte half float to a 4-byte float. 13101e04c3fSmrg * Based on code from: 13201e04c3fSmrg * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html 13301e04c3fSmrg */ 13401e04c3fSmrgfloat 13501e04c3fSmrg_mesa_half_to_float(uint16_t val) 13601e04c3fSmrg{ 13701e04c3fSmrg /* XXX could also use a 64K-entry lookup table */ 13801e04c3fSmrg const int m = val & 0x3ff; 13901e04c3fSmrg const int e = (val >> 10) & 0x1f; 14001e04c3fSmrg const int s = (val >> 15) & 0x1; 14101e04c3fSmrg int flt_m, flt_e, flt_s; 14201e04c3fSmrg fi_type fi; 14301e04c3fSmrg float result; 14401e04c3fSmrg 14501e04c3fSmrg /* sign bit */ 14601e04c3fSmrg flt_s = s; 14701e04c3fSmrg 14801e04c3fSmrg /* handle special cases */ 14901e04c3fSmrg if ((e == 0) && (m == 0)) { 15001e04c3fSmrg /* zero */ 15101e04c3fSmrg flt_m = 0; 15201e04c3fSmrg flt_e = 0; 15301e04c3fSmrg } 15401e04c3fSmrg else if ((e == 0) && (m != 0)) { 15501e04c3fSmrg /* denorm -- denorm half will fit in non-denorm single */ 15601e04c3fSmrg const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */ 15701e04c3fSmrg float mantissa = ((float) (m)) / 1024.0f; 15801e04c3fSmrg float sign = s ? -1.0f : 1.0f; 15901e04c3fSmrg return sign * mantissa * half_denorm; 16001e04c3fSmrg } 16101e04c3fSmrg else if ((e == 31) && (m == 0)) { 16201e04c3fSmrg /* infinity */ 16301e04c3fSmrg flt_e = 0xff; 16401e04c3fSmrg flt_m = 0; 16501e04c3fSmrg } 16601e04c3fSmrg else if ((e == 31) && (m != 0)) { 16701e04c3fSmrg /* NaN */ 16801e04c3fSmrg flt_e = 0xff; 16901e04c3fSmrg flt_m = 1; 17001e04c3fSmrg } 17101e04c3fSmrg else { 17201e04c3fSmrg /* regular */ 17301e04c3fSmrg flt_e = e + 112; 17401e04c3fSmrg flt_m = m << 13; 17501e04c3fSmrg } 17601e04c3fSmrg 17701e04c3fSmrg fi.i = (flt_s << 31) | (flt_e << 23) | flt_m; 17801e04c3fSmrg result = fi.f; 17901e04c3fSmrg return result; 18001e04c3fSmrg} 18101e04c3fSmrg 18201e04c3fSmrg/** 18301e04c3fSmrg * Convert 0.0 to 0x00, 1.0 to 0xff. 18401e04c3fSmrg * Values outside the range [0.0, 1.0] will give undefined results. 18501e04c3fSmrg */ 18601e04c3fSmrguint8_t _mesa_half_to_unorm8(uint16_t val) 18701e04c3fSmrg{ 18801e04c3fSmrg const int m = val & 0x3ff; 18901e04c3fSmrg const int e = (val >> 10) & 0x1f; 19001e04c3fSmrg MAYBE_UNUSED const int s = (val >> 15) & 0x1; 19101e04c3fSmrg 19201e04c3fSmrg /* v = round_to_nearest(1.mmmmmmmmmm * 2^(e-15) * 255) 19301e04c3fSmrg * = round_to_nearest((1.mmmmmmmmmm * 255) * 2^(e-15)) 19401e04c3fSmrg * = round_to_nearest((1mmmmmmmmmm * 255) * 2^(e-25)) 19501e04c3fSmrg * = round_to_zero((1mmmmmmmmmm * 255) * 2^(e-25) + 0.5) 19601e04c3fSmrg * = round_to_zero(((1mmmmmmmmmm * 255) * 2^(e-24) + 1) / 2) 19701e04c3fSmrg * 19801e04c3fSmrg * This happens to give the correct answer for zero/subnormals too 19901e04c3fSmrg */ 20001e04c3fSmrg assert(s == 0 && val <= FP16_ONE); /* check 0 <= this <= 1 */ 20101e04c3fSmrg /* (implies e <= 15, which means the bit-shifts below are safe) */ 20201e04c3fSmrg 20301e04c3fSmrg uint32_t v = ((1 << 10) | m) * 255; 20401e04c3fSmrg v = ((v >> (24 - e)) + 1) >> 1; 20501e04c3fSmrg return v; 20601e04c3fSmrg} 20701e04c3fSmrg 20801e04c3fSmrg/** 20901e04c3fSmrg * Takes a uint16_t, divides by 65536, converts the infinite-precision 21001e04c3fSmrg * result to fp16 with round-to-zero. Used by the ASTC decoder. 21101e04c3fSmrg */ 21201e04c3fSmrguint16_t _mesa_uint16_div_64k_to_half(uint16_t v) 21301e04c3fSmrg{ 21401e04c3fSmrg /* Zero or subnormal. Set the mantissa to (v << 8) and return. */ 21501e04c3fSmrg if (v < 4) 21601e04c3fSmrg return v << 8; 21701e04c3fSmrg 21801e04c3fSmrg /* Count the leading 0s in the uint16_t */ 21901e04c3fSmrg#ifdef HAVE___BUILTIN_CLZ 22001e04c3fSmrg int n = __builtin_clz(v) - 16; 22101e04c3fSmrg#else 22201e04c3fSmrg int n = 16; 22301e04c3fSmrg for (int i = 15; i >= 0; i--) { 22401e04c3fSmrg if (v & (1 << i)) { 22501e04c3fSmrg n = 15 - i; 22601e04c3fSmrg break; 22701e04c3fSmrg } 22801e04c3fSmrg } 22901e04c3fSmrg#endif 23001e04c3fSmrg 23101e04c3fSmrg /* Shift the mantissa up so bit 16 is the hidden 1 bit, 23201e04c3fSmrg * mask it off, then shift back down to 10 bits 23301e04c3fSmrg */ 23401e04c3fSmrg int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6; 23501e04c3fSmrg 23601e04c3fSmrg /* (0{n} 1 X{15-n}) * 2^-16 23701e04c3fSmrg * = 1.X * 2^(15-n-16) 23801e04c3fSmrg * = 1.X * 2^(14-n - 15) 23901e04c3fSmrg * which is the FP16 form with e = 14 - n 24001e04c3fSmrg */ 24101e04c3fSmrg int e = 14 - n; 24201e04c3fSmrg 24301e04c3fSmrg assert(e >= 1 && e <= 30); 24401e04c3fSmrg assert(m >= 0 && m < 0x400); 24501e04c3fSmrg 24601e04c3fSmrg return (e << 10) | m; 24701e04c3fSmrg} 248