1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2010 Luca Barbieri 4848b8605Smrg * 5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining 6848b8605Smrg * a copy of this software and associated documentation files (the 7848b8605Smrg * "Software"), to deal in the Software without restriction, including 8848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 9848b8605Smrg * distribute, sublicense, and/or sell copies of the Software, and to 10848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 11848b8605Smrg * the following conditions: 12848b8605Smrg * 13848b8605Smrg * The above copyright notice and this permission notice (including the 14848b8605Smrg * next paragraph) shall be included in all copies or substantial 15848b8605Smrg * portions of the Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18848b8605Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20848b8605Smrg * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21848b8605Smrg * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22848b8605Smrg * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23848b8605Smrg * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg * 25848b8605Smrg **************************************************************************/ 26848b8605Smrg 27848b8605Smrg 28848b8605Smrg#ifndef U_HALF_H 29848b8605Smrg#define U_HALF_H 30848b8605Smrg 31848b8605Smrg#include "pipe/p_compiler.h" 32848b8605Smrg#include "util/u_math.h" 33848b8605Smrg 34848b8605Smrg#ifdef __cplusplus 35848b8605Smrgextern "C" { 36848b8605Smrg#endif 37848b8605Smrg 38848b8605Smrg/* 39848b8605Smrg * References for float <-> half conversions 40848b8605Smrg * 41848b8605Smrg * http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ 42848b8605Smrg * https://gist.github.com/2156668 43848b8605Smrg * https://gist.github.com/2144712 44848b8605Smrg */ 45848b8605Smrg 46b8e80941Smrgstatic inline uint16_t 47848b8605Smrgutil_float_to_half(float f) 48848b8605Smrg{ 49848b8605Smrg uint32_t sign_mask = 0x80000000; 50848b8605Smrg uint32_t round_mask = ~0xfff; 51848b8605Smrg uint32_t f32inf = 0xff << 23; 52848b8605Smrg uint32_t f16inf = 0x1f << 23; 53848b8605Smrg uint32_t sign; 54848b8605Smrg union fi magic; 55848b8605Smrg union fi f32; 56848b8605Smrg uint16_t f16; 57848b8605Smrg 58848b8605Smrg magic.ui = 0xf << 23; 59848b8605Smrg 60848b8605Smrg f32.f = f; 61848b8605Smrg 62848b8605Smrg /* Sign */ 63848b8605Smrg sign = f32.ui & sign_mask; 64848b8605Smrg f32.ui ^= sign; 65848b8605Smrg 66848b8605Smrg if (f32.ui == f32inf) { 67848b8605Smrg /* Inf */ 68848b8605Smrg f16 = 0x7c00; 69848b8605Smrg } else if (f32.ui > f32inf) { 70848b8605Smrg /* NaN */ 71848b8605Smrg f16 = 0x7e00; 72848b8605Smrg } else { 73848b8605Smrg /* Number */ 74848b8605Smrg f32.ui &= round_mask; 75848b8605Smrg f32.f *= magic.f; 76848b8605Smrg f32.ui -= round_mask; 77b8e80941Smrg /* 78b8e80941Smrg * XXX: The magic mul relies on denorms being available, otherwise 79b8e80941Smrg * all f16 denorms get flushed to zero - hence when this is used 80b8e80941Smrg * for tgsi_exec in softpipe we won't get f16 denorms. 81b8e80941Smrg */ 82848b8605Smrg /* 83848b8605Smrg * Clamp to max finite value if overflowed. 84848b8605Smrg * OpenGL has completely undefined rounding behavior for float to 85848b8605Smrg * half-float conversions, and this matches what is mandated for float 86848b8605Smrg * to fp11/fp10, which recommend round-to-nearest-finite too. 87848b8605Smrg * (d3d10 is deeply unhappy about flushing such values to infinity, and 88848b8605Smrg * while it also mandates round-to-zero it doesn't care nearly as much 89848b8605Smrg * about that.) 90848b8605Smrg */ 91848b8605Smrg if (f32.ui > f16inf) 92848b8605Smrg f32.ui = f16inf - 1; 93848b8605Smrg 94848b8605Smrg f16 = f32.ui >> 13; 95848b8605Smrg } 96848b8605Smrg 97848b8605Smrg /* Sign */ 98848b8605Smrg f16 |= sign >> 16; 99848b8605Smrg 100848b8605Smrg return f16; 101848b8605Smrg} 102848b8605Smrg 103b8e80941Smrgstatic inline float 104848b8605Smrgutil_half_to_float(uint16_t f16) 105848b8605Smrg{ 106848b8605Smrg union fi infnan; 107848b8605Smrg union fi magic; 108848b8605Smrg union fi f32; 109848b8605Smrg 110848b8605Smrg infnan.ui = 0x8f << 23; 111848b8605Smrg infnan.f = 65536.0f; 112848b8605Smrg magic.ui = 0xef << 23; 113848b8605Smrg 114848b8605Smrg /* Exponent / Mantissa */ 115848b8605Smrg f32.ui = (f16 & 0x7fff) << 13; 116848b8605Smrg 117848b8605Smrg /* Adjust */ 118848b8605Smrg f32.f *= magic.f; 119b8e80941Smrg /* XXX: The magic mul relies on denorms being available */ 120848b8605Smrg 121848b8605Smrg /* Inf / NaN */ 122848b8605Smrg if (f32.f >= infnan.f) 123848b8605Smrg f32.ui |= 0xff << 23; 124848b8605Smrg 125848b8605Smrg /* Sign */ 126848b8605Smrg f32.ui |= (f16 & 0x8000) << 16; 127848b8605Smrg 128848b8605Smrg return f32.f; 129848b8605Smrg} 130848b8605Smrg 131848b8605Smrg#ifdef __cplusplus 132848b8605Smrg} 133848b8605Smrg#endif 134848b8605Smrg 135848b8605Smrg#endif /* U_HALF_H */ 136848b8605Smrg 137