1848b8605Smrg/**************************************************************************
2848b8605Smrg *
3848b8605Smrg * Copyright 2010 Luca Barbieri
4848b8605Smrg *
5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining
6848b8605Smrg * a copy of this software and associated documentation files (the
7848b8605Smrg * "Software"), to deal in the Software without restriction, including
8848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish,
9848b8605Smrg * distribute, sublicense, and/or sell copies of the Software, and to
10848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to
11848b8605Smrg * the following conditions:
12848b8605Smrg *
13848b8605Smrg * The above copyright notice and this permission notice (including the
14848b8605Smrg * next paragraph) shall be included in all copies or substantial
15848b8605Smrg * portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18848b8605Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20848b8605Smrg * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21848b8605Smrg * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22848b8605Smrg * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23848b8605Smrg * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg *
25848b8605Smrg **************************************************************************/
26848b8605Smrg
27848b8605Smrg
28848b8605Smrg#ifndef U_HALF_H
29848b8605Smrg#define U_HALF_H
30848b8605Smrg
31848b8605Smrg#include "pipe/p_compiler.h"
32848b8605Smrg#include "util/u_math.h"
33848b8605Smrg
34848b8605Smrg#ifdef __cplusplus
35848b8605Smrgextern "C" {
36848b8605Smrg#endif
37848b8605Smrg
38848b8605Smrg/*
39848b8605Smrg * References for float <-> half conversions
40848b8605Smrg *
41848b8605Smrg *  http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
42848b8605Smrg *  https://gist.github.com/2156668
43848b8605Smrg *  https://gist.github.com/2144712
44848b8605Smrg */
45848b8605Smrg
46b8e80941Smrgstatic inline uint16_t
47848b8605Smrgutil_float_to_half(float f)
48848b8605Smrg{
49848b8605Smrg   uint32_t sign_mask  = 0x80000000;
50848b8605Smrg   uint32_t round_mask = ~0xfff;
51848b8605Smrg   uint32_t f32inf = 0xff << 23;
52848b8605Smrg   uint32_t f16inf = 0x1f << 23;
53848b8605Smrg   uint32_t sign;
54848b8605Smrg   union fi magic;
55848b8605Smrg   union fi f32;
56848b8605Smrg   uint16_t f16;
57848b8605Smrg
58848b8605Smrg   magic.ui = 0xf << 23;
59848b8605Smrg
60848b8605Smrg   f32.f = f;
61848b8605Smrg
62848b8605Smrg   /* Sign */
63848b8605Smrg   sign = f32.ui & sign_mask;
64848b8605Smrg   f32.ui ^= sign;
65848b8605Smrg
66848b8605Smrg   if (f32.ui == f32inf) {
67848b8605Smrg      /* Inf */
68848b8605Smrg      f16 = 0x7c00;
69848b8605Smrg   } else if (f32.ui > f32inf) {
70848b8605Smrg      /* NaN */
71848b8605Smrg      f16 = 0x7e00;
72848b8605Smrg   } else {
73848b8605Smrg      /* Number */
74848b8605Smrg      f32.ui &= round_mask;
75848b8605Smrg      f32.f  *= magic.f;
76848b8605Smrg      f32.ui -= round_mask;
77b8e80941Smrg      /*
78b8e80941Smrg       * XXX: The magic mul relies on denorms being available, otherwise
79b8e80941Smrg       * all f16 denorms get flushed to zero - hence when this is used
80b8e80941Smrg       * for tgsi_exec in softpipe we won't get f16 denorms.
81b8e80941Smrg       */
82848b8605Smrg      /*
83848b8605Smrg       * Clamp to max finite value if overflowed.
84848b8605Smrg       * OpenGL has completely undefined rounding behavior for float to
85848b8605Smrg       * half-float conversions, and this matches what is mandated for float
86848b8605Smrg       * to fp11/fp10, which recommend round-to-nearest-finite too.
87848b8605Smrg       * (d3d10 is deeply unhappy about flushing such values to infinity, and
88848b8605Smrg       * while it also mandates round-to-zero it doesn't care nearly as much
89848b8605Smrg       * about that.)
90848b8605Smrg       */
91848b8605Smrg      if (f32.ui > f16inf)
92848b8605Smrg         f32.ui = f16inf - 1;
93848b8605Smrg
94848b8605Smrg      f16 = f32.ui >> 13;
95848b8605Smrg   }
96848b8605Smrg
97848b8605Smrg   /* Sign */
98848b8605Smrg   f16 |= sign >> 16;
99848b8605Smrg
100848b8605Smrg   return f16;
101848b8605Smrg}
102848b8605Smrg
103b8e80941Smrgstatic inline float
104848b8605Smrgutil_half_to_float(uint16_t f16)
105848b8605Smrg{
106848b8605Smrg   union fi infnan;
107848b8605Smrg   union fi magic;
108848b8605Smrg   union fi f32;
109848b8605Smrg
110848b8605Smrg   infnan.ui = 0x8f << 23;
111848b8605Smrg   infnan.f = 65536.0f;
112848b8605Smrg   magic.ui  = 0xef << 23;
113848b8605Smrg
114848b8605Smrg   /* Exponent / Mantissa */
115848b8605Smrg   f32.ui = (f16 & 0x7fff) << 13;
116848b8605Smrg
117848b8605Smrg   /* Adjust */
118848b8605Smrg   f32.f *= magic.f;
119b8e80941Smrg   /* XXX: The magic mul relies on denorms being available */
120848b8605Smrg
121848b8605Smrg   /* Inf / NaN */
122848b8605Smrg   if (f32.f >= infnan.f)
123848b8605Smrg      f32.ui |= 0xff << 23;
124848b8605Smrg
125848b8605Smrg   /* Sign */
126848b8605Smrg   f32.ui |= (f16 & 0x8000) << 16;
127848b8605Smrg
128848b8605Smrg   return f32.f;
129848b8605Smrg}
130848b8605Smrg
131848b8605Smrg#ifdef __cplusplus
132848b8605Smrg}
133848b8605Smrg#endif
134848b8605Smrg
135848b8605Smrg#endif /* U_HALF_H */
136848b8605Smrg
137