101e04c3fSmrg/* 201e04c3fSmrg * Mesa 3-D graphics library 301e04c3fSmrg * 401e04c3fSmrg * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. 57ec681f3Smrg * Copyright (C) 2018-2019 Intel Corporation 601e04c3fSmrg * 701e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 801e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 901e04c3fSmrg * to deal in the Software without restriction, including without limitation 1001e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1101e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1201e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1301e04c3fSmrg * 1401e04c3fSmrg * The above copyright notice and this permission notice shall be included 1501e04c3fSmrg * in all copies or substantial portions of the Software. 1601e04c3fSmrg * 1701e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1801e04c3fSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1901e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 2001e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 2101e04c3fSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2201e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2301e04c3fSmrg * OTHER DEALINGS IN THE SOFTWARE. 2401e04c3fSmrg */ 2501e04c3fSmrg 2601e04c3fSmrg#ifndef _HALF_FLOAT_H_ 2701e04c3fSmrg#define _HALF_FLOAT_H_ 2801e04c3fSmrg 2901e04c3fSmrg#include <stdbool.h> 3001e04c3fSmrg#include <stdint.h> 317ec681f3Smrg#include <string.h> 327ec681f3Smrg#include "util/u_cpu_detect.h" 337ec681f3Smrg 347ec681f3Smrg#if defined(USE_X86_64_ASM) 357ec681f3Smrg#include <immintrin.h> 367ec681f3Smrg#endif 3701e04c3fSmrg 3801e04c3fSmrg#ifdef __cplusplus 3901e04c3fSmrgextern "C" { 4001e04c3fSmrg#endif 4101e04c3fSmrg 427ec681f3Smrg#define FP16_ONE ((uint16_t) 0x3c00) 437ec681f3Smrg#define FP16_ZERO ((uint16_t) 0) 4401e04c3fSmrg 457ec681f3Smrguint16_t _mesa_float_to_half_slow(float val); 467ec681f3Smrgfloat _mesa_half_to_float_slow(uint16_t val); 4701e04c3fSmrguint8_t _mesa_half_to_unorm8(uint16_t v); 4801e04c3fSmrguint16_t _mesa_uint16_div_64k_to_half(uint16_t v); 4901e04c3fSmrg 507ec681f3Smrg/* 517ec681f3Smrg * _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart 527ec681f3Smrg * softfloat.h call. Still, softfloat.h conversion API is meant to be kept 537ec681f3Smrg * private. In other words, only use the API published here, instead of 547ec681f3Smrg * calling directly the softfloat.h one. 557ec681f3Smrg */ 567ec681f3Smrguint16_t _mesa_float_to_float16_rtz_slow(float val); 577ec681f3Smrg 587ec681f3Smrgstatic inline uint16_t 597ec681f3Smrg_mesa_float_to_half(float val) 607ec681f3Smrg{ 617ec681f3Smrg#if defined(USE_X86_64_ASM) 627ec681f3Smrg if (util_get_cpu_caps()->has_f16c) { 637ec681f3Smrg __m128 in = {val}; 647ec681f3Smrg __m128i out; 657ec681f3Smrg 667ec681f3Smrg /* $0 = round to nearest */ 677ec681f3Smrg __asm volatile("vcvtps2ph $0, %1, %0" : "=v"(out) : "v"(in)); 687ec681f3Smrg return out[0]; 697ec681f3Smrg } 707ec681f3Smrg#endif 717ec681f3Smrg return _mesa_float_to_half_slow(val); 727ec681f3Smrg} 737ec681f3Smrg 747ec681f3Smrgstatic inline float 757ec681f3Smrg_mesa_half_to_float(uint16_t val) 767ec681f3Smrg{ 777ec681f3Smrg#if defined(USE_X86_64_ASM) 787ec681f3Smrg if (util_get_cpu_caps()->has_f16c) { 797ec681f3Smrg __m128i in = {val}; 807ec681f3Smrg __m128 out; 817ec681f3Smrg 827ec681f3Smrg __asm volatile("vcvtph2ps %1, %0" : "=v"(out) : "v"(in)); 837ec681f3Smrg return out[0]; 847ec681f3Smrg } 857ec681f3Smrg#endif 867ec681f3Smrg return _mesa_half_to_float_slow(val); 877ec681f3Smrg} 887ec681f3Smrg 897ec681f3Smrgstatic inline uint16_t 907ec681f3Smrg_mesa_float_to_float16_rtz(float val) 917ec681f3Smrg{ 927ec681f3Smrg#if defined(USE_X86_64_ASM) 937ec681f3Smrg if (util_get_cpu_caps()->has_f16c) { 947ec681f3Smrg __m128 in = {val}; 957ec681f3Smrg __m128i out; 967ec681f3Smrg 977ec681f3Smrg /* $3 = round towards zero (truncate) */ 987ec681f3Smrg __asm volatile("vcvtps2ph $3, %1, %0" : "=v"(out) : "v"(in)); 997ec681f3Smrg return out[0]; 1007ec681f3Smrg } 1017ec681f3Smrg#endif 1027ec681f3Smrg return _mesa_float_to_float16_rtz_slow(val); 1037ec681f3Smrg} 1047ec681f3Smrg 1057ec681f3Smrgstatic inline uint16_t 1067ec681f3Smrg_mesa_float_to_float16_rtne(float val) 1077ec681f3Smrg{ 1087ec681f3Smrg return _mesa_float_to_half(val); 1097ec681f3Smrg} 1107ec681f3Smrg 11101e04c3fSmrgstatic inline bool 11201e04c3fSmrg_mesa_half_is_negative(uint16_t h) 11301e04c3fSmrg{ 11401e04c3fSmrg return !!(h & 0x8000); 11501e04c3fSmrg} 11601e04c3fSmrg 11701e04c3fSmrg 1187ec681f3Smrg#ifdef __cplusplus 1197ec681f3Smrg 1207ec681f3Smrg/* Helper class for disambiguating fp16 from uint16_t in C++ overloads */ 1217ec681f3Smrg 1227ec681f3Smrgstruct float16_t { 1237ec681f3Smrg uint16_t bits; 1247ec681f3Smrg float16_t(float f) : bits(_mesa_float_to_half(f)) {} 1257ec681f3Smrg float16_t(double d) : bits(_mesa_float_to_half(d)) {} 1267ec681f3Smrg float16_t(uint16_t raw_bits) : bits(raw_bits) {} 1277ec681f3Smrg static float16_t one() { return float16_t(FP16_ONE); } 1287ec681f3Smrg static float16_t zero() { return float16_t(FP16_ZERO); } 1297ec681f3Smrg}; 1307ec681f3Smrg 1317ec681f3Smrg#endif 1327ec681f3Smrg 1337ec681f3Smrg 13401e04c3fSmrg#ifdef __cplusplus 13501e04c3fSmrg} /* extern C */ 13601e04c3fSmrg#endif 13701e04c3fSmrg 13801e04c3fSmrg#endif /* _HALF_FLOAT_H_ */ 139