101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#ifndef _ROUNDING_H 2501e04c3fSmrg#define _ROUNDING_H 2601e04c3fSmrg 2701e04c3fSmrg#include "c99_math.h" 2801e04c3fSmrg 2901e04c3fSmrg#include <limits.h> 3001e04c3fSmrg#include <stdint.h> 3101e04c3fSmrg 327ec681f3Smrg#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || (defined(_M_X64) && !defined(_M_ARM64EC)) 3301e04c3fSmrg#include <xmmintrin.h> 3401e04c3fSmrg#include <emmintrin.h> 3501e04c3fSmrg#endif 3601e04c3fSmrg 3701e04c3fSmrg#ifdef __SSE4_1__ 3801e04c3fSmrg#include <smmintrin.h> 3901e04c3fSmrg#endif 4001e04c3fSmrg 4101e04c3fSmrg/* The C standard library has functions round()/rint()/nearbyint() that round 4201e04c3fSmrg * their arguments according to the rounding mode set in the floating-point 4301e04c3fSmrg * control register. While there are trunc()/ceil()/floor() functions that do 4401e04c3fSmrg * a specific operation without modifying the rounding mode, there is no 4501e04c3fSmrg * roundeven() in any version of C. 4601e04c3fSmrg * 4701e04c3fSmrg * Technical Specification 18661 (ISO/IEC TS 18661-1:2014) adds roundeven(), 4801e04c3fSmrg * but it's unfortunately not implemented by glibc. 4901e04c3fSmrg * 5001e04c3fSmrg * This implementation differs in that it does not raise the inexact exception. 5101e04c3fSmrg * 5201e04c3fSmrg * We use rint() to implement these functions, with the assumption that the 5301e04c3fSmrg * floating-point rounding mode has not been changed from the default Round 5401e04c3fSmrg * to Nearest. 5501e04c3fSmrg */ 5601e04c3fSmrg 5701e04c3fSmrg/** 5801e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer. 5901e04c3fSmrg */ 6001e04c3fSmrgstatic inline float 6101e04c3fSmrg_mesa_roundevenf(float x) 6201e04c3fSmrg{ 6301e04c3fSmrg#ifdef __SSE4_1__ 6401e04c3fSmrg float ret; 6501e04c3fSmrg __m128 m = _mm_load_ss(&x); 6601e04c3fSmrg m = _mm_round_ss(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC); 6701e04c3fSmrg _mm_store_ss(&ret, m); 6801e04c3fSmrg return ret; 6901e04c3fSmrg#else 7001e04c3fSmrg return rintf(x); 7101e04c3fSmrg#endif 7201e04c3fSmrg} 7301e04c3fSmrg 7401e04c3fSmrg/** 7501e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer. 7601e04c3fSmrg */ 7701e04c3fSmrgstatic inline double 7801e04c3fSmrg_mesa_roundeven(double x) 7901e04c3fSmrg{ 8001e04c3fSmrg#ifdef __SSE4_1__ 8101e04c3fSmrg double ret; 8201e04c3fSmrg __m128d m = _mm_load_sd(&x); 8301e04c3fSmrg m = _mm_round_sd(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC); 8401e04c3fSmrg _mm_store_sd(&ret, m); 8501e04c3fSmrg return ret; 8601e04c3fSmrg#else 8701e04c3fSmrg return rint(x); 8801e04c3fSmrg#endif 8901e04c3fSmrg} 9001e04c3fSmrg 9101e04c3fSmrg/** 9201e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer, 9301e04c3fSmrg * and returns the value as a long int. 9401e04c3fSmrg */ 9501e04c3fSmrgstatic inline long 9601e04c3fSmrg_mesa_lroundevenf(float x) 9701e04c3fSmrg{ 987ec681f3Smrg#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || (defined(_M_X64) && !defined(_M_ARM64EC)) 9901e04c3fSmrg#if LONG_MAX == INT64_MAX 10001e04c3fSmrg return _mm_cvtss_si64(_mm_load_ss(&x)); 10101e04c3fSmrg#elif LONG_MAX == INT32_MAX 10201e04c3fSmrg return _mm_cvtss_si32(_mm_load_ss(&x)); 10301e04c3fSmrg#else 10401e04c3fSmrg#error "Unsupported long size" 10501e04c3fSmrg#endif 10601e04c3fSmrg#else 10701e04c3fSmrg return lrintf(x); 10801e04c3fSmrg#endif 10901e04c3fSmrg} 11001e04c3fSmrg 11101e04c3fSmrg/** 11201e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer, 11301e04c3fSmrg * and returns the value as a long int. 11401e04c3fSmrg */ 11501e04c3fSmrgstatic inline long 11601e04c3fSmrg_mesa_lroundeven(double x) 11701e04c3fSmrg{ 1187ec681f3Smrg#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || (defined(_M_X64) && !defined(_M_ARM64EC)) 11901e04c3fSmrg#if LONG_MAX == INT64_MAX 12001e04c3fSmrg return _mm_cvtsd_si64(_mm_load_sd(&x)); 12101e04c3fSmrg#elif LONG_MAX == INT32_MAX 12201e04c3fSmrg return _mm_cvtsd_si32(_mm_load_sd(&x)); 12301e04c3fSmrg#else 12401e04c3fSmrg#error "Unsupported long size" 12501e04c3fSmrg#endif 12601e04c3fSmrg#else 12701e04c3fSmrg return lrint(x); 12801e04c3fSmrg#endif 12901e04c3fSmrg} 13001e04c3fSmrg 1318a1362adSmaya/** 1328a1362adSmaya * \brief Rounds \c x to the nearest integer, with ties to the even integer, 1338a1362adSmaya * and returns the value as an int64_t. 1348a1362adSmaya */ 1358a1362adSmayastatic inline int64_t 1368a1362adSmaya_mesa_i64roundevenf(float x) 1378a1362adSmaya{ 1388a1362adSmaya#if LONG_MAX == INT64_MAX 1398a1362adSmaya return _mesa_lroundevenf(x); 1408a1362adSmaya#elif LONG_MAX == INT32_MAX 1418a1362adSmaya return llrintf(x); 1428a1362adSmaya#else 1438a1362adSmaya#error "Unsupported long size" 1448a1362adSmaya#endif 1458a1362adSmaya} 1468a1362adSmaya 14701e04c3fSmrg#endif 148