101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2015 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#ifndef _ROUNDING_H
2501e04c3fSmrg#define _ROUNDING_H
2601e04c3fSmrg
2701e04c3fSmrg#include "c99_math.h"
2801e04c3fSmrg
2901e04c3fSmrg#include <limits.h>
3001e04c3fSmrg#include <stdint.h>
3101e04c3fSmrg
327ec681f3Smrg#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || (defined(_M_X64) && !defined(_M_ARM64EC))
3301e04c3fSmrg#include <xmmintrin.h>
3401e04c3fSmrg#include <emmintrin.h>
3501e04c3fSmrg#endif
3601e04c3fSmrg
3701e04c3fSmrg#ifdef __SSE4_1__
3801e04c3fSmrg#include <smmintrin.h>
3901e04c3fSmrg#endif
4001e04c3fSmrg
4101e04c3fSmrg/* The C standard library has functions round()/rint()/nearbyint() that round
4201e04c3fSmrg * their arguments according to the rounding mode set in the floating-point
4301e04c3fSmrg * control register. While there are trunc()/ceil()/floor() functions that do
4401e04c3fSmrg * a specific operation without modifying the rounding mode, there is no
4501e04c3fSmrg * roundeven() in any version of C.
4601e04c3fSmrg *
4701e04c3fSmrg * Technical Specification 18661 (ISO/IEC TS 18661-1:2014) adds roundeven(),
4801e04c3fSmrg * but it's unfortunately not implemented by glibc.
4901e04c3fSmrg *
5001e04c3fSmrg * This implementation differs in that it does not raise the inexact exception.
5101e04c3fSmrg *
5201e04c3fSmrg * We use rint() to implement these functions, with the assumption that the
5301e04c3fSmrg * floating-point rounding mode has not been changed from the default Round
5401e04c3fSmrg * to Nearest.
5501e04c3fSmrg */
5601e04c3fSmrg
5701e04c3fSmrg/**
5801e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer.
5901e04c3fSmrg */
6001e04c3fSmrgstatic inline float
6101e04c3fSmrg_mesa_roundevenf(float x)
6201e04c3fSmrg{
6301e04c3fSmrg#ifdef __SSE4_1__
6401e04c3fSmrg   float ret;
6501e04c3fSmrg   __m128 m = _mm_load_ss(&x);
6601e04c3fSmrg   m = _mm_round_ss(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC);
6701e04c3fSmrg   _mm_store_ss(&ret, m);
6801e04c3fSmrg   return ret;
6901e04c3fSmrg#else
7001e04c3fSmrg   return rintf(x);
7101e04c3fSmrg#endif
7201e04c3fSmrg}
7301e04c3fSmrg
7401e04c3fSmrg/**
7501e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer.
7601e04c3fSmrg */
7701e04c3fSmrgstatic inline double
7801e04c3fSmrg_mesa_roundeven(double x)
7901e04c3fSmrg{
8001e04c3fSmrg#ifdef __SSE4_1__
8101e04c3fSmrg   double ret;
8201e04c3fSmrg   __m128d m = _mm_load_sd(&x);
8301e04c3fSmrg   m = _mm_round_sd(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC);
8401e04c3fSmrg   _mm_store_sd(&ret, m);
8501e04c3fSmrg   return ret;
8601e04c3fSmrg#else
8701e04c3fSmrg   return rint(x);
8801e04c3fSmrg#endif
8901e04c3fSmrg}
9001e04c3fSmrg
9101e04c3fSmrg/**
9201e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer,
9301e04c3fSmrg * and returns the value as a long int.
9401e04c3fSmrg */
9501e04c3fSmrgstatic inline long
9601e04c3fSmrg_mesa_lroundevenf(float x)
9701e04c3fSmrg{
987ec681f3Smrg#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || (defined(_M_X64) && !defined(_M_ARM64EC))
9901e04c3fSmrg#if LONG_MAX == INT64_MAX
10001e04c3fSmrg   return _mm_cvtss_si64(_mm_load_ss(&x));
10101e04c3fSmrg#elif LONG_MAX == INT32_MAX
10201e04c3fSmrg   return _mm_cvtss_si32(_mm_load_ss(&x));
10301e04c3fSmrg#else
10401e04c3fSmrg#error "Unsupported long size"
10501e04c3fSmrg#endif
10601e04c3fSmrg#else
10701e04c3fSmrg   return lrintf(x);
10801e04c3fSmrg#endif
10901e04c3fSmrg}
11001e04c3fSmrg
11101e04c3fSmrg/**
11201e04c3fSmrg * \brief Rounds \c x to the nearest integer, with ties to the even integer,
11301e04c3fSmrg * and returns the value as a long int.
11401e04c3fSmrg */
11501e04c3fSmrgstatic inline long
11601e04c3fSmrg_mesa_lroundeven(double x)
11701e04c3fSmrg{
1187ec681f3Smrg#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || (defined(_M_X64) && !defined(_M_ARM64EC))
11901e04c3fSmrg#if LONG_MAX == INT64_MAX
12001e04c3fSmrg   return _mm_cvtsd_si64(_mm_load_sd(&x));
12101e04c3fSmrg#elif LONG_MAX == INT32_MAX
12201e04c3fSmrg   return _mm_cvtsd_si32(_mm_load_sd(&x));
12301e04c3fSmrg#else
12401e04c3fSmrg#error "Unsupported long size"
12501e04c3fSmrg#endif
12601e04c3fSmrg#else
12701e04c3fSmrg   return lrint(x);
12801e04c3fSmrg#endif
12901e04c3fSmrg}
13001e04c3fSmrg
1318a1362adSmaya/**
1328a1362adSmaya * \brief Rounds \c x to the nearest integer, with ties to the even integer,
1338a1362adSmaya * and returns the value as an int64_t.
1348a1362adSmaya */
1358a1362adSmayastatic inline int64_t
1368a1362adSmaya_mesa_i64roundevenf(float x)
1378a1362adSmaya{
1388a1362adSmaya#if LONG_MAX == INT64_MAX
1398a1362adSmaya   return _mesa_lroundevenf(x);
1408a1362adSmaya#elif LONG_MAX == INT32_MAX
1418a1362adSmaya   return llrintf(x);
1428a1362adSmaya#else
1438a1362adSmaya#error "Unsupported long size"
1448a1362adSmaya#endif
1458a1362adSmaya}
1468a1362adSmaya
14701e04c3fSmrg#endif
148