rounding.h revision b8e80941
1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#ifndef _ROUNDING_H
25b8e80941Smrg#define _ROUNDING_H
26b8e80941Smrg
27b8e80941Smrg#include "c99_math.h"
28b8e80941Smrg
29b8e80941Smrg#include <limits.h>
30b8e80941Smrg#include <stdint.h>
31b8e80941Smrg
32b8e80941Smrg#if defined(__SSE__) || defined(_MSC_VER)
33b8e80941Smrg/* MSVC always has SSE nowadays */
34b8e80941Smrg#include <xmmintrin.h>
35b8e80941Smrg#include <emmintrin.h>
36b8e80941Smrg#endif
37b8e80941Smrg
38b8e80941Smrg#ifdef __SSE4_1__
39b8e80941Smrg#include <smmintrin.h>
40b8e80941Smrg#endif
41b8e80941Smrg
42b8e80941Smrg/* The C standard library has functions round()/rint()/nearbyint() that round
43b8e80941Smrg * their arguments according to the rounding mode set in the floating-point
44b8e80941Smrg * control register. While there are trunc()/ceil()/floor() functions that do
45b8e80941Smrg * a specific operation without modifying the rounding mode, there is no
46b8e80941Smrg * roundeven() in any version of C.
47b8e80941Smrg *
48b8e80941Smrg * Technical Specification 18661 (ISO/IEC TS 18661-1:2014) adds roundeven(),
49b8e80941Smrg * but it's unfortunately not implemented by glibc.
50b8e80941Smrg *
51b8e80941Smrg * This implementation differs in that it does not raise the inexact exception.
52b8e80941Smrg *
53b8e80941Smrg * We use rint() to implement these functions, with the assumption that the
54b8e80941Smrg * floating-point rounding mode has not been changed from the default Round
55b8e80941Smrg * to Nearest.
56b8e80941Smrg */
57b8e80941Smrg
58b8e80941Smrg/**
59b8e80941Smrg * \brief Rounds \c x to the nearest integer, with ties to the even integer.
60b8e80941Smrg */
61b8e80941Smrgstatic inline float
62b8e80941Smrg_mesa_roundevenf(float x)
63b8e80941Smrg{
64b8e80941Smrg#ifdef __SSE4_1__
65b8e80941Smrg   float ret;
66b8e80941Smrg   __m128 m = _mm_load_ss(&x);
67b8e80941Smrg   m = _mm_round_ss(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC);
68b8e80941Smrg   _mm_store_ss(&ret, m);
69b8e80941Smrg   return ret;
70b8e80941Smrg#else
71b8e80941Smrg   return rintf(x);
72b8e80941Smrg#endif
73b8e80941Smrg}
74b8e80941Smrg
75b8e80941Smrg/**
76b8e80941Smrg * \brief Rounds \c x to the nearest integer, with ties to the even integer.
77b8e80941Smrg */
78b8e80941Smrgstatic inline double
79b8e80941Smrg_mesa_roundeven(double x)
80b8e80941Smrg{
81b8e80941Smrg#ifdef __SSE4_1__
82b8e80941Smrg   double ret;
83b8e80941Smrg   __m128d m = _mm_load_sd(&x);
84b8e80941Smrg   m = _mm_round_sd(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC);
85b8e80941Smrg   _mm_store_sd(&ret, m);
86b8e80941Smrg   return ret;
87b8e80941Smrg#else
88b8e80941Smrg   return rint(x);
89b8e80941Smrg#endif
90b8e80941Smrg}
91b8e80941Smrg
92b8e80941Smrg/**
93b8e80941Smrg * \brief Rounds \c x to the nearest integer, with ties to the even integer,
94b8e80941Smrg * and returns the value as a long int.
95b8e80941Smrg */
96b8e80941Smrgstatic inline long
97b8e80941Smrg_mesa_lroundevenf(float x)
98b8e80941Smrg{
99b8e80941Smrg#if defined(__SSE__) || defined(_MSC_VER)
100b8e80941Smrg#if LONG_MAX == INT64_MAX
101b8e80941Smrg   return _mm_cvtss_si64(_mm_load_ss(&x));
102b8e80941Smrg#elif LONG_MAX == INT32_MAX
103b8e80941Smrg   return _mm_cvtss_si32(_mm_load_ss(&x));
104b8e80941Smrg#else
105b8e80941Smrg#error "Unsupported long size"
106b8e80941Smrg#endif
107b8e80941Smrg#else
108b8e80941Smrg   return lrintf(x);
109b8e80941Smrg#endif
110b8e80941Smrg}
111b8e80941Smrg
112b8e80941Smrg/**
113b8e80941Smrg * \brief Rounds \c x to the nearest integer, with ties to the even integer,
114b8e80941Smrg * and returns the value as a long int.
115b8e80941Smrg */
116b8e80941Smrgstatic inline long
117b8e80941Smrg_mesa_lroundeven(double x)
118b8e80941Smrg{
119b8e80941Smrg#if defined(__SSE2__) || defined(_MSC_VER)
120b8e80941Smrg#if LONG_MAX == INT64_MAX
121b8e80941Smrg   return _mm_cvtsd_si64(_mm_load_sd(&x));
122b8e80941Smrg#elif LONG_MAX == INT32_MAX
123b8e80941Smrg   return _mm_cvtsd_si32(_mm_load_sd(&x));
124b8e80941Smrg#else
125b8e80941Smrg#error "Unsupported long size"
126b8e80941Smrg#endif
127b8e80941Smrg#else
128b8e80941Smrg   return lrint(x);
129b8e80941Smrg#endif
130b8e80941Smrg}
131b8e80941Smrg
132b8e80941Smrg/**
133b8e80941Smrg * \brief Rounds \c x to the nearest integer, with ties to the even integer,
134b8e80941Smrg * and returns the value as an int64_t.
135b8e80941Smrg */
136b8e80941Smrgstatic inline int64_t
137b8e80941Smrg_mesa_i64roundevenf(float x)
138b8e80941Smrg{
139b8e80941Smrg#if LONG_MAX == INT64_MAX
140b8e80941Smrg   return _mesa_lroundevenf(x);
141b8e80941Smrg#elif LONG_MAX == INT32_MAX
142b8e80941Smrg   return llrintf(x);
143b8e80941Smrg#else
144b8e80941Smrg#error "Unsupported long size"
145b8e80941Smrg#endif
146b8e80941Smrg}
147b8e80941Smrg
148b8e80941Smrg#endif
149