fpu-387.h revision 1.1.1.4 1 1.1 mrg /* FPU-related code for x86 and x86_64 processors.
2 1.1.1.4 mrg Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
4 1.1 mrg
5 1.1 mrg This file is part of the GNU Fortran 95 runtime library (libgfortran).
6 1.1 mrg
7 1.1 mrg Libgfortran is free software; you can redistribute it and/or
8 1.1 mrg modify it under the terms of the GNU General Public
9 1.1 mrg License as published by the Free Software Foundation; either
10 1.1 mrg version 3 of the License, or (at your option) any later version.
11 1.1 mrg
12 1.1 mrg Libgfortran is distributed in the hope that it will be useful,
13 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 1.1 mrg GNU General Public License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifndef __SSE_MATH__
27 1.1 mrg #include "cpuid.h"
28 1.1 mrg #endif
29 1.1 mrg
30 1.1 mrg static int
31 1.1 mrg has_sse (void)
32 1.1 mrg {
33 1.1 mrg #ifndef __SSE_MATH__
34 1.1 mrg unsigned int eax, ebx, ecx, edx;
35 1.1 mrg
36 1.1 mrg if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 1.1 mrg return 0;
38 1.1 mrg
39 1.1 mrg return edx & bit_SSE;
40 1.1 mrg #else
41 1.1 mrg return 1;
42 1.1 mrg #endif
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 1.1 mrg #define _FPU_MASK_IM 0x01
47 1.1 mrg #define _FPU_MASK_DM 0x02
48 1.1 mrg #define _FPU_MASK_ZM 0x04
49 1.1 mrg #define _FPU_MASK_OM 0x08
50 1.1 mrg #define _FPU_MASK_UM 0x10
51 1.1 mrg #define _FPU_MASK_PM 0x20
52 1.1 mrg #define _FPU_MASK_ALL 0x3f
53 1.1 mrg
54 1.1 mrg #define _FPU_EX_ALL 0x3f
55 1.1 mrg
56 1.1 mrg /* i387 rounding modes. */
57 1.1 mrg
58 1.1 mrg #define _FPU_RC_NEAREST 0x0
59 1.1 mrg #define _FPU_RC_DOWN 0x1
60 1.1 mrg #define _FPU_RC_UP 0x2
61 1.1 mrg #define _FPU_RC_ZERO 0x3
62 1.1 mrg
63 1.1 mrg #define _FPU_RC_MASK 0x3
64 1.1 mrg
65 1.1 mrg /* Enable flush to zero mode. */
66 1.1 mrg
67 1.1 mrg #define MXCSR_FTZ (1 << 15)
68 1.1 mrg
69 1.1 mrg
70 1.1 mrg /* This structure corresponds to the layout of the block
71 1.1 mrg written by FSTENV. */
72 1.1.1.3 mrg struct fenv
73 1.1 mrg {
74 1.1 mrg unsigned short int __control_word;
75 1.1 mrg unsigned short int __unused1;
76 1.1 mrg unsigned short int __status_word;
77 1.1 mrg unsigned short int __unused2;
78 1.1 mrg unsigned short int __tags;
79 1.1 mrg unsigned short int __unused3;
80 1.1 mrg unsigned int __eip;
81 1.1 mrg unsigned short int __cs_selector;
82 1.1.1.3 mrg unsigned int __opcode:11;
83 1.1.1.3 mrg unsigned int __unused4:5;
84 1.1 mrg unsigned int __data_offset;
85 1.1 mrg unsigned short int __data_selector;
86 1.1 mrg unsigned short int __unused5;
87 1.1 mrg unsigned int __mxcsr;
88 1.1.1.3 mrg } __attribute__ ((gcc_struct));
89 1.1 mrg
90 1.1 mrg /* Check we can actually store the FPU state in the allocated size. */
91 1.1.1.3 mrg _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 1.1 mrg "GFC_FPE_STATE_BUFFER_SIZE is too small");
93 1.1 mrg
94 1.1.1.3 mrg #ifdef __SSE_MATH__
95 1.1.1.3 mrg # define __math_force_eval_div(x, y) \
96 1.1.1.3 mrg do { \
97 1.1.1.3 mrg __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \
98 1.1.1.3 mrg } while (0)
99 1.1.1.3 mrg #else
100 1.1.1.3 mrg # define __math_force_eval_div(x, y) \
101 1.1.1.3 mrg do { \
102 1.1.1.3 mrg __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \
103 1.1.1.3 mrg } while (0)
104 1.1.1.3 mrg #endif
105 1.1 mrg
106 1.1 mrg /* Raise the supported floating-point exceptions from EXCEPTS. Other
107 1.1 mrg bits in EXCEPTS are ignored. Code originally borrowed from
108 1.1 mrg libatomic/config/x86/fenv.c. */
109 1.1 mrg
110 1.1 mrg static void
111 1.1 mrg local_feraiseexcept (int excepts)
112 1.1 mrg {
113 1.1.1.3 mrg struct fenv temp;
114 1.1.1.3 mrg
115 1.1 mrg if (excepts & _FPU_MASK_IM)
116 1.1 mrg {
117 1.1 mrg float f = 0.0f;
118 1.1.1.3 mrg __math_force_eval_div (f, f);
119 1.1 mrg }
120 1.1 mrg if (excepts & _FPU_MASK_DM)
121 1.1 mrg {
122 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
123 1.1 mrg temp.__status_word |= _FPU_MASK_DM;
124 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
125 1.1 mrg __asm__ __volatile__ ("fwait");
126 1.1 mrg }
127 1.1 mrg if (excepts & _FPU_MASK_ZM)
128 1.1 mrg {
129 1.1 mrg float f = 1.0f, g = 0.0f;
130 1.1.1.3 mrg __math_force_eval_div (f, g);
131 1.1 mrg }
132 1.1 mrg if (excepts & _FPU_MASK_OM)
133 1.1 mrg {
134 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
135 1.1 mrg temp.__status_word |= _FPU_MASK_OM;
136 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
137 1.1 mrg __asm__ __volatile__ ("fwait");
138 1.1 mrg }
139 1.1 mrg if (excepts & _FPU_MASK_UM)
140 1.1 mrg {
141 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142 1.1 mrg temp.__status_word |= _FPU_MASK_UM;
143 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144 1.1 mrg __asm__ __volatile__ ("fwait");
145 1.1 mrg }
146 1.1 mrg if (excepts & _FPU_MASK_PM)
147 1.1 mrg {
148 1.1 mrg float f = 1.0f, g = 3.0f;
149 1.1.1.3 mrg __math_force_eval_div (f, g);
150 1.1 mrg }
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg
154 1.1 mrg void
155 1.1 mrg set_fpu_trap_exceptions (int trap, int notrap)
156 1.1 mrg {
157 1.1 mrg int exc_set = 0, exc_clr = 0;
158 1.1 mrg unsigned short cw;
159 1.1 mrg
160 1.1 mrg if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
161 1.1 mrg if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
162 1.1 mrg if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
163 1.1 mrg if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
164 1.1 mrg if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
165 1.1 mrg if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
166 1.1 mrg
167 1.1 mrg if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
168 1.1 mrg if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
169 1.1 mrg if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
170 1.1 mrg if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
171 1.1 mrg if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
172 1.1 mrg if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
173 1.1 mrg
174 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
175 1.1 mrg
176 1.1 mrg cw |= exc_clr;
177 1.1 mrg cw &= ~exc_set;
178 1.1 mrg
179 1.1 mrg __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
180 1.1 mrg
181 1.1 mrg if (has_sse())
182 1.1 mrg {
183 1.1 mrg unsigned int cw_sse;
184 1.1 mrg
185 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
186 1.1 mrg
187 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */
188 1.1 mrg cw_sse |= (exc_clr << 7);
189 1.1 mrg cw_sse &= ~(exc_set << 7);
190 1.1 mrg
191 1.1 mrg /* Clear stalled exception flags. */
192 1.1 mrg cw_sse &= ~_FPU_EX_ALL;
193 1.1 mrg
194 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
195 1.1 mrg }
196 1.1 mrg }
197 1.1 mrg
198 1.1 mrg void
199 1.1 mrg set_fpu (void)
200 1.1 mrg {
201 1.1 mrg set_fpu_trap_exceptions (options.fpe, 0);
202 1.1 mrg }
203 1.1 mrg
204 1.1 mrg int
205 1.1 mrg get_fpu_trap_exceptions (void)
206 1.1 mrg {
207 1.1 mrg unsigned short cw;
208 1.1 mrg int mask;
209 1.1 mrg int res = 0;
210 1.1 mrg
211 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
212 1.1 mrg mask = cw;
213 1.1 mrg
214 1.1 mrg if (has_sse())
215 1.1 mrg {
216 1.1 mrg unsigned int cw_sse;
217 1.1 mrg
218 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
219 1.1 mrg
220 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */
221 1.1 mrg mask |= (cw_sse >> 7);
222 1.1 mrg }
223 1.1 mrg
224 1.1 mrg mask = ~mask & _FPU_MASK_ALL;
225 1.1 mrg
226 1.1 mrg if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
227 1.1 mrg if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
228 1.1 mrg if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
229 1.1 mrg if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
230 1.1 mrg if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
231 1.1 mrg if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
232 1.1 mrg
233 1.1 mrg return res;
234 1.1 mrg }
235 1.1 mrg
236 1.1 mrg int
237 1.1 mrg support_fpu_trap (int flag __attribute__((unused)))
238 1.1 mrg {
239 1.1 mrg return 1;
240 1.1 mrg }
241 1.1 mrg
242 1.1 mrg int
243 1.1 mrg get_fpu_except_flags (void)
244 1.1 mrg {
245 1.1 mrg unsigned short cw;
246 1.1 mrg int excepts;
247 1.1 mrg int res = 0;
248 1.1 mrg
249 1.1 mrg __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
250 1.1 mrg excepts = cw;
251 1.1 mrg
252 1.1 mrg if (has_sse())
253 1.1 mrg {
254 1.1 mrg unsigned int cw_sse;
255 1.1 mrg
256 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
257 1.1 mrg excepts |= cw_sse;
258 1.1 mrg }
259 1.1 mrg
260 1.1 mrg excepts &= _FPU_EX_ALL;
261 1.1 mrg
262 1.1 mrg if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
263 1.1 mrg if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
264 1.1 mrg if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
265 1.1 mrg if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
266 1.1 mrg if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
267 1.1 mrg if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
268 1.1 mrg
269 1.1 mrg return res;
270 1.1 mrg }
271 1.1 mrg
272 1.1 mrg void
273 1.1 mrg set_fpu_except_flags (int set, int clear)
274 1.1 mrg {
275 1.1.1.3 mrg struct fenv temp;
276 1.1 mrg int exc_set = 0, exc_clr = 0;
277 1.1 mrg
278 1.1 mrg /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
279 1.1 mrg if (set & GFC_FPE_INVALID)
280 1.1 mrg exc_set |= _FPU_MASK_IM;
281 1.1 mrg if (clear & GFC_FPE_INVALID)
282 1.1 mrg exc_clr |= _FPU_MASK_IM;
283 1.1 mrg
284 1.1 mrg if (set & GFC_FPE_DENORMAL)
285 1.1 mrg exc_set |= _FPU_MASK_DM;
286 1.1 mrg if (clear & GFC_FPE_DENORMAL)
287 1.1 mrg exc_clr |= _FPU_MASK_DM;
288 1.1 mrg
289 1.1 mrg if (set & GFC_FPE_ZERO)
290 1.1 mrg exc_set |= _FPU_MASK_ZM;
291 1.1 mrg if (clear & GFC_FPE_ZERO)
292 1.1 mrg exc_clr |= _FPU_MASK_ZM;
293 1.1 mrg
294 1.1 mrg if (set & GFC_FPE_OVERFLOW)
295 1.1 mrg exc_set |= _FPU_MASK_OM;
296 1.1 mrg if (clear & GFC_FPE_OVERFLOW)
297 1.1 mrg exc_clr |= _FPU_MASK_OM;
298 1.1 mrg
299 1.1 mrg if (set & GFC_FPE_UNDERFLOW)
300 1.1 mrg exc_set |= _FPU_MASK_UM;
301 1.1 mrg if (clear & GFC_FPE_UNDERFLOW)
302 1.1 mrg exc_clr |= _FPU_MASK_UM;
303 1.1 mrg
304 1.1 mrg if (set & GFC_FPE_INEXACT)
305 1.1 mrg exc_set |= _FPU_MASK_PM;
306 1.1 mrg if (clear & GFC_FPE_INEXACT)
307 1.1 mrg exc_clr |= _FPU_MASK_PM;
308 1.1 mrg
309 1.1 mrg
310 1.1 mrg /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311 1.1 mrg FNSTSW but no FLDSW instruction. */
312 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
313 1.1 mrg temp.__status_word &= ~exc_clr;
314 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
315 1.1 mrg
316 1.1 mrg /* Change the flags on SSE. */
317 1.1 mrg
318 1.1 mrg if (has_sse())
319 1.1 mrg {
320 1.1 mrg unsigned int cw_sse;
321 1.1 mrg
322 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
323 1.1 mrg cw_sse &= ~exc_clr;
324 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
325 1.1 mrg }
326 1.1 mrg
327 1.1 mrg local_feraiseexcept (exc_set);
328 1.1 mrg }
329 1.1 mrg
330 1.1 mrg int
331 1.1 mrg support_fpu_flag (int flag __attribute__((unused)))
332 1.1 mrg {
333 1.1 mrg return 1;
334 1.1 mrg }
335 1.1 mrg
336 1.1 mrg void
337 1.1 mrg set_fpu_rounding_mode (int round)
338 1.1 mrg {
339 1.1 mrg int round_mode;
340 1.1 mrg unsigned short cw;
341 1.1 mrg
342 1.1 mrg switch (round)
343 1.1 mrg {
344 1.1 mrg case GFC_FPE_TONEAREST:
345 1.1 mrg round_mode = _FPU_RC_NEAREST;
346 1.1 mrg break;
347 1.1 mrg case GFC_FPE_UPWARD:
348 1.1 mrg round_mode = _FPU_RC_UP;
349 1.1 mrg break;
350 1.1 mrg case GFC_FPE_DOWNWARD:
351 1.1 mrg round_mode = _FPU_RC_DOWN;
352 1.1 mrg break;
353 1.1 mrg case GFC_FPE_TOWARDZERO:
354 1.1 mrg round_mode = _FPU_RC_ZERO;
355 1.1 mrg break;
356 1.1 mrg default:
357 1.1 mrg return; /* Should be unreachable. */
358 1.1 mrg }
359 1.1 mrg
360 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
361 1.1 mrg
362 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */
363 1.1 mrg cw &= ~(_FPU_RC_MASK << 10);
364 1.1 mrg cw |= round_mode << 10;
365 1.1 mrg
366 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
367 1.1 mrg
368 1.1 mrg if (has_sse())
369 1.1 mrg {
370 1.1 mrg unsigned int cw_sse;
371 1.1 mrg
372 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
373 1.1 mrg
374 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */
375 1.1 mrg cw_sse &= ~(_FPU_RC_MASK << 13);
376 1.1 mrg cw_sse |= round_mode << 13;
377 1.1 mrg
378 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
379 1.1 mrg }
380 1.1 mrg }
381 1.1 mrg
382 1.1 mrg int
383 1.1 mrg get_fpu_rounding_mode (void)
384 1.1 mrg {
385 1.1 mrg int round_mode;
386 1.1 mrg
387 1.1 mrg #ifdef __SSE_MATH__
388 1.1 mrg unsigned int cw;
389 1.1 mrg
390 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
391 1.1 mrg
392 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */
393 1.1 mrg round_mode = cw >> 13;
394 1.1 mrg #else
395 1.1 mrg unsigned short cw;
396 1.1 mrg
397 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
398 1.1 mrg
399 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */
400 1.1 mrg round_mode = cw >> 10;
401 1.1 mrg #endif
402 1.1 mrg
403 1.1 mrg round_mode &= _FPU_RC_MASK;
404 1.1 mrg
405 1.1 mrg switch (round_mode)
406 1.1 mrg {
407 1.1 mrg case _FPU_RC_NEAREST:
408 1.1 mrg return GFC_FPE_TONEAREST;
409 1.1 mrg case _FPU_RC_UP:
410 1.1 mrg return GFC_FPE_UPWARD;
411 1.1 mrg case _FPU_RC_DOWN:
412 1.1 mrg return GFC_FPE_DOWNWARD;
413 1.1 mrg case _FPU_RC_ZERO:
414 1.1 mrg return GFC_FPE_TOWARDZERO;
415 1.1 mrg default:
416 1.1 mrg return 0; /* Should be unreachable. */
417 1.1 mrg }
418 1.1 mrg }
419 1.1 mrg
420 1.1 mrg int
421 1.1.1.4 mrg support_fpu_rounding_mode (int mode)
422 1.1 mrg {
423 1.1.1.4 mrg if (mode == GFC_FPE_AWAY)
424 1.1.1.4 mrg return 0;
425 1.1.1.4 mrg else
426 1.1.1.4 mrg return 1;
427 1.1 mrg }
428 1.1 mrg
429 1.1 mrg void
430 1.1 mrg get_fpu_state (void *state)
431 1.1 mrg {
432 1.1.1.3 mrg struct fenv *envp = state;
433 1.1 mrg
434 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
435 1.1 mrg
436 1.1 mrg /* fnstenv has the side effect of masking all exceptions, so we need
437 1.1 mrg to restore the control word after that. */
438 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
439 1.1 mrg
440 1.1 mrg if (has_sse())
441 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg void
445 1.1 mrg set_fpu_state (void *state)
446 1.1 mrg {
447 1.1.1.3 mrg struct fenv *envp = state;
448 1.1 mrg
449 1.1 mrg /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
450 1.1 mrg complex than this, but I think it suffices in our case. */
451 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
452 1.1 mrg
453 1.1 mrg if (has_sse())
454 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
455 1.1 mrg }
456 1.1 mrg
457 1.1 mrg
458 1.1 mrg int
459 1.1 mrg support_fpu_underflow_control (int kind)
460 1.1 mrg {
461 1.1 mrg if (!has_sse())
462 1.1 mrg return 0;
463 1.1 mrg
464 1.1 mrg return (kind == 4 || kind == 8) ? 1 : 0;
465 1.1 mrg }
466 1.1 mrg
467 1.1 mrg
468 1.1 mrg int
469 1.1 mrg get_fpu_underflow_mode (void)
470 1.1 mrg {
471 1.1 mrg unsigned int cw_sse;
472 1.1 mrg
473 1.1 mrg if (!has_sse())
474 1.1 mrg return 1;
475 1.1 mrg
476 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
477 1.1 mrg
478 1.1 mrg /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
479 1.1 mrg return (cw_sse & MXCSR_FTZ) ? 0 : 1;
480 1.1 mrg }
481 1.1 mrg
482 1.1 mrg
483 1.1 mrg void
484 1.1 mrg set_fpu_underflow_mode (int gradual)
485 1.1 mrg {
486 1.1 mrg unsigned int cw_sse;
487 1.1 mrg
488 1.1 mrg if (!has_sse())
489 1.1 mrg return;
490 1.1 mrg
491 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
492 1.1 mrg
493 1.1 mrg if (gradual)
494 1.1 mrg cw_sse &= ~MXCSR_FTZ;
495 1.1 mrg else
496 1.1 mrg cw_sse |= MXCSR_FTZ;
497 1.1 mrg
498 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
499 1.1 mrg }
500 1.1 mrg
501