fpu-387.h revision 1.1 1 1.1 mrg /* FPU-related code for x86 and x86_64 processors.
2 1.1 mrg Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
4 1.1 mrg
5 1.1 mrg This file is part of the GNU Fortran 95 runtime library (libgfortran).
6 1.1 mrg
7 1.1 mrg Libgfortran is free software; you can redistribute it and/or
8 1.1 mrg modify it under the terms of the GNU General Public
9 1.1 mrg License as published by the Free Software Foundation; either
10 1.1 mrg version 3 of the License, or (at your option) any later version.
11 1.1 mrg
12 1.1 mrg Libgfortran is distributed in the hope that it will be useful,
13 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 1.1 mrg GNU General Public License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifndef __SSE_MATH__
27 1.1 mrg #include "cpuid.h"
28 1.1 mrg #endif
29 1.1 mrg
30 1.1 mrg static int
31 1.1 mrg has_sse (void)
32 1.1 mrg {
33 1.1 mrg #ifndef __SSE_MATH__
34 1.1 mrg unsigned int eax, ebx, ecx, edx;
35 1.1 mrg
36 1.1 mrg if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 1.1 mrg return 0;
38 1.1 mrg
39 1.1 mrg return edx & bit_SSE;
40 1.1 mrg #else
41 1.1 mrg return 1;
42 1.1 mrg #endif
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 1.1 mrg #define _FPU_MASK_IM 0x01
47 1.1 mrg #define _FPU_MASK_DM 0x02
48 1.1 mrg #define _FPU_MASK_ZM 0x04
49 1.1 mrg #define _FPU_MASK_OM 0x08
50 1.1 mrg #define _FPU_MASK_UM 0x10
51 1.1 mrg #define _FPU_MASK_PM 0x20
52 1.1 mrg #define _FPU_MASK_ALL 0x3f
53 1.1 mrg
54 1.1 mrg #define _FPU_EX_ALL 0x3f
55 1.1 mrg
56 1.1 mrg /* i387 rounding modes. */
57 1.1 mrg
58 1.1 mrg #define _FPU_RC_NEAREST 0x0
59 1.1 mrg #define _FPU_RC_DOWN 0x1
60 1.1 mrg #define _FPU_RC_UP 0x2
61 1.1 mrg #define _FPU_RC_ZERO 0x3
62 1.1 mrg
63 1.1 mrg #define _FPU_RC_MASK 0x3
64 1.1 mrg
65 1.1 mrg /* Enable flush to zero mode. */
66 1.1 mrg
67 1.1 mrg #define MXCSR_FTZ (1 << 15)
68 1.1 mrg
69 1.1 mrg
70 1.1 mrg /* This structure corresponds to the layout of the block
71 1.1 mrg written by FSTENV. */
72 1.1 mrg typedef struct
73 1.1 mrg {
74 1.1 mrg unsigned short int __control_word;
75 1.1 mrg unsigned short int __unused1;
76 1.1 mrg unsigned short int __status_word;
77 1.1 mrg unsigned short int __unused2;
78 1.1 mrg unsigned short int __tags;
79 1.1 mrg unsigned short int __unused3;
80 1.1 mrg unsigned int __eip;
81 1.1 mrg unsigned short int __cs_selector;
82 1.1 mrg unsigned short int __opcode;
83 1.1 mrg unsigned int __data_offset;
84 1.1 mrg unsigned short int __data_selector;
85 1.1 mrg unsigned short int __unused5;
86 1.1 mrg unsigned int __mxcsr;
87 1.1 mrg }
88 1.1 mrg my_fenv_t;
89 1.1 mrg
90 1.1 mrg /* Check we can actually store the FPU state in the allocated size. */
91 1.1 mrg _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 1.1 mrg "GFC_FPE_STATE_BUFFER_SIZE is too small");
93 1.1 mrg
94 1.1 mrg
95 1.1 mrg /* Raise the supported floating-point exceptions from EXCEPTS. Other
96 1.1 mrg bits in EXCEPTS are ignored. Code originally borrowed from
97 1.1 mrg libatomic/config/x86/fenv.c. */
98 1.1 mrg
99 1.1 mrg static void
100 1.1 mrg local_feraiseexcept (int excepts)
101 1.1 mrg {
102 1.1 mrg if (excepts & _FPU_MASK_IM)
103 1.1 mrg {
104 1.1 mrg float f = 0.0f;
105 1.1 mrg #ifdef __SSE_MATH__
106 1.1 mrg volatile float r __attribute__ ((unused));
107 1.1 mrg __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
108 1.1 mrg r = f; /* Needed to trigger exception. */
109 1.1 mrg #else
110 1.1 mrg __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
111 1.1 mrg /* No need for fwait, exception is triggered by emitted fstp. */
112 1.1 mrg #endif
113 1.1 mrg }
114 1.1 mrg if (excepts & _FPU_MASK_DM)
115 1.1 mrg {
116 1.1 mrg my_fenv_t temp;
117 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
118 1.1 mrg temp.__status_word |= _FPU_MASK_DM;
119 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
120 1.1 mrg __asm__ __volatile__ ("fwait");
121 1.1 mrg }
122 1.1 mrg if (excepts & _FPU_MASK_ZM)
123 1.1 mrg {
124 1.1 mrg float f = 1.0f, g = 0.0f;
125 1.1 mrg #ifdef __SSE_MATH__
126 1.1 mrg volatile float r __attribute__ ((unused));
127 1.1 mrg __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
128 1.1 mrg r = f; /* Needed to trigger exception. */
129 1.1 mrg #else
130 1.1 mrg __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
131 1.1 mrg /* No need for fwait, exception is triggered by emitted fstp. */
132 1.1 mrg #endif
133 1.1 mrg }
134 1.1 mrg if (excepts & _FPU_MASK_OM)
135 1.1 mrg {
136 1.1 mrg my_fenv_t temp;
137 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
138 1.1 mrg temp.__status_word |= _FPU_MASK_OM;
139 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
140 1.1 mrg __asm__ __volatile__ ("fwait");
141 1.1 mrg }
142 1.1 mrg if (excepts & _FPU_MASK_UM)
143 1.1 mrg {
144 1.1 mrg my_fenv_t temp;
145 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
146 1.1 mrg temp.__status_word |= _FPU_MASK_UM;
147 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
148 1.1 mrg __asm__ __volatile__ ("fwait");
149 1.1 mrg }
150 1.1 mrg if (excepts & _FPU_MASK_PM)
151 1.1 mrg {
152 1.1 mrg float f = 1.0f, g = 3.0f;
153 1.1 mrg #ifdef __SSE_MATH__
154 1.1 mrg volatile float r __attribute__ ((unused));
155 1.1 mrg __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
156 1.1 mrg r = f; /* Needed to trigger exception. */
157 1.1 mrg #else
158 1.1 mrg __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
159 1.1 mrg /* No need for fwait, exception is triggered by emitted fstp. */
160 1.1 mrg #endif
161 1.1 mrg }
162 1.1 mrg }
163 1.1 mrg
164 1.1 mrg
165 1.1 mrg void
166 1.1 mrg set_fpu_trap_exceptions (int trap, int notrap)
167 1.1 mrg {
168 1.1 mrg int exc_set = 0, exc_clr = 0;
169 1.1 mrg unsigned short cw;
170 1.1 mrg
171 1.1 mrg if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
172 1.1 mrg if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
173 1.1 mrg if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
174 1.1 mrg if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
175 1.1 mrg if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
176 1.1 mrg if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
177 1.1 mrg
178 1.1 mrg if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
179 1.1 mrg if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
180 1.1 mrg if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
181 1.1 mrg if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
182 1.1 mrg if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
183 1.1 mrg if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
184 1.1 mrg
185 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
186 1.1 mrg
187 1.1 mrg cw |= exc_clr;
188 1.1 mrg cw &= ~exc_set;
189 1.1 mrg
190 1.1 mrg __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
191 1.1 mrg
192 1.1 mrg if (has_sse())
193 1.1 mrg {
194 1.1 mrg unsigned int cw_sse;
195 1.1 mrg
196 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
197 1.1 mrg
198 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */
199 1.1 mrg cw_sse |= (exc_clr << 7);
200 1.1 mrg cw_sse &= ~(exc_set << 7);
201 1.1 mrg
202 1.1 mrg /* Clear stalled exception flags. */
203 1.1 mrg cw_sse &= ~_FPU_EX_ALL;
204 1.1 mrg
205 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
206 1.1 mrg }
207 1.1 mrg }
208 1.1 mrg
209 1.1 mrg void
210 1.1 mrg set_fpu (void)
211 1.1 mrg {
212 1.1 mrg set_fpu_trap_exceptions (options.fpe, 0);
213 1.1 mrg }
214 1.1 mrg
215 1.1 mrg int
216 1.1 mrg get_fpu_trap_exceptions (void)
217 1.1 mrg {
218 1.1 mrg unsigned short cw;
219 1.1 mrg int mask;
220 1.1 mrg int res = 0;
221 1.1 mrg
222 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
223 1.1 mrg mask = cw;
224 1.1 mrg
225 1.1 mrg if (has_sse())
226 1.1 mrg {
227 1.1 mrg unsigned int cw_sse;
228 1.1 mrg
229 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
230 1.1 mrg
231 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */
232 1.1 mrg mask |= (cw_sse >> 7);
233 1.1 mrg }
234 1.1 mrg
235 1.1 mrg mask = ~mask & _FPU_MASK_ALL;
236 1.1 mrg
237 1.1 mrg if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
238 1.1 mrg if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
239 1.1 mrg if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
240 1.1 mrg if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
241 1.1 mrg if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
242 1.1 mrg if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
243 1.1 mrg
244 1.1 mrg return res;
245 1.1 mrg }
246 1.1 mrg
247 1.1 mrg int
248 1.1 mrg support_fpu_trap (int flag __attribute__((unused)))
249 1.1 mrg {
250 1.1 mrg return 1;
251 1.1 mrg }
252 1.1 mrg
253 1.1 mrg int
254 1.1 mrg get_fpu_except_flags (void)
255 1.1 mrg {
256 1.1 mrg unsigned short cw;
257 1.1 mrg int excepts;
258 1.1 mrg int res = 0;
259 1.1 mrg
260 1.1 mrg __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
261 1.1 mrg excepts = cw;
262 1.1 mrg
263 1.1 mrg if (has_sse())
264 1.1 mrg {
265 1.1 mrg unsigned int cw_sse;
266 1.1 mrg
267 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
268 1.1 mrg excepts |= cw_sse;
269 1.1 mrg }
270 1.1 mrg
271 1.1 mrg excepts &= _FPU_EX_ALL;
272 1.1 mrg
273 1.1 mrg if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
274 1.1 mrg if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
275 1.1 mrg if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
276 1.1 mrg if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
277 1.1 mrg if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
278 1.1 mrg if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
279 1.1 mrg
280 1.1 mrg return res;
281 1.1 mrg }
282 1.1 mrg
283 1.1 mrg void
284 1.1 mrg set_fpu_except_flags (int set, int clear)
285 1.1 mrg {
286 1.1 mrg my_fenv_t temp;
287 1.1 mrg int exc_set = 0, exc_clr = 0;
288 1.1 mrg
289 1.1 mrg /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
290 1.1 mrg if (set & GFC_FPE_INVALID)
291 1.1 mrg exc_set |= _FPU_MASK_IM;
292 1.1 mrg if (clear & GFC_FPE_INVALID)
293 1.1 mrg exc_clr |= _FPU_MASK_IM;
294 1.1 mrg
295 1.1 mrg if (set & GFC_FPE_DENORMAL)
296 1.1 mrg exc_set |= _FPU_MASK_DM;
297 1.1 mrg if (clear & GFC_FPE_DENORMAL)
298 1.1 mrg exc_clr |= _FPU_MASK_DM;
299 1.1 mrg
300 1.1 mrg if (set & GFC_FPE_ZERO)
301 1.1 mrg exc_set |= _FPU_MASK_ZM;
302 1.1 mrg if (clear & GFC_FPE_ZERO)
303 1.1 mrg exc_clr |= _FPU_MASK_ZM;
304 1.1 mrg
305 1.1 mrg if (set & GFC_FPE_OVERFLOW)
306 1.1 mrg exc_set |= _FPU_MASK_OM;
307 1.1 mrg if (clear & GFC_FPE_OVERFLOW)
308 1.1 mrg exc_clr |= _FPU_MASK_OM;
309 1.1 mrg
310 1.1 mrg if (set & GFC_FPE_UNDERFLOW)
311 1.1 mrg exc_set |= _FPU_MASK_UM;
312 1.1 mrg if (clear & GFC_FPE_UNDERFLOW)
313 1.1 mrg exc_clr |= _FPU_MASK_UM;
314 1.1 mrg
315 1.1 mrg if (set & GFC_FPE_INEXACT)
316 1.1 mrg exc_set |= _FPU_MASK_PM;
317 1.1 mrg if (clear & GFC_FPE_INEXACT)
318 1.1 mrg exc_clr |= _FPU_MASK_PM;
319 1.1 mrg
320 1.1 mrg
321 1.1 mrg /* Change the flags. This is tricky on 387 (unlike SSE), because we have
322 1.1 mrg FNSTSW but no FLDSW instruction. */
323 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
324 1.1 mrg temp.__status_word &= ~exc_clr;
325 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
326 1.1 mrg
327 1.1 mrg /* Change the flags on SSE. */
328 1.1 mrg
329 1.1 mrg if (has_sse())
330 1.1 mrg {
331 1.1 mrg unsigned int cw_sse;
332 1.1 mrg
333 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
334 1.1 mrg cw_sse &= ~exc_clr;
335 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
336 1.1 mrg }
337 1.1 mrg
338 1.1 mrg local_feraiseexcept (exc_set);
339 1.1 mrg }
340 1.1 mrg
341 1.1 mrg int
342 1.1 mrg support_fpu_flag (int flag __attribute__((unused)))
343 1.1 mrg {
344 1.1 mrg return 1;
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg void
348 1.1 mrg set_fpu_rounding_mode (int round)
349 1.1 mrg {
350 1.1 mrg int round_mode;
351 1.1 mrg unsigned short cw;
352 1.1 mrg
353 1.1 mrg switch (round)
354 1.1 mrg {
355 1.1 mrg case GFC_FPE_TONEAREST:
356 1.1 mrg round_mode = _FPU_RC_NEAREST;
357 1.1 mrg break;
358 1.1 mrg case GFC_FPE_UPWARD:
359 1.1 mrg round_mode = _FPU_RC_UP;
360 1.1 mrg break;
361 1.1 mrg case GFC_FPE_DOWNWARD:
362 1.1 mrg round_mode = _FPU_RC_DOWN;
363 1.1 mrg break;
364 1.1 mrg case GFC_FPE_TOWARDZERO:
365 1.1 mrg round_mode = _FPU_RC_ZERO;
366 1.1 mrg break;
367 1.1 mrg default:
368 1.1 mrg return; /* Should be unreachable. */
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
372 1.1 mrg
373 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */
374 1.1 mrg cw &= ~(_FPU_RC_MASK << 10);
375 1.1 mrg cw |= round_mode << 10;
376 1.1 mrg
377 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
378 1.1 mrg
379 1.1 mrg if (has_sse())
380 1.1 mrg {
381 1.1 mrg unsigned int cw_sse;
382 1.1 mrg
383 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
384 1.1 mrg
385 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */
386 1.1 mrg cw_sse &= ~(_FPU_RC_MASK << 13);
387 1.1 mrg cw_sse |= round_mode << 13;
388 1.1 mrg
389 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
390 1.1 mrg }
391 1.1 mrg }
392 1.1 mrg
393 1.1 mrg int
394 1.1 mrg get_fpu_rounding_mode (void)
395 1.1 mrg {
396 1.1 mrg int round_mode;
397 1.1 mrg
398 1.1 mrg #ifdef __SSE_MATH__
399 1.1 mrg unsigned int cw;
400 1.1 mrg
401 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
402 1.1 mrg
403 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */
404 1.1 mrg round_mode = cw >> 13;
405 1.1 mrg #else
406 1.1 mrg unsigned short cw;
407 1.1 mrg
408 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
409 1.1 mrg
410 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */
411 1.1 mrg round_mode = cw >> 10;
412 1.1 mrg #endif
413 1.1 mrg
414 1.1 mrg round_mode &= _FPU_RC_MASK;
415 1.1 mrg
416 1.1 mrg switch (round_mode)
417 1.1 mrg {
418 1.1 mrg case _FPU_RC_NEAREST:
419 1.1 mrg return GFC_FPE_TONEAREST;
420 1.1 mrg case _FPU_RC_UP:
421 1.1 mrg return GFC_FPE_UPWARD;
422 1.1 mrg case _FPU_RC_DOWN:
423 1.1 mrg return GFC_FPE_DOWNWARD;
424 1.1 mrg case _FPU_RC_ZERO:
425 1.1 mrg return GFC_FPE_TOWARDZERO;
426 1.1 mrg default:
427 1.1 mrg return 0; /* Should be unreachable. */
428 1.1 mrg }
429 1.1 mrg }
430 1.1 mrg
431 1.1 mrg int
432 1.1 mrg support_fpu_rounding_mode (int mode __attribute__((unused)))
433 1.1 mrg {
434 1.1 mrg return 1;
435 1.1 mrg }
436 1.1 mrg
437 1.1 mrg void
438 1.1 mrg get_fpu_state (void *state)
439 1.1 mrg {
440 1.1 mrg my_fenv_t *envp = state;
441 1.1 mrg
442 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
443 1.1 mrg
444 1.1 mrg /* fnstenv has the side effect of masking all exceptions, so we need
445 1.1 mrg to restore the control word after that. */
446 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
447 1.1 mrg
448 1.1 mrg if (has_sse())
449 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
450 1.1 mrg }
451 1.1 mrg
452 1.1 mrg void
453 1.1 mrg set_fpu_state (void *state)
454 1.1 mrg {
455 1.1 mrg my_fenv_t *envp = state;
456 1.1 mrg
457 1.1 mrg /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
458 1.1 mrg complex than this, but I think it suffices in our case. */
459 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
460 1.1 mrg
461 1.1 mrg if (has_sse())
462 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
463 1.1 mrg }
464 1.1 mrg
465 1.1 mrg
466 1.1 mrg int
467 1.1 mrg support_fpu_underflow_control (int kind)
468 1.1 mrg {
469 1.1 mrg if (!has_sse())
470 1.1 mrg return 0;
471 1.1 mrg
472 1.1 mrg return (kind == 4 || kind == 8) ? 1 : 0;
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg
476 1.1 mrg int
477 1.1 mrg get_fpu_underflow_mode (void)
478 1.1 mrg {
479 1.1 mrg unsigned int cw_sse;
480 1.1 mrg
481 1.1 mrg if (!has_sse())
482 1.1 mrg return 1;
483 1.1 mrg
484 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
485 1.1 mrg
486 1.1 mrg /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
487 1.1 mrg return (cw_sse & MXCSR_FTZ) ? 0 : 1;
488 1.1 mrg }
489 1.1 mrg
490 1.1 mrg
491 1.1 mrg void
492 1.1 mrg set_fpu_underflow_mode (int gradual)
493 1.1 mrg {
494 1.1 mrg unsigned int cw_sse;
495 1.1 mrg
496 1.1 mrg if (!has_sse())
497 1.1 mrg return;
498 1.1 mrg
499 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
500 1.1 mrg
501 1.1 mrg if (gradual)
502 1.1 mrg cw_sse &= ~MXCSR_FTZ;
503 1.1 mrg else
504 1.1 mrg cw_sse |= MXCSR_FTZ;
505 1.1 mrg
506 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
507 1.1 mrg }
508 1.1 mrg
509