ieee754-sf.S revision 1.1.1.13 1 1.1 mrg /* IEEE-754 single-precision functions for Xtensa
2 1.1.1.13 mrg Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifdef __XTENSA_EB__
27 1.1 mrg #define xh a2
28 1.1 mrg #define xl a3
29 1.1 mrg #define yh a4
30 1.1 mrg #define yl a5
31 1.1 mrg #else
32 1.1 mrg #define xh a3
33 1.1 mrg #define xl a2
34 1.1 mrg #define yh a5
35 1.1 mrg #define yl a4
36 1.1 mrg #endif
37 1.1 mrg
38 1.1 mrg /* Warning! The branch displacements for some Xtensa branch instructions
39 1.1 mrg are quite small, and this code has been carefully laid out to keep
40 1.1 mrg branch targets in range. If you change anything, be sure to check that
41 1.1 mrg the assembler is not relaxing anything to branch over a jump. */
42 1.1 mrg
43 1.1 mrg #ifdef L_negsf2
44 1.1 mrg
45 1.1 mrg .align 4
46 1.1 mrg .global __negsf2
47 1.1 mrg .type __negsf2, @function
48 1.1 mrg __negsf2:
49 1.1 mrg leaf_entry sp, 16
50 1.1 mrg movi a4, 0x80000000
51 1.1 mrg xor a2, a2, a4
52 1.1 mrg leaf_return
53 1.1 mrg
54 1.1 mrg #endif /* L_negsf2 */
55 1.1 mrg
56 1.1 mrg #ifdef L_addsubsf3
57 1.1 mrg
58 1.1.1.5 mrg .literal_position
59 1.1 mrg /* Addition */
60 1.1 mrg __addsf3_aux:
61 1.1 mrg
62 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
63 1.1 mrg start of the function just to keep it in range of the limited
64 1.1 mrg branch displacements.) */
65 1.1 mrg
66 1.1 mrg .Ladd_xnan_or_inf:
67 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
68 1.1.1.5 mrg bnall a3, a6, .Ladd_return_nan_or_inf
69 1.1 mrg /* If x is a NaN, return it. Otherwise, return y. */
70 1.1 mrg slli a7, a2, 9
71 1.1.1.5 mrg bnez a7, .Ladd_return_nan
72 1.1 mrg
73 1.1 mrg .Ladd_ynan_or_inf:
74 1.1 mrg /* Return y. */
75 1.1 mrg mov a2, a3
76 1.1.1.5 mrg
77 1.1.1.5 mrg .Ladd_return_nan_or_inf:
78 1.1.1.5 mrg slli a7, a2, 9
79 1.1.1.5 mrg bnez a7, .Ladd_return_nan
80 1.1.1.5 mrg leaf_return
81 1.1.1.5 mrg
82 1.1.1.5 mrg .Ladd_return_nan:
83 1.1.1.5 mrg movi a6, 0x400000 /* make it a quiet NaN */
84 1.1.1.5 mrg or a2, a2, a6
85 1.1 mrg leaf_return
86 1.1 mrg
87 1.1 mrg .Ladd_opposite_signs:
88 1.1 mrg /* Operand signs differ. Do a subtraction. */
89 1.1 mrg slli a7, a6, 8
90 1.1 mrg xor a3, a3, a7
91 1.1 mrg j .Lsub_same_sign
92 1.1 mrg
93 1.1 mrg .align 4
94 1.1 mrg .global __addsf3
95 1.1 mrg .type __addsf3, @function
96 1.1 mrg __addsf3:
97 1.1 mrg leaf_entry sp, 16
98 1.1 mrg movi a6, 0x7f800000
99 1.1 mrg
100 1.1 mrg /* Check if the two operands have the same sign. */
101 1.1 mrg xor a7, a2, a3
102 1.1 mrg bltz a7, .Ladd_opposite_signs
103 1.1 mrg
104 1.1 mrg .Ladd_same_sign:
105 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
106 1.1 mrg ball a2, a6, .Ladd_xnan_or_inf
107 1.1 mrg ball a3, a6, .Ladd_ynan_or_inf
108 1.1 mrg
109 1.1 mrg /* Compare the exponents. The smaller operand will be shifted
110 1.1 mrg right by the exponent difference and added to the larger
111 1.1 mrg one. */
112 1.1 mrg extui a7, a2, 23, 9
113 1.1 mrg extui a8, a3, 23, 9
114 1.1 mrg bltu a7, a8, .Ladd_shiftx
115 1.1 mrg
116 1.1 mrg .Ladd_shifty:
117 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
118 1.1 mrg bnone a3, a6, .Ladd_yexpzero
119 1.1 mrg
120 1.1 mrg /* Replace y sign/exponent with 0x008. */
121 1.1 mrg or a3, a3, a6
122 1.1 mrg slli a3, a3, 8
123 1.1 mrg srli a3, a3, 8
124 1.1 mrg
125 1.1 mrg .Ladd_yexpdiff:
126 1.1 mrg /* Compute the exponent difference. */
127 1.1 mrg sub a10, a7, a8
128 1.1 mrg
129 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
130 1.1 mrg bgeui a10, 32, 1f
131 1.1 mrg
132 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
133 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
134 1.1 mrg ssr a10
135 1.1 mrg movi a9, 0
136 1.1 mrg src a9, a3, a9
137 1.1 mrg srl a3, a3
138 1.1 mrg
139 1.1 mrg /* Do the addition. */
140 1.1 mrg add a2, a2, a3
141 1.1 mrg
142 1.1 mrg /* Check if the add overflowed into the exponent. */
143 1.1 mrg extui a10, a2, 23, 9
144 1.1 mrg beq a10, a7, .Ladd_round
145 1.1 mrg mov a8, a7
146 1.1 mrg j .Ladd_carry
147 1.1 mrg
148 1.1 mrg .Ladd_yexpzero:
149 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
150 1.1 mrg i.e., no implicit "1.0", and increment the apparent exponent
151 1.1 mrg because subnormals behave as if they had the minimum (nonzero)
152 1.1 mrg exponent. Test for the case when both exponents are zero. */
153 1.1 mrg slli a3, a3, 9
154 1.1 mrg srli a3, a3, 9
155 1.1 mrg bnone a2, a6, .Ladd_bothexpzero
156 1.1 mrg addi a8, a8, 1
157 1.1 mrg j .Ladd_yexpdiff
158 1.1 mrg
159 1.1 mrg .Ladd_bothexpzero:
160 1.1 mrg /* Both exponents are zero. Handle this as a special case. There
161 1.1 mrg is no need to shift or round, and the normal code for handling
162 1.1 mrg a carry into the exponent field will not work because it
163 1.1 mrg assumes there is an implicit "1.0" that needs to be added. */
164 1.1 mrg add a2, a2, a3
165 1.1 mrg 1: leaf_return
166 1.1 mrg
167 1.1 mrg .Ladd_xexpzero:
168 1.1 mrg /* Same as "yexpzero" except skip handling the case when both
169 1.1 mrg exponents are zero. */
170 1.1 mrg slli a2, a2, 9
171 1.1 mrg srli a2, a2, 9
172 1.1 mrg addi a7, a7, 1
173 1.1 mrg j .Ladd_xexpdiff
174 1.1 mrg
175 1.1 mrg .Ladd_shiftx:
176 1.1 mrg /* Same thing as the "shifty" code, but with x and y swapped. Also,
177 1.1 mrg because the exponent difference is always nonzero in this version,
178 1.1 mrg the shift sequence can use SLL and skip loading a constant zero. */
179 1.1 mrg bnone a2, a6, .Ladd_xexpzero
180 1.1 mrg
181 1.1 mrg or a2, a2, a6
182 1.1 mrg slli a2, a2, 8
183 1.1 mrg srli a2, a2, 8
184 1.1 mrg
185 1.1 mrg .Ladd_xexpdiff:
186 1.1 mrg sub a10, a8, a7
187 1.1 mrg bgeui a10, 32, .Ladd_returny
188 1.1 mrg
189 1.1 mrg ssr a10
190 1.1 mrg sll a9, a2
191 1.1 mrg srl a2, a2
192 1.1 mrg
193 1.1 mrg add a2, a2, a3
194 1.1 mrg
195 1.1 mrg /* Check if the add overflowed into the exponent. */
196 1.1 mrg extui a10, a2, 23, 9
197 1.1 mrg bne a10, a8, .Ladd_carry
198 1.1 mrg
199 1.1 mrg .Ladd_round:
200 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
201 1.1 mrg bgez a9, 1f
202 1.1 mrg addi a2, a2, 1
203 1.1 mrg
204 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
205 1.1 mrg slli a9, a9, 1
206 1.1 mrg beqz a9, .Ladd_exactlyhalf
207 1.1 mrg 1: leaf_return
208 1.1 mrg
209 1.1 mrg .Ladd_returny:
210 1.1 mrg mov a2, a3
211 1.1 mrg leaf_return
212 1.1 mrg
213 1.1 mrg .Ladd_carry:
214 1.1 mrg /* The addition has overflowed into the exponent field, so the
215 1.1 mrg value needs to be renormalized. The mantissa of the result
216 1.1 mrg can be recovered by subtracting the original exponent and
217 1.1 mrg adding 0x800000 (which is the explicit "1.0" for the
218 1.1 mrg mantissa of the non-shifted operand -- the "1.0" for the
219 1.1 mrg shifted operand was already added). The mantissa can then
220 1.1 mrg be shifted right by one bit. The explicit "1.0" of the
221 1.1 mrg shifted mantissa then needs to be replaced by the exponent,
222 1.1 mrg incremented by one to account for the normalizing shift.
223 1.1 mrg It is faster to combine these operations: do the shift first
224 1.1 mrg and combine the additions and subtractions. If x is the
225 1.1 mrg original exponent, the result is:
226 1.1 mrg shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
227 1.1 mrg or:
228 1.1 mrg shifted mantissa + ((x + 1) << 22)
229 1.1 mrg Note that the exponent is incremented here by leaving the
230 1.1 mrg explicit "1.0" of the mantissa in the exponent field. */
231 1.1 mrg
232 1.1 mrg /* Shift x right by one bit. Save the lsb. */
233 1.1 mrg mov a10, a2
234 1.1 mrg srli a2, a2, 1
235 1.1 mrg
236 1.1 mrg /* See explanation above. The original exponent is in a8. */
237 1.1 mrg addi a8, a8, 1
238 1.1 mrg slli a8, a8, 22
239 1.1 mrg add a2, a2, a8
240 1.1 mrg
241 1.1 mrg /* Return an Infinity if the exponent overflowed. */
242 1.1 mrg ball a2, a6, .Ladd_infinity
243 1.1 mrg
244 1.1 mrg /* Same thing as the "round" code except the msb of the leftover
245 1.1 mrg fraction is bit 0 of a10, with the rest of the fraction in a9. */
246 1.1 mrg bbci.l a10, 0, 1f
247 1.1 mrg addi a2, a2, 1
248 1.1 mrg beqz a9, .Ladd_exactlyhalf
249 1.1 mrg 1: leaf_return
250 1.1 mrg
251 1.1 mrg .Ladd_infinity:
252 1.1 mrg /* Clear the mantissa. */
253 1.1 mrg srli a2, a2, 23
254 1.1 mrg slli a2, a2, 23
255 1.1 mrg
256 1.1 mrg /* The sign bit may have been lost in a carry-out. Put it back. */
257 1.1 mrg slli a8, a8, 1
258 1.1 mrg or a2, a2, a8
259 1.1 mrg leaf_return
260 1.1 mrg
261 1.1 mrg .Ladd_exactlyhalf:
262 1.1 mrg /* Round down to the nearest even value. */
263 1.1 mrg srli a2, a2, 1
264 1.1 mrg slli a2, a2, 1
265 1.1 mrg leaf_return
266 1.1 mrg
267 1.1 mrg
268 1.1 mrg /* Subtraction */
269 1.1 mrg __subsf3_aux:
270 1.1 mrg
271 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
272 1.1 mrg start of the function just to keep it in range of the limited
273 1.1 mrg branch displacements.) */
274 1.1 mrg
275 1.1 mrg .Lsub_xnan_or_inf:
276 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
277 1.1.1.5 mrg bnall a3, a6, .Lsub_return_nan_or_inf
278 1.1 mrg /* Both x and y are either NaN or Inf, so the result is NaN. */
279 1.1.1.5 mrg
280 1.1.1.5 mrg .Lsub_return_nan:
281 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
282 1.1 mrg or a2, a2, a4
283 1.1.1.5 mrg leaf_return
284 1.1 mrg
285 1.1 mrg .Lsub_ynan_or_inf:
286 1.1 mrg /* Negate y and return it. */
287 1.1 mrg slli a7, a6, 8
288 1.1 mrg xor a2, a3, a7
289 1.1.1.5 mrg
290 1.1.1.5 mrg .Lsub_return_nan_or_inf:
291 1.1.1.5 mrg slli a7, a2, 9
292 1.1.1.5 mrg bnez a7, .Lsub_return_nan
293 1.1 mrg leaf_return
294 1.1 mrg
295 1.1 mrg .Lsub_opposite_signs:
296 1.1 mrg /* Operand signs differ. Do an addition. */
297 1.1 mrg slli a7, a6, 8
298 1.1 mrg xor a3, a3, a7
299 1.1 mrg j .Ladd_same_sign
300 1.1 mrg
301 1.1 mrg .align 4
302 1.1 mrg .global __subsf3
303 1.1 mrg .type __subsf3, @function
304 1.1 mrg __subsf3:
305 1.1 mrg leaf_entry sp, 16
306 1.1 mrg movi a6, 0x7f800000
307 1.1 mrg
308 1.1 mrg /* Check if the two operands have the same sign. */
309 1.1 mrg xor a7, a2, a3
310 1.1 mrg bltz a7, .Lsub_opposite_signs
311 1.1 mrg
312 1.1 mrg .Lsub_same_sign:
313 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
314 1.1 mrg ball a2, a6, .Lsub_xnan_or_inf
315 1.1 mrg ball a3, a6, .Lsub_ynan_or_inf
316 1.1 mrg
317 1.1 mrg /* Compare the operands. In contrast to addition, the entire
318 1.1 mrg value matters here. */
319 1.1 mrg extui a7, a2, 23, 8
320 1.1 mrg extui a8, a3, 23, 8
321 1.1 mrg bltu a2, a3, .Lsub_xsmaller
322 1.1 mrg
323 1.1 mrg .Lsub_ysmaller:
324 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
325 1.1 mrg bnone a3, a6, .Lsub_yexpzero
326 1.1 mrg
327 1.1 mrg /* Replace y sign/exponent with 0x008. */
328 1.1 mrg or a3, a3, a6
329 1.1 mrg slli a3, a3, 8
330 1.1 mrg srli a3, a3, 8
331 1.1 mrg
332 1.1 mrg .Lsub_yexpdiff:
333 1.1 mrg /* Compute the exponent difference. */
334 1.1 mrg sub a10, a7, a8
335 1.1 mrg
336 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
337 1.1 mrg bgeui a10, 32, 1f
338 1.1 mrg
339 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
340 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
341 1.1 mrg ssr a10
342 1.1 mrg movi a9, 0
343 1.1 mrg src a9, a3, a9
344 1.1 mrg srl a3, a3
345 1.1 mrg
346 1.1 mrg sub a2, a2, a3
347 1.1 mrg
348 1.1 mrg /* Subtract the leftover bits in a9 from zero and propagate any
349 1.1 mrg borrow from a2. */
350 1.1 mrg neg a9, a9
351 1.1 mrg addi a10, a2, -1
352 1.1 mrg movnez a2, a10, a9
353 1.1 mrg
354 1.1 mrg /* Check if the subtract underflowed into the exponent. */
355 1.1 mrg extui a10, a2, 23, 8
356 1.1 mrg beq a10, a7, .Lsub_round
357 1.1 mrg j .Lsub_borrow
358 1.1 mrg
359 1.1 mrg .Lsub_yexpzero:
360 1.1 mrg /* Return zero if the inputs are equal. (For the non-subnormal
361 1.1 mrg case, subtracting the "1.0" will cause a borrow from the exponent
362 1.1 mrg and this case can be detected when handling the borrow.) */
363 1.1 mrg beq a2, a3, .Lsub_return_zero
364 1.1 mrg
365 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
366 1.1 mrg i.e., no implicit "1.0". Unless x is also a subnormal, increment
367 1.1 mrg y's apparent exponent because subnormals behave as if they had
368 1.1 mrg the minimum (nonzero) exponent. */
369 1.1 mrg slli a3, a3, 9
370 1.1 mrg srli a3, a3, 9
371 1.1 mrg bnone a2, a6, .Lsub_yexpdiff
372 1.1 mrg addi a8, a8, 1
373 1.1 mrg j .Lsub_yexpdiff
374 1.1 mrg
375 1.1 mrg .Lsub_returny:
376 1.1 mrg /* Negate and return y. */
377 1.1 mrg slli a7, a6, 8
378 1.1 mrg xor a2, a3, a7
379 1.1 mrg 1: leaf_return
380 1.1 mrg
381 1.1 mrg .Lsub_xsmaller:
382 1.1 mrg /* Same thing as the "ysmaller" code, but with x and y swapped and
383 1.1 mrg with y negated. */
384 1.1 mrg bnone a2, a6, .Lsub_xexpzero
385 1.1 mrg
386 1.1 mrg or a2, a2, a6
387 1.1 mrg slli a2, a2, 8
388 1.1 mrg srli a2, a2, 8
389 1.1 mrg
390 1.1 mrg .Lsub_xexpdiff:
391 1.1 mrg sub a10, a8, a7
392 1.1 mrg bgeui a10, 32, .Lsub_returny
393 1.1 mrg
394 1.1 mrg ssr a10
395 1.1 mrg movi a9, 0
396 1.1 mrg src a9, a2, a9
397 1.1 mrg srl a2, a2
398 1.1 mrg
399 1.1 mrg /* Negate y. */
400 1.1 mrg slli a11, a6, 8
401 1.1 mrg xor a3, a3, a11
402 1.1 mrg
403 1.1 mrg sub a2, a3, a2
404 1.1 mrg
405 1.1 mrg neg a9, a9
406 1.1 mrg addi a10, a2, -1
407 1.1 mrg movnez a2, a10, a9
408 1.1 mrg
409 1.1 mrg /* Check if the subtract underflowed into the exponent. */
410 1.1 mrg extui a10, a2, 23, 8
411 1.1 mrg bne a10, a8, .Lsub_borrow
412 1.1 mrg
413 1.1 mrg .Lsub_round:
414 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
415 1.1 mrg bgez a9, 1f
416 1.1 mrg addi a2, a2, 1
417 1.1 mrg
418 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
419 1.1 mrg slli a9, a9, 1
420 1.1 mrg beqz a9, .Lsub_exactlyhalf
421 1.1 mrg 1: leaf_return
422 1.1 mrg
423 1.1 mrg .Lsub_xexpzero:
424 1.1 mrg /* Same as "yexpzero". */
425 1.1 mrg beq a2, a3, .Lsub_return_zero
426 1.1 mrg slli a2, a2, 9
427 1.1 mrg srli a2, a2, 9
428 1.1 mrg bnone a3, a6, .Lsub_xexpdiff
429 1.1 mrg addi a7, a7, 1
430 1.1 mrg j .Lsub_xexpdiff
431 1.1 mrg
432 1.1 mrg .Lsub_return_zero:
433 1.1 mrg movi a2, 0
434 1.1 mrg leaf_return
435 1.1 mrg
436 1.1 mrg .Lsub_borrow:
437 1.1 mrg /* The subtraction has underflowed into the exponent field, so the
438 1.1 mrg value needs to be renormalized. Shift the mantissa left as
439 1.1 mrg needed to remove any leading zeros and adjust the exponent
440 1.1 mrg accordingly. If the exponent is not large enough to remove
441 1.1 mrg all the leading zeros, the result will be a subnormal value. */
442 1.1 mrg
443 1.1 mrg slli a8, a2, 9
444 1.1 mrg beqz a8, .Lsub_xzero
445 1.1 mrg do_nsau a6, a8, a7, a11
446 1.1 mrg srli a8, a8, 9
447 1.1 mrg bge a6, a10, .Lsub_subnormal
448 1.1 mrg addi a6, a6, 1
449 1.1 mrg
450 1.1 mrg .Lsub_normalize_shift:
451 1.1 mrg /* Shift the mantissa (a8/a9) left by a6. */
452 1.1 mrg ssl a6
453 1.1 mrg src a8, a8, a9
454 1.1 mrg sll a9, a9
455 1.1 mrg
456 1.1 mrg /* Combine the shifted mantissa with the sign and exponent,
457 1.1 mrg decrementing the exponent by a6. (The exponent has already
458 1.1 mrg been decremented by one due to the borrow from the subtraction,
459 1.1 mrg but adding the mantissa will increment the exponent by one.) */
460 1.1 mrg srli a2, a2, 23
461 1.1 mrg sub a2, a2, a6
462 1.1 mrg slli a2, a2, 23
463 1.1 mrg add a2, a2, a8
464 1.1 mrg j .Lsub_round
465 1.1 mrg
466 1.1 mrg .Lsub_exactlyhalf:
467 1.1 mrg /* Round down to the nearest even value. */
468 1.1 mrg srli a2, a2, 1
469 1.1 mrg slli a2, a2, 1
470 1.1 mrg leaf_return
471 1.1 mrg
472 1.1 mrg .Lsub_xzero:
473 1.1 mrg /* If there was a borrow from the exponent, and the mantissa and
474 1.1 mrg guard digits are all zero, then the inputs were equal and the
475 1.1 mrg result should be zero. */
476 1.1 mrg beqz a9, .Lsub_return_zero
477 1.1 mrg
478 1.1 mrg /* Only the guard digit is nonzero. Shift by min(24, a10). */
479 1.1 mrg addi a11, a10, -24
480 1.1 mrg movi a6, 24
481 1.1 mrg movltz a6, a10, a11
482 1.1 mrg j .Lsub_normalize_shift
483 1.1 mrg
484 1.1 mrg .Lsub_subnormal:
485 1.1 mrg /* The exponent is too small to shift away all the leading zeros.
486 1.1 mrg Set a6 to the current exponent (which has already been
487 1.1 mrg decremented by the borrow) so that the exponent of the result
488 1.1 mrg will be zero. Do not add 1 to a6 in this case, because: (1)
489 1.1 mrg adding the mantissa will not increment the exponent, so there is
490 1.1 mrg no need to subtract anything extra from the exponent to
491 1.1 mrg compensate, and (2) the effective exponent of a subnormal is 1
492 1.1 mrg not 0 so the shift amount must be 1 smaller than normal. */
493 1.1 mrg mov a6, a10
494 1.1 mrg j .Lsub_normalize_shift
495 1.1 mrg
496 1.1 mrg #endif /* L_addsubsf3 */
497 1.1 mrg
498 1.1 mrg #ifdef L_mulsf3
499 1.1 mrg
500 1.1 mrg /* Multiplication */
501 1.1 mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
502 1.1 mrg #define XCHAL_NO_MUL 1
503 1.1 mrg #endif
504 1.1 mrg
505 1.1.1.3 mrg .literal_position
506 1.1 mrg __mulsf3_aux:
507 1.1 mrg
508 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
509 1.1 mrg (This code is placed before the start of the function just to
510 1.1 mrg keep it in range of the limited branch displacements.) */
511 1.1 mrg
512 1.1 mrg .Lmul_xexpzero:
513 1.1 mrg /* Clear the sign bit of x. */
514 1.1 mrg slli a2, a2, 1
515 1.1 mrg srli a2, a2, 1
516 1.1 mrg
517 1.1 mrg /* If x is zero, return zero. */
518 1.1 mrg beqz a2, .Lmul_return_zero
519 1.1 mrg
520 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
521 1.1 mrg do_nsau a10, a2, a11, a12
522 1.1 mrg addi a10, a10, -8
523 1.1 mrg ssl a10
524 1.1 mrg sll a2, a2
525 1.1 mrg movi a8, 1
526 1.1 mrg sub a8, a8, a10
527 1.1 mrg j .Lmul_xnormalized
528 1.1 mrg
529 1.1 mrg .Lmul_yexpzero:
530 1.1 mrg /* Clear the sign bit of y. */
531 1.1 mrg slli a3, a3, 1
532 1.1 mrg srli a3, a3, 1
533 1.1 mrg
534 1.1 mrg /* If y is zero, return zero. */
535 1.1 mrg beqz a3, .Lmul_return_zero
536 1.1 mrg
537 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
538 1.1 mrg do_nsau a10, a3, a11, a12
539 1.1 mrg addi a10, a10, -8
540 1.1 mrg ssl a10
541 1.1 mrg sll a3, a3
542 1.1 mrg movi a9, 1
543 1.1 mrg sub a9, a9, a10
544 1.1 mrg j .Lmul_ynormalized
545 1.1 mrg
546 1.1 mrg .Lmul_return_zero:
547 1.1 mrg /* Return zero with the appropriate sign bit. */
548 1.1 mrg srli a2, a7, 31
549 1.1 mrg slli a2, a2, 31
550 1.1 mrg j .Lmul_done
551 1.1 mrg
552 1.1 mrg .Lmul_xnan_or_inf:
553 1.1 mrg /* If y is zero, return NaN. */
554 1.1 mrg slli a8, a3, 1
555 1.1.1.5 mrg beqz a8, .Lmul_return_nan
556 1.1 mrg /* If y is NaN, return y. */
557 1.1 mrg bnall a3, a6, .Lmul_returnx
558 1.1 mrg slli a8, a3, 9
559 1.1 mrg beqz a8, .Lmul_returnx
560 1.1 mrg
561 1.1 mrg .Lmul_returny:
562 1.1 mrg mov a2, a3
563 1.1 mrg
564 1.1 mrg .Lmul_returnx:
565 1.1.1.5 mrg slli a8, a2, 9
566 1.1.1.5 mrg bnez a8, .Lmul_return_nan
567 1.1 mrg /* Set the sign bit and return. */
568 1.1 mrg extui a7, a7, 31, 1
569 1.1 mrg slli a2, a2, 1
570 1.1 mrg ssai 1
571 1.1 mrg src a2, a7, a2
572 1.1 mrg j .Lmul_done
573 1.1 mrg
574 1.1 mrg .Lmul_ynan_or_inf:
575 1.1 mrg /* If x is zero, return NaN. */
576 1.1 mrg slli a8, a2, 1
577 1.1 mrg bnez a8, .Lmul_returny
578 1.1.1.5 mrg mov a2, a3
579 1.1.1.5 mrg
580 1.1.1.5 mrg .Lmul_return_nan:
581 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */
582 1.1.1.5 mrg or a2, a2, a4
583 1.1 mrg j .Lmul_done
584 1.1 mrg
585 1.1 mrg .align 4
586 1.1 mrg .global __mulsf3
587 1.1 mrg .type __mulsf3, @function
588 1.1 mrg __mulsf3:
589 1.1 mrg #if __XTENSA_CALL0_ABI__
590 1.1 mrg leaf_entry sp, 32
591 1.1 mrg addi sp, sp, -32
592 1.1 mrg s32i a12, sp, 16
593 1.1 mrg s32i a13, sp, 20
594 1.1 mrg s32i a14, sp, 24
595 1.1 mrg s32i a15, sp, 28
596 1.1 mrg #elif XCHAL_NO_MUL
597 1.1 mrg /* This is not really a leaf function; allocate enough stack space
598 1.1 mrg to allow CALL12s to a helper function. */
599 1.1 mrg leaf_entry sp, 64
600 1.1 mrg #else
601 1.1 mrg leaf_entry sp, 32
602 1.1 mrg #endif
603 1.1 mrg movi a6, 0x7f800000
604 1.1 mrg
605 1.1 mrg /* Get the sign of the result. */
606 1.1 mrg xor a7, a2, a3
607 1.1 mrg
608 1.1 mrg /* Check for NaN and infinity. */
609 1.1 mrg ball a2, a6, .Lmul_xnan_or_inf
610 1.1 mrg ball a3, a6, .Lmul_ynan_or_inf
611 1.1 mrg
612 1.1 mrg /* Extract the exponents. */
613 1.1 mrg extui a8, a2, 23, 8
614 1.1 mrg extui a9, a3, 23, 8
615 1.1 mrg
616 1.1 mrg beqz a8, .Lmul_xexpzero
617 1.1 mrg .Lmul_xnormalized:
618 1.1 mrg beqz a9, .Lmul_yexpzero
619 1.1 mrg .Lmul_ynormalized:
620 1.1 mrg
621 1.1 mrg /* Add the exponents. */
622 1.1 mrg add a8, a8, a9
623 1.1 mrg
624 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
625 1.1 mrg movi a10, 0xffffff
626 1.1 mrg or a2, a2, a6
627 1.1 mrg and a2, a2, a10
628 1.1 mrg or a3, a3, a6
629 1.1 mrg and a3, a3, a10
630 1.1 mrg
631 1.1 mrg /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
632 1.1 mrg
633 1.1 mrg #if XCHAL_HAVE_MUL32_HIGH
634 1.1 mrg
635 1.1 mrg mull a6, a2, a3
636 1.1 mrg muluh a2, a2, a3
637 1.1 mrg
638 1.1 mrg #else
639 1.1 mrg
640 1.1 mrg /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
641 1.1 mrg products. These partial products are:
642 1.1 mrg
643 1.1 mrg 0 xl * yl
644 1.1 mrg
645 1.1 mrg 1 xl * yh
646 1.1 mrg 2 xh * yl
647 1.1 mrg
648 1.1 mrg 3 xh * yh
649 1.1 mrg
650 1.1 mrg If using the Mul16 or Mul32 multiplier options, these input
651 1.1 mrg chunks must be stored in separate registers. For Mac16, the
652 1.1 mrg UMUL.AA.* opcodes can specify that the inputs come from either
653 1.1 mrg half of the registers, so there is no need to shift them out
654 1.1 mrg ahead of time. If there is no multiply hardware, the 16-bit
655 1.1 mrg chunks can be extracted when setting up the arguments to the
656 1.1 mrg separate multiply function. */
657 1.1 mrg
658 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
659 1.1 mrg /* Calling a separate multiply function will clobber a0 and requires
660 1.1 mrg use of a8 as a temporary, so save those values now. (The function
661 1.1 mrg uses a custom ABI so nothing else needs to be saved.) */
662 1.1 mrg s32i a0, sp, 0
663 1.1 mrg s32i a8, sp, 4
664 1.1 mrg #endif
665 1.1 mrg
666 1.1 mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
667 1.1 mrg
668 1.1 mrg #define a2h a4
669 1.1 mrg #define a3h a5
670 1.1 mrg
671 1.1 mrg /* Get the high halves of the inputs into registers. */
672 1.1 mrg srli a2h, a2, 16
673 1.1 mrg srli a3h, a3, 16
674 1.1 mrg
675 1.1 mrg #define a2l a2
676 1.1 mrg #define a3l a3
677 1.1 mrg
678 1.1 mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
679 1.1 mrg /* Clear the high halves of the inputs. This does not matter
680 1.1 mrg for MUL16 because the high bits are ignored. */
681 1.1 mrg extui a2, a2, 0, 16
682 1.1 mrg extui a3, a3, 0, 16
683 1.1 mrg #endif
684 1.1 mrg #endif /* MUL16 || MUL32 */
685 1.1 mrg
686 1.1 mrg
687 1.1 mrg #if XCHAL_HAVE_MUL16
688 1.1 mrg
689 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
690 1.1 mrg mul16u dst, xreg ## xhalf, yreg ## yhalf
691 1.1 mrg
692 1.1 mrg #elif XCHAL_HAVE_MUL32
693 1.1 mrg
694 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
695 1.1 mrg mull dst, xreg ## xhalf, yreg ## yhalf
696 1.1 mrg
697 1.1 mrg #elif XCHAL_HAVE_MAC16
698 1.1 mrg
699 1.1 mrg /* The preprocessor insists on inserting a space when concatenating after
700 1.1 mrg a period in the definition of do_mul below. These macros are a workaround
701 1.1 mrg using underscores instead of periods when doing the concatenation. */
702 1.1 mrg #define umul_aa_ll umul.aa.ll
703 1.1 mrg #define umul_aa_lh umul.aa.lh
704 1.1 mrg #define umul_aa_hl umul.aa.hl
705 1.1 mrg #define umul_aa_hh umul.aa.hh
706 1.1 mrg
707 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 1.1 mrg umul_aa_ ## xhalf ## yhalf xreg, yreg; \
709 1.1 mrg rsr dst, ACCLO
710 1.1 mrg
711 1.1 mrg #else /* no multiply hardware */
712 1.1 mrg
713 1.1 mrg #define set_arg_l(dst, src) \
714 1.1 mrg extui dst, src, 0, 16
715 1.1 mrg #define set_arg_h(dst, src) \
716 1.1 mrg srli dst, src, 16
717 1.1 mrg
718 1.1 mrg #if __XTENSA_CALL0_ABI__
719 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
720 1.1 mrg set_arg_ ## xhalf (a13, xreg); \
721 1.1 mrg set_arg_ ## yhalf (a14, yreg); \
722 1.1 mrg call0 .Lmul_mulsi3; \
723 1.1 mrg mov dst, a12
724 1.1 mrg #else
725 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
726 1.1 mrg set_arg_ ## xhalf (a14, xreg); \
727 1.1 mrg set_arg_ ## yhalf (a15, yreg); \
728 1.1 mrg call12 .Lmul_mulsi3; \
729 1.1 mrg mov dst, a14
730 1.1 mrg #endif /* __XTENSA_CALL0_ABI__ */
731 1.1 mrg
732 1.1 mrg #endif /* no multiply hardware */
733 1.1 mrg
734 1.1 mrg /* Add pp1 and pp2 into a6 with carry-out in a9. */
735 1.1 mrg do_mul(a6, a2, l, a3, h) /* pp 1 */
736 1.1 mrg do_mul(a11, a2, h, a3, l) /* pp 2 */
737 1.1 mrg movi a9, 0
738 1.1 mrg add a6, a6, a11
739 1.1 mrg bgeu a6, a11, 1f
740 1.1 mrg addi a9, a9, 1
741 1.1 mrg 1:
742 1.1 mrg /* Shift the high half of a9/a6 into position in a9. Note that
743 1.1 mrg this value can be safely incremented without any carry-outs. */
744 1.1 mrg ssai 16
745 1.1 mrg src a9, a9, a6
746 1.1 mrg
747 1.1 mrg /* Compute the low word into a6. */
748 1.1 mrg do_mul(a11, a2, l, a3, l) /* pp 0 */
749 1.1 mrg sll a6, a6
750 1.1 mrg add a6, a6, a11
751 1.1 mrg bgeu a6, a11, 1f
752 1.1 mrg addi a9, a9, 1
753 1.1 mrg 1:
754 1.1 mrg /* Compute the high word into a2. */
755 1.1 mrg do_mul(a2, a2, h, a3, h) /* pp 3 */
756 1.1 mrg add a2, a2, a9
757 1.1 mrg
758 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
759 1.1 mrg /* Restore values saved on the stack during the multiplication. */
760 1.1 mrg l32i a0, sp, 0
761 1.1 mrg l32i a8, sp, 4
762 1.1 mrg #endif
763 1.1 mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */
764 1.1 mrg
765 1.1 mrg /* Shift left by 9 bits, unless there was a carry-out from the
766 1.1 mrg multiply, in which case, shift by 8 bits and increment the
767 1.1 mrg exponent. */
768 1.1 mrg movi a4, 9
769 1.1 mrg srli a5, a2, 24 - 9
770 1.1 mrg beqz a5, 1f
771 1.1 mrg addi a4, a4, -1
772 1.1 mrg addi a8, a8, 1
773 1.1 mrg 1: ssl a4
774 1.1 mrg src a2, a2, a6
775 1.1 mrg sll a6, a6
776 1.1 mrg
777 1.1 mrg /* Subtract the extra bias from the exponent sum (plus one to account
778 1.1 mrg for the explicit "1.0" of the mantissa that will be added to the
779 1.1 mrg exponent in the final result). */
780 1.1 mrg movi a4, 0x80
781 1.1 mrg sub a8, a8, a4
782 1.1 mrg
783 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
784 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
785 1.1 mrg movi a4, 0xfe
786 1.1 mrg bgeu a8, a4, .Lmul_overflow
787 1.1 mrg
788 1.1 mrg .Lmul_round:
789 1.1 mrg /* Round. */
790 1.1 mrg bgez a6, .Lmul_rounded
791 1.1 mrg addi a2, a2, 1
792 1.1 mrg slli a6, a6, 1
793 1.1 mrg beqz a6, .Lmul_exactlyhalf
794 1.1 mrg
795 1.1 mrg .Lmul_rounded:
796 1.1 mrg /* Add the exponent to the mantissa. */
797 1.1 mrg slli a8, a8, 23
798 1.1 mrg add a2, a2, a8
799 1.1 mrg
800 1.1 mrg .Lmul_addsign:
801 1.1 mrg /* Add the sign bit. */
802 1.1 mrg srli a7, a7, 31
803 1.1 mrg slli a7, a7, 31
804 1.1 mrg or a2, a2, a7
805 1.1 mrg
806 1.1 mrg .Lmul_done:
807 1.1 mrg #if __XTENSA_CALL0_ABI__
808 1.1 mrg l32i a12, sp, 16
809 1.1 mrg l32i a13, sp, 20
810 1.1 mrg l32i a14, sp, 24
811 1.1 mrg l32i a15, sp, 28
812 1.1 mrg addi sp, sp, 32
813 1.1 mrg #endif
814 1.1 mrg leaf_return
815 1.1 mrg
816 1.1 mrg .Lmul_exactlyhalf:
817 1.1 mrg /* Round down to the nearest even value. */
818 1.1 mrg srli a2, a2, 1
819 1.1 mrg slli a2, a2, 1
820 1.1 mrg j .Lmul_rounded
821 1.1 mrg
822 1.1 mrg .Lmul_overflow:
823 1.1 mrg bltz a8, .Lmul_underflow
824 1.1 mrg /* Return +/- Infinity. */
825 1.1 mrg movi a8, 0xff
826 1.1 mrg slli a2, a8, 23
827 1.1 mrg j .Lmul_addsign
828 1.1 mrg
829 1.1 mrg .Lmul_underflow:
830 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
831 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
832 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
833 1.1 mrg neg a8, a8
834 1.1 mrg mov a9, a6
835 1.1 mrg ssr a8
836 1.1 mrg bgeui a8, 32, .Lmul_flush_to_zero
837 1.1 mrg
838 1.1 mrg /* Shift a2 right. Any bits that are shifted out of a2 are saved
839 1.1 mrg in a6 (combined with the shifted-out bits currently in a6) for
840 1.1 mrg rounding the result. */
841 1.1 mrg sll a6, a2
842 1.1 mrg srl a2, a2
843 1.1 mrg
844 1.1 mrg /* Set the exponent to zero. */
845 1.1 mrg movi a8, 0
846 1.1 mrg
847 1.1 mrg /* Pack any nonzero bits shifted out into a6. */
848 1.1 mrg beqz a9, .Lmul_round
849 1.1 mrg movi a9, 1
850 1.1 mrg or a6, a6, a9
851 1.1 mrg j .Lmul_round
852 1.1 mrg
853 1.1 mrg .Lmul_flush_to_zero:
854 1.1 mrg /* Return zero with the appropriate sign bit. */
855 1.1 mrg srli a2, a7, 31
856 1.1 mrg slli a2, a2, 31
857 1.1 mrg j .Lmul_done
858 1.1 mrg
859 1.1 mrg #if XCHAL_NO_MUL
860 1.1 mrg
861 1.1 mrg /* For Xtensa processors with no multiply hardware, this simplified
862 1.1 mrg version of _mulsi3 is used for multiplying 16-bit chunks of
863 1.1 mrg the floating-point mantissas. When using CALL0, this function
864 1.1 mrg uses a custom ABI: the inputs are passed in a13 and a14, the
865 1.1 mrg result is returned in a12, and a8 and a15 are clobbered. */
866 1.1 mrg .align 4
867 1.1 mrg .Lmul_mulsi3:
868 1.1 mrg leaf_entry sp, 16
869 1.1 mrg .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
870 1.1 mrg movi \dst, 0
871 1.1 mrg 1: add \tmp1, \src2, \dst
872 1.1 mrg extui \tmp2, \src1, 0, 1
873 1.1 mrg movnez \dst, \tmp1, \tmp2
874 1.1 mrg
875 1.1 mrg do_addx2 \tmp1, \src2, \dst, \tmp1
876 1.1 mrg extui \tmp2, \src1, 1, 1
877 1.1 mrg movnez \dst, \tmp1, \tmp2
878 1.1 mrg
879 1.1 mrg do_addx4 \tmp1, \src2, \dst, \tmp1
880 1.1 mrg extui \tmp2, \src1, 2, 1
881 1.1 mrg movnez \dst, \tmp1, \tmp2
882 1.1 mrg
883 1.1 mrg do_addx8 \tmp1, \src2, \dst, \tmp1
884 1.1 mrg extui \tmp2, \src1, 3, 1
885 1.1 mrg movnez \dst, \tmp1, \tmp2
886 1.1 mrg
887 1.1 mrg srli \src1, \src1, 4
888 1.1 mrg slli \src2, \src2, 4
889 1.1 mrg bnez \src1, 1b
890 1.1 mrg .endm
891 1.1 mrg #if __XTENSA_CALL0_ABI__
892 1.1 mrg mul_mulsi3_body a12, a13, a14, a15, a8
893 1.1 mrg #else
894 1.1 mrg /* The result will be written into a2, so save that argument in a4. */
895 1.1 mrg mov a4, a2
896 1.1 mrg mul_mulsi3_body a2, a4, a3, a5, a6
897 1.1 mrg #endif
898 1.1 mrg leaf_return
899 1.1 mrg #endif /* XCHAL_NO_MUL */
900 1.1 mrg #endif /* L_mulsf3 */
901 1.1 mrg
902 1.1 mrg #ifdef L_divsf3
903 1.1 mrg
904 1.1 mrg /* Division */
905 1.1.1.6 mrg
906 1.1.1.6 mrg #if XCHAL_HAVE_FP_DIV
907 1.1.1.6 mrg
908 1.1.1.6 mrg .align 4
909 1.1.1.6 mrg .global __divsf3
910 1.1.1.6 mrg .type __divsf3, @function
911 1.1.1.6 mrg __divsf3:
912 1.1.1.6 mrg leaf_entry sp, 16
913 1.1.1.6 mrg
914 1.1.1.6 mrg wfr f1, a2 /* dividend */
915 1.1.1.6 mrg wfr f2, a3 /* divisor */
916 1.1.1.6 mrg
917 1.1.1.6 mrg div0.s f3, f2
918 1.1.1.6 mrg nexp01.s f4, f2
919 1.1.1.6 mrg const.s f5, 1
920 1.1.1.6 mrg maddn.s f5, f4, f3
921 1.1.1.6 mrg mov.s f6, f3
922 1.1.1.6 mrg mov.s f7, f2
923 1.1.1.6 mrg nexp01.s f2, f1
924 1.1.1.6 mrg maddn.s f6, f5, f6
925 1.1.1.6 mrg const.s f5, 1
926 1.1.1.6 mrg const.s f0, 0
927 1.1.1.6 mrg neg.s f8, f2
928 1.1.1.6 mrg maddn.s f5, f4, f6
929 1.1.1.6 mrg maddn.s f0, f8, f3
930 1.1.1.6 mrg mkdadj.s f7, f1
931 1.1.1.6 mrg maddn.s f6, f5, f6
932 1.1.1.6 mrg maddn.s f8, f4, f0
933 1.1.1.6 mrg const.s f3, 1
934 1.1.1.6 mrg maddn.s f3, f4, f6
935 1.1.1.6 mrg maddn.s f0, f8, f6
936 1.1.1.6 mrg neg.s f2, f2
937 1.1.1.6 mrg maddn.s f6, f3, f6
938 1.1.1.6 mrg maddn.s f2, f4, f0
939 1.1.1.6 mrg addexpm.s f0, f7
940 1.1.1.6 mrg addexp.s f6, f7
941 1.1.1.6 mrg divn.s f0, f2, f6
942 1.1.1.6 mrg
943 1.1.1.6 mrg rfr a2, f0
944 1.1.1.6 mrg
945 1.1.1.6 mrg leaf_return
946 1.1.1.6 mrg
947 1.1.1.6 mrg #else
948 1.1.1.6 mrg
949 1.1.1.6 mrg .literal_position
950 1.1 mrg __divsf3_aux:
951 1.1 mrg
952 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
953 1.1 mrg (This code is placed before the start of the function just to
954 1.1 mrg keep it in range of the limited branch displacements.) */
955 1.1 mrg
956 1.1 mrg .Ldiv_yexpzero:
957 1.1 mrg /* Clear the sign bit of y. */
958 1.1 mrg slli a3, a3, 1
959 1.1 mrg srli a3, a3, 1
960 1.1 mrg
961 1.1 mrg /* Check for division by zero. */
962 1.1 mrg beqz a3, .Ldiv_yzero
963 1.1 mrg
964 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
965 1.1 mrg do_nsau a10, a3, a4, a5
966 1.1 mrg addi a10, a10, -8
967 1.1 mrg ssl a10
968 1.1 mrg sll a3, a3
969 1.1 mrg movi a9, 1
970 1.1 mrg sub a9, a9, a10
971 1.1 mrg j .Ldiv_ynormalized
972 1.1 mrg
973 1.1 mrg .Ldiv_yzero:
974 1.1 mrg /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
975 1.1 mrg slli a4, a2, 1
976 1.1 mrg srli a4, a4, 1
977 1.1 mrg srli a2, a7, 31
978 1.1 mrg slli a2, a2, 31
979 1.1 mrg or a2, a2, a6
980 1.1 mrg bnez a4, 1f
981 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
982 1.1 mrg or a2, a2, a4
983 1.1 mrg 1: leaf_return
984 1.1 mrg
985 1.1 mrg .Ldiv_xexpzero:
986 1.1 mrg /* Clear the sign bit of x. */
987 1.1 mrg slli a2, a2, 1
988 1.1 mrg srli a2, a2, 1
989 1.1 mrg
990 1.1 mrg /* If x is zero, return zero. */
991 1.1 mrg beqz a2, .Ldiv_return_zero
992 1.1 mrg
993 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
994 1.1 mrg do_nsau a10, a2, a4, a5
995 1.1 mrg addi a10, a10, -8
996 1.1 mrg ssl a10
997 1.1 mrg sll a2, a2
998 1.1 mrg movi a8, 1
999 1.1 mrg sub a8, a8, a10
1000 1.1 mrg j .Ldiv_xnormalized
1001 1.1 mrg
1002 1.1 mrg .Ldiv_return_zero:
1003 1.1 mrg /* Return zero with the appropriate sign bit. */
1004 1.1 mrg srli a2, a7, 31
1005 1.1 mrg slli a2, a2, 31
1006 1.1 mrg leaf_return
1007 1.1 mrg
1008 1.1 mrg .Ldiv_xnan_or_inf:
1009 1.1 mrg /* Set the sign bit of the result. */
1010 1.1 mrg srli a7, a3, 31
1011 1.1 mrg slli a7, a7, 31
1012 1.1 mrg xor a2, a2, a7
1013 1.1 mrg /* If y is NaN or Inf, return NaN. */
1014 1.1.1.5 mrg ball a3, a6, .Ldiv_return_nan
1015 1.1.1.5 mrg slli a7, a2, 9
1016 1.1.1.5 mrg bnez a7, .Ldiv_return_nan
1017 1.1.1.5 mrg leaf_return
1018 1.1 mrg
1019 1.1 mrg .Ldiv_ynan_or_inf:
1020 1.1 mrg /* If y is Infinity, return zero. */
1021 1.1 mrg slli a8, a3, 9
1022 1.1 mrg beqz a8, .Ldiv_return_zero
1023 1.1 mrg /* y is NaN; return it. */
1024 1.1 mrg mov a2, a3
1025 1.1.1.5 mrg
1026 1.1.1.5 mrg .Ldiv_return_nan:
1027 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */
1028 1.1.1.5 mrg or a2, a2, a4
1029 1.1 mrg leaf_return
1030 1.1 mrg
1031 1.1 mrg .align 4
1032 1.1 mrg .global __divsf3
1033 1.1 mrg .type __divsf3, @function
1034 1.1 mrg __divsf3:
1035 1.1 mrg leaf_entry sp, 16
1036 1.1 mrg movi a6, 0x7f800000
1037 1.1 mrg
1038 1.1 mrg /* Get the sign of the result. */
1039 1.1 mrg xor a7, a2, a3
1040 1.1 mrg
1041 1.1 mrg /* Check for NaN and infinity. */
1042 1.1 mrg ball a2, a6, .Ldiv_xnan_or_inf
1043 1.1 mrg ball a3, a6, .Ldiv_ynan_or_inf
1044 1.1 mrg
1045 1.1 mrg /* Extract the exponents. */
1046 1.1 mrg extui a8, a2, 23, 8
1047 1.1 mrg extui a9, a3, 23, 8
1048 1.1 mrg
1049 1.1 mrg beqz a9, .Ldiv_yexpzero
1050 1.1 mrg .Ldiv_ynormalized:
1051 1.1 mrg beqz a8, .Ldiv_xexpzero
1052 1.1 mrg .Ldiv_xnormalized:
1053 1.1 mrg
1054 1.1 mrg /* Subtract the exponents. */
1055 1.1 mrg sub a8, a8, a9
1056 1.1 mrg
1057 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
1058 1.1 mrg movi a10, 0xffffff
1059 1.1 mrg or a2, a2, a6
1060 1.1 mrg and a2, a2, a10
1061 1.1 mrg or a3, a3, a6
1062 1.1 mrg and a3, a3, a10
1063 1.1 mrg
1064 1.1 mrg /* The first digit of the mantissa division must be a one.
1065 1.1 mrg Shift x (and adjust the exponent) as needed to make this true. */
1066 1.1 mrg bltu a3, a2, 1f
1067 1.1 mrg slli a2, a2, 1
1068 1.1 mrg addi a8, a8, -1
1069 1.1 mrg 1:
1070 1.1 mrg /* Do the first subtraction and shift. */
1071 1.1 mrg sub a2, a2, a3
1072 1.1 mrg slli a2, a2, 1
1073 1.1 mrg
1074 1.1 mrg /* Put the quotient into a10. */
1075 1.1 mrg movi a10, 1
1076 1.1 mrg
1077 1.1 mrg /* Divide one bit at a time for 23 bits. */
1078 1.1 mrg movi a9, 23
1079 1.1 mrg #if XCHAL_HAVE_LOOPS
1080 1.1 mrg loop a9, .Ldiv_loopend
1081 1.1 mrg #endif
1082 1.1 mrg .Ldiv_loop:
1083 1.1 mrg /* Shift the quotient << 1. */
1084 1.1 mrg slli a10, a10, 1
1085 1.1 mrg
1086 1.1 mrg /* Is this digit a 0 or 1? */
1087 1.1 mrg bltu a2, a3, 1f
1088 1.1 mrg
1089 1.1 mrg /* Output a 1 and subtract. */
1090 1.1 mrg addi a10, a10, 1
1091 1.1 mrg sub a2, a2, a3
1092 1.1 mrg
1093 1.1 mrg /* Shift the dividend << 1. */
1094 1.1 mrg 1: slli a2, a2, 1
1095 1.1 mrg
1096 1.1 mrg #if !XCHAL_HAVE_LOOPS
1097 1.1 mrg addi a9, a9, -1
1098 1.1 mrg bnez a9, .Ldiv_loop
1099 1.1 mrg #endif
1100 1.1 mrg .Ldiv_loopend:
1101 1.1 mrg
1102 1.1 mrg /* Add the exponent bias (less one to account for the explicit "1.0"
1103 1.1 mrg of the mantissa that will be added to the exponent in the final
1104 1.1 mrg result). */
1105 1.1 mrg addi a8, a8, 0x7e
1106 1.1 mrg
1107 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
1108 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
1109 1.1 mrg movi a4, 0xfe
1110 1.1 mrg bgeu a8, a4, .Ldiv_overflow
1111 1.1 mrg
1112 1.1 mrg .Ldiv_round:
1113 1.1 mrg /* Round. The remainder (<< 1) is in a2. */
1114 1.1 mrg bltu a2, a3, .Ldiv_rounded
1115 1.1 mrg addi a10, a10, 1
1116 1.1 mrg beq a2, a3, .Ldiv_exactlyhalf
1117 1.1 mrg
1118 1.1 mrg .Ldiv_rounded:
1119 1.1 mrg /* Add the exponent to the mantissa. */
1120 1.1 mrg slli a8, a8, 23
1121 1.1 mrg add a2, a10, a8
1122 1.1 mrg
1123 1.1 mrg .Ldiv_addsign:
1124 1.1 mrg /* Add the sign bit. */
1125 1.1 mrg srli a7, a7, 31
1126 1.1 mrg slli a7, a7, 31
1127 1.1 mrg or a2, a2, a7
1128 1.1 mrg leaf_return
1129 1.1 mrg
1130 1.1 mrg .Ldiv_overflow:
1131 1.1 mrg bltz a8, .Ldiv_underflow
1132 1.1 mrg /* Return +/- Infinity. */
1133 1.1 mrg addi a8, a4, 1 /* 0xff */
1134 1.1 mrg slli a2, a8, 23
1135 1.1 mrg j .Ldiv_addsign
1136 1.1 mrg
1137 1.1 mrg .Ldiv_exactlyhalf:
1138 1.1 mrg /* Remainder is exactly half the divisor. Round even. */
1139 1.1 mrg srli a10, a10, 1
1140 1.1 mrg slli a10, a10, 1
1141 1.1 mrg j .Ldiv_rounded
1142 1.1 mrg
1143 1.1 mrg .Ldiv_underflow:
1144 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
1145 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
1146 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
1147 1.1 mrg neg a8, a8
1148 1.1 mrg ssr a8
1149 1.1 mrg bgeui a8, 32, .Ldiv_flush_to_zero
1150 1.1 mrg
1151 1.1 mrg /* Shift a10 right. Any bits that are shifted out of a10 are
1152 1.1 mrg saved in a6 for rounding the result. */
1153 1.1 mrg sll a6, a10
1154 1.1 mrg srl a10, a10
1155 1.1 mrg
1156 1.1 mrg /* Set the exponent to zero. */
1157 1.1 mrg movi a8, 0
1158 1.1 mrg
1159 1.1 mrg /* Pack any nonzero remainder (in a2) into a6. */
1160 1.1 mrg beqz a2, 1f
1161 1.1 mrg movi a9, 1
1162 1.1 mrg or a6, a6, a9
1163 1.1 mrg
1164 1.1 mrg /* Round a10 based on the bits shifted out into a6. */
1165 1.1 mrg 1: bgez a6, .Ldiv_rounded
1166 1.1 mrg addi a10, a10, 1
1167 1.1 mrg slli a6, a6, 1
1168 1.1 mrg bnez a6, .Ldiv_rounded
1169 1.1 mrg srli a10, a10, 1
1170 1.1 mrg slli a10, a10, 1
1171 1.1 mrg j .Ldiv_rounded
1172 1.1 mrg
1173 1.1 mrg .Ldiv_flush_to_zero:
1174 1.1 mrg /* Return zero with the appropriate sign bit. */
1175 1.1 mrg srli a2, a7, 31
1176 1.1 mrg slli a2, a2, 31
1177 1.1 mrg leaf_return
1178 1.1 mrg
1179 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_DIV */
1180 1.1.1.6 mrg
1181 1.1 mrg #endif /* L_divsf3 */
1182 1.1 mrg
1183 1.1 mrg #ifdef L_cmpsf2
1184 1.1 mrg
1185 1.1 mrg /* Equal and Not Equal */
1186 1.1 mrg
1187 1.1 mrg .align 4
1188 1.1 mrg .global __eqsf2
1189 1.1 mrg .global __nesf2
1190 1.1 mrg .set __nesf2, __eqsf2
1191 1.1 mrg .type __eqsf2, @function
1192 1.1 mrg __eqsf2:
1193 1.1 mrg leaf_entry sp, 16
1194 1.1 mrg bne a2, a3, 4f
1195 1.1 mrg
1196 1.1 mrg /* The values are equal but NaN != NaN. Check the exponent. */
1197 1.1 mrg movi a6, 0x7f800000
1198 1.1 mrg ball a2, a6, 3f
1199 1.1 mrg
1200 1.1 mrg /* Equal. */
1201 1.1 mrg movi a2, 0
1202 1.1 mrg leaf_return
1203 1.1 mrg
1204 1.1 mrg /* Not equal. */
1205 1.1 mrg 2: movi a2, 1
1206 1.1 mrg leaf_return
1207 1.1 mrg
1208 1.1 mrg /* Check if the mantissas are nonzero. */
1209 1.1 mrg 3: slli a7, a2, 9
1210 1.1 mrg j 5f
1211 1.1 mrg
1212 1.1 mrg /* Check if x and y are zero with different signs. */
1213 1.1 mrg 4: or a7, a2, a3
1214 1.1 mrg slli a7, a7, 1
1215 1.1 mrg
1216 1.1 mrg /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1217 1.1 mrg or x when exponent(x) = 0x7f8 and x == y. */
1218 1.1 mrg 5: movi a2, 0
1219 1.1 mrg movi a3, 1
1220 1.1 mrg movnez a2, a3, a7
1221 1.1 mrg leaf_return
1222 1.1 mrg
1223 1.1 mrg
1224 1.1 mrg /* Greater Than */
1225 1.1 mrg
1226 1.1 mrg .align 4
1227 1.1 mrg .global __gtsf2
1228 1.1 mrg .type __gtsf2, @function
1229 1.1 mrg __gtsf2:
1230 1.1 mrg leaf_entry sp, 16
1231 1.1 mrg movi a6, 0x7f800000
1232 1.1 mrg ball a2, a6, 2f
1233 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1234 1.1 mrg
1235 1.1 mrg /* Check if y is a NaN. */
1236 1.1 mrg slli a7, a3, 9
1237 1.1 mrg beqz a7, .Lle_cmp
1238 1.1 mrg movi a2, 0
1239 1.1 mrg leaf_return
1240 1.1 mrg
1241 1.1 mrg /* Check if x is a NaN. */
1242 1.1 mrg 2: slli a7, a2, 9
1243 1.1 mrg beqz a7, 1b
1244 1.1 mrg movi a2, 0
1245 1.1 mrg leaf_return
1246 1.1 mrg
1247 1.1 mrg
1248 1.1 mrg /* Less Than or Equal */
1249 1.1 mrg
1250 1.1 mrg .align 4
1251 1.1 mrg .global __lesf2
1252 1.1 mrg .type __lesf2, @function
1253 1.1 mrg __lesf2:
1254 1.1 mrg leaf_entry sp, 16
1255 1.1 mrg movi a6, 0x7f800000
1256 1.1 mrg ball a2, a6, 2f
1257 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1258 1.1 mrg
1259 1.1 mrg /* Check if y is a NaN. */
1260 1.1 mrg slli a7, a3, 9
1261 1.1 mrg beqz a7, .Lle_cmp
1262 1.1 mrg movi a2, 1
1263 1.1 mrg leaf_return
1264 1.1 mrg
1265 1.1 mrg /* Check if x is a NaN. */
1266 1.1 mrg 2: slli a7, a2, 9
1267 1.1 mrg beqz a7, 1b
1268 1.1 mrg movi a2, 1
1269 1.1 mrg leaf_return
1270 1.1 mrg
1271 1.1 mrg .Lle_cmp:
1272 1.1 mrg /* Check if x and y have different signs. */
1273 1.1 mrg xor a7, a2, a3
1274 1.1 mrg bltz a7, .Lle_diff_signs
1275 1.1 mrg
1276 1.1 mrg /* Check if x is negative. */
1277 1.1 mrg bltz a2, .Lle_xneg
1278 1.1 mrg
1279 1.1 mrg /* Check if x <= y. */
1280 1.1 mrg bltu a3, a2, 5f
1281 1.1 mrg 4: movi a2, 0
1282 1.1 mrg leaf_return
1283 1.1 mrg
1284 1.1 mrg .Lle_xneg:
1285 1.1 mrg /* Check if y <= x. */
1286 1.1 mrg bgeu a2, a3, 4b
1287 1.1 mrg 5: movi a2, 1
1288 1.1 mrg leaf_return
1289 1.1 mrg
1290 1.1 mrg .Lle_diff_signs:
1291 1.1 mrg bltz a2, 4b
1292 1.1 mrg
1293 1.1 mrg /* Check if both x and y are zero. */
1294 1.1 mrg or a7, a2, a3
1295 1.1 mrg slli a7, a7, 1
1296 1.1 mrg movi a2, 1
1297 1.1 mrg movi a3, 0
1298 1.1 mrg moveqz a2, a3, a7
1299 1.1 mrg leaf_return
1300 1.1 mrg
1301 1.1 mrg
1302 1.1 mrg /* Greater Than or Equal */
1303 1.1 mrg
1304 1.1 mrg .align 4
1305 1.1 mrg .global __gesf2
1306 1.1 mrg .type __gesf2, @function
1307 1.1 mrg __gesf2:
1308 1.1 mrg leaf_entry sp, 16
1309 1.1 mrg movi a6, 0x7f800000
1310 1.1 mrg ball a2, a6, 2f
1311 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1312 1.1 mrg
1313 1.1 mrg /* Check if y is a NaN. */
1314 1.1 mrg slli a7, a3, 9
1315 1.1 mrg beqz a7, .Llt_cmp
1316 1.1 mrg movi a2, -1
1317 1.1 mrg leaf_return
1318 1.1 mrg
1319 1.1 mrg /* Check if x is a NaN. */
1320 1.1 mrg 2: slli a7, a2, 9
1321 1.1 mrg beqz a7, 1b
1322 1.1 mrg movi a2, -1
1323 1.1 mrg leaf_return
1324 1.1 mrg
1325 1.1 mrg
1326 1.1 mrg /* Less Than */
1327 1.1 mrg
1328 1.1 mrg .align 4
1329 1.1 mrg .global __ltsf2
1330 1.1 mrg .type __ltsf2, @function
1331 1.1 mrg __ltsf2:
1332 1.1 mrg leaf_entry sp, 16
1333 1.1 mrg movi a6, 0x7f800000
1334 1.1 mrg ball a2, a6, 2f
1335 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1336 1.1 mrg
1337 1.1 mrg /* Check if y is a NaN. */
1338 1.1 mrg slli a7, a3, 9
1339 1.1 mrg beqz a7, .Llt_cmp
1340 1.1 mrg movi a2, 0
1341 1.1 mrg leaf_return
1342 1.1 mrg
1343 1.1 mrg /* Check if x is a NaN. */
1344 1.1 mrg 2: slli a7, a2, 9
1345 1.1 mrg beqz a7, 1b
1346 1.1 mrg movi a2, 0
1347 1.1 mrg leaf_return
1348 1.1 mrg
1349 1.1 mrg .Llt_cmp:
1350 1.1 mrg /* Check if x and y have different signs. */
1351 1.1 mrg xor a7, a2, a3
1352 1.1 mrg bltz a7, .Llt_diff_signs
1353 1.1 mrg
1354 1.1 mrg /* Check if x is negative. */
1355 1.1 mrg bltz a2, .Llt_xneg
1356 1.1 mrg
1357 1.1 mrg /* Check if x < y. */
1358 1.1 mrg bgeu a2, a3, 5f
1359 1.1 mrg 4: movi a2, -1
1360 1.1 mrg leaf_return
1361 1.1 mrg
1362 1.1 mrg .Llt_xneg:
1363 1.1 mrg /* Check if y < x. */
1364 1.1 mrg bltu a3, a2, 4b
1365 1.1 mrg 5: movi a2, 0
1366 1.1 mrg leaf_return
1367 1.1 mrg
1368 1.1 mrg .Llt_diff_signs:
1369 1.1 mrg bgez a2, 5b
1370 1.1 mrg
1371 1.1 mrg /* Check if both x and y are nonzero. */
1372 1.1 mrg or a7, a2, a3
1373 1.1 mrg slli a7, a7, 1
1374 1.1 mrg movi a2, 0
1375 1.1 mrg movi a3, -1
1376 1.1 mrg movnez a2, a3, a7
1377 1.1 mrg leaf_return
1378 1.1 mrg
1379 1.1 mrg
1380 1.1 mrg /* Unordered */
1381 1.1 mrg
1382 1.1 mrg .align 4
1383 1.1 mrg .global __unordsf2
1384 1.1 mrg .type __unordsf2, @function
1385 1.1 mrg __unordsf2:
1386 1.1 mrg leaf_entry sp, 16
1387 1.1 mrg movi a6, 0x7f800000
1388 1.1 mrg ball a2, a6, 3f
1389 1.1 mrg 1: ball a3, a6, 4f
1390 1.1 mrg 2: movi a2, 0
1391 1.1 mrg leaf_return
1392 1.1 mrg
1393 1.1 mrg 3: slli a7, a2, 9
1394 1.1 mrg beqz a7, 1b
1395 1.1 mrg movi a2, 1
1396 1.1 mrg leaf_return
1397 1.1 mrg
1398 1.1 mrg 4: slli a7, a3, 9
1399 1.1 mrg beqz a7, 2b
1400 1.1 mrg movi a2, 1
1401 1.1 mrg leaf_return
1402 1.1 mrg
1403 1.1 mrg #endif /* L_cmpsf2 */
1404 1.1 mrg
1405 1.1 mrg #ifdef L_fixsfsi
1406 1.1 mrg
1407 1.1 mrg .align 4
1408 1.1 mrg .global __fixsfsi
1409 1.1 mrg .type __fixsfsi, @function
1410 1.1 mrg __fixsfsi:
1411 1.1 mrg leaf_entry sp, 16
1412 1.1 mrg
1413 1.1 mrg /* Check for NaN and Infinity. */
1414 1.1 mrg movi a6, 0x7f800000
1415 1.1 mrg ball a2, a6, .Lfixsfsi_nan_or_inf
1416 1.1 mrg
1417 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1418 1.1 mrg extui a4, a2, 23, 8
1419 1.1 mrg addi a4, a4, -0x7e
1420 1.1 mrg bgei a4, 32, .Lfixsfsi_maxint
1421 1.1 mrg blti a4, 1, .Lfixsfsi_zero
1422 1.1 mrg
1423 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1424 1.1 mrg or a7, a2, a6
1425 1.1 mrg slli a5, a7, 8
1426 1.1 mrg
1427 1.1 mrg /* Shift back to the right, based on the exponent. */
1428 1.1 mrg ssl a4 /* shift by 32 - a4 */
1429 1.1 mrg srl a5, a5
1430 1.1 mrg
1431 1.1 mrg /* Negate the result if sign != 0. */
1432 1.1 mrg neg a2, a5
1433 1.1 mrg movgez a2, a5, a7
1434 1.1 mrg leaf_return
1435 1.1 mrg
1436 1.1 mrg .Lfixsfsi_nan_or_inf:
1437 1.1 mrg /* Handle Infinity and NaN. */
1438 1.1 mrg slli a4, a2, 9
1439 1.1 mrg beqz a4, .Lfixsfsi_maxint
1440 1.1 mrg
1441 1.1 mrg /* Translate NaN to +maxint. */
1442 1.1 mrg movi a2, 0
1443 1.1 mrg
1444 1.1 mrg .Lfixsfsi_maxint:
1445 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1446 1.1 mrg addi a5, a4, -1 /* 0x7fffffff */
1447 1.1 mrg movgez a4, a5, a2
1448 1.1 mrg mov a2, a4
1449 1.1 mrg leaf_return
1450 1.1 mrg
1451 1.1 mrg .Lfixsfsi_zero:
1452 1.1 mrg movi a2, 0
1453 1.1 mrg leaf_return
1454 1.1 mrg
1455 1.1 mrg #endif /* L_fixsfsi */
1456 1.1 mrg
1457 1.1 mrg #ifdef L_fixsfdi
1458 1.1 mrg
1459 1.1 mrg .align 4
1460 1.1 mrg .global __fixsfdi
1461 1.1 mrg .type __fixsfdi, @function
1462 1.1 mrg __fixsfdi:
1463 1.1 mrg leaf_entry sp, 16
1464 1.1 mrg
1465 1.1 mrg /* Check for NaN and Infinity. */
1466 1.1 mrg movi a6, 0x7f800000
1467 1.1 mrg ball a2, a6, .Lfixsfdi_nan_or_inf
1468 1.1 mrg
1469 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1470 1.1 mrg extui a4, a2, 23, 8
1471 1.1 mrg addi a4, a4, -0x7e
1472 1.1 mrg bgei a4, 64, .Lfixsfdi_maxint
1473 1.1 mrg blti a4, 1, .Lfixsfdi_zero
1474 1.1 mrg
1475 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1476 1.1 mrg or a7, a2, a6
1477 1.1 mrg slli xh, a7, 8
1478 1.1 mrg
1479 1.1 mrg /* Shift back to the right, based on the exponent. */
1480 1.1 mrg ssl a4 /* shift by 64 - a4 */
1481 1.1 mrg bgei a4, 32, .Lfixsfdi_smallshift
1482 1.1 mrg srl xl, xh
1483 1.1 mrg movi xh, 0
1484 1.1 mrg
1485 1.1 mrg .Lfixsfdi_shifted:
1486 1.1 mrg /* Negate the result if sign != 0. */
1487 1.1 mrg bgez a7, 1f
1488 1.1 mrg neg xl, xl
1489 1.1 mrg neg xh, xh
1490 1.1 mrg beqz xl, 1f
1491 1.1 mrg addi xh, xh, -1
1492 1.1 mrg 1: leaf_return
1493 1.1 mrg
1494 1.1 mrg .Lfixsfdi_smallshift:
1495 1.1 mrg movi xl, 0
1496 1.1 mrg sll xl, xh
1497 1.1 mrg srl xh, xh
1498 1.1 mrg j .Lfixsfdi_shifted
1499 1.1 mrg
1500 1.1 mrg .Lfixsfdi_nan_or_inf:
1501 1.1 mrg /* Handle Infinity and NaN. */
1502 1.1 mrg slli a4, a2, 9
1503 1.1 mrg beqz a4, .Lfixsfdi_maxint
1504 1.1 mrg
1505 1.1 mrg /* Translate NaN to +maxint. */
1506 1.1 mrg movi a2, 0
1507 1.1 mrg
1508 1.1 mrg .Lfixsfdi_maxint:
1509 1.1 mrg slli a7, a6, 8 /* 0x80000000 */
1510 1.1 mrg bgez a2, 1f
1511 1.1 mrg mov xh, a7
1512 1.1 mrg movi xl, 0
1513 1.1 mrg leaf_return
1514 1.1 mrg
1515 1.1 mrg 1: addi xh, a7, -1 /* 0x7fffffff */
1516 1.1 mrg movi xl, -1
1517 1.1 mrg leaf_return
1518 1.1 mrg
1519 1.1 mrg .Lfixsfdi_zero:
1520 1.1 mrg movi xh, 0
1521 1.1 mrg movi xl, 0
1522 1.1 mrg leaf_return
1523 1.1 mrg
1524 1.1 mrg #endif /* L_fixsfdi */
1525 1.1 mrg
1526 1.1 mrg #ifdef L_fixunssfsi
1527 1.1 mrg
1528 1.1 mrg .align 4
1529 1.1 mrg .global __fixunssfsi
1530 1.1 mrg .type __fixunssfsi, @function
1531 1.1 mrg __fixunssfsi:
1532 1.1 mrg leaf_entry sp, 16
1533 1.1 mrg
1534 1.1 mrg /* Check for NaN and Infinity. */
1535 1.1 mrg movi a6, 0x7f800000
1536 1.1 mrg ball a2, a6, .Lfixunssfsi_nan_or_inf
1537 1.1 mrg
1538 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1539 1.1 mrg extui a4, a2, 23, 8
1540 1.1 mrg addi a4, a4, -0x7f
1541 1.1 mrg bgei a4, 32, .Lfixunssfsi_maxint
1542 1.1 mrg bltz a4, .Lfixunssfsi_zero
1543 1.1 mrg
1544 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1545 1.1 mrg or a7, a2, a6
1546 1.1 mrg slli a5, a7, 8
1547 1.1 mrg
1548 1.1 mrg /* Shift back to the right, based on the exponent. */
1549 1.1 mrg addi a4, a4, 1
1550 1.1 mrg beqi a4, 32, .Lfixunssfsi_bigexp
1551 1.1 mrg ssl a4 /* shift by 32 - a4 */
1552 1.1 mrg srl a5, a5
1553 1.1 mrg
1554 1.1 mrg /* Negate the result if sign != 0. */
1555 1.1 mrg neg a2, a5
1556 1.1 mrg movgez a2, a5, a7
1557 1.1 mrg leaf_return
1558 1.1 mrg
1559 1.1 mrg .Lfixunssfsi_nan_or_inf:
1560 1.1 mrg /* Handle Infinity and NaN. */
1561 1.1 mrg slli a4, a2, 9
1562 1.1 mrg beqz a4, .Lfixunssfsi_maxint
1563 1.1 mrg
1564 1.1 mrg /* Translate NaN to 0xffffffff. */
1565 1.1 mrg movi a2, -1
1566 1.1 mrg leaf_return
1567 1.1 mrg
1568 1.1 mrg .Lfixunssfsi_maxint:
1569 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1570 1.1 mrg movi a5, -1 /* 0xffffffff */
1571 1.1 mrg movgez a4, a5, a2
1572 1.1 mrg mov a2, a4
1573 1.1 mrg leaf_return
1574 1.1 mrg
1575 1.1 mrg .Lfixunssfsi_zero:
1576 1.1 mrg movi a2, 0
1577 1.1 mrg leaf_return
1578 1.1 mrg
1579 1.1 mrg .Lfixunssfsi_bigexp:
1580 1.1 mrg /* Handle unsigned maximum exponent case. */
1581 1.1 mrg bltz a2, 1f
1582 1.1 mrg mov a2, a5 /* no shift needed */
1583 1.1 mrg leaf_return
1584 1.1 mrg
1585 1.1 mrg /* Return 0x80000000 if negative. */
1586 1.1 mrg 1: slli a2, a6, 8
1587 1.1 mrg leaf_return
1588 1.1 mrg
1589 1.1 mrg #endif /* L_fixunssfsi */
1590 1.1 mrg
1591 1.1 mrg #ifdef L_fixunssfdi
1592 1.1 mrg
1593 1.1 mrg .align 4
1594 1.1 mrg .global __fixunssfdi
1595 1.1 mrg .type __fixunssfdi, @function
1596 1.1 mrg __fixunssfdi:
1597 1.1 mrg leaf_entry sp, 16
1598 1.1 mrg
1599 1.1 mrg /* Check for NaN and Infinity. */
1600 1.1 mrg movi a6, 0x7f800000
1601 1.1 mrg ball a2, a6, .Lfixunssfdi_nan_or_inf
1602 1.1 mrg
1603 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1604 1.1 mrg extui a4, a2, 23, 8
1605 1.1 mrg addi a4, a4, -0x7f
1606 1.1 mrg bgei a4, 64, .Lfixunssfdi_maxint
1607 1.1 mrg bltz a4, .Lfixunssfdi_zero
1608 1.1 mrg
1609 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1610 1.1 mrg or a7, a2, a6
1611 1.1 mrg slli xh, a7, 8
1612 1.1 mrg
1613 1.1 mrg /* Shift back to the right, based on the exponent. */
1614 1.1 mrg addi a4, a4, 1
1615 1.1 mrg beqi a4, 64, .Lfixunssfdi_bigexp
1616 1.1 mrg ssl a4 /* shift by 64 - a4 */
1617 1.1 mrg bgei a4, 32, .Lfixunssfdi_smallshift
1618 1.1 mrg srl xl, xh
1619 1.1 mrg movi xh, 0
1620 1.1 mrg
1621 1.1 mrg .Lfixunssfdi_shifted:
1622 1.1 mrg /* Negate the result if sign != 0. */
1623 1.1 mrg bgez a7, 1f
1624 1.1 mrg neg xl, xl
1625 1.1 mrg neg xh, xh
1626 1.1 mrg beqz xl, 1f
1627 1.1 mrg addi xh, xh, -1
1628 1.1 mrg 1: leaf_return
1629 1.1 mrg
1630 1.1 mrg .Lfixunssfdi_smallshift:
1631 1.1 mrg movi xl, 0
1632 1.1 mrg src xl, xh, xl
1633 1.1 mrg srl xh, xh
1634 1.1 mrg j .Lfixunssfdi_shifted
1635 1.1 mrg
1636 1.1 mrg .Lfixunssfdi_nan_or_inf:
1637 1.1 mrg /* Handle Infinity and NaN. */
1638 1.1 mrg slli a4, a2, 9
1639 1.1 mrg beqz a4, .Lfixunssfdi_maxint
1640 1.1 mrg
1641 1.1 mrg /* Translate NaN to 0xffffffff.... */
1642 1.1 mrg 1: movi xh, -1
1643 1.1 mrg movi xl, -1
1644 1.1 mrg leaf_return
1645 1.1 mrg
1646 1.1 mrg .Lfixunssfdi_maxint:
1647 1.1 mrg bgez a2, 1b
1648 1.1 mrg 2: slli xh, a6, 8 /* 0x80000000 */
1649 1.1 mrg movi xl, 0
1650 1.1 mrg leaf_return
1651 1.1 mrg
1652 1.1 mrg .Lfixunssfdi_zero:
1653 1.1 mrg movi xh, 0
1654 1.1 mrg movi xl, 0
1655 1.1 mrg leaf_return
1656 1.1 mrg
1657 1.1 mrg .Lfixunssfdi_bigexp:
1658 1.1 mrg /* Handle unsigned maximum exponent case. */
1659 1.1 mrg bltz a7, 2b
1660 1.1 mrg movi xl, 0
1661 1.1 mrg leaf_return /* no shift needed */
1662 1.1 mrg
1663 1.1 mrg #endif /* L_fixunssfdi */
1664 1.1 mrg
1665 1.1 mrg #ifdef L_floatsisf
1666 1.1 mrg
1667 1.1 mrg .align 4
1668 1.1 mrg .global __floatunsisf
1669 1.1 mrg .type __floatunsisf, @function
1670 1.1 mrg __floatunsisf:
1671 1.1 mrg leaf_entry sp, 16
1672 1.1 mrg beqz a2, .Lfloatsisf_return
1673 1.1 mrg
1674 1.1 mrg /* Set the sign to zero and jump to the floatsisf code. */
1675 1.1 mrg movi a7, 0
1676 1.1 mrg j .Lfloatsisf_normalize
1677 1.1 mrg
1678 1.1 mrg .align 4
1679 1.1 mrg .global __floatsisf
1680 1.1 mrg .type __floatsisf, @function
1681 1.1 mrg __floatsisf:
1682 1.1 mrg leaf_entry sp, 16
1683 1.1 mrg
1684 1.1 mrg /* Check for zero. */
1685 1.1 mrg beqz a2, .Lfloatsisf_return
1686 1.1 mrg
1687 1.1 mrg /* Save the sign. */
1688 1.1 mrg extui a7, a2, 31, 1
1689 1.1 mrg
1690 1.1 mrg /* Get the absolute value. */
1691 1.1 mrg #if XCHAL_HAVE_ABS
1692 1.1 mrg abs a2, a2
1693 1.1 mrg #else
1694 1.1 mrg neg a4, a2
1695 1.1 mrg movltz a2, a4, a2
1696 1.1 mrg #endif
1697 1.1 mrg
1698 1.1 mrg .Lfloatsisf_normalize:
1699 1.1 mrg /* Normalize with the first 1 bit in the msb. */
1700 1.1 mrg do_nsau a4, a2, a5, a6
1701 1.1 mrg ssl a4
1702 1.1 mrg sll a5, a2
1703 1.1 mrg
1704 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1705 1.1 mrg srli a2, a5, 8
1706 1.1 mrg slli a6, a5, (32 - 8)
1707 1.1 mrg
1708 1.1 mrg /* Set the exponent. */
1709 1.1 mrg movi a5, 0x9d /* 0x7e + 31 */
1710 1.1 mrg sub a5, a5, a4
1711 1.1 mrg slli a5, a5, 23
1712 1.1 mrg add a2, a2, a5
1713 1.1 mrg
1714 1.1 mrg /* Add the sign. */
1715 1.1 mrg slli a7, a7, 31
1716 1.1 mrg or a2, a2, a7
1717 1.1 mrg
1718 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1719 1.1 mrg bgez a6, .Lfloatsisf_return
1720 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1721 1.1 mrg
1722 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1723 1.1 mrg slli a6, a6, 1
1724 1.1 mrg beqz a6, .Lfloatsisf_exactlyhalf
1725 1.1 mrg
1726 1.1 mrg .Lfloatsisf_return:
1727 1.1 mrg leaf_return
1728 1.1 mrg
1729 1.1 mrg .Lfloatsisf_exactlyhalf:
1730 1.1 mrg /* Round down to the nearest even value. */
1731 1.1 mrg srli a2, a2, 1
1732 1.1 mrg slli a2, a2, 1
1733 1.1 mrg leaf_return
1734 1.1 mrg
1735 1.1 mrg #endif /* L_floatsisf */
1736 1.1 mrg
1737 1.1 mrg #ifdef L_floatdisf
1738 1.1 mrg
1739 1.1 mrg .align 4
1740 1.1 mrg .global __floatundisf
1741 1.1 mrg .type __floatundisf, @function
1742 1.1 mrg __floatundisf:
1743 1.1 mrg leaf_entry sp, 16
1744 1.1 mrg
1745 1.1 mrg /* Check for zero. */
1746 1.1 mrg or a4, xh, xl
1747 1.1 mrg beqz a4, 2f
1748 1.1 mrg
1749 1.1 mrg /* Set the sign to zero and jump to the floatdisf code. */
1750 1.1 mrg movi a7, 0
1751 1.1 mrg j .Lfloatdisf_normalize
1752 1.1 mrg
1753 1.1 mrg .align 4
1754 1.1 mrg .global __floatdisf
1755 1.1 mrg .type __floatdisf, @function
1756 1.1 mrg __floatdisf:
1757 1.1 mrg leaf_entry sp, 16
1758 1.1 mrg
1759 1.1 mrg /* Check for zero. */
1760 1.1 mrg or a4, xh, xl
1761 1.1 mrg beqz a4, 2f
1762 1.1 mrg
1763 1.1 mrg /* Save the sign. */
1764 1.1 mrg extui a7, xh, 31, 1
1765 1.1 mrg
1766 1.1 mrg /* Get the absolute value. */
1767 1.1 mrg bgez xh, .Lfloatdisf_normalize
1768 1.1 mrg neg xl, xl
1769 1.1 mrg neg xh, xh
1770 1.1 mrg beqz xl, .Lfloatdisf_normalize
1771 1.1 mrg addi xh, xh, -1
1772 1.1 mrg
1773 1.1 mrg .Lfloatdisf_normalize:
1774 1.1 mrg /* Normalize with the first 1 bit in the msb of xh. */
1775 1.1 mrg beqz xh, .Lfloatdisf_bigshift
1776 1.1 mrg do_nsau a4, xh, a5, a6
1777 1.1 mrg ssl a4
1778 1.1 mrg src xh, xh, xl
1779 1.1 mrg sll xl, xl
1780 1.1 mrg
1781 1.1 mrg .Lfloatdisf_shifted:
1782 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1783 1.1 mrg ssai 8
1784 1.1 mrg sll a5, xl
1785 1.1 mrg src a6, xh, xl
1786 1.1 mrg srl xh, xh
1787 1.1 mrg beqz a5, 1f
1788 1.1 mrg movi a5, 1
1789 1.1 mrg or a6, a6, a5
1790 1.1 mrg 1:
1791 1.1 mrg /* Set the exponent. */
1792 1.1 mrg movi a5, 0xbd /* 0x7e + 63 */
1793 1.1 mrg sub a5, a5, a4
1794 1.1 mrg slli a5, a5, 23
1795 1.1 mrg add a2, xh, a5
1796 1.1 mrg
1797 1.1 mrg /* Add the sign. */
1798 1.1 mrg slli a7, a7, 31
1799 1.1 mrg or a2, a2, a7
1800 1.1 mrg
1801 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1802 1.1 mrg bgez a6, 2f
1803 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1804 1.1 mrg
1805 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1806 1.1 mrg slli a6, a6, 1
1807 1.1 mrg beqz a6, .Lfloatdisf_exactlyhalf
1808 1.1 mrg 2: leaf_return
1809 1.1 mrg
1810 1.1 mrg .Lfloatdisf_bigshift:
1811 1.1 mrg /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1812 1.1 mrg do_nsau a4, xl, a5, a6
1813 1.1 mrg ssl a4
1814 1.1 mrg sll xh, xl
1815 1.1 mrg movi xl, 0
1816 1.1 mrg addi a4, a4, 32
1817 1.1 mrg j .Lfloatdisf_shifted
1818 1.1 mrg
1819 1.1 mrg .Lfloatdisf_exactlyhalf:
1820 1.1 mrg /* Round down to the nearest even value. */
1821 1.1 mrg srli a2, a2, 1
1822 1.1 mrg slli a2, a2, 1
1823 1.1 mrg leaf_return
1824 1.1 mrg
1825 1.1 mrg #endif /* L_floatdisf */
1826 1.1.1.6 mrg
1827 1.1.1.6 mrg #if XCHAL_HAVE_FP_SQRT
1828 1.1.1.6 mrg #ifdef L_sqrtf
1829 1.1.1.6 mrg /* Square root */
1830 1.1.1.6 mrg
1831 1.1.1.6 mrg .align 4
1832 1.1.1.6 mrg .global __ieee754_sqrtf
1833 1.1.1.6 mrg .type __ieee754_sqrtf, @function
1834 1.1.1.6 mrg __ieee754_sqrtf:
1835 1.1.1.6 mrg leaf_entry sp, 16
1836 1.1.1.6 mrg
1837 1.1.1.6 mrg wfr f1, a2
1838 1.1.1.6 mrg
1839 1.1.1.6 mrg sqrt0.s f2, f1
1840 1.1.1.6 mrg const.s f3, 0
1841 1.1.1.6 mrg maddn.s f3, f2, f2
1842 1.1.1.6 mrg nexp01.s f4, f1
1843 1.1.1.6 mrg const.s f0, 3
1844 1.1.1.6 mrg addexp.s f4, f0
1845 1.1.1.6 mrg maddn.s f0, f3, f4
1846 1.1.1.6 mrg nexp01.s f3, f1
1847 1.1.1.6 mrg neg.s f5, f3
1848 1.1.1.6 mrg maddn.s f2, f0, f2
1849 1.1.1.6 mrg const.s f0, 0
1850 1.1.1.6 mrg const.s f6, 0
1851 1.1.1.6 mrg const.s f7, 0
1852 1.1.1.6 mrg maddn.s f0, f5, f2
1853 1.1.1.6 mrg maddn.s f6, f2, f4
1854 1.1.1.6 mrg const.s f4, 3
1855 1.1.1.6 mrg maddn.s f7, f4, f2
1856 1.1.1.6 mrg maddn.s f3, f0, f0
1857 1.1.1.6 mrg maddn.s f4, f6, f2
1858 1.1.1.6 mrg neg.s f2, f7
1859 1.1.1.6 mrg maddn.s f0, f3, f2
1860 1.1.1.6 mrg maddn.s f7, f4, f7
1861 1.1.1.6 mrg mksadj.s f2, f1
1862 1.1.1.6 mrg nexp01.s f1, f1
1863 1.1.1.6 mrg maddn.s f1, f0, f0
1864 1.1.1.6 mrg neg.s f3, f7
1865 1.1.1.6 mrg addexpm.s f0, f2
1866 1.1.1.6 mrg addexp.s f3, f2
1867 1.1.1.6 mrg divn.s f0, f1, f3
1868 1.1.1.6 mrg
1869 1.1.1.6 mrg rfr a2, f0
1870 1.1.1.6 mrg
1871 1.1.1.6 mrg leaf_return
1872 1.1.1.6 mrg
1873 1.1.1.6 mrg #endif /* L_sqrtf */
1874 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_SQRT */
1875 1.1.1.6 mrg
1876 1.1.1.6 mrg #if XCHAL_HAVE_FP_RECIP
1877 1.1.1.6 mrg #ifdef L_recipsf2
1878 1.1.1.6 mrg /* Reciprocal */
1879 1.1.1.6 mrg
1880 1.1.1.6 mrg .align 4
1881 1.1.1.6 mrg .global __recipsf2
1882 1.1.1.6 mrg .type __recipsf2, @function
1883 1.1.1.6 mrg __recipsf2:
1884 1.1.1.6 mrg leaf_entry sp, 16
1885 1.1.1.6 mrg
1886 1.1.1.6 mrg wfr f1, a2
1887 1.1.1.6 mrg
1888 1.1.1.6 mrg recip0.s f0, f1
1889 1.1.1.6 mrg const.s f2, 1
1890 1.1.1.6 mrg msub.s f2, f1, f0
1891 1.1.1.6 mrg maddn.s f0, f0, f2
1892 1.1.1.6 mrg const.s f2, 1
1893 1.1.1.6 mrg msub.s f2, f1, f0
1894 1.1.1.6 mrg maddn.s f0, f0, f2
1895 1.1.1.6 mrg
1896 1.1.1.6 mrg rfr a2, f0
1897 1.1.1.6 mrg
1898 1.1.1.6 mrg leaf_return
1899 1.1.1.6 mrg
1900 1.1.1.6 mrg #endif /* L_recipsf2 */
1901 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_RECIP */
1902 1.1.1.6 mrg
1903 1.1.1.6 mrg #if XCHAL_HAVE_FP_RSQRT
1904 1.1.1.6 mrg #ifdef L_rsqrtsf2
1905 1.1.1.6 mrg /* Reciprocal square root */
1906 1.1.1.6 mrg
1907 1.1.1.6 mrg .align 4
1908 1.1.1.6 mrg .global __rsqrtsf2
1909 1.1.1.6 mrg .type __rsqrtsf2, @function
1910 1.1.1.6 mrg __rsqrtsf2:
1911 1.1.1.6 mrg leaf_entry sp, 16
1912 1.1.1.6 mrg
1913 1.1.1.6 mrg wfr f1, a2
1914 1.1.1.6 mrg
1915 1.1.1.6 mrg rsqrt0.s f0, f1
1916 1.1.1.6 mrg mul.s f2, f1, f0
1917 1.1.1.6 mrg const.s f3, 3;
1918 1.1.1.6 mrg mul.s f4, f3, f0
1919 1.1.1.6 mrg const.s f5, 1
1920 1.1.1.6 mrg msub.s f5, f2, f0
1921 1.1.1.6 mrg maddn.s f0, f4, f5
1922 1.1.1.6 mrg mul.s f2, f1, f0
1923 1.1.1.6 mrg mul.s f1, f3, f0
1924 1.1.1.6 mrg const.s f3, 1
1925 1.1.1.6 mrg msub.s f3, f2, f0
1926 1.1.1.6 mrg maddn.s f0, f1, f3
1927 1.1.1.6 mrg
1928 1.1.1.6 mrg rfr a2, f0
1929 1.1.1.6 mrg
1930 1.1.1.6 mrg leaf_return
1931 1.1.1.6 mrg
1932 1.1.1.6 mrg #endif /* L_rsqrtsf2 */
1933 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_RSQRT */
1934