ieee754-sf.S revision 1.1.1.3 1 1.1 mrg /* IEEE-754 single-precision functions for Xtensa
2 1.1.1.2 mrg Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifdef __XTENSA_EB__
27 1.1 mrg #define xh a2
28 1.1 mrg #define xl a3
29 1.1 mrg #define yh a4
30 1.1 mrg #define yl a5
31 1.1 mrg #else
32 1.1 mrg #define xh a3
33 1.1 mrg #define xl a2
34 1.1 mrg #define yh a5
35 1.1 mrg #define yl a4
36 1.1 mrg #endif
37 1.1 mrg
38 1.1 mrg /* Warning! The branch displacements for some Xtensa branch instructions
39 1.1 mrg are quite small, and this code has been carefully laid out to keep
40 1.1 mrg branch targets in range. If you change anything, be sure to check that
41 1.1 mrg the assembler is not relaxing anything to branch over a jump. */
42 1.1 mrg
43 1.1 mrg #ifdef L_negsf2
44 1.1 mrg
45 1.1 mrg .align 4
46 1.1 mrg .global __negsf2
47 1.1 mrg .type __negsf2, @function
48 1.1 mrg __negsf2:
49 1.1 mrg leaf_entry sp, 16
50 1.1 mrg movi a4, 0x80000000
51 1.1 mrg xor a2, a2, a4
52 1.1 mrg leaf_return
53 1.1 mrg
54 1.1 mrg #endif /* L_negsf2 */
55 1.1 mrg
56 1.1 mrg #ifdef L_addsubsf3
57 1.1 mrg
58 1.1 mrg /* Addition */
59 1.1 mrg __addsf3_aux:
60 1.1 mrg
61 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
62 1.1 mrg start of the function just to keep it in range of the limited
63 1.1 mrg branch displacements.) */
64 1.1 mrg
65 1.1 mrg .Ladd_xnan_or_inf:
66 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
67 1.1 mrg bnall a3, a6, 1f
68 1.1 mrg /* If x is a NaN, return it. Otherwise, return y. */
69 1.1 mrg slli a7, a2, 9
70 1.1 mrg beqz a7, .Ladd_ynan_or_inf
71 1.1 mrg 1: leaf_return
72 1.1 mrg
73 1.1 mrg .Ladd_ynan_or_inf:
74 1.1 mrg /* Return y. */
75 1.1 mrg mov a2, a3
76 1.1 mrg leaf_return
77 1.1 mrg
78 1.1 mrg .Ladd_opposite_signs:
79 1.1 mrg /* Operand signs differ. Do a subtraction. */
80 1.1 mrg slli a7, a6, 8
81 1.1 mrg xor a3, a3, a7
82 1.1 mrg j .Lsub_same_sign
83 1.1 mrg
84 1.1 mrg .align 4
85 1.1 mrg .global __addsf3
86 1.1 mrg .type __addsf3, @function
87 1.1 mrg __addsf3:
88 1.1 mrg leaf_entry sp, 16
89 1.1 mrg movi a6, 0x7f800000
90 1.1 mrg
91 1.1 mrg /* Check if the two operands have the same sign. */
92 1.1 mrg xor a7, a2, a3
93 1.1 mrg bltz a7, .Ladd_opposite_signs
94 1.1 mrg
95 1.1 mrg .Ladd_same_sign:
96 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
97 1.1 mrg ball a2, a6, .Ladd_xnan_or_inf
98 1.1 mrg ball a3, a6, .Ladd_ynan_or_inf
99 1.1 mrg
100 1.1 mrg /* Compare the exponents. The smaller operand will be shifted
101 1.1 mrg right by the exponent difference and added to the larger
102 1.1 mrg one. */
103 1.1 mrg extui a7, a2, 23, 9
104 1.1 mrg extui a8, a3, 23, 9
105 1.1 mrg bltu a7, a8, .Ladd_shiftx
106 1.1 mrg
107 1.1 mrg .Ladd_shifty:
108 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
109 1.1 mrg bnone a3, a6, .Ladd_yexpzero
110 1.1 mrg
111 1.1 mrg /* Replace y sign/exponent with 0x008. */
112 1.1 mrg or a3, a3, a6
113 1.1 mrg slli a3, a3, 8
114 1.1 mrg srli a3, a3, 8
115 1.1 mrg
116 1.1 mrg .Ladd_yexpdiff:
117 1.1 mrg /* Compute the exponent difference. */
118 1.1 mrg sub a10, a7, a8
119 1.1 mrg
120 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
121 1.1 mrg bgeui a10, 32, 1f
122 1.1 mrg
123 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
124 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
125 1.1 mrg ssr a10
126 1.1 mrg movi a9, 0
127 1.1 mrg src a9, a3, a9
128 1.1 mrg srl a3, a3
129 1.1 mrg
130 1.1 mrg /* Do the addition. */
131 1.1 mrg add a2, a2, a3
132 1.1 mrg
133 1.1 mrg /* Check if the add overflowed into the exponent. */
134 1.1 mrg extui a10, a2, 23, 9
135 1.1 mrg beq a10, a7, .Ladd_round
136 1.1 mrg mov a8, a7
137 1.1 mrg j .Ladd_carry
138 1.1 mrg
139 1.1 mrg .Ladd_yexpzero:
140 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
141 1.1 mrg i.e., no implicit "1.0", and increment the apparent exponent
142 1.1 mrg because subnormals behave as if they had the minimum (nonzero)
143 1.1 mrg exponent. Test for the case when both exponents are zero. */
144 1.1 mrg slli a3, a3, 9
145 1.1 mrg srli a3, a3, 9
146 1.1 mrg bnone a2, a6, .Ladd_bothexpzero
147 1.1 mrg addi a8, a8, 1
148 1.1 mrg j .Ladd_yexpdiff
149 1.1 mrg
150 1.1 mrg .Ladd_bothexpzero:
151 1.1 mrg /* Both exponents are zero. Handle this as a special case. There
152 1.1 mrg is no need to shift or round, and the normal code for handling
153 1.1 mrg a carry into the exponent field will not work because it
154 1.1 mrg assumes there is an implicit "1.0" that needs to be added. */
155 1.1 mrg add a2, a2, a3
156 1.1 mrg 1: leaf_return
157 1.1 mrg
158 1.1 mrg .Ladd_xexpzero:
159 1.1 mrg /* Same as "yexpzero" except skip handling the case when both
160 1.1 mrg exponents are zero. */
161 1.1 mrg slli a2, a2, 9
162 1.1 mrg srli a2, a2, 9
163 1.1 mrg addi a7, a7, 1
164 1.1 mrg j .Ladd_xexpdiff
165 1.1 mrg
166 1.1 mrg .Ladd_shiftx:
167 1.1 mrg /* Same thing as the "shifty" code, but with x and y swapped. Also,
168 1.1 mrg because the exponent difference is always nonzero in this version,
169 1.1 mrg the shift sequence can use SLL and skip loading a constant zero. */
170 1.1 mrg bnone a2, a6, .Ladd_xexpzero
171 1.1 mrg
172 1.1 mrg or a2, a2, a6
173 1.1 mrg slli a2, a2, 8
174 1.1 mrg srli a2, a2, 8
175 1.1 mrg
176 1.1 mrg .Ladd_xexpdiff:
177 1.1 mrg sub a10, a8, a7
178 1.1 mrg bgeui a10, 32, .Ladd_returny
179 1.1 mrg
180 1.1 mrg ssr a10
181 1.1 mrg sll a9, a2
182 1.1 mrg srl a2, a2
183 1.1 mrg
184 1.1 mrg add a2, a2, a3
185 1.1 mrg
186 1.1 mrg /* Check if the add overflowed into the exponent. */
187 1.1 mrg extui a10, a2, 23, 9
188 1.1 mrg bne a10, a8, .Ladd_carry
189 1.1 mrg
190 1.1 mrg .Ladd_round:
191 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
192 1.1 mrg bgez a9, 1f
193 1.1 mrg addi a2, a2, 1
194 1.1 mrg
195 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
196 1.1 mrg slli a9, a9, 1
197 1.1 mrg beqz a9, .Ladd_exactlyhalf
198 1.1 mrg 1: leaf_return
199 1.1 mrg
200 1.1 mrg .Ladd_returny:
201 1.1 mrg mov a2, a3
202 1.1 mrg leaf_return
203 1.1 mrg
204 1.1 mrg .Ladd_carry:
205 1.1 mrg /* The addition has overflowed into the exponent field, so the
206 1.1 mrg value needs to be renormalized. The mantissa of the result
207 1.1 mrg can be recovered by subtracting the original exponent and
208 1.1 mrg adding 0x800000 (which is the explicit "1.0" for the
209 1.1 mrg mantissa of the non-shifted operand -- the "1.0" for the
210 1.1 mrg shifted operand was already added). The mantissa can then
211 1.1 mrg be shifted right by one bit. The explicit "1.0" of the
212 1.1 mrg shifted mantissa then needs to be replaced by the exponent,
213 1.1 mrg incremented by one to account for the normalizing shift.
214 1.1 mrg It is faster to combine these operations: do the shift first
215 1.1 mrg and combine the additions and subtractions. If x is the
216 1.1 mrg original exponent, the result is:
217 1.1 mrg shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218 1.1 mrg or:
219 1.1 mrg shifted mantissa + ((x + 1) << 22)
220 1.1 mrg Note that the exponent is incremented here by leaving the
221 1.1 mrg explicit "1.0" of the mantissa in the exponent field. */
222 1.1 mrg
223 1.1 mrg /* Shift x right by one bit. Save the lsb. */
224 1.1 mrg mov a10, a2
225 1.1 mrg srli a2, a2, 1
226 1.1 mrg
227 1.1 mrg /* See explanation above. The original exponent is in a8. */
228 1.1 mrg addi a8, a8, 1
229 1.1 mrg slli a8, a8, 22
230 1.1 mrg add a2, a2, a8
231 1.1 mrg
232 1.1 mrg /* Return an Infinity if the exponent overflowed. */
233 1.1 mrg ball a2, a6, .Ladd_infinity
234 1.1 mrg
235 1.1 mrg /* Same thing as the "round" code except the msb of the leftover
236 1.1 mrg fraction is bit 0 of a10, with the rest of the fraction in a9. */
237 1.1 mrg bbci.l a10, 0, 1f
238 1.1 mrg addi a2, a2, 1
239 1.1 mrg beqz a9, .Ladd_exactlyhalf
240 1.1 mrg 1: leaf_return
241 1.1 mrg
242 1.1 mrg .Ladd_infinity:
243 1.1 mrg /* Clear the mantissa. */
244 1.1 mrg srli a2, a2, 23
245 1.1 mrg slli a2, a2, 23
246 1.1 mrg
247 1.1 mrg /* The sign bit may have been lost in a carry-out. Put it back. */
248 1.1 mrg slli a8, a8, 1
249 1.1 mrg or a2, a2, a8
250 1.1 mrg leaf_return
251 1.1 mrg
252 1.1 mrg .Ladd_exactlyhalf:
253 1.1 mrg /* Round down to the nearest even value. */
254 1.1 mrg srli a2, a2, 1
255 1.1 mrg slli a2, a2, 1
256 1.1 mrg leaf_return
257 1.1 mrg
258 1.1 mrg
259 1.1 mrg /* Subtraction */
260 1.1 mrg __subsf3_aux:
261 1.1 mrg
262 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
263 1.1 mrg start of the function just to keep it in range of the limited
264 1.1 mrg branch displacements.) */
265 1.1 mrg
266 1.1 mrg .Lsub_xnan_or_inf:
267 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
268 1.1 mrg bnall a3, a6, 1f
269 1.1 mrg /* Both x and y are either NaN or Inf, so the result is NaN. */
270 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
271 1.1 mrg or a2, a2, a4
272 1.1 mrg 1: leaf_return
273 1.1 mrg
274 1.1 mrg .Lsub_ynan_or_inf:
275 1.1 mrg /* Negate y and return it. */
276 1.1 mrg slli a7, a6, 8
277 1.1 mrg xor a2, a3, a7
278 1.1 mrg leaf_return
279 1.1 mrg
280 1.1 mrg .Lsub_opposite_signs:
281 1.1 mrg /* Operand signs differ. Do an addition. */
282 1.1 mrg slli a7, a6, 8
283 1.1 mrg xor a3, a3, a7
284 1.1 mrg j .Ladd_same_sign
285 1.1 mrg
286 1.1 mrg .align 4
287 1.1 mrg .global __subsf3
288 1.1 mrg .type __subsf3, @function
289 1.1 mrg __subsf3:
290 1.1 mrg leaf_entry sp, 16
291 1.1 mrg movi a6, 0x7f800000
292 1.1 mrg
293 1.1 mrg /* Check if the two operands have the same sign. */
294 1.1 mrg xor a7, a2, a3
295 1.1 mrg bltz a7, .Lsub_opposite_signs
296 1.1 mrg
297 1.1 mrg .Lsub_same_sign:
298 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
299 1.1 mrg ball a2, a6, .Lsub_xnan_or_inf
300 1.1 mrg ball a3, a6, .Lsub_ynan_or_inf
301 1.1 mrg
302 1.1 mrg /* Compare the operands. In contrast to addition, the entire
303 1.1 mrg value matters here. */
304 1.1 mrg extui a7, a2, 23, 8
305 1.1 mrg extui a8, a3, 23, 8
306 1.1 mrg bltu a2, a3, .Lsub_xsmaller
307 1.1 mrg
308 1.1 mrg .Lsub_ysmaller:
309 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
310 1.1 mrg bnone a3, a6, .Lsub_yexpzero
311 1.1 mrg
312 1.1 mrg /* Replace y sign/exponent with 0x008. */
313 1.1 mrg or a3, a3, a6
314 1.1 mrg slli a3, a3, 8
315 1.1 mrg srli a3, a3, 8
316 1.1 mrg
317 1.1 mrg .Lsub_yexpdiff:
318 1.1 mrg /* Compute the exponent difference. */
319 1.1 mrg sub a10, a7, a8
320 1.1 mrg
321 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
322 1.1 mrg bgeui a10, 32, 1f
323 1.1 mrg
324 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
325 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
326 1.1 mrg ssr a10
327 1.1 mrg movi a9, 0
328 1.1 mrg src a9, a3, a9
329 1.1 mrg srl a3, a3
330 1.1 mrg
331 1.1 mrg sub a2, a2, a3
332 1.1 mrg
333 1.1 mrg /* Subtract the leftover bits in a9 from zero and propagate any
334 1.1 mrg borrow from a2. */
335 1.1 mrg neg a9, a9
336 1.1 mrg addi a10, a2, -1
337 1.1 mrg movnez a2, a10, a9
338 1.1 mrg
339 1.1 mrg /* Check if the subtract underflowed into the exponent. */
340 1.1 mrg extui a10, a2, 23, 8
341 1.1 mrg beq a10, a7, .Lsub_round
342 1.1 mrg j .Lsub_borrow
343 1.1 mrg
344 1.1 mrg .Lsub_yexpzero:
345 1.1 mrg /* Return zero if the inputs are equal. (For the non-subnormal
346 1.1 mrg case, subtracting the "1.0" will cause a borrow from the exponent
347 1.1 mrg and this case can be detected when handling the borrow.) */
348 1.1 mrg beq a2, a3, .Lsub_return_zero
349 1.1 mrg
350 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
351 1.1 mrg i.e., no implicit "1.0". Unless x is also a subnormal, increment
352 1.1 mrg y's apparent exponent because subnormals behave as if they had
353 1.1 mrg the minimum (nonzero) exponent. */
354 1.1 mrg slli a3, a3, 9
355 1.1 mrg srli a3, a3, 9
356 1.1 mrg bnone a2, a6, .Lsub_yexpdiff
357 1.1 mrg addi a8, a8, 1
358 1.1 mrg j .Lsub_yexpdiff
359 1.1 mrg
360 1.1 mrg .Lsub_returny:
361 1.1 mrg /* Negate and return y. */
362 1.1 mrg slli a7, a6, 8
363 1.1 mrg xor a2, a3, a7
364 1.1 mrg 1: leaf_return
365 1.1 mrg
366 1.1 mrg .Lsub_xsmaller:
367 1.1 mrg /* Same thing as the "ysmaller" code, but with x and y swapped and
368 1.1 mrg with y negated. */
369 1.1 mrg bnone a2, a6, .Lsub_xexpzero
370 1.1 mrg
371 1.1 mrg or a2, a2, a6
372 1.1 mrg slli a2, a2, 8
373 1.1 mrg srli a2, a2, 8
374 1.1 mrg
375 1.1 mrg .Lsub_xexpdiff:
376 1.1 mrg sub a10, a8, a7
377 1.1 mrg bgeui a10, 32, .Lsub_returny
378 1.1 mrg
379 1.1 mrg ssr a10
380 1.1 mrg movi a9, 0
381 1.1 mrg src a9, a2, a9
382 1.1 mrg srl a2, a2
383 1.1 mrg
384 1.1 mrg /* Negate y. */
385 1.1 mrg slli a11, a6, 8
386 1.1 mrg xor a3, a3, a11
387 1.1 mrg
388 1.1 mrg sub a2, a3, a2
389 1.1 mrg
390 1.1 mrg neg a9, a9
391 1.1 mrg addi a10, a2, -1
392 1.1 mrg movnez a2, a10, a9
393 1.1 mrg
394 1.1 mrg /* Check if the subtract underflowed into the exponent. */
395 1.1 mrg extui a10, a2, 23, 8
396 1.1 mrg bne a10, a8, .Lsub_borrow
397 1.1 mrg
398 1.1 mrg .Lsub_round:
399 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
400 1.1 mrg bgez a9, 1f
401 1.1 mrg addi a2, a2, 1
402 1.1 mrg
403 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
404 1.1 mrg slli a9, a9, 1
405 1.1 mrg beqz a9, .Lsub_exactlyhalf
406 1.1 mrg 1: leaf_return
407 1.1 mrg
408 1.1 mrg .Lsub_xexpzero:
409 1.1 mrg /* Same as "yexpzero". */
410 1.1 mrg beq a2, a3, .Lsub_return_zero
411 1.1 mrg slli a2, a2, 9
412 1.1 mrg srli a2, a2, 9
413 1.1 mrg bnone a3, a6, .Lsub_xexpdiff
414 1.1 mrg addi a7, a7, 1
415 1.1 mrg j .Lsub_xexpdiff
416 1.1 mrg
417 1.1 mrg .Lsub_return_zero:
418 1.1 mrg movi a2, 0
419 1.1 mrg leaf_return
420 1.1 mrg
421 1.1 mrg .Lsub_borrow:
422 1.1 mrg /* The subtraction has underflowed into the exponent field, so the
423 1.1 mrg value needs to be renormalized. Shift the mantissa left as
424 1.1 mrg needed to remove any leading zeros and adjust the exponent
425 1.1 mrg accordingly. If the exponent is not large enough to remove
426 1.1 mrg all the leading zeros, the result will be a subnormal value. */
427 1.1 mrg
428 1.1 mrg slli a8, a2, 9
429 1.1 mrg beqz a8, .Lsub_xzero
430 1.1 mrg do_nsau a6, a8, a7, a11
431 1.1 mrg srli a8, a8, 9
432 1.1 mrg bge a6, a10, .Lsub_subnormal
433 1.1 mrg addi a6, a6, 1
434 1.1 mrg
435 1.1 mrg .Lsub_normalize_shift:
436 1.1 mrg /* Shift the mantissa (a8/a9) left by a6. */
437 1.1 mrg ssl a6
438 1.1 mrg src a8, a8, a9
439 1.1 mrg sll a9, a9
440 1.1 mrg
441 1.1 mrg /* Combine the shifted mantissa with the sign and exponent,
442 1.1 mrg decrementing the exponent by a6. (The exponent has already
443 1.1 mrg been decremented by one due to the borrow from the subtraction,
444 1.1 mrg but adding the mantissa will increment the exponent by one.) */
445 1.1 mrg srli a2, a2, 23
446 1.1 mrg sub a2, a2, a6
447 1.1 mrg slli a2, a2, 23
448 1.1 mrg add a2, a2, a8
449 1.1 mrg j .Lsub_round
450 1.1 mrg
451 1.1 mrg .Lsub_exactlyhalf:
452 1.1 mrg /* Round down to the nearest even value. */
453 1.1 mrg srli a2, a2, 1
454 1.1 mrg slli a2, a2, 1
455 1.1 mrg leaf_return
456 1.1 mrg
457 1.1 mrg .Lsub_xzero:
458 1.1 mrg /* If there was a borrow from the exponent, and the mantissa and
459 1.1 mrg guard digits are all zero, then the inputs were equal and the
460 1.1 mrg result should be zero. */
461 1.1 mrg beqz a9, .Lsub_return_zero
462 1.1 mrg
463 1.1 mrg /* Only the guard digit is nonzero. Shift by min(24, a10). */
464 1.1 mrg addi a11, a10, -24
465 1.1 mrg movi a6, 24
466 1.1 mrg movltz a6, a10, a11
467 1.1 mrg j .Lsub_normalize_shift
468 1.1 mrg
469 1.1 mrg .Lsub_subnormal:
470 1.1 mrg /* The exponent is too small to shift away all the leading zeros.
471 1.1 mrg Set a6 to the current exponent (which has already been
472 1.1 mrg decremented by the borrow) so that the exponent of the result
473 1.1 mrg will be zero. Do not add 1 to a6 in this case, because: (1)
474 1.1 mrg adding the mantissa will not increment the exponent, so there is
475 1.1 mrg no need to subtract anything extra from the exponent to
476 1.1 mrg compensate, and (2) the effective exponent of a subnormal is 1
477 1.1 mrg not 0 so the shift amount must be 1 smaller than normal. */
478 1.1 mrg mov a6, a10
479 1.1 mrg j .Lsub_normalize_shift
480 1.1 mrg
481 1.1 mrg #endif /* L_addsubsf3 */
482 1.1 mrg
483 1.1 mrg #ifdef L_mulsf3
484 1.1 mrg
485 1.1 mrg /* Multiplication */
486 1.1 mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487 1.1 mrg #define XCHAL_NO_MUL 1
488 1.1 mrg #endif
489 1.1 mrg
490 1.1.1.3 mrg .literal_position
491 1.1 mrg __mulsf3_aux:
492 1.1 mrg
493 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
494 1.1 mrg (This code is placed before the start of the function just to
495 1.1 mrg keep it in range of the limited branch displacements.) */
496 1.1 mrg
497 1.1 mrg .Lmul_xexpzero:
498 1.1 mrg /* Clear the sign bit of x. */
499 1.1 mrg slli a2, a2, 1
500 1.1 mrg srli a2, a2, 1
501 1.1 mrg
502 1.1 mrg /* If x is zero, return zero. */
503 1.1 mrg beqz a2, .Lmul_return_zero
504 1.1 mrg
505 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
506 1.1 mrg do_nsau a10, a2, a11, a12
507 1.1 mrg addi a10, a10, -8
508 1.1 mrg ssl a10
509 1.1 mrg sll a2, a2
510 1.1 mrg movi a8, 1
511 1.1 mrg sub a8, a8, a10
512 1.1 mrg j .Lmul_xnormalized
513 1.1 mrg
514 1.1 mrg .Lmul_yexpzero:
515 1.1 mrg /* Clear the sign bit of y. */
516 1.1 mrg slli a3, a3, 1
517 1.1 mrg srli a3, a3, 1
518 1.1 mrg
519 1.1 mrg /* If y is zero, return zero. */
520 1.1 mrg beqz a3, .Lmul_return_zero
521 1.1 mrg
522 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
523 1.1 mrg do_nsau a10, a3, a11, a12
524 1.1 mrg addi a10, a10, -8
525 1.1 mrg ssl a10
526 1.1 mrg sll a3, a3
527 1.1 mrg movi a9, 1
528 1.1 mrg sub a9, a9, a10
529 1.1 mrg j .Lmul_ynormalized
530 1.1 mrg
531 1.1 mrg .Lmul_return_zero:
532 1.1 mrg /* Return zero with the appropriate sign bit. */
533 1.1 mrg srli a2, a7, 31
534 1.1 mrg slli a2, a2, 31
535 1.1 mrg j .Lmul_done
536 1.1 mrg
537 1.1 mrg .Lmul_xnan_or_inf:
538 1.1 mrg /* If y is zero, return NaN. */
539 1.1 mrg slli a8, a3, 1
540 1.1 mrg bnez a8, 1f
541 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
542 1.1 mrg or a2, a2, a4
543 1.1 mrg j .Lmul_done
544 1.1 mrg 1:
545 1.1 mrg /* If y is NaN, return y. */
546 1.1 mrg bnall a3, a6, .Lmul_returnx
547 1.1 mrg slli a8, a3, 9
548 1.1 mrg beqz a8, .Lmul_returnx
549 1.1 mrg
550 1.1 mrg .Lmul_returny:
551 1.1 mrg mov a2, a3
552 1.1 mrg
553 1.1 mrg .Lmul_returnx:
554 1.1 mrg /* Set the sign bit and return. */
555 1.1 mrg extui a7, a7, 31, 1
556 1.1 mrg slli a2, a2, 1
557 1.1 mrg ssai 1
558 1.1 mrg src a2, a7, a2
559 1.1 mrg j .Lmul_done
560 1.1 mrg
561 1.1 mrg .Lmul_ynan_or_inf:
562 1.1 mrg /* If x is zero, return NaN. */
563 1.1 mrg slli a8, a2, 1
564 1.1 mrg bnez a8, .Lmul_returny
565 1.1 mrg movi a7, 0x400000 /* make it a quiet NaN */
566 1.1 mrg or a2, a3, a7
567 1.1 mrg j .Lmul_done
568 1.1 mrg
569 1.1 mrg .align 4
570 1.1 mrg .global __mulsf3
571 1.1 mrg .type __mulsf3, @function
572 1.1 mrg __mulsf3:
573 1.1 mrg #if __XTENSA_CALL0_ABI__
574 1.1 mrg leaf_entry sp, 32
575 1.1 mrg addi sp, sp, -32
576 1.1 mrg s32i a12, sp, 16
577 1.1 mrg s32i a13, sp, 20
578 1.1 mrg s32i a14, sp, 24
579 1.1 mrg s32i a15, sp, 28
580 1.1 mrg #elif XCHAL_NO_MUL
581 1.1 mrg /* This is not really a leaf function; allocate enough stack space
582 1.1 mrg to allow CALL12s to a helper function. */
583 1.1 mrg leaf_entry sp, 64
584 1.1 mrg #else
585 1.1 mrg leaf_entry sp, 32
586 1.1 mrg #endif
587 1.1 mrg movi a6, 0x7f800000
588 1.1 mrg
589 1.1 mrg /* Get the sign of the result. */
590 1.1 mrg xor a7, a2, a3
591 1.1 mrg
592 1.1 mrg /* Check for NaN and infinity. */
593 1.1 mrg ball a2, a6, .Lmul_xnan_or_inf
594 1.1 mrg ball a3, a6, .Lmul_ynan_or_inf
595 1.1 mrg
596 1.1 mrg /* Extract the exponents. */
597 1.1 mrg extui a8, a2, 23, 8
598 1.1 mrg extui a9, a3, 23, 8
599 1.1 mrg
600 1.1 mrg beqz a8, .Lmul_xexpzero
601 1.1 mrg .Lmul_xnormalized:
602 1.1 mrg beqz a9, .Lmul_yexpzero
603 1.1 mrg .Lmul_ynormalized:
604 1.1 mrg
605 1.1 mrg /* Add the exponents. */
606 1.1 mrg add a8, a8, a9
607 1.1 mrg
608 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
609 1.1 mrg movi a10, 0xffffff
610 1.1 mrg or a2, a2, a6
611 1.1 mrg and a2, a2, a10
612 1.1 mrg or a3, a3, a6
613 1.1 mrg and a3, a3, a10
614 1.1 mrg
615 1.1 mrg /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
616 1.1 mrg
617 1.1 mrg #if XCHAL_HAVE_MUL32_HIGH
618 1.1 mrg
619 1.1 mrg mull a6, a2, a3
620 1.1 mrg muluh a2, a2, a3
621 1.1 mrg
622 1.1 mrg #else
623 1.1 mrg
624 1.1 mrg /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
625 1.1 mrg products. These partial products are:
626 1.1 mrg
627 1.1 mrg 0 xl * yl
628 1.1 mrg
629 1.1 mrg 1 xl * yh
630 1.1 mrg 2 xh * yl
631 1.1 mrg
632 1.1 mrg 3 xh * yh
633 1.1 mrg
634 1.1 mrg If using the Mul16 or Mul32 multiplier options, these input
635 1.1 mrg chunks must be stored in separate registers. For Mac16, the
636 1.1 mrg UMUL.AA.* opcodes can specify that the inputs come from either
637 1.1 mrg half of the registers, so there is no need to shift them out
638 1.1 mrg ahead of time. If there is no multiply hardware, the 16-bit
639 1.1 mrg chunks can be extracted when setting up the arguments to the
640 1.1 mrg separate multiply function. */
641 1.1 mrg
642 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
643 1.1 mrg /* Calling a separate multiply function will clobber a0 and requires
644 1.1 mrg use of a8 as a temporary, so save those values now. (The function
645 1.1 mrg uses a custom ABI so nothing else needs to be saved.) */
646 1.1 mrg s32i a0, sp, 0
647 1.1 mrg s32i a8, sp, 4
648 1.1 mrg #endif
649 1.1 mrg
650 1.1 mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
651 1.1 mrg
652 1.1 mrg #define a2h a4
653 1.1 mrg #define a3h a5
654 1.1 mrg
655 1.1 mrg /* Get the high halves of the inputs into registers. */
656 1.1 mrg srli a2h, a2, 16
657 1.1 mrg srli a3h, a3, 16
658 1.1 mrg
659 1.1 mrg #define a2l a2
660 1.1 mrg #define a3l a3
661 1.1 mrg
662 1.1 mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
663 1.1 mrg /* Clear the high halves of the inputs. This does not matter
664 1.1 mrg for MUL16 because the high bits are ignored. */
665 1.1 mrg extui a2, a2, 0, 16
666 1.1 mrg extui a3, a3, 0, 16
667 1.1 mrg #endif
668 1.1 mrg #endif /* MUL16 || MUL32 */
669 1.1 mrg
670 1.1 mrg
671 1.1 mrg #if XCHAL_HAVE_MUL16
672 1.1 mrg
673 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
674 1.1 mrg mul16u dst, xreg ## xhalf, yreg ## yhalf
675 1.1 mrg
676 1.1 mrg #elif XCHAL_HAVE_MUL32
677 1.1 mrg
678 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
679 1.1 mrg mull dst, xreg ## xhalf, yreg ## yhalf
680 1.1 mrg
681 1.1 mrg #elif XCHAL_HAVE_MAC16
682 1.1 mrg
683 1.1 mrg /* The preprocessor insists on inserting a space when concatenating after
684 1.1 mrg a period in the definition of do_mul below. These macros are a workaround
685 1.1 mrg using underscores instead of periods when doing the concatenation. */
686 1.1 mrg #define umul_aa_ll umul.aa.ll
687 1.1 mrg #define umul_aa_lh umul.aa.lh
688 1.1 mrg #define umul_aa_hl umul.aa.hl
689 1.1 mrg #define umul_aa_hh umul.aa.hh
690 1.1 mrg
691 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
692 1.1 mrg umul_aa_ ## xhalf ## yhalf xreg, yreg; \
693 1.1 mrg rsr dst, ACCLO
694 1.1 mrg
695 1.1 mrg #else /* no multiply hardware */
696 1.1 mrg
697 1.1 mrg #define set_arg_l(dst, src) \
698 1.1 mrg extui dst, src, 0, 16
699 1.1 mrg #define set_arg_h(dst, src) \
700 1.1 mrg srli dst, src, 16
701 1.1 mrg
702 1.1 mrg #if __XTENSA_CALL0_ABI__
703 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
704 1.1 mrg set_arg_ ## xhalf (a13, xreg); \
705 1.1 mrg set_arg_ ## yhalf (a14, yreg); \
706 1.1 mrg call0 .Lmul_mulsi3; \
707 1.1 mrg mov dst, a12
708 1.1 mrg #else
709 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
710 1.1 mrg set_arg_ ## xhalf (a14, xreg); \
711 1.1 mrg set_arg_ ## yhalf (a15, yreg); \
712 1.1 mrg call12 .Lmul_mulsi3; \
713 1.1 mrg mov dst, a14
714 1.1 mrg #endif /* __XTENSA_CALL0_ABI__ */
715 1.1 mrg
716 1.1 mrg #endif /* no multiply hardware */
717 1.1 mrg
718 1.1 mrg /* Add pp1 and pp2 into a6 with carry-out in a9. */
719 1.1 mrg do_mul(a6, a2, l, a3, h) /* pp 1 */
720 1.1 mrg do_mul(a11, a2, h, a3, l) /* pp 2 */
721 1.1 mrg movi a9, 0
722 1.1 mrg add a6, a6, a11
723 1.1 mrg bgeu a6, a11, 1f
724 1.1 mrg addi a9, a9, 1
725 1.1 mrg 1:
726 1.1 mrg /* Shift the high half of a9/a6 into position in a9. Note that
727 1.1 mrg this value can be safely incremented without any carry-outs. */
728 1.1 mrg ssai 16
729 1.1 mrg src a9, a9, a6
730 1.1 mrg
731 1.1 mrg /* Compute the low word into a6. */
732 1.1 mrg do_mul(a11, a2, l, a3, l) /* pp 0 */
733 1.1 mrg sll a6, a6
734 1.1 mrg add a6, a6, a11
735 1.1 mrg bgeu a6, a11, 1f
736 1.1 mrg addi a9, a9, 1
737 1.1 mrg 1:
738 1.1 mrg /* Compute the high word into a2. */
739 1.1 mrg do_mul(a2, a2, h, a3, h) /* pp 3 */
740 1.1 mrg add a2, a2, a9
741 1.1 mrg
742 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
743 1.1 mrg /* Restore values saved on the stack during the multiplication. */
744 1.1 mrg l32i a0, sp, 0
745 1.1 mrg l32i a8, sp, 4
746 1.1 mrg #endif
747 1.1 mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */
748 1.1 mrg
749 1.1 mrg /* Shift left by 9 bits, unless there was a carry-out from the
750 1.1 mrg multiply, in which case, shift by 8 bits and increment the
751 1.1 mrg exponent. */
752 1.1 mrg movi a4, 9
753 1.1 mrg srli a5, a2, 24 - 9
754 1.1 mrg beqz a5, 1f
755 1.1 mrg addi a4, a4, -1
756 1.1 mrg addi a8, a8, 1
757 1.1 mrg 1: ssl a4
758 1.1 mrg src a2, a2, a6
759 1.1 mrg sll a6, a6
760 1.1 mrg
761 1.1 mrg /* Subtract the extra bias from the exponent sum (plus one to account
762 1.1 mrg for the explicit "1.0" of the mantissa that will be added to the
763 1.1 mrg exponent in the final result). */
764 1.1 mrg movi a4, 0x80
765 1.1 mrg sub a8, a8, a4
766 1.1 mrg
767 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
768 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
769 1.1 mrg movi a4, 0xfe
770 1.1 mrg bgeu a8, a4, .Lmul_overflow
771 1.1 mrg
772 1.1 mrg .Lmul_round:
773 1.1 mrg /* Round. */
774 1.1 mrg bgez a6, .Lmul_rounded
775 1.1 mrg addi a2, a2, 1
776 1.1 mrg slli a6, a6, 1
777 1.1 mrg beqz a6, .Lmul_exactlyhalf
778 1.1 mrg
779 1.1 mrg .Lmul_rounded:
780 1.1 mrg /* Add the exponent to the mantissa. */
781 1.1 mrg slli a8, a8, 23
782 1.1 mrg add a2, a2, a8
783 1.1 mrg
784 1.1 mrg .Lmul_addsign:
785 1.1 mrg /* Add the sign bit. */
786 1.1 mrg srli a7, a7, 31
787 1.1 mrg slli a7, a7, 31
788 1.1 mrg or a2, a2, a7
789 1.1 mrg
790 1.1 mrg .Lmul_done:
791 1.1 mrg #if __XTENSA_CALL0_ABI__
792 1.1 mrg l32i a12, sp, 16
793 1.1 mrg l32i a13, sp, 20
794 1.1 mrg l32i a14, sp, 24
795 1.1 mrg l32i a15, sp, 28
796 1.1 mrg addi sp, sp, 32
797 1.1 mrg #endif
798 1.1 mrg leaf_return
799 1.1 mrg
800 1.1 mrg .Lmul_exactlyhalf:
801 1.1 mrg /* Round down to the nearest even value. */
802 1.1 mrg srli a2, a2, 1
803 1.1 mrg slli a2, a2, 1
804 1.1 mrg j .Lmul_rounded
805 1.1 mrg
806 1.1 mrg .Lmul_overflow:
807 1.1 mrg bltz a8, .Lmul_underflow
808 1.1 mrg /* Return +/- Infinity. */
809 1.1 mrg movi a8, 0xff
810 1.1 mrg slli a2, a8, 23
811 1.1 mrg j .Lmul_addsign
812 1.1 mrg
813 1.1 mrg .Lmul_underflow:
814 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
815 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
816 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
817 1.1 mrg neg a8, a8
818 1.1 mrg mov a9, a6
819 1.1 mrg ssr a8
820 1.1 mrg bgeui a8, 32, .Lmul_flush_to_zero
821 1.1 mrg
822 1.1 mrg /* Shift a2 right. Any bits that are shifted out of a2 are saved
823 1.1 mrg in a6 (combined with the shifted-out bits currently in a6) for
824 1.1 mrg rounding the result. */
825 1.1 mrg sll a6, a2
826 1.1 mrg srl a2, a2
827 1.1 mrg
828 1.1 mrg /* Set the exponent to zero. */
829 1.1 mrg movi a8, 0
830 1.1 mrg
831 1.1 mrg /* Pack any nonzero bits shifted out into a6. */
832 1.1 mrg beqz a9, .Lmul_round
833 1.1 mrg movi a9, 1
834 1.1 mrg or a6, a6, a9
835 1.1 mrg j .Lmul_round
836 1.1 mrg
837 1.1 mrg .Lmul_flush_to_zero:
838 1.1 mrg /* Return zero with the appropriate sign bit. */
839 1.1 mrg srli a2, a7, 31
840 1.1 mrg slli a2, a2, 31
841 1.1 mrg j .Lmul_done
842 1.1 mrg
843 1.1 mrg #if XCHAL_NO_MUL
844 1.1 mrg
845 1.1 mrg /* For Xtensa processors with no multiply hardware, this simplified
846 1.1 mrg version of _mulsi3 is used for multiplying 16-bit chunks of
847 1.1 mrg the floating-point mantissas. When using CALL0, this function
848 1.1 mrg uses a custom ABI: the inputs are passed in a13 and a14, the
849 1.1 mrg result is returned in a12, and a8 and a15 are clobbered. */
850 1.1 mrg .align 4
851 1.1 mrg .Lmul_mulsi3:
852 1.1 mrg leaf_entry sp, 16
853 1.1 mrg .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
854 1.1 mrg movi \dst, 0
855 1.1 mrg 1: add \tmp1, \src2, \dst
856 1.1 mrg extui \tmp2, \src1, 0, 1
857 1.1 mrg movnez \dst, \tmp1, \tmp2
858 1.1 mrg
859 1.1 mrg do_addx2 \tmp1, \src2, \dst, \tmp1
860 1.1 mrg extui \tmp2, \src1, 1, 1
861 1.1 mrg movnez \dst, \tmp1, \tmp2
862 1.1 mrg
863 1.1 mrg do_addx4 \tmp1, \src2, \dst, \tmp1
864 1.1 mrg extui \tmp2, \src1, 2, 1
865 1.1 mrg movnez \dst, \tmp1, \tmp2
866 1.1 mrg
867 1.1 mrg do_addx8 \tmp1, \src2, \dst, \tmp1
868 1.1 mrg extui \tmp2, \src1, 3, 1
869 1.1 mrg movnez \dst, \tmp1, \tmp2
870 1.1 mrg
871 1.1 mrg srli \src1, \src1, 4
872 1.1 mrg slli \src2, \src2, 4
873 1.1 mrg bnez \src1, 1b
874 1.1 mrg .endm
875 1.1 mrg #if __XTENSA_CALL0_ABI__
876 1.1 mrg mul_mulsi3_body a12, a13, a14, a15, a8
877 1.1 mrg #else
878 1.1 mrg /* The result will be written into a2, so save that argument in a4. */
879 1.1 mrg mov a4, a2
880 1.1 mrg mul_mulsi3_body a2, a4, a3, a5, a6
881 1.1 mrg #endif
882 1.1 mrg leaf_return
883 1.1 mrg #endif /* XCHAL_NO_MUL */
884 1.1 mrg #endif /* L_mulsf3 */
885 1.1 mrg
886 1.1 mrg #ifdef L_divsf3
887 1.1 mrg
888 1.1.1.3 mrg .literal_position
889 1.1 mrg /* Division */
890 1.1 mrg __divsf3_aux:
891 1.1 mrg
892 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
893 1.1 mrg (This code is placed before the start of the function just to
894 1.1 mrg keep it in range of the limited branch displacements.) */
895 1.1 mrg
896 1.1 mrg .Ldiv_yexpzero:
897 1.1 mrg /* Clear the sign bit of y. */
898 1.1 mrg slli a3, a3, 1
899 1.1 mrg srli a3, a3, 1
900 1.1 mrg
901 1.1 mrg /* Check for division by zero. */
902 1.1 mrg beqz a3, .Ldiv_yzero
903 1.1 mrg
904 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
905 1.1 mrg do_nsau a10, a3, a4, a5
906 1.1 mrg addi a10, a10, -8
907 1.1 mrg ssl a10
908 1.1 mrg sll a3, a3
909 1.1 mrg movi a9, 1
910 1.1 mrg sub a9, a9, a10
911 1.1 mrg j .Ldiv_ynormalized
912 1.1 mrg
913 1.1 mrg .Ldiv_yzero:
914 1.1 mrg /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
915 1.1 mrg slli a4, a2, 1
916 1.1 mrg srli a4, a4, 1
917 1.1 mrg srli a2, a7, 31
918 1.1 mrg slli a2, a2, 31
919 1.1 mrg or a2, a2, a6
920 1.1 mrg bnez a4, 1f
921 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
922 1.1 mrg or a2, a2, a4
923 1.1 mrg 1: leaf_return
924 1.1 mrg
925 1.1 mrg .Ldiv_xexpzero:
926 1.1 mrg /* Clear the sign bit of x. */
927 1.1 mrg slli a2, a2, 1
928 1.1 mrg srli a2, a2, 1
929 1.1 mrg
930 1.1 mrg /* If x is zero, return zero. */
931 1.1 mrg beqz a2, .Ldiv_return_zero
932 1.1 mrg
933 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
934 1.1 mrg do_nsau a10, a2, a4, a5
935 1.1 mrg addi a10, a10, -8
936 1.1 mrg ssl a10
937 1.1 mrg sll a2, a2
938 1.1 mrg movi a8, 1
939 1.1 mrg sub a8, a8, a10
940 1.1 mrg j .Ldiv_xnormalized
941 1.1 mrg
942 1.1 mrg .Ldiv_return_zero:
943 1.1 mrg /* Return zero with the appropriate sign bit. */
944 1.1 mrg srli a2, a7, 31
945 1.1 mrg slli a2, a2, 31
946 1.1 mrg leaf_return
947 1.1 mrg
948 1.1 mrg .Ldiv_xnan_or_inf:
949 1.1 mrg /* Set the sign bit of the result. */
950 1.1 mrg srli a7, a3, 31
951 1.1 mrg slli a7, a7, 31
952 1.1 mrg xor a2, a2, a7
953 1.1 mrg /* If y is NaN or Inf, return NaN. */
954 1.1 mrg bnall a3, a6, 1f
955 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
956 1.1 mrg or a2, a2, a4
957 1.1 mrg 1: leaf_return
958 1.1 mrg
959 1.1 mrg .Ldiv_ynan_or_inf:
960 1.1 mrg /* If y is Infinity, return zero. */
961 1.1 mrg slli a8, a3, 9
962 1.1 mrg beqz a8, .Ldiv_return_zero
963 1.1 mrg /* y is NaN; return it. */
964 1.1 mrg mov a2, a3
965 1.1 mrg leaf_return
966 1.1 mrg
967 1.1 mrg .align 4
968 1.1 mrg .global __divsf3
969 1.1 mrg .type __divsf3, @function
970 1.1 mrg __divsf3:
971 1.1 mrg leaf_entry sp, 16
972 1.1 mrg movi a6, 0x7f800000
973 1.1 mrg
974 1.1 mrg /* Get the sign of the result. */
975 1.1 mrg xor a7, a2, a3
976 1.1 mrg
977 1.1 mrg /* Check for NaN and infinity. */
978 1.1 mrg ball a2, a6, .Ldiv_xnan_or_inf
979 1.1 mrg ball a3, a6, .Ldiv_ynan_or_inf
980 1.1 mrg
981 1.1 mrg /* Extract the exponents. */
982 1.1 mrg extui a8, a2, 23, 8
983 1.1 mrg extui a9, a3, 23, 8
984 1.1 mrg
985 1.1 mrg beqz a9, .Ldiv_yexpzero
986 1.1 mrg .Ldiv_ynormalized:
987 1.1 mrg beqz a8, .Ldiv_xexpzero
988 1.1 mrg .Ldiv_xnormalized:
989 1.1 mrg
990 1.1 mrg /* Subtract the exponents. */
991 1.1 mrg sub a8, a8, a9
992 1.1 mrg
993 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
994 1.1 mrg movi a10, 0xffffff
995 1.1 mrg or a2, a2, a6
996 1.1 mrg and a2, a2, a10
997 1.1 mrg or a3, a3, a6
998 1.1 mrg and a3, a3, a10
999 1.1 mrg
1000 1.1 mrg /* The first digit of the mantissa division must be a one.
1001 1.1 mrg Shift x (and adjust the exponent) as needed to make this true. */
1002 1.1 mrg bltu a3, a2, 1f
1003 1.1 mrg slli a2, a2, 1
1004 1.1 mrg addi a8, a8, -1
1005 1.1 mrg 1:
1006 1.1 mrg /* Do the first subtraction and shift. */
1007 1.1 mrg sub a2, a2, a3
1008 1.1 mrg slli a2, a2, 1
1009 1.1 mrg
1010 1.1 mrg /* Put the quotient into a10. */
1011 1.1 mrg movi a10, 1
1012 1.1 mrg
1013 1.1 mrg /* Divide one bit at a time for 23 bits. */
1014 1.1 mrg movi a9, 23
1015 1.1 mrg #if XCHAL_HAVE_LOOPS
1016 1.1 mrg loop a9, .Ldiv_loopend
1017 1.1 mrg #endif
1018 1.1 mrg .Ldiv_loop:
1019 1.1 mrg /* Shift the quotient << 1. */
1020 1.1 mrg slli a10, a10, 1
1021 1.1 mrg
1022 1.1 mrg /* Is this digit a 0 or 1? */
1023 1.1 mrg bltu a2, a3, 1f
1024 1.1 mrg
1025 1.1 mrg /* Output a 1 and subtract. */
1026 1.1 mrg addi a10, a10, 1
1027 1.1 mrg sub a2, a2, a3
1028 1.1 mrg
1029 1.1 mrg /* Shift the dividend << 1. */
1030 1.1 mrg 1: slli a2, a2, 1
1031 1.1 mrg
1032 1.1 mrg #if !XCHAL_HAVE_LOOPS
1033 1.1 mrg addi a9, a9, -1
1034 1.1 mrg bnez a9, .Ldiv_loop
1035 1.1 mrg #endif
1036 1.1 mrg .Ldiv_loopend:
1037 1.1 mrg
1038 1.1 mrg /* Add the exponent bias (less one to account for the explicit "1.0"
1039 1.1 mrg of the mantissa that will be added to the exponent in the final
1040 1.1 mrg result). */
1041 1.1 mrg addi a8, a8, 0x7e
1042 1.1 mrg
1043 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
1044 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
1045 1.1 mrg movi a4, 0xfe
1046 1.1 mrg bgeu a8, a4, .Ldiv_overflow
1047 1.1 mrg
1048 1.1 mrg .Ldiv_round:
1049 1.1 mrg /* Round. The remainder (<< 1) is in a2. */
1050 1.1 mrg bltu a2, a3, .Ldiv_rounded
1051 1.1 mrg addi a10, a10, 1
1052 1.1 mrg beq a2, a3, .Ldiv_exactlyhalf
1053 1.1 mrg
1054 1.1 mrg .Ldiv_rounded:
1055 1.1 mrg /* Add the exponent to the mantissa. */
1056 1.1 mrg slli a8, a8, 23
1057 1.1 mrg add a2, a10, a8
1058 1.1 mrg
1059 1.1 mrg .Ldiv_addsign:
1060 1.1 mrg /* Add the sign bit. */
1061 1.1 mrg srli a7, a7, 31
1062 1.1 mrg slli a7, a7, 31
1063 1.1 mrg or a2, a2, a7
1064 1.1 mrg leaf_return
1065 1.1 mrg
1066 1.1 mrg .Ldiv_overflow:
1067 1.1 mrg bltz a8, .Ldiv_underflow
1068 1.1 mrg /* Return +/- Infinity. */
1069 1.1 mrg addi a8, a4, 1 /* 0xff */
1070 1.1 mrg slli a2, a8, 23
1071 1.1 mrg j .Ldiv_addsign
1072 1.1 mrg
1073 1.1 mrg .Ldiv_exactlyhalf:
1074 1.1 mrg /* Remainder is exactly half the divisor. Round even. */
1075 1.1 mrg srli a10, a10, 1
1076 1.1 mrg slli a10, a10, 1
1077 1.1 mrg j .Ldiv_rounded
1078 1.1 mrg
1079 1.1 mrg .Ldiv_underflow:
1080 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
1081 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
1082 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
1083 1.1 mrg neg a8, a8
1084 1.1 mrg ssr a8
1085 1.1 mrg bgeui a8, 32, .Ldiv_flush_to_zero
1086 1.1 mrg
1087 1.1 mrg /* Shift a10 right. Any bits that are shifted out of a10 are
1088 1.1 mrg saved in a6 for rounding the result. */
1089 1.1 mrg sll a6, a10
1090 1.1 mrg srl a10, a10
1091 1.1 mrg
1092 1.1 mrg /* Set the exponent to zero. */
1093 1.1 mrg movi a8, 0
1094 1.1 mrg
1095 1.1 mrg /* Pack any nonzero remainder (in a2) into a6. */
1096 1.1 mrg beqz a2, 1f
1097 1.1 mrg movi a9, 1
1098 1.1 mrg or a6, a6, a9
1099 1.1 mrg
1100 1.1 mrg /* Round a10 based on the bits shifted out into a6. */
1101 1.1 mrg 1: bgez a6, .Ldiv_rounded
1102 1.1 mrg addi a10, a10, 1
1103 1.1 mrg slli a6, a6, 1
1104 1.1 mrg bnez a6, .Ldiv_rounded
1105 1.1 mrg srli a10, a10, 1
1106 1.1 mrg slli a10, a10, 1
1107 1.1 mrg j .Ldiv_rounded
1108 1.1 mrg
1109 1.1 mrg .Ldiv_flush_to_zero:
1110 1.1 mrg /* Return zero with the appropriate sign bit. */
1111 1.1 mrg srli a2, a7, 31
1112 1.1 mrg slli a2, a2, 31
1113 1.1 mrg leaf_return
1114 1.1 mrg
1115 1.1 mrg #endif /* L_divsf3 */
1116 1.1 mrg
1117 1.1 mrg #ifdef L_cmpsf2
1118 1.1 mrg
1119 1.1 mrg /* Equal and Not Equal */
1120 1.1 mrg
1121 1.1 mrg .align 4
1122 1.1 mrg .global __eqsf2
1123 1.1 mrg .global __nesf2
1124 1.1 mrg .set __nesf2, __eqsf2
1125 1.1 mrg .type __eqsf2, @function
1126 1.1 mrg __eqsf2:
1127 1.1 mrg leaf_entry sp, 16
1128 1.1 mrg bne a2, a3, 4f
1129 1.1 mrg
1130 1.1 mrg /* The values are equal but NaN != NaN. Check the exponent. */
1131 1.1 mrg movi a6, 0x7f800000
1132 1.1 mrg ball a2, a6, 3f
1133 1.1 mrg
1134 1.1 mrg /* Equal. */
1135 1.1 mrg movi a2, 0
1136 1.1 mrg leaf_return
1137 1.1 mrg
1138 1.1 mrg /* Not equal. */
1139 1.1 mrg 2: movi a2, 1
1140 1.1 mrg leaf_return
1141 1.1 mrg
1142 1.1 mrg /* Check if the mantissas are nonzero. */
1143 1.1 mrg 3: slli a7, a2, 9
1144 1.1 mrg j 5f
1145 1.1 mrg
1146 1.1 mrg /* Check if x and y are zero with different signs. */
1147 1.1 mrg 4: or a7, a2, a3
1148 1.1 mrg slli a7, a7, 1
1149 1.1 mrg
1150 1.1 mrg /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1151 1.1 mrg or x when exponent(x) = 0x7f8 and x == y. */
1152 1.1 mrg 5: movi a2, 0
1153 1.1 mrg movi a3, 1
1154 1.1 mrg movnez a2, a3, a7
1155 1.1 mrg leaf_return
1156 1.1 mrg
1157 1.1 mrg
1158 1.1 mrg /* Greater Than */
1159 1.1 mrg
1160 1.1 mrg .align 4
1161 1.1 mrg .global __gtsf2
1162 1.1 mrg .type __gtsf2, @function
1163 1.1 mrg __gtsf2:
1164 1.1 mrg leaf_entry sp, 16
1165 1.1 mrg movi a6, 0x7f800000
1166 1.1 mrg ball a2, a6, 2f
1167 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1168 1.1 mrg
1169 1.1 mrg /* Check if y is a NaN. */
1170 1.1 mrg slli a7, a3, 9
1171 1.1 mrg beqz a7, .Lle_cmp
1172 1.1 mrg movi a2, 0
1173 1.1 mrg leaf_return
1174 1.1 mrg
1175 1.1 mrg /* Check if x is a NaN. */
1176 1.1 mrg 2: slli a7, a2, 9
1177 1.1 mrg beqz a7, 1b
1178 1.1 mrg movi a2, 0
1179 1.1 mrg leaf_return
1180 1.1 mrg
1181 1.1 mrg
1182 1.1 mrg /* Less Than or Equal */
1183 1.1 mrg
1184 1.1 mrg .align 4
1185 1.1 mrg .global __lesf2
1186 1.1 mrg .type __lesf2, @function
1187 1.1 mrg __lesf2:
1188 1.1 mrg leaf_entry sp, 16
1189 1.1 mrg movi a6, 0x7f800000
1190 1.1 mrg ball a2, a6, 2f
1191 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1192 1.1 mrg
1193 1.1 mrg /* Check if y is a NaN. */
1194 1.1 mrg slli a7, a3, 9
1195 1.1 mrg beqz a7, .Lle_cmp
1196 1.1 mrg movi a2, 1
1197 1.1 mrg leaf_return
1198 1.1 mrg
1199 1.1 mrg /* Check if x is a NaN. */
1200 1.1 mrg 2: slli a7, a2, 9
1201 1.1 mrg beqz a7, 1b
1202 1.1 mrg movi a2, 1
1203 1.1 mrg leaf_return
1204 1.1 mrg
1205 1.1 mrg .Lle_cmp:
1206 1.1 mrg /* Check if x and y have different signs. */
1207 1.1 mrg xor a7, a2, a3
1208 1.1 mrg bltz a7, .Lle_diff_signs
1209 1.1 mrg
1210 1.1 mrg /* Check if x is negative. */
1211 1.1 mrg bltz a2, .Lle_xneg
1212 1.1 mrg
1213 1.1 mrg /* Check if x <= y. */
1214 1.1 mrg bltu a3, a2, 5f
1215 1.1 mrg 4: movi a2, 0
1216 1.1 mrg leaf_return
1217 1.1 mrg
1218 1.1 mrg .Lle_xneg:
1219 1.1 mrg /* Check if y <= x. */
1220 1.1 mrg bgeu a2, a3, 4b
1221 1.1 mrg 5: movi a2, 1
1222 1.1 mrg leaf_return
1223 1.1 mrg
1224 1.1 mrg .Lle_diff_signs:
1225 1.1 mrg bltz a2, 4b
1226 1.1 mrg
1227 1.1 mrg /* Check if both x and y are zero. */
1228 1.1 mrg or a7, a2, a3
1229 1.1 mrg slli a7, a7, 1
1230 1.1 mrg movi a2, 1
1231 1.1 mrg movi a3, 0
1232 1.1 mrg moveqz a2, a3, a7
1233 1.1 mrg leaf_return
1234 1.1 mrg
1235 1.1 mrg
1236 1.1 mrg /* Greater Than or Equal */
1237 1.1 mrg
1238 1.1 mrg .align 4
1239 1.1 mrg .global __gesf2
1240 1.1 mrg .type __gesf2, @function
1241 1.1 mrg __gesf2:
1242 1.1 mrg leaf_entry sp, 16
1243 1.1 mrg movi a6, 0x7f800000
1244 1.1 mrg ball a2, a6, 2f
1245 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1246 1.1 mrg
1247 1.1 mrg /* Check if y is a NaN. */
1248 1.1 mrg slli a7, a3, 9
1249 1.1 mrg beqz a7, .Llt_cmp
1250 1.1 mrg movi a2, -1
1251 1.1 mrg leaf_return
1252 1.1 mrg
1253 1.1 mrg /* Check if x is a NaN. */
1254 1.1 mrg 2: slli a7, a2, 9
1255 1.1 mrg beqz a7, 1b
1256 1.1 mrg movi a2, -1
1257 1.1 mrg leaf_return
1258 1.1 mrg
1259 1.1 mrg
1260 1.1 mrg /* Less Than */
1261 1.1 mrg
1262 1.1 mrg .align 4
1263 1.1 mrg .global __ltsf2
1264 1.1 mrg .type __ltsf2, @function
1265 1.1 mrg __ltsf2:
1266 1.1 mrg leaf_entry sp, 16
1267 1.1 mrg movi a6, 0x7f800000
1268 1.1 mrg ball a2, a6, 2f
1269 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1270 1.1 mrg
1271 1.1 mrg /* Check if y is a NaN. */
1272 1.1 mrg slli a7, a3, 9
1273 1.1 mrg beqz a7, .Llt_cmp
1274 1.1 mrg movi a2, 0
1275 1.1 mrg leaf_return
1276 1.1 mrg
1277 1.1 mrg /* Check if x is a NaN. */
1278 1.1 mrg 2: slli a7, a2, 9
1279 1.1 mrg beqz a7, 1b
1280 1.1 mrg movi a2, 0
1281 1.1 mrg leaf_return
1282 1.1 mrg
1283 1.1 mrg .Llt_cmp:
1284 1.1 mrg /* Check if x and y have different signs. */
1285 1.1 mrg xor a7, a2, a3
1286 1.1 mrg bltz a7, .Llt_diff_signs
1287 1.1 mrg
1288 1.1 mrg /* Check if x is negative. */
1289 1.1 mrg bltz a2, .Llt_xneg
1290 1.1 mrg
1291 1.1 mrg /* Check if x < y. */
1292 1.1 mrg bgeu a2, a3, 5f
1293 1.1 mrg 4: movi a2, -1
1294 1.1 mrg leaf_return
1295 1.1 mrg
1296 1.1 mrg .Llt_xneg:
1297 1.1 mrg /* Check if y < x. */
1298 1.1 mrg bltu a3, a2, 4b
1299 1.1 mrg 5: movi a2, 0
1300 1.1 mrg leaf_return
1301 1.1 mrg
1302 1.1 mrg .Llt_diff_signs:
1303 1.1 mrg bgez a2, 5b
1304 1.1 mrg
1305 1.1 mrg /* Check if both x and y are nonzero. */
1306 1.1 mrg or a7, a2, a3
1307 1.1 mrg slli a7, a7, 1
1308 1.1 mrg movi a2, 0
1309 1.1 mrg movi a3, -1
1310 1.1 mrg movnez a2, a3, a7
1311 1.1 mrg leaf_return
1312 1.1 mrg
1313 1.1 mrg
1314 1.1 mrg /* Unordered */
1315 1.1 mrg
1316 1.1 mrg .align 4
1317 1.1 mrg .global __unordsf2
1318 1.1 mrg .type __unordsf2, @function
1319 1.1 mrg __unordsf2:
1320 1.1 mrg leaf_entry sp, 16
1321 1.1 mrg movi a6, 0x7f800000
1322 1.1 mrg ball a2, a6, 3f
1323 1.1 mrg 1: ball a3, a6, 4f
1324 1.1 mrg 2: movi a2, 0
1325 1.1 mrg leaf_return
1326 1.1 mrg
1327 1.1 mrg 3: slli a7, a2, 9
1328 1.1 mrg beqz a7, 1b
1329 1.1 mrg movi a2, 1
1330 1.1 mrg leaf_return
1331 1.1 mrg
1332 1.1 mrg 4: slli a7, a3, 9
1333 1.1 mrg beqz a7, 2b
1334 1.1 mrg movi a2, 1
1335 1.1 mrg leaf_return
1336 1.1 mrg
1337 1.1 mrg #endif /* L_cmpsf2 */
1338 1.1 mrg
1339 1.1 mrg #ifdef L_fixsfsi
1340 1.1 mrg
1341 1.1 mrg .align 4
1342 1.1 mrg .global __fixsfsi
1343 1.1 mrg .type __fixsfsi, @function
1344 1.1 mrg __fixsfsi:
1345 1.1 mrg leaf_entry sp, 16
1346 1.1 mrg
1347 1.1 mrg /* Check for NaN and Infinity. */
1348 1.1 mrg movi a6, 0x7f800000
1349 1.1 mrg ball a2, a6, .Lfixsfsi_nan_or_inf
1350 1.1 mrg
1351 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1352 1.1 mrg extui a4, a2, 23, 8
1353 1.1 mrg addi a4, a4, -0x7e
1354 1.1 mrg bgei a4, 32, .Lfixsfsi_maxint
1355 1.1 mrg blti a4, 1, .Lfixsfsi_zero
1356 1.1 mrg
1357 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1358 1.1 mrg or a7, a2, a6
1359 1.1 mrg slli a5, a7, 8
1360 1.1 mrg
1361 1.1 mrg /* Shift back to the right, based on the exponent. */
1362 1.1 mrg ssl a4 /* shift by 32 - a4 */
1363 1.1 mrg srl a5, a5
1364 1.1 mrg
1365 1.1 mrg /* Negate the result if sign != 0. */
1366 1.1 mrg neg a2, a5
1367 1.1 mrg movgez a2, a5, a7
1368 1.1 mrg leaf_return
1369 1.1 mrg
1370 1.1 mrg .Lfixsfsi_nan_or_inf:
1371 1.1 mrg /* Handle Infinity and NaN. */
1372 1.1 mrg slli a4, a2, 9
1373 1.1 mrg beqz a4, .Lfixsfsi_maxint
1374 1.1 mrg
1375 1.1 mrg /* Translate NaN to +maxint. */
1376 1.1 mrg movi a2, 0
1377 1.1 mrg
1378 1.1 mrg .Lfixsfsi_maxint:
1379 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1380 1.1 mrg addi a5, a4, -1 /* 0x7fffffff */
1381 1.1 mrg movgez a4, a5, a2
1382 1.1 mrg mov a2, a4
1383 1.1 mrg leaf_return
1384 1.1 mrg
1385 1.1 mrg .Lfixsfsi_zero:
1386 1.1 mrg movi a2, 0
1387 1.1 mrg leaf_return
1388 1.1 mrg
1389 1.1 mrg #endif /* L_fixsfsi */
1390 1.1 mrg
1391 1.1 mrg #ifdef L_fixsfdi
1392 1.1 mrg
1393 1.1 mrg .align 4
1394 1.1 mrg .global __fixsfdi
1395 1.1 mrg .type __fixsfdi, @function
1396 1.1 mrg __fixsfdi:
1397 1.1 mrg leaf_entry sp, 16
1398 1.1 mrg
1399 1.1 mrg /* Check for NaN and Infinity. */
1400 1.1 mrg movi a6, 0x7f800000
1401 1.1 mrg ball a2, a6, .Lfixsfdi_nan_or_inf
1402 1.1 mrg
1403 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1404 1.1 mrg extui a4, a2, 23, 8
1405 1.1 mrg addi a4, a4, -0x7e
1406 1.1 mrg bgei a4, 64, .Lfixsfdi_maxint
1407 1.1 mrg blti a4, 1, .Lfixsfdi_zero
1408 1.1 mrg
1409 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1410 1.1 mrg or a7, a2, a6
1411 1.1 mrg slli xh, a7, 8
1412 1.1 mrg
1413 1.1 mrg /* Shift back to the right, based on the exponent. */
1414 1.1 mrg ssl a4 /* shift by 64 - a4 */
1415 1.1 mrg bgei a4, 32, .Lfixsfdi_smallshift
1416 1.1 mrg srl xl, xh
1417 1.1 mrg movi xh, 0
1418 1.1 mrg
1419 1.1 mrg .Lfixsfdi_shifted:
1420 1.1 mrg /* Negate the result if sign != 0. */
1421 1.1 mrg bgez a7, 1f
1422 1.1 mrg neg xl, xl
1423 1.1 mrg neg xh, xh
1424 1.1 mrg beqz xl, 1f
1425 1.1 mrg addi xh, xh, -1
1426 1.1 mrg 1: leaf_return
1427 1.1 mrg
1428 1.1 mrg .Lfixsfdi_smallshift:
1429 1.1 mrg movi xl, 0
1430 1.1 mrg sll xl, xh
1431 1.1 mrg srl xh, xh
1432 1.1 mrg j .Lfixsfdi_shifted
1433 1.1 mrg
1434 1.1 mrg .Lfixsfdi_nan_or_inf:
1435 1.1 mrg /* Handle Infinity and NaN. */
1436 1.1 mrg slli a4, a2, 9
1437 1.1 mrg beqz a4, .Lfixsfdi_maxint
1438 1.1 mrg
1439 1.1 mrg /* Translate NaN to +maxint. */
1440 1.1 mrg movi a2, 0
1441 1.1 mrg
1442 1.1 mrg .Lfixsfdi_maxint:
1443 1.1 mrg slli a7, a6, 8 /* 0x80000000 */
1444 1.1 mrg bgez a2, 1f
1445 1.1 mrg mov xh, a7
1446 1.1 mrg movi xl, 0
1447 1.1 mrg leaf_return
1448 1.1 mrg
1449 1.1 mrg 1: addi xh, a7, -1 /* 0x7fffffff */
1450 1.1 mrg movi xl, -1
1451 1.1 mrg leaf_return
1452 1.1 mrg
1453 1.1 mrg .Lfixsfdi_zero:
1454 1.1 mrg movi xh, 0
1455 1.1 mrg movi xl, 0
1456 1.1 mrg leaf_return
1457 1.1 mrg
1458 1.1 mrg #endif /* L_fixsfdi */
1459 1.1 mrg
1460 1.1 mrg #ifdef L_fixunssfsi
1461 1.1 mrg
1462 1.1 mrg .align 4
1463 1.1 mrg .global __fixunssfsi
1464 1.1 mrg .type __fixunssfsi, @function
1465 1.1 mrg __fixunssfsi:
1466 1.1 mrg leaf_entry sp, 16
1467 1.1 mrg
1468 1.1 mrg /* Check for NaN and Infinity. */
1469 1.1 mrg movi a6, 0x7f800000
1470 1.1 mrg ball a2, a6, .Lfixunssfsi_nan_or_inf
1471 1.1 mrg
1472 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1473 1.1 mrg extui a4, a2, 23, 8
1474 1.1 mrg addi a4, a4, -0x7f
1475 1.1 mrg bgei a4, 32, .Lfixunssfsi_maxint
1476 1.1 mrg bltz a4, .Lfixunssfsi_zero
1477 1.1 mrg
1478 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1479 1.1 mrg or a7, a2, a6
1480 1.1 mrg slli a5, a7, 8
1481 1.1 mrg
1482 1.1 mrg /* Shift back to the right, based on the exponent. */
1483 1.1 mrg addi a4, a4, 1
1484 1.1 mrg beqi a4, 32, .Lfixunssfsi_bigexp
1485 1.1 mrg ssl a4 /* shift by 32 - a4 */
1486 1.1 mrg srl a5, a5
1487 1.1 mrg
1488 1.1 mrg /* Negate the result if sign != 0. */
1489 1.1 mrg neg a2, a5
1490 1.1 mrg movgez a2, a5, a7
1491 1.1 mrg leaf_return
1492 1.1 mrg
1493 1.1 mrg .Lfixunssfsi_nan_or_inf:
1494 1.1 mrg /* Handle Infinity and NaN. */
1495 1.1 mrg slli a4, a2, 9
1496 1.1 mrg beqz a4, .Lfixunssfsi_maxint
1497 1.1 mrg
1498 1.1 mrg /* Translate NaN to 0xffffffff. */
1499 1.1 mrg movi a2, -1
1500 1.1 mrg leaf_return
1501 1.1 mrg
1502 1.1 mrg .Lfixunssfsi_maxint:
1503 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1504 1.1 mrg movi a5, -1 /* 0xffffffff */
1505 1.1 mrg movgez a4, a5, a2
1506 1.1 mrg mov a2, a4
1507 1.1 mrg leaf_return
1508 1.1 mrg
1509 1.1 mrg .Lfixunssfsi_zero:
1510 1.1 mrg movi a2, 0
1511 1.1 mrg leaf_return
1512 1.1 mrg
1513 1.1 mrg .Lfixunssfsi_bigexp:
1514 1.1 mrg /* Handle unsigned maximum exponent case. */
1515 1.1 mrg bltz a2, 1f
1516 1.1 mrg mov a2, a5 /* no shift needed */
1517 1.1 mrg leaf_return
1518 1.1 mrg
1519 1.1 mrg /* Return 0x80000000 if negative. */
1520 1.1 mrg 1: slli a2, a6, 8
1521 1.1 mrg leaf_return
1522 1.1 mrg
1523 1.1 mrg #endif /* L_fixunssfsi */
1524 1.1 mrg
1525 1.1 mrg #ifdef L_fixunssfdi
1526 1.1 mrg
1527 1.1 mrg .align 4
1528 1.1 mrg .global __fixunssfdi
1529 1.1 mrg .type __fixunssfdi, @function
1530 1.1 mrg __fixunssfdi:
1531 1.1 mrg leaf_entry sp, 16
1532 1.1 mrg
1533 1.1 mrg /* Check for NaN and Infinity. */
1534 1.1 mrg movi a6, 0x7f800000
1535 1.1 mrg ball a2, a6, .Lfixunssfdi_nan_or_inf
1536 1.1 mrg
1537 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1538 1.1 mrg extui a4, a2, 23, 8
1539 1.1 mrg addi a4, a4, -0x7f
1540 1.1 mrg bgei a4, 64, .Lfixunssfdi_maxint
1541 1.1 mrg bltz a4, .Lfixunssfdi_zero
1542 1.1 mrg
1543 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1544 1.1 mrg or a7, a2, a6
1545 1.1 mrg slli xh, a7, 8
1546 1.1 mrg
1547 1.1 mrg /* Shift back to the right, based on the exponent. */
1548 1.1 mrg addi a4, a4, 1
1549 1.1 mrg beqi a4, 64, .Lfixunssfdi_bigexp
1550 1.1 mrg ssl a4 /* shift by 64 - a4 */
1551 1.1 mrg bgei a4, 32, .Lfixunssfdi_smallshift
1552 1.1 mrg srl xl, xh
1553 1.1 mrg movi xh, 0
1554 1.1 mrg
1555 1.1 mrg .Lfixunssfdi_shifted:
1556 1.1 mrg /* Negate the result if sign != 0. */
1557 1.1 mrg bgez a7, 1f
1558 1.1 mrg neg xl, xl
1559 1.1 mrg neg xh, xh
1560 1.1 mrg beqz xl, 1f
1561 1.1 mrg addi xh, xh, -1
1562 1.1 mrg 1: leaf_return
1563 1.1 mrg
1564 1.1 mrg .Lfixunssfdi_smallshift:
1565 1.1 mrg movi xl, 0
1566 1.1 mrg src xl, xh, xl
1567 1.1 mrg srl xh, xh
1568 1.1 mrg j .Lfixunssfdi_shifted
1569 1.1 mrg
1570 1.1 mrg .Lfixunssfdi_nan_or_inf:
1571 1.1 mrg /* Handle Infinity and NaN. */
1572 1.1 mrg slli a4, a2, 9
1573 1.1 mrg beqz a4, .Lfixunssfdi_maxint
1574 1.1 mrg
1575 1.1 mrg /* Translate NaN to 0xffffffff.... */
1576 1.1 mrg 1: movi xh, -1
1577 1.1 mrg movi xl, -1
1578 1.1 mrg leaf_return
1579 1.1 mrg
1580 1.1 mrg .Lfixunssfdi_maxint:
1581 1.1 mrg bgez a2, 1b
1582 1.1 mrg 2: slli xh, a6, 8 /* 0x80000000 */
1583 1.1 mrg movi xl, 0
1584 1.1 mrg leaf_return
1585 1.1 mrg
1586 1.1 mrg .Lfixunssfdi_zero:
1587 1.1 mrg movi xh, 0
1588 1.1 mrg movi xl, 0
1589 1.1 mrg leaf_return
1590 1.1 mrg
1591 1.1 mrg .Lfixunssfdi_bigexp:
1592 1.1 mrg /* Handle unsigned maximum exponent case. */
1593 1.1 mrg bltz a7, 2b
1594 1.1 mrg movi xl, 0
1595 1.1 mrg leaf_return /* no shift needed */
1596 1.1 mrg
1597 1.1 mrg #endif /* L_fixunssfdi */
1598 1.1 mrg
1599 1.1 mrg #ifdef L_floatsisf
1600 1.1 mrg
1601 1.1 mrg .align 4
1602 1.1 mrg .global __floatunsisf
1603 1.1 mrg .type __floatunsisf, @function
1604 1.1 mrg __floatunsisf:
1605 1.1 mrg leaf_entry sp, 16
1606 1.1 mrg beqz a2, .Lfloatsisf_return
1607 1.1 mrg
1608 1.1 mrg /* Set the sign to zero and jump to the floatsisf code. */
1609 1.1 mrg movi a7, 0
1610 1.1 mrg j .Lfloatsisf_normalize
1611 1.1 mrg
1612 1.1 mrg .align 4
1613 1.1 mrg .global __floatsisf
1614 1.1 mrg .type __floatsisf, @function
1615 1.1 mrg __floatsisf:
1616 1.1 mrg leaf_entry sp, 16
1617 1.1 mrg
1618 1.1 mrg /* Check for zero. */
1619 1.1 mrg beqz a2, .Lfloatsisf_return
1620 1.1 mrg
1621 1.1 mrg /* Save the sign. */
1622 1.1 mrg extui a7, a2, 31, 1
1623 1.1 mrg
1624 1.1 mrg /* Get the absolute value. */
1625 1.1 mrg #if XCHAL_HAVE_ABS
1626 1.1 mrg abs a2, a2
1627 1.1 mrg #else
1628 1.1 mrg neg a4, a2
1629 1.1 mrg movltz a2, a4, a2
1630 1.1 mrg #endif
1631 1.1 mrg
1632 1.1 mrg .Lfloatsisf_normalize:
1633 1.1 mrg /* Normalize with the first 1 bit in the msb. */
1634 1.1 mrg do_nsau a4, a2, a5, a6
1635 1.1 mrg ssl a4
1636 1.1 mrg sll a5, a2
1637 1.1 mrg
1638 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1639 1.1 mrg srli a2, a5, 8
1640 1.1 mrg slli a6, a5, (32 - 8)
1641 1.1 mrg
1642 1.1 mrg /* Set the exponent. */
1643 1.1 mrg movi a5, 0x9d /* 0x7e + 31 */
1644 1.1 mrg sub a5, a5, a4
1645 1.1 mrg slli a5, a5, 23
1646 1.1 mrg add a2, a2, a5
1647 1.1 mrg
1648 1.1 mrg /* Add the sign. */
1649 1.1 mrg slli a7, a7, 31
1650 1.1 mrg or a2, a2, a7
1651 1.1 mrg
1652 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1653 1.1 mrg bgez a6, .Lfloatsisf_return
1654 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1655 1.1 mrg
1656 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1657 1.1 mrg slli a6, a6, 1
1658 1.1 mrg beqz a6, .Lfloatsisf_exactlyhalf
1659 1.1 mrg
1660 1.1 mrg .Lfloatsisf_return:
1661 1.1 mrg leaf_return
1662 1.1 mrg
1663 1.1 mrg .Lfloatsisf_exactlyhalf:
1664 1.1 mrg /* Round down to the nearest even value. */
1665 1.1 mrg srli a2, a2, 1
1666 1.1 mrg slli a2, a2, 1
1667 1.1 mrg leaf_return
1668 1.1 mrg
1669 1.1 mrg #endif /* L_floatsisf */
1670 1.1 mrg
1671 1.1 mrg #ifdef L_floatdisf
1672 1.1 mrg
1673 1.1 mrg .align 4
1674 1.1 mrg .global __floatundisf
1675 1.1 mrg .type __floatundisf, @function
1676 1.1 mrg __floatundisf:
1677 1.1 mrg leaf_entry sp, 16
1678 1.1 mrg
1679 1.1 mrg /* Check for zero. */
1680 1.1 mrg or a4, xh, xl
1681 1.1 mrg beqz a4, 2f
1682 1.1 mrg
1683 1.1 mrg /* Set the sign to zero and jump to the floatdisf code. */
1684 1.1 mrg movi a7, 0
1685 1.1 mrg j .Lfloatdisf_normalize
1686 1.1 mrg
1687 1.1 mrg .align 4
1688 1.1 mrg .global __floatdisf
1689 1.1 mrg .type __floatdisf, @function
1690 1.1 mrg __floatdisf:
1691 1.1 mrg leaf_entry sp, 16
1692 1.1 mrg
1693 1.1 mrg /* Check for zero. */
1694 1.1 mrg or a4, xh, xl
1695 1.1 mrg beqz a4, 2f
1696 1.1 mrg
1697 1.1 mrg /* Save the sign. */
1698 1.1 mrg extui a7, xh, 31, 1
1699 1.1 mrg
1700 1.1 mrg /* Get the absolute value. */
1701 1.1 mrg bgez xh, .Lfloatdisf_normalize
1702 1.1 mrg neg xl, xl
1703 1.1 mrg neg xh, xh
1704 1.1 mrg beqz xl, .Lfloatdisf_normalize
1705 1.1 mrg addi xh, xh, -1
1706 1.1 mrg
1707 1.1 mrg .Lfloatdisf_normalize:
1708 1.1 mrg /* Normalize with the first 1 bit in the msb of xh. */
1709 1.1 mrg beqz xh, .Lfloatdisf_bigshift
1710 1.1 mrg do_nsau a4, xh, a5, a6
1711 1.1 mrg ssl a4
1712 1.1 mrg src xh, xh, xl
1713 1.1 mrg sll xl, xl
1714 1.1 mrg
1715 1.1 mrg .Lfloatdisf_shifted:
1716 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1717 1.1 mrg ssai 8
1718 1.1 mrg sll a5, xl
1719 1.1 mrg src a6, xh, xl
1720 1.1 mrg srl xh, xh
1721 1.1 mrg beqz a5, 1f
1722 1.1 mrg movi a5, 1
1723 1.1 mrg or a6, a6, a5
1724 1.1 mrg 1:
1725 1.1 mrg /* Set the exponent. */
1726 1.1 mrg movi a5, 0xbd /* 0x7e + 63 */
1727 1.1 mrg sub a5, a5, a4
1728 1.1 mrg slli a5, a5, 23
1729 1.1 mrg add a2, xh, a5
1730 1.1 mrg
1731 1.1 mrg /* Add the sign. */
1732 1.1 mrg slli a7, a7, 31
1733 1.1 mrg or a2, a2, a7
1734 1.1 mrg
1735 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1736 1.1 mrg bgez a6, 2f
1737 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1738 1.1 mrg
1739 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1740 1.1 mrg slli a6, a6, 1
1741 1.1 mrg beqz a6, .Lfloatdisf_exactlyhalf
1742 1.1 mrg 2: leaf_return
1743 1.1 mrg
1744 1.1 mrg .Lfloatdisf_bigshift:
1745 1.1 mrg /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1746 1.1 mrg do_nsau a4, xl, a5, a6
1747 1.1 mrg ssl a4
1748 1.1 mrg sll xh, xl
1749 1.1 mrg movi xl, 0
1750 1.1 mrg addi a4, a4, 32
1751 1.1 mrg j .Lfloatdisf_shifted
1752 1.1 mrg
1753 1.1 mrg .Lfloatdisf_exactlyhalf:
1754 1.1 mrg /* Round down to the nearest even value. */
1755 1.1 mrg srli a2, a2, 1
1756 1.1 mrg slli a2, a2, 1
1757 1.1 mrg leaf_return
1758 1.1 mrg
1759 1.1 mrg #endif /* L_floatdisf */
1760