1 1.1.1.3 mrg ;; Copyright (C) 2019-2022 Free Software Foundation, Inc. 2 1.1 mrg ;; 3 1.1 mrg ;; This file is part of LIBF7, which is part of GCC. 4 1.1 mrg ;; 5 1.1 mrg ;; GCC is free software; you can redistribute it and/or modify it under 6 1.1 mrg ;; the terms of the GNU General Public License as published by the Free 7 1.1 mrg ;; Software Foundation; either version 3, or (at your option) any later 8 1.1 mrg ;; version. 9 1.1 mrg ;; 10 1.1 mrg ;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 1.1 mrg ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 1.1 mrg ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 1.1 mrg ;; for more details. 14 1.1 mrg ;; 15 1.1 mrg ;; Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg ;; permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg ;; 3.1, as published by the Free Software Foundation. 18 1.1 mrg ;; 19 1.1 mrg ;; You should have received a copy of the GNU General Public License and 20 1.1 mrg ;; a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg ;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg ;; <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef __AVR_TINY__ 25 1.1 mrg 26 1.1 mrg #define ASM_DEFS_HAVE_DEFUN 27 1.1 mrg 28 1.1 mrg #include "asm-defs.h" 29 1.1 mrg #include "libf7.h" 30 1.1 mrg 31 1.1 mrg #define ZERO __zero_reg__ 32 1.1 mrg #define TMP __tmp_reg__ 33 1.1 mrg 34 1.1 mrg #define F7(name) F7_(name##_asm) 35 1.1 mrg 36 1.1 mrg .macro F7call name 37 1.1 mrg .global F7(\name\()) 38 1.1 mrg XCALL F7(\name\()) 39 1.1 mrg .endm 40 1.1 mrg 41 1.1 mrg .macro F7jmp name 42 1.1 mrg .global F7(\name\()) 43 1.1 mrg XJMP F7(\name\()) 44 1.1 mrg .endm 45 1.1 mrg 46 1.1 mrg ;; Just for visibility in disassembly. 47 1.1 mrg .macro LLL name 48 1.1 mrg .global LLL.\name 49 1.1 mrg LLL.\name: 50 1.1 mrg nop 51 1.1 mrg .endm 52 1.1 mrg 53 1.1 mrg .macro DEFUN name 54 1.1 mrg .section .text.libf7.asm.\name, "ax", @progbits 55 1.1 mrg .global F7(\name\()) 56 1.1 mrg .func F7(\name\()) 57 1.1 mrg F7(\name\()) : 58 1.1 mrg .endm 59 1.1 mrg 60 1.1 mrg .macro ENDF name 61 1.1 mrg .size F7(\name\()), . - F7(\name\()) 62 1.1 mrg .endfunc 63 1.1 mrg .endm 64 1.1 mrg 65 1.1 mrg .macro LABEL name 66 1.1 mrg .global F7(\name\()) 67 1.1 mrg F7(\name\()) : 68 1.1 mrg .endm 69 1.1 mrg 70 1.1 mrg .macro _DEFUN name 71 1.1 mrg .section .text.libf7.asm.\name, "ax", @progbits 72 1.1 mrg .weak \name 73 1.1 mrg .type \name, @function 74 1.1 mrg \name : 75 1.1 mrg .endm 76 1.1 mrg 77 1.1 mrg .macro _ENDF name 78 1.1 mrg .size \name, . - \name 79 1.1 mrg .endm 80 1.1 mrg 81 1.1 mrg .macro _LABEL name 82 1.1 mrg .weak \name 83 1.1 mrg .type \name, @function 84 1.1 mrg \name : 85 1.1 mrg .endm 86 1.1 mrg 87 1.1 mrg #define F7_NAME(X) F7_(X) 88 1.1 mrg 89 1.1 mrg ;; Make a weak alias. 90 1.1 mrg .macro ALIAS sym 91 1.1 mrg .weak \sym 92 1.1 mrg .type \sym, @function 93 1.1 mrg \sym: 94 1.1 mrg .endm 95 1.1 mrg 96 1.1 mrg ;; Make a weak alias if double is 64 bits wide. 97 1.1 mrg .macro DALIAS sym 98 1.1 mrg #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8 99 1.1 mrg ALIAS \sym 100 1.1 mrg #endif 101 1.1 mrg .endm 102 1.1 mrg 103 1.1 mrg ;; Make a weak alias if long double is 64 bits wide. 104 1.1 mrg .macro LALIAS sym 105 1.1 mrg #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8 106 1.1 mrg ALIAS \sym 107 1.1 mrg #endif 108 1.1 mrg .endm 109 1.1 mrg 110 1.1 mrg #define Off 1 111 1.1 mrg #define Expo (Off + F7_MANT_BYTES) 112 1.1 mrg 113 1.1 mrg #ifdef F7MOD_classify_ 114 1.1 mrg ;; r24 = classify (*Z) 115 1.1 mrg ;; NaN -> F7_FLAG_nan 116 1.1 mrg ;; INF -> F7_FLAG_inf [ | F7_FLAG_sign ] 117 1.1 mrg ;; ==0 -> F7_FLAG_zero 118 1.1 mrg ;; ... -> 0 [ | F7_FLAG_sign ] 119 1.1 mrg 120 1.1 mrg ;; Clobbers: None (no TMP, no T). 121 1.1 mrg DEFUN classify 122 1.1 mrg 123 1.1 mrg ld r24, Z 124 1.1 mrg lsr r24 125 1.1 mrg brne .Lnan_or_inf 126 1.1 mrg 127 1.1 mrg ldd r24, Z+6+Off 128 1.1 mrg tst r24 129 1.1 mrg brpl 0f 130 1.1 mrg sbc r24, r24 131 1.1 mrg andi r24, F7_FLAG_sign 132 1.1 mrg ret 133 1.1 mrg 134 1.1 mrg 0: ldi r24, F7_FLAG_zero 135 1.1 mrg ret 136 1.1 mrg 137 1.1 mrg .Lnan_or_inf: 138 1.1 mrg rol r24 139 1.1 mrg ret 140 1.1 mrg 141 1.1 mrg ENDF classify 142 1.1 mrg #endif /* F7MOD_classify_ */ 143 1.1 mrg 144 1.1 mrg #ifdef F7MOD_clr_ 145 1.1 mrg DEFUN clr 146 1.1 mrg std Z+0, ZERO 147 1.1 mrg std Z+0+Off, ZERO 148 1.1 mrg std Z+1+Off, ZERO 149 1.1 mrg std Z+2+Off, ZERO 150 1.1 mrg std Z+3+Off, ZERO 151 1.1 mrg std Z+4+Off, ZERO 152 1.1 mrg std Z+5+Off, ZERO 153 1.1 mrg std Z+6+Off, ZERO 154 1.1 mrg std Z+0+Expo, ZERO 155 1.1 mrg std Z+1+Expo, ZERO 156 1.1 mrg ret 157 1.1 mrg ENDF clr 158 1.1 mrg 159 1.1 mrg #endif /* F7MOD_clr_ */ 160 1.1 mrg 161 1.1 mrg #ifdef F7MOD_clz_ 162 1.1 mrg ;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are 163 1.1 mrg ;; not very well suited for out purpose, so implement our own. 164 1.1 mrg 165 1.1 mrg #define ZBITS r26 166 1.1 mrg .macro .test.byte reg 167 1.1 mrg or ZERO, \reg 168 1.1 mrg brne .Loop_bit 169 1.1 mrg subi ZBITS, -8 170 1.1 mrg .endm 171 1.1 mrg 172 1.1 mrg ;; R26 = CLZ (uint64_t R18); CLZ (0) = 64. 173 1.1 mrg ;; Unchanged: T 174 1.1 mrg DEFUN clzdi2 175 1.1 mrg clr ZBITS 176 1.1 mrg ;; Catch the common case of normalized .mant for speed-up. 177 1.1 mrg tst r25 178 1.1 mrg brmi 9f 179 1.1 mrg .test.byte r25 180 1.1 mrg .test.byte r24 181 1.1 mrg .test.byte r23 182 1.1 mrg .test.byte r22 183 1.1 mrg .test.byte r21 184 1.1 mrg .test.byte r20 185 1.1 mrg .test.byte r19 186 1.1 mrg .test.byte r18 187 1.1 mrg .Ldone: 188 1.1 mrg clr ZERO 189 1.1 mrg 9: ret 190 1.1 mrg 191 1.1 mrg .Loop_bit: 192 1.1 mrg lsl ZERO 193 1.1 mrg brcs .Ldone 194 1.1 mrg inc ZBITS 195 1.1 mrg rjmp .Loop_bit 196 1.1 mrg 197 1.1 mrg ENDF clzdi2 198 1.1 mrg #undef ZBITS 199 1.1 mrg #endif /* F7MOD_clz_ */ 200 1.1 mrg 201 1.1 mrg #ifdef F7MOD_cmp_mant_ 202 1.1 mrg DEFUN cmp_mant 203 1.1 mrg 204 1.1 mrg adiw X, 6 + Off 205 1.1 mrg ld r24, X $ ldd TMP, Z+6+Off $ SUB r24, TMP 206 1.1 mrg brne .Lunequal 207 1.1 mrg 208 1.1 mrg sbiw X, 6 209 1.1 mrg ld r24, X+ $ ldd TMP, Z+0+Off $ SUB r24, TMP 210 1.1 mrg ld r24, X+ $ ldd TMP, Z+1+Off $ sbc r24, TMP 211 1.1 mrg ld r24, X+ $ ldd TMP, Z+2+Off $ sbc r24, TMP 212 1.1 mrg ld r24, X+ $ ldd TMP, Z+3+Off $ sbc r24, TMP 213 1.1 mrg ld r24, X+ $ ldd TMP, Z+4+Off $ sbc r24, TMP 214 1.1 mrg ld r24, X+ $ ldd TMP, Z+5+Off $ sbc r24, TMP 215 1.1 mrg ;; MSBs are already known to be equal 216 1.1 mrg breq 9f 217 1.1 mrg .Lunequal: 218 1.1 mrg sbc r24, r24 219 1.1 mrg sbci r24, -1 220 1.1 mrg 9: sbiw X, 6 + Off 221 1.1 mrg ret 222 1.1 mrg ENDF cmp_mant 223 1.1 mrg #endif /* F7MOD_cmp_mant_ */ 224 1.1 mrg 225 1.1 mrg #define CA 18 226 1.1 mrg #define C0 CA+1 227 1.1 mrg #define C1 C0+1 228 1.1 mrg #define C2 C0+2 229 1.1 mrg #define C3 C0+3 230 1.1 mrg #define C4 C0+4 231 1.1 mrg #define C5 C0+5 232 1.1 mrg #define C6 C0+6 233 1.1 mrg #define Carry r16 234 1.1 mrg #define Flags 18 235 1.1 mrg 236 1.1 mrg #ifdef F7MOD_store_ 237 1.1 mrg ;; Z->flags = CA. 238 1.1 mrg ;; Z->mant = C[7]. 239 1.1 mrg DEFUN store_mant.with_flags 240 1.1 mrg st Z, CA 241 1.1 mrg 242 1.1 mrg ;; Z->mant = C[7]. 243 1.1 mrg LABEL store_mant 244 1.1 mrg std Z+0+Off, C0 245 1.1 mrg std Z+1+Off, C1 246 1.1 mrg std Z+2+Off, C2 247 1.1 mrg std Z+3+Off, C3 248 1.1 mrg std Z+4+Off, C4 249 1.1 mrg std Z+5+Off, C5 250 1.1 mrg std Z+6+Off, C6 251 1.1 mrg ret 252 1.1 mrg ENDF store_mant.with_flags 253 1.1 mrg #endif /* F7MOD_store_ */ 254 1.1 mrg 255 1.1 mrg #ifdef F7MOD_load_ 256 1.1 mrg ;; CA = Z->flags 257 1.1 mrg ;; C[7] = Z->mant 258 1.1 mrg DEFUN load_mant.with_flags 259 1.1 mrg ld CA, Z 260 1.1 mrg skipnext 261 1.1 mrg 262 1.1 mrg ;; CA = 0 263 1.1 mrg ;; C[7] = Z->mant 264 1.1 mrg LABEL load_mant.clr_CA 265 1.1 mrg LABEL load_mant.clr_flags 266 1.1 mrg clr CA ; May be skipped 267 1.1 mrg 268 1.1 mrg ;; C[7] = Z->mant 269 1.1 mrg LABEL load_mant 270 1.1 mrg ldd C0, Z+0+Off 271 1.1 mrg ldd C1, Z+1+Off 272 1.1 mrg ldd C2, Z+2+Off 273 1.1 mrg ldd C3, Z+3+Off 274 1.1 mrg ldd C4, Z+4+Off 275 1.1 mrg ldd C5, Z+5+Off 276 1.1 mrg ldd C6, Z+6+Off 277 1.1 mrg ret 278 1.1 mrg ENDF load_mant.with_flags 279 1.1 mrg #endif /* F7MOD_load_ */ 280 1.1 mrg 281 1.1 mrg #ifdef F7MOD_copy_ 282 1.1 mrg DEFUN copy 283 1.1 mrg cp XL, ZL 284 1.1 mrg cpc XH, ZH 285 1.1 mrg breq 9f 286 1.1 mrg adiw XL, 10 287 1.1 mrg adiw ZL, 10 288 1.1 mrg set 289 1.1 mrg bld ZERO, 1 290 1.1 mrg bld ZERO, 3 ; ZERO = 0b1010 = 10. 291 1.1 mrg .Loop: 292 1.1 mrg ld TMP, -X 293 1.1 mrg st -Z, TMP 294 1.1 mrg dec ZERO 295 1.1 mrg brne .Loop 296 1.1 mrg 9: ret 297 1.1 mrg ENDF copy 298 1.1 mrg #endif /* F7MOD_copy_ */ 299 1.1 mrg 300 1.1 mrg #ifdef F7MOD_copy_P_ 301 1.1 mrg DEFUN copy_P 302 1.1 mrg set 303 1.1 mrg bld ZERO, 1 304 1.1 mrg bld ZERO, 3 ; ZERO = 0b1010 = 10. 305 1.1 mrg .Loop: 306 1.1 mrg #ifdef __AVR_HAVE_LPMX__ 307 1.1 mrg lpm TMP, Z+ 308 1.1 mrg #else 309 1.1 mrg lpm 310 1.1 mrg adiw Z, 1 311 1.1 mrg #endif /* Have LPMx */ 312 1.1 mrg st X+, TMP 313 1.1 mrg dec ZERO 314 1.1 mrg brne .Loop 315 1.1 mrg sbiw X, 10 316 1.1 mrg sbiw Z, 10 317 1.1 mrg ret 318 1.1 mrg ENDF copy_P 319 1.1 mrg #endif /* F7MOD_copy_P_ */ 320 1.1 mrg 321 1.1 mrg #ifdef F7MOD_copy_mant_ 322 1.1 mrg DEFUN copy_mant 323 1.1 mrg cp XL, ZL 324 1.1 mrg cpc XH, ZH 325 1.1 mrg breq 9f 326 1.1 mrg adiw XL, 1 327 1.1 mrg adiw ZL, 1 328 1.1 mrg set 329 1.1 mrg bld ZERO, 3 330 1.1 mrg dec ZERO ; ZERO = 7 331 1.1 mrg .Loop: 332 1.1 mrg ld TMP, X+ 333 1.1 mrg st Z+, TMP 334 1.1 mrg dec ZERO 335 1.1 mrg brne .Loop 336 1.1 mrg sbiw XL, 8 337 1.1 mrg sbiw ZL, 8 338 1.1 mrg 9: ret 339 1.1 mrg ENDF copy_mant 340 1.1 mrg #endif /* F7MOD_copy_mant_ */ 341 1.1 mrg 342 1.1 mrg 343 1.1 mrg #ifdef F7MOD_clr_mant_lsbs_ 344 1.1 mrg DEFUN clr_mant_lsbs 345 1.1 mrg push r16 346 1.1 mrg mov r16, r20 347 1.1 mrg wmov XL, r24 348 1.1 mrg 349 1.1 mrg wmov ZL, r22 350 1.1 mrg F7call load_mant 351 1.1 mrg 352 1.1 mrg F7call lshrdi3 353 1.1 mrg 354 1.1 mrg clr CA 355 1.1 mrg 356 1.1 mrg F7call ashldi3 357 1.1 mrg 358 1.1 mrg pop r16 359 1.1 mrg 360 1.1 mrg wmov ZL, XL 361 1.1 mrg F7jmp store_mant 362 1.1 mrg 363 1.1 mrg ENDF clr_mant_lsbs 364 1.1 mrg #endif /* F7MOD_clr_mant_lsbs_ */ 365 1.1 mrg 366 1.1 mrg 367 1.1 mrg #ifdef F7MOD_normalize_with_carry_ 368 1.1 mrg ;; Z = &f7_t 369 1.1 mrg ;; C[] = .mant may be not normalized 370 1.1 mrg ;; Carry === r16 = Addend to Z->expo in [-64, 128). 371 1.1 mrg ;; Normalize C[], set Flags, and adjust Z->expo. 372 1.1 mrg ;; Return CA (after normalization) in TMP. 373 1.1 mrg ;; Unchanged: T 374 1.1 mrg #define Addend r17 375 1.1 mrg #define Zbits r26 376 1.1 mrg #define expL r26 377 1.1 mrg #define expH r27 378 1.1 mrg DEFUN normalize_with_carry 379 1.1 mrg mov Addend, Carry 380 1.1 mrg tst C6 381 1.1 mrg brmi .Lshift.0 382 1.1 mrg ;; r26 = CLZ (uint64_t R18) 383 1.1 mrg F7call clzdi2 384 1.1 mrg cpi Zbits, 64 385 1.1 mrg breq .Lclr 386 1.1 mrg sub Addend, Zbits 387 1.1 mrg mov r16, Zbits 388 1.1 mrg 389 1.1 mrg F7call ashldi3 390 1.1 mrg ;; Assert (R25.7 == 1) 391 1.1 mrg .Lshift.0: 392 1.1 mrg mov TMP, CA 393 1.1 mrg ld Flags, Z 394 1.1 mrg 395 1.1 mrg ;; .expo += Addend 396 1.1 mrg ldd expL, Z+0+Expo 397 1.1 mrg ldd expH, Z+1+Expo 398 1.1 mrg ;; Sign-extend Addend 399 1.1 mrg clr r16 400 1.1 mrg sbrc Addend, 7 401 1.1 mrg com r16 402 1.1 mrg 403 1.1 mrg ;; exp += (int8_t) Addend, i.e. sign-extend Addend. 404 1.1 mrg add expL, Addend 405 1.1 mrg adc expH, r16 406 1.1 mrg brvc .Lnormal 407 1.1 mrg tst r16 408 1.1 mrg brmi .Lclr 409 1.1 mrg ;; Overflow 410 1.1 mrg #if F7_HAVE_Inf == 1 411 1.1 mrg ori Flags, F7_FLAG_inf 412 1.1 mrg #else 413 1.1 mrg ldi Flags, F7_FLAG_nan 414 1.1 mrg #endif /* Have Inf */ 415 1.1 mrg ret 416 1.1 mrg 417 1.1 mrg .Lnormal: 418 1.1 mrg std Z+0+Expo, expL 419 1.1 mrg std Z+1+Expo, expH 420 1.1 mrg ret 421 1.1 mrg 422 1.1 mrg .Lclr: 423 1.1 mrg ;; Underflow or Zero. 424 1.1 mrg clr TMP 425 1.1 mrg .global __clr_8 426 1.1 mrg XJMP __clr_8 427 1.1 mrg 428 1.1 mrg LABEL normalize.store_with_flags 429 1.1 mrg ;; no rounding 430 1.1 mrg set 431 1.1 mrg skipnext 432 1.1 mrg LABEL normalize.round.store_with_flags 433 1.1 mrg ;; with rounding 434 1.1 mrg clt ; skipped ? 435 1.1 mrg LABEL normalize.maybe_round.store_with_flags 436 1.1 mrg F7call normalize_with_carry 437 1.1 mrg ;; We have: 438 1.1 mrg ;; Z = &f7_t 439 1.1 mrg ;; X = .expo 440 1.1 mrg ;; C[] = .mant 441 1.1 mrg ;; R18 = .flags 442 1.1 mrg ;; TMP = byte below .mant after normalization 443 1.1 mrg ;; T = 1 => no rounding. 444 1.1 mrg brts .Lstore 445 1.1 mrg lsl TMP 446 1.1 mrg adc C0, ZERO 447 1.1 mrg brcc .Lstore 448 1.1 mrg adc C1, ZERO 449 1.1 mrg adc C2, ZERO 450 1.1 mrg adc C3, ZERO 451 1.1 mrg adc C4, ZERO 452 1.1 mrg adc C5, ZERO 453 1.1 mrg adc C6, ZERO 454 1.1 mrg brcc .Lstore 455 1.1 mrg ;; We only come here if C6 overflowed, i.e. C[] is 0 now. 456 1.1 mrg ;; .mant = 1.0 by restoring the MSbit. 457 1.1 mrg ror C6 458 1.1 mrg ;; .expo += 1 and override the .expo stored during normalize. 459 1.1 mrg adiw expL, 1 460 1.1 mrg std Z+0+Expo, expL 461 1.1 mrg std Z+1+Expo, expH 462 1.1 mrg 463 1.1 mrg .Lstore: 464 1.1 mrg F7call store_mant.with_flags 465 1.1 mrg 466 1.1 mrg ;; Return the byte below .mant after normalization. 467 1.1 mrg ;; This is only useful without rounding; the caller will know. 468 1.1 mrg mov R24, TMP 469 1.1 mrg ret 470 1.1 mrg ENDF normalize_with_carry 471 1.1 mrg #endif /* F7MOD_normalize_with_carry_ */ 472 1.1 mrg 473 1.1 mrg 474 1.1 mrg #ifdef F7MOD_normalize_ 475 1.1 mrg ;; Using above functionality from C. 476 1.1 mrg ;; f7_t* normalize (f7_t *cc) 477 1.1 mrg ;; Adjusts cc->expo 478 1.1 mrg ;; Clears cc->flags 479 1.1 mrg DEFUN normalize 480 1.1 mrg push r17 481 1.1 mrg push r16 482 1.1 mrg wmov ZL, r24 483 1.1 mrg F7call load_mant.clr_CA 484 1.1 mrg clr Carry 485 1.1 mrg st Z, ZERO 486 1.1 mrg F7call normalize.store_with_flags 487 1.1 mrg wmov r24, Z 488 1.1 mrg pop r16 489 1.1 mrg pop r17 490 1.1 mrg ret 491 1.1 mrg ENDF normalize 492 1.1 mrg #endif /* F7MOD_normalize_ */ 493 1.1 mrg 494 1.1 mrg 495 1.1 mrg #ifdef F7MOD_store_expo_ 496 1.1 mrg #define Done r24 497 1.1 mrg #define expLO r24 498 1.1 mrg #define expHI r25 499 1.1 mrg ;; expo == INT16_MAX => *Z = Inf, return Done = true. 500 1.1 mrg ;; expo == INT16_MIN => *Z = 0x0, return Done = true. 501 1.1 mrg ;; else => Z->expo = expo, return Done = false. 502 1.1 mrg DEFUN store_expo 503 1.1 mrg cpi expHI, 0x80 504 1.1 mrg cpc expLO, ZERO 505 1.1 mrg breq .Ltiny 506 1.1 mrg adiw expLO, 1 507 1.1 mrg brvs .Lhuge 508 1.1 mrg sbiw expLO, 1 509 1.1 mrg std Z+0+Expo, expLO 510 1.1 mrg std Z+1+Expo, expHI 511 1.1 mrg ldi Done, 0 512 1.1 mrg ret 513 1.1 mrg 514 1.1 mrg .Lhuge: 515 1.1 mrg #if F7_HAVE_Inf == 1 516 1.1 mrg ld Done, Z 517 1.1 mrg andi Done, F7_FLAG_sign 518 1.1 mrg ori Done, F7_FLAG_inf 519 1.1 mrg #else 520 1.1 mrg ldi Done, F7_FLAG_nan 521 1.1 mrg #endif /* Have Inf */ 522 1.1 mrg st Z, Done 523 1.1 mrg ldi Done, 1 524 1.1 mrg ret 525 1.1 mrg 526 1.1 mrg .Ltiny: 527 1.1 mrg ldi Done, 1 528 1.1 mrg F7jmp clr 529 1.1 mrg ENDF store_expo 530 1.1 mrg #endif /* F7MOD_store_expo_ */ 531 1.1 mrg 532 1.1 mrg 533 1.1 mrg #ifdef F7MOD_set_u64_ 534 1.1 mrg DEFUN set_s64 535 1.1 mrg set 536 1.1 mrg skipnext 537 1.1 mrg ;; ... 538 1.1 mrg LABEL set_u64 539 1.1 mrg clt ; Skipped? 540 1.1 mrg wmov Zl, r16 541 1.1 mrg ;; TMP holds .flags. 542 1.1 mrg clr TMP 543 1.1 mrg brtc .Lnot.negative 544 1.1 mrg 545 1.1 mrg bst C6, 7 546 1.1 mrg brtc .Lnot.negative 547 1.1 mrg bld TMP, F7_FLAGNO_sign 548 1.1 mrg .global __negdi2 549 1.1 mrg XCALL __negdi2 550 1.1 mrg 551 1.1 mrg .Lnot.negative: 552 1.1 mrg st Z, TMP 553 1.1 mrg std Z+0+Expo, ZERO 554 1.1 mrg std Z+1+Expo, ZERO 555 1.1 mrg ldi Carry, 63 556 1.1 mrg F7call normalize.round.store_with_flags 557 1.1 mrg wmov r24, Z 558 1.1 mrg wmov r16, Z ; Unclobber r16. 559 1.1 mrg ret 560 1.1 mrg ENDF set_s64 561 1.1 mrg #endif /* F7MOD_set_u64_ */ 562 1.1 mrg 563 1.1 mrg 564 1.1 mrg #ifdef F7MOD_to_integer_ 565 1.1 mrg #define Mask r26 566 1.1 mrg DEFUN to_integer 567 1.1 mrg wmov ZL, r24 568 1.1 mrg mov Mask, r22 569 1.1 mrg 570 1.1 mrg F7call load_mant.with_flags 571 1.1 mrg 572 1.1 mrg sbrc Flags, F7_FLAGNO_nan 573 1.1 mrg rjmp .Lset_0x8000 574 1.1 mrg 575 1.1 mrg sbrc Flags, F7_FLAGNO_inf 576 1.1 mrg rjmp .Lsaturate 577 1.1 mrg 578 1.1 mrg sbrs C6, 7 579 1.1 mrg rjmp .Lset_0x0000 580 1.1 mrg 581 1.1 mrg bst Flags, F7_FLAGNO_sign 582 1.1 mrg ldd r27, Z+0+Expo 583 1.1 mrg ;; Does .expo have bits outside Mask? ... 584 1.1 mrg mov TMP, Mask 585 1.1 mrg com TMP 586 1.1 mrg and TMP, r27 587 1.1 mrg ldd r27, Z+1+Expo 588 1.1 mrg tst r27 589 1.1 mrg brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 590 1.1 mrg or TMP, r27 591 1.1 mrg brne .Lsaturate.T ; ...yes: .expo > Mask => saturate 592 1.1 mrg 593 1.1 mrg ;; ...no: Shift right to meet .expo = 0. 594 1.1 mrg PUSH r16 595 1.1 mrg ldd r16, Z+0+Expo 596 1.1 mrg eor r16, Mask 597 1.1 mrg and r16, Mask 598 1.1 mrg clr CA 599 1.1 mrg F7call lshrdi3 600 1.1 mrg POP r16 601 1.1 mrg tst C6 602 1.1 mrg brmi .Lsaturate.T ; > INTxx_MAX => saturate 603 1.1 mrg 604 1.1 mrg brtc 9f ; >= 0 => return 605 1.1 mrg sbrc Mask, 5 606 1.1 mrg .global __negdi2 607 1.1 mrg XJMP __negdi2 608 1.1 mrg sbrc Mask, 4 609 1.1 mrg .global __negsi2 610 1.1 mrg XJMP __negsi2 611 1.1 mrg neg C6 612 1.1 mrg neg C5 613 1.1 mrg sbci C6, 0 614 1.1 mrg 9: ret 615 1.1 mrg 616 1.1 mrg .Lsaturate: 617 1.1 mrg bst Flags, F7_FLAGNO_sign 618 1.1 mrg .Lsaturate.T: 619 1.1 mrg 620 1.1 mrg #if F7_HAVE_Inf 621 1.1 mrg brtc .Lset_0x7fff 622 1.1 mrg ;; -Inf => return 1 + INTxx_MIN 623 1.1 mrg mov ZL, Flags 624 1.1 mrg .global __clr_8 625 1.1 mrg XCALL __clr_8 626 1.1 mrg ldi C6, 0x80 627 1.1 mrg 628 1.1 mrg ldi CA+0, 0x01 629 1.1 mrg 630 1.1 mrg sbrs Mask, 5 631 1.1 mrg ldi CA+4, 0x01 632 1.1 mrg 633 1.1 mrg sbrs Mask, 4 634 1.1 mrg ldi CA+6, 0x01 635 1.1 mrg ret 636 1.1 mrg 637 1.1 mrg .Lset_0x7fff: 638 1.1 mrg ;; +Inf => return INTxx_MAX 639 1.1 mrg sec 640 1.1 mrg .global __sbc_8 641 1.1 mrg XCALL __sbc_8 642 1.1 mrg ldi C6, 0x7f 643 1.1 mrg ret 644 1.1 mrg #endif /* F7_HAVE_Inf */ 645 1.1 mrg 646 1.1 mrg .Lset_0x8000: 647 1.1 mrg ;; NaN => return INTxx_MIN 648 1.1 mrg .global __clr_8 649 1.1 mrg XCALL __clr_8 650 1.1 mrg ldi C6, 0x80 651 1.1 mrg ret 652 1.1 mrg 653 1.1 mrg .Lset_0x0000: 654 1.1 mrg ;; Small value => return 0x0 655 1.1 mrg .global __clr_8 656 1.1 mrg XJMP __clr_8 657 1.1 mrg 658 1.1 mrg ENDF to_integer 659 1.1 mrg #endif /* F7MOD_to_integer_ */ 660 1.1 mrg 661 1.1 mrg 662 1.1 mrg #ifdef F7MOD_to_unsigned_ 663 1.1 mrg #define Mask r26 664 1.1 mrg DEFUN to_unsigned 665 1.1 mrg wmov ZL, r24 666 1.1 mrg mov Mask, r22 667 1.1 mrg 668 1.1 mrg F7call load_mant.with_flags 669 1.1 mrg 670 1.1 mrg sbrc Flags, F7_FLAGNO_nan 671 1.1 mrg rjmp .Lset_0xffff 672 1.1 mrg 673 1.1 mrg sbrc Flags, F7_FLAGNO_sign 674 1.1 mrg rjmp .Lset_0x0000 675 1.1 mrg 676 1.1 mrg sbrc Flags, F7_FLAGNO_inf 677 1.1 mrg rjmp .Lset_0xffff 678 1.1 mrg 679 1.1 mrg sbrs C6, 7 680 1.1 mrg rjmp .Lset_0x0000 681 1.1 mrg 682 1.1 mrg ldd r27, Z+0+Expo 683 1.1 mrg ;; Does .expo have bits outside Mask? ... 684 1.1 mrg mov TMP, Mask 685 1.1 mrg com TMP 686 1.1 mrg and TMP, r27 687 1.1 mrg ldd r27, Z+1+Expo 688 1.1 mrg tst r27 689 1.1 mrg brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 690 1.1 mrg or TMP, r27 691 1.1 mrg brne .Lset_0xffff ; ...yes: .expo > Mask => saturate 692 1.1 mrg 693 1.1 mrg ;; ...no: Shift right to meet .expo = 0. 694 1.1 mrg PUSH r16 695 1.1 mrg ldd r16, Z+0+Expo 696 1.1 mrg eor r16, Mask 697 1.1 mrg and r16, Mask 698 1.1 mrg clr CA 699 1.1 mrg F7call lshrdi3 700 1.1 mrg POP r16 701 1.1 mrg ret 702 1.1 mrg 703 1.1 mrg .Lset_0xffff: 704 1.1 mrg ;; return UINTxx_MAX 705 1.1 mrg sec 706 1.1 mrg .global __sbc_8 707 1.1 mrg XJMP __sbc_8 708 1.1 mrg 709 1.1 mrg .Lset_0x0000: 710 1.1 mrg ;; Small value => return 0x0 711 1.1 mrg .global __clr_8 712 1.1 mrg XJMP __clr_8 713 1.1 mrg 714 1.1 mrg ENDF to_unsigned 715 1.1 mrg #endif /* F7MOD_to_unsigned_ */ 716 1.1 mrg 717 1.1 mrg 718 1.1 mrg #ifdef F7MOD_addsub_mant_scaled_ 719 1.1 mrg ;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*, 720 1.1 mrg ;; uint8_t r18); 721 1.1 mrg ;; R18.0 = 1 : ADD 722 1.1 mrg ;; R18.0 = 0 : SUB 723 1.1 mrg ;; R18[7..1] : Scale 724 1.1 mrg ;; Compute *R24 = *R22 + *R20 >> R18[7..1]. 725 1.1 mrg 726 1.1 mrg #define BA 10 727 1.1 mrg #define B0 BA+1 728 1.1 mrg #define B1 B0+1 729 1.1 mrg #define B2 B0+2 730 1.1 mrg #define B3 B0+3 731 1.1 mrg #define B4 B0+4 732 1.1 mrg #define B5 B0+5 733 1.1 mrg #define B6 B0+6 734 1.1 mrg 735 1.1 mrg DEFUN addsub_mant_scaled 736 1.1 mrg do_prologue_saves 10 737 1.1 mrg 738 1.1 mrg bst r18, 0 ;; ADD ? 739 1.1 mrg lsr r18 740 1.1 mrg mov r16, r18 741 1.1 mrg 742 1.1 mrg wmov ZL, r20 743 1.1 mrg wmov YL, r22 744 1.1 mrg ;; C[] = bb >> shift 745 1.1 mrg wmov XL, r24 746 1.1 mrg 747 1.1 mrg F7call load_mant.clr_CA 748 1.1 mrg F7call lshrdi3 749 1.1 mrg 750 1.1 mrg wmov BA, CA 751 1.1 mrg wmov B1, C1 752 1.1 mrg wmov B3, C3 753 1.1 mrg wmov B5, C5 754 1.1 mrg wmov ZL, YL 755 1.1 mrg F7call load_mant.clr_CA 756 1.1 mrg 757 1.1 mrg wmov ZL, XL 758 1.1 mrg 759 1.1 mrg brts .Ladd 760 1.1 mrg 761 1.1 mrg .global __subdi3 762 1.1 mrg XCALL __subdi3 763 1.1 mrg 764 1.1 mrg breq .Lzero 765 1.1 mrg brcc .Lround 766 1.1 mrg ;; C = 1: Can underflow happen at all ? 767 1.1 mrg .Lzero: 768 1.1 mrg F7call clr 769 1.1 mrg rjmp .Lepilogue 770 1.1 mrg 771 1.1 mrg .Ladd: 772 1.1 mrg .global __adddi3 773 1.1 mrg XCALL __adddi3 774 1.1 mrg brcc .Lround 775 1.1 mrg ldi Carry, 1 776 1.1 mrg .global __lshrdi3 777 1.1 mrg XCALL __lshrdi3 778 1.1 mrg ori C6, 1 << 7 779 1.1 mrg skipnext 780 1.1 mrg .Lround: 781 1.1 mrg clr Carry ; skipped? 782 1.1 mrg F7call normalize.round.store_with_flags 783 1.1 mrg 784 1.1 mrg .Lepilogue: 785 1.1 mrg do_epilogue_restores 10 786 1.1 mrg 787 1.1 mrg ENDF addsub_mant_scaled 788 1.1 mrg 789 1.1 mrg #if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__) 790 1.1 mrg DEFUN lshrdi3 791 1.1 mrg .global __lshrdi3 792 1.1 mrg XJMP __lshrdi3 793 1.1 mrg ENDF lshrdi3 794 1.1 mrg DEFUN ashldi3 795 1.1 mrg .global __ashldi3 796 1.1 mrg XJMP __ashldi3 797 1.1 mrg ENDF ashldi3 798 1.1 mrg #else 799 1.1 mrg 800 1.1 mrg # Basically just a wrapper around libgcc's __lshrdi3. 801 1.1 mrg DEFUN lshrdi3 802 1.1 mrg ;; Handle bit 5 of shift offset. 803 1.1 mrg sbrs r16, 5 804 1.1 mrg rjmp 4f 805 1.1 mrg wmov CA, C3 806 1.1 mrg wmov C1, C5 807 1.1 mrg clr C6 $ clr C5 $ wmov C3, C5 808 1.1 mrg 4: 809 1.1 mrg ;; Handle bit 4 of shift offset. 810 1.1 mrg sbrs r16, 4 811 1.1 mrg rjmp 3f 812 1.1 mrg wmov CA, C1 813 1.1 mrg wmov C1, C3 814 1.1 mrg wmov C3, C5 815 1.1 mrg clr C6 $ clr C5 816 1.1 mrg 3: 817 1.1 mrg ;; Handle bits 3...0 of shift offset. 818 1.1 mrg push r16 819 1.1 mrg andi r16, 0xf 820 1.1 mrg breq 0f 821 1.1 mrg 822 1.1 mrg .global __lshrdi3 823 1.1 mrg XCALL __lshrdi3 824 1.1 mrg 0: 825 1.1 mrg pop r16 826 1.1 mrg ret 827 1.1 mrg ENDF lshrdi3 828 1.1 mrg 829 1.1 mrg # Basically just a wrapper around libgcc's __ashldi3. 830 1.1 mrg DEFUN ashldi3 831 1.1 mrg ;; Handle bit 5 of shift offset. 832 1.1 mrg sbrs r16, 5 833 1.1 mrg rjmp 4f 834 1.1 mrg wmov C5, C1 835 1.1 mrg wmov C3, CA 836 1.1 mrg clr C2 $ clr C1 $ wmov CA, C1 837 1.1 mrg 4: 838 1.1 mrg ;; Handle bit 4 of shift offset. 839 1.1 mrg sbrs r16, 4 840 1.1 mrg rjmp 3f 841 1.1 mrg wmov C5, C3 842 1.1 mrg wmov C3, C1 843 1.1 mrg wmov C1, CA 844 1.1 mrg clr CA $ clr C0 845 1.1 mrg 3: 846 1.1 mrg ;; Handle bits 3...0 of shift offset. 847 1.1 mrg push r16 848 1.1 mrg andi r16, 0xf 849 1.1 mrg breq 0f 850 1.1 mrg 851 1.1 mrg .global __ashldi3 852 1.1 mrg XCALL __ashldi3 853 1.1 mrg 0: 854 1.1 mrg pop r16 855 1.1 mrg ret 856 1.1 mrg ENDF ashldi3 857 1.1 mrg #endif /* Small device */ 858 1.1 mrg 859 1.1 mrg #endif /* F7MOD_addsub_mant_scaled_ */ 860 1.1 mrg 861 1.1 mrg #if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__) 862 1.1 mrg #define A0 11 863 1.1 mrg #define A1 A0+1 864 1.1 mrg #define A2 A0+2 865 1.1 mrg #define A3 A0+3 866 1.1 mrg #define A4 A0+4 867 1.1 mrg #define A5 A0+5 868 1.1 mrg #define A6 A0+6 869 1.1 mrg 870 1.1 mrg #define TT0 26 871 1.1 mrg #define TT1 TT0+1 872 1.1 mrg #define TT2 28 873 1.1 mrg #define TT3 TT2+1 874 1.1 mrg 875 1.1 mrg #define BB 10 876 1.1 mrg 877 1.1 mrg ;; R18.0 = 1: No rounding. 878 1.1 mrg 879 1.1 mrg DEFUN mul_mant 880 1.1 mrg do_prologue_saves 10 881 1.1 mrg bst r18, 0 882 1.1 mrg push r25 883 1.1 mrg push r24 884 1.1 mrg movw ZL, r22 885 1.1 mrg LDD A0, Z+0+Off 886 1.1 mrg LDD A1, Z+1+Off 887 1.1 mrg LDD A2, Z+2+Off 888 1.1 mrg LDD A3, Z+3+Off 889 1.1 mrg LDD A4, Z+4+Off 890 1.1 mrg LDD A5, Z+5+Off 891 1.1 mrg LDD A6, Z+6+Off 892 1.1 mrg movw ZL, r20 893 1.1 mrg 894 1.1 mrg ;; 6 * 6 -> 6:5 895 1.1 mrg ;; 4 * 6 -> 4:3 896 1.1 mrg ;; 2 * 6 -> 2:1 897 1.1 mrg ;; 0 * 6 -> 0:a 898 1.1 mrg ldd BB, Z+6+Off 899 1.1 mrg mul A6, BB $ movw C5, r0 900 1.1 mrg mul A4, BB $ movw C3, r0 901 1.1 mrg mul A2, BB $ movw C1, r0 902 1.1 mrg mul A0, BB $ movw CA, r0 903 1.1 mrg 904 1.1 mrg ;; 5 * 6 -> 5:4 905 1.1 mrg ;; 3 * 6 -> 3:2 906 1.1 mrg ;; 1 * 6 -> 1:0 907 1.1 mrg mul A5, BB $ movw TT2, r0 908 1.1 mrg mul A3, BB $ movw TT0, r0 909 1.1 mrg mul A1, BB 910 1.1 mrg ADD C0, r0 $ adc C1, r1 911 1.1 mrg adc C2, TT0 $ adc C3, TT1 912 1.1 mrg adc C4, TT2 $ adc C5, TT3 $ clr ZERO 913 1.1 mrg adc C6, ZERO 914 1.1 mrg ;; Done B6 915 1.1 mrg 916 1.1 mrg ;; 3 * 3 -> 0:a 917 1.1 mrg ;; 4 * 4 -> 2:1 918 1.1 mrg ;; 5 * 5 -> 4:3 919 1.1 mrg ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0 920 1.1 mrg ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0 921 1.1 mrg ldd BB, Z+5+Off $ mul A5, BB 922 1.1 mrg 923 1.1 mrg ADD CA, TT0 $ adc C0, TT1 924 1.1 mrg adc C1, TT2 $ adc C2, TT3 925 1.1 mrg adc C3, r0 $ adc C4, r1 926 1.1 mrg brcc .+2 927 1.1 mrg adiw C5, 1 928 1.1 mrg 929 1.1 mrg ;; 6 * 5 -> 5:4 930 1.1 mrg ;; 4 * 5 -> 3:2 931 1.1 mrg ;; 2 * 5 -> 1:0 932 1.1 mrg ;; 0 * 5 -> a:- 933 1.1 mrg mul A0, BB 934 1.1 mrg ;; A0 done 935 1.1 mrg #define Atmp A0 936 1.1 mrg 937 1.1 mrg mov Atmp, r1 938 1.1 mrg mul A6, BB $ movw TT2, r0 939 1.1 mrg mul A4, BB $ movw TT0, r0 940 1.1 mrg mul A2, BB 941 1.1 mrg 942 1.1 mrg ADD CA, Atmp 943 1.1 mrg adc C0, r0 $ adc C1, r1 944 1.1 mrg adc C2, TT0 $ adc C3, TT1 945 1.1 mrg adc C4, TT2 $ adc C5, TT3 $ clr ZERO 946 1.1 mrg adc C6, ZERO 947 1.1 mrg 948 1.1 mrg ;; 1 * 5 -> 0:a 949 1.1 mrg ;; 3 * 5 -> 2:1 950 1.1 mrg ;; 6 * 4 -> 4:3 951 1.1 mrg mul A1, BB $ movw TT0, r0 952 1.1 mrg mul A3, BB $ movw TT2, r0 953 1.1 mrg ldd BB, Z+4+Off 954 1.1 mrg mul A6, BB 955 1.1 mrg 956 1.1 mrg ADD CA, TT0 $ adc C0, TT1 957 1.1 mrg adc C1, TT2 $ adc C2, TT3 958 1.1 mrg adc C3, r0 $ adc C4, r1 $ clr ZERO 959 1.1 mrg adc C5, ZERO $ adc C6, ZERO 960 1.1 mrg ;; B5 done 961 1.1 mrg 962 1.1 mrg ;; 6 * 3 -> 3:2 963 1.1 mrg ;; 6 * 1 -> 1:0 964 1.1 mrg ;; 4 * 1 -> a:- 965 1.1 mrg mov TT0, A6 $ ldd TMP, Z+3+Off 966 1.1 mrg mov BB, A4 $ ldd Atmp, Z+1+Off 967 1.1 mrg rcall .Lmul.help.3 968 1.1 mrg 969 1.1 mrg ;; 5 * 4 -> 3:2 970 1.1 mrg ;; 5 * 2 -> 1:0 971 1.1 mrg ;; 3 * 2 -> a:- 972 1.1 mrg mov TT0, A5 $ ldd TMP, Z+4+Off 973 1.1 mrg mov BB, A3 $ ldd Atmp, Z+2+Off 974 1.1 mrg rcall .Lmul.help.3 975 1.1 mrg 976 1.1 mrg ;; 4 * -> 3:2 (=0) 977 1.1 mrg ;; 4 * 3 -> 1:0 978 1.1 mrg ;; 2 * 3 -> a:- 979 1.1 mrg mov TT0, A4 $ clr TMP 980 1.1 mrg mov BB, A2 $ ldd Atmp, Z+3+Off 981 1.1 mrg rcall .Lmul.help.3 982 1.1 mrg 983 1.1 mrg ;; 3 * . -> 3:2 (=0) 984 1.1 mrg ;; 3 * 4 -> 1:0 985 1.1 mrg ;; 1 * 4 -> a:- 986 1.1 mrg mov TT0, A3 $ clr TMP 987 1.1 mrg mov BB, A1 $ ldd Atmp, Z+4+Off 988 1.1 mrg rcall .Lmul.help.3 989 1.1 mrg 990 1.1 mrg ;; . * ? -> 3:2 (=0) 991 1.1 mrg ;; . * 0 -> 1:0 (=0) 992 1.1 mrg ;; 5 * 0 -> a:- 993 1.1 mrg clr TT0 994 1.1 mrg mov BB, A5 $ ldd Atmp, Z+0+Off 995 1.1 mrg rcall .Lmul.help.3 996 1.1 mrg 997 1.1 mrg clr TT3 ;; Asserted by .Lmul.help.2 998 1.1 mrg ;; 6 * 2 -> 2:1 999 1.1 mrg ;; 6 * 0 -> 0:a 1000 1.1 mrg $ ldd TMP, Z+2+Off 1001 1.1 mrg mov BB, A6 ;$ ldd Atmp, Z+0+Off 1002 1.1 mrg rcall .Lmul.help.2 1003 1.1 mrg 1004 1.1 mrg ;; 5 * 3 -> 2:1 1005 1.1 mrg ;; 5 * 1 -> 0:a 1006 1.1 mrg $ ldd TMP, Z+3+Off 1007 1.1 mrg mov BB, A5 $ ldd Atmp, Z+1+Off 1008 1.1 mrg rcall .Lmul.help.2 1009 1.1 mrg 1010 1.1 mrg ;; 4 * . -> 2:1 (=0) 1011 1.1 mrg ;; 4 * 2 -> 0:a 1012 1.1 mrg $ clr TMP 1013 1.1 mrg mov BB, A4 $ ldd Atmp, Z+2+Off 1014 1.1 mrg rcall .Lmul.help.2 1015 1.1 mrg 1016 1.1 mrg ;; 2 * . -> 2:1 (=0) 1017 1.1 mrg ;; 2 * 4 -> 0:a 1018 1.1 mrg $ clr TMP 1019 1.1 mrg mov BB, A2 $ ldd Atmp, Z+4+Off 1020 1.1 mrg rcall .Lmul.help.2 1021 1.1 mrg 1022 1.1 mrg ;; Finally... 1023 1.1 mrg 1024 1.1 mrg pop ZL 1025 1.1 mrg pop ZH 1026 1.1 mrg ;; The high byte is at least 0x40 and at most 0xfe. 1027 1.1 mrg ;; The result has to be left-shifted by one in order to scale it 1028 1.1 mrg ;; correctly. 1029 1.1 mrg 1030 1.1 mrg ldi Carry, 1 1031 1.1 mrg F7call normalize.maybe_round.store_with_flags 1032 1.1 mrg 1033 1.1 mrg do_epilogue_restores 10 1034 1.1 mrg 1035 1.1 mrg ;; TT0 * Tmp -> 3:2 1036 1.1 mrg ;; TT0 * Atmp -> 1:0 1037 1.1 mrg ;; BB * Atmp -> a:- 1038 1.1 mrg ;; 1039 1.1 mrg ;; Clobbers : TMP, TT0...TT3. 1040 1.1 mrg ;; Sets : ZERO = 0. 1041 1.1 mrg .Lmul.help.3: 1042 1.1 mrg mul TT0, TMP $ movw TT2, r0 1043 1.1 mrg mul TT0, Atmp $ movw TT0, r0 1044 1.1 mrg mul BB, Atmp 1045 1.1 mrg 1046 1.1 mrg ADD CA, r1 1047 1.1 mrg adc C0, TT0 $ adc C1, TT1 1048 1.1 mrg adc C2, TT2 1049 1.1 mrg .Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO 1050 1.1 mrg adc C4, ZERO $ adc C5, ZERO 1051 1.1 mrg adc C6, ZERO 1052 1.1 mrg ret 1053 1.1 mrg 1054 1.1 mrg ;; BB * TMP -> 2:1 1055 1.1 mrg ;; BB * Atmp -> 0:a 1056 1.1 mrg ;; 1057 1.1 mrg ;; Asserts : TT3 = 0 1058 1.1 mrg ;; Clobbers : TMP, TT0, TT1. 1059 1.1 mrg ;; Sets : ZERO = 0. 1060 1.1 mrg .Lmul.help.2: 1061 1.1 mrg mul BB, TMP $ movw TT0, r0 1062 1.1 mrg mul BB, Atmp 1063 1.1 mrg ADD CA, r0 $ adc C0, r1 1064 1.1 mrg adc C1, TT0 $ adc C2, TT1 1065 1.1 mrg rjmp .Lmul.help.3.C3 1066 1.1 mrg 1067 1.1 mrg ENDF mul_mant 1068 1.1 mrg #endif /* F7MOD_mul_mant_ && MUL */ 1069 1.1 mrg 1070 1.1 mrg 1071 1.1 mrg #if defined (F7MOD_div_) 1072 1.1 mrg 1073 1.1 mrg ;; Dividend is C[] 1074 1.1 mrg 1075 1.1 mrg ;; Divisor 1076 1.1 mrg #define A0 9 1077 1.1 mrg #define A1 10 1078 1.1 mrg #define A2 11 1079 1.1 mrg #define A3 12 1080 1.1 mrg #define A4 13 1081 1.1 mrg #define A5 14 1082 1.1 mrg #define A6 15 1083 1.1 mrg 1084 1.1 mrg ;; Quotient 1085 1.1 mrg #define Q0 0 /* === TMP */ 1086 1.1 mrg #define Q1 Q0+1 /* === ZERO */ 1087 1.1 mrg #define Q2 26 1088 1.1 mrg #define Q3 Q2+1 1089 1.1 mrg #define Q4 28 1090 1.1 mrg #define Q5 Q4+1 1091 1.1 mrg #define Q6 16 1092 1.1 mrg #define Q7 Q6+1 1093 1.1 mrg 1094 1.1 mrg #define Cnt CA 1095 1.1 mrg #define QBits r8 1096 1.1 mrg 1097 1.1 mrg DEFUN div 1098 1.1 mrg do_prologue_saves 12 1099 1.1 mrg 1100 1.1 mrg ;; Number of bits requested for the quotient. 1101 1.1 mrg ;; This is usually 2 + F7_MANT_BITS. 1102 1.1 mrg mov QBits, r20 1103 1.1 mrg wmov ZL, r22 1104 1.1 mrg LDD A0, Z+0+Off 1105 1.1 mrg LDD A1, Z+1+Off 1106 1.1 mrg LDD A2, Z+2+Off 1107 1.1 mrg LDD A3, Z+3+Off 1108 1.1 mrg LDD A4, Z+4+Off 1109 1.1 mrg LDD A5, Z+5+Off 1110 1.1 mrg LDD A6, Z+6+Off 1111 1.1 mrg wmov ZL, r24 1112 1.1 mrg F7call load_mant 1113 1.1 mrg 1114 1.1 mrg ;; Clear quotient Q[]. 1115 1.1 mrg clr Q0 ; === TMP 1116 1.1 mrg ;clr Q1 ; === ZERO 1117 1.1 mrg wmov Q2, Q0 1118 1.1 mrg wmov Q4, Q0 1119 1.1 mrg wmov Q6, Q0 1120 1.1 mrg 1121 1.1 mrg ;; C[] and A[] are valid mantissae, i.e. their MSBit is set. Therefore, 1122 1.1 mrg ;; quotient Q[] will be in [0x0.ff..., 0x0.40...] and to adjust Q[] we 1123 1.1 mrg ;; need at most 1 left-shift. Compute F7_MANT_BITS + 2 bits of the 1124 1.1 mrg ;; quotient: One bit is used for rounding, and one bit might be consumed 1125 1.1 mrg ;; by the mentioned left-shift. 1126 1.1 mrg mov Cnt, QBits 1127 1.1 mrg rjmp .Loop_start 1128 1.1 mrg 1129 1.1 mrg .Loop: 1130 1.1 mrg ;; Shift dividend. 1131 1.1 mrg LSL C0 1132 1.1 mrg rol C1 1133 1.1 mrg rol C2 1134 1.1 mrg rol C3 1135 1.1 mrg rol C4 1136 1.1 mrg rol C5 1137 1.1 mrg rol C6 1138 1.1 mrg brcs .Lfits 1139 1.1 mrg ;; Compare dividend against divisor. 1140 1.1 mrg .Loop_start: 1141 1.1 mrg CP C0, A0 1142 1.1 mrg cpc C1, A1 1143 1.1 mrg cpc C2, A2 1144 1.1 mrg cpc C3, A3 1145 1.1 mrg cpc C4, A4 1146 1.1 mrg cpc C5, A5 1147 1.1 mrg cpc C6, A6 1148 1.1 mrg ;; Shift 0 into quotient. 1149 1.1 mrg brlo 1f 1150 1.1 mrg .Lfits: 1151 1.1 mrg ;; Divisor fits into dividend. 1152 1.1 mrg SUB C0, A0 1153 1.1 mrg sbc C1, A1 1154 1.1 mrg sbc C2, A2 1155 1.1 mrg sbc C3, A3 1156 1.1 mrg sbc C4, A4 1157 1.1 mrg sbc C5, A5 1158 1.1 mrg sbc C6, A6 1159 1.1 mrg ;; Shift 1 into quotient. 1160 1.1 mrg sec 1161 1.1 mrg rol Q0 1162 1.1 mrg skipnext 1163 1.1 mrg 1: lsl Q0 1164 1.1 mrg rol Q1 1165 1.1 mrg rol Q2 1166 1.1 mrg rol Q3 1167 1.1 mrg rol Q4 1168 1.1 mrg rol Q5 1169 1.1 mrg rol Q6 1170 1.1 mrg rol Q7 1171 1.1 mrg dec Cnt 1172 1.1 mrg brne .Loop 1173 1.1 mrg 1174 1.1 mrg wmov CA, Q0 1175 1.1 mrg wmov C1, Q2 1176 1.1 mrg wmov C3, Q4 1177 1.1 mrg wmov C5, Q6 1178 1.1 mrg clr ZERO 1179 1.1 mrg 1180 1.1 mrg ldi Carry, 64 1181 1.1 mrg sub Carry, QBits 1182 1.1 mrg F7call normalize.round.store_with_flags 1183 1.1 mrg 1184 1.1 mrg do_epilogue_restores 12 1185 1.1 mrg ENDF div 1186 1.1 mrg 1187 1.1 mrg #endif /* F7MOD_div_ */ 1188 1.1 mrg 1189 1.1 mrg 1190 1.1 mrg #if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__) 1191 1.1 mrg 1192 1.1 mrg #define Mask C6 1193 1.1 mrg #define Q0 C3 /* = R22 */ 1194 1.1 mrg #define Q1 C4 /* = R23 */ 1195 1.1 mrg 1196 1.1 mrg ;; uint16_t R24 = sqrt16_XXX (uint16_t R24); 1197 1.1 mrg ;; Clobbers: R22, R23, TMP. 1198 1.1 mrg ;; 1199 1.1 mrg ;; XXX = floor: Return integral part of square-root of R25:R24 with R25 = 0. 1200 1.1 mrg ;; Error is in [0, -1 LSB). 1201 1.1 mrg ;; XXX = round: Return quare-root of R25:R24 rounded to nearest integer. 1202 1.1 mrg ;; R25 = (Q[] >= 65281) = (Q > 0xff00), i.e. if Q[] is not 1203 1.1 mrg ;; bigger than 0xff00, then the result fits in 8 bits. 1204 1.1 mrg ;; Return C = 0 if the result is the same as for XXX = floor, 1205 1.1 mrg ;; error in [0, -1/2 LSB) 1206 1.1 mrg ;; Return C = 1 if the result is one higher than for XXX = floor, 1207 1.1 mrg ;; error in [1/2 LSB, 0). 1208 1.1 mrg DEFUN sqrt16_round 1209 1.1 mrg set 1210 1.1 mrg skipnext 1211 1.1 mrg ;; ... 1212 1.1 mrg LABEL sqrt16_floor 1213 1.1 mrg clt ; Skipped? 1214 1.1 mrg movw Q0, r24 1215 1.1 mrg clr C5 1216 1.1 mrg ldi Mask, 1 << 7 1217 1.1 mrg 1218 1.1 mrg .Loop_mask: 1219 1.1 mrg add C5, Mask 1220 1.1 mrg mul C5, C5 1221 1.1 mrg cp Q0, R0 1222 1.1 mrg cpc Q1, R1 1223 1.1 mrg brsh 1f 1224 1.1 mrg sub C5, Mask 1225 1.1 mrg 1: lsr Mask 1226 1.1 mrg brne .Loop_mask 1227 1.1 mrg 1228 1.1 mrg brtc .Ldone ; No rounding => C6 will be 0. 1229 1.1 mrg 1230 1.1 mrg ;; Rounding: (X + 1/2)^2 = X^2 + X + 1/4, thus probing 1231 1.1 mrg ;; for bit -1 is testing Q[] against C5^2 + C5. 1232 1.1 mrg mul C5, C5 1233 1.1 mrg add R0, C5 1234 1.1 mrg adc R1, C6 ; Exploit C6 === Mask = 0. 1235 1.1 mrg cp R0, Q0 1236 1.1 mrg cpc R1, Q1 1237 1.1 mrg brcc .Ldone 1238 1.1 mrg ;; If C5^2 + C5 + 1/4 fits into Q[], then round up and C = 1. 1239 1.1 mrg adiw C5, 1 ; Exploit C6 === Mask = 0. 1240 1.1 mrg sec 1241 1.1 mrg 1242 1.1 mrg .Ldone: 1243 1.1 mrg clr __zero_reg__ 1244 1.1 mrg ret 1245 1.1 mrg ENDF sqrt16_round 1246 1.1 mrg #undef Mask 1247 1.1 mrg #undef Q0 1248 1.1 mrg #undef Q1 1249 1.1 mrg #endif /* F7MOD_sqrt16_ && MUL */ 1250 1.1 mrg 1251 1.1 mrg #ifdef F7MOD_sqrt_approx_ 1252 1.1 mrg DEFUN sqrt_approx 1253 1.1 mrg push r17 1254 1.1 mrg push r16 1255 1.1 mrg wmov XL, r24 1256 1.1 mrg wmov ZL, r22 1257 1.1 mrg 1258 1.1 mrg ;; C[] = 0. 1259 1.1 mrg .global __clr_8 1260 1.1 mrg XCALL __clr_8 1261 1.1 mrg 1262 1.1 mrg ldd C5, Z+5+Off 1263 1.1 mrg ldd C6, Z+6+Off 1264 1.1 mrg 1265 1.1 mrg ldd Carry, Z+0+Expo 1266 1.1 mrg ldd TMP, Z+1+Expo 1267 1.1 mrg wmov ZL, XL 1268 1.1 mrg 1269 1.1 mrg st Z, ZERO 1270 1.1 mrg 1271 1.1 mrg asr TMP 1272 1.1 mrg ror Carry 1273 1.1 mrg std Z+1+Expo, TMP 1274 1.1 mrg std Z+0+Expo, Carry 1275 1.1 mrg 1276 1.1 mrg ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift 1277 1.1 mrg ;; the mantissa to the right by 1. As we need an even exponent, multiply 1278 1.1 mrg ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo 1279 1.1 mrg ;; is even. 1280 1.1 mrg 1281 1.1 mrg brcs 1f 1282 1.1 mrg lsr C6 1283 1.1 mrg ror C5 1284 1.1 mrg 1285 1.1 mrg 1: 1286 1.1 mrg F7call sqrt16_round 1287 1.1 mrg 1288 1.1 mrg ;; sqrt16_round() returns: C = 0: error in [0, -1/2 LSB). 1289 1.1 mrg ;; C = 1: error in [1/2 LSB, 0) 1290 1.1 mrg 1291 1.1 mrg brcc 2f 1292 1.1 mrg ;; Undo the round-up from sqrt16_round(); this will transform to 1293 1.1 mrg ;; error in [-1/2 LSB, -1 LSB). 1294 1.1 mrg sbiw C5, 1 1295 1.1 mrg ;; Together with the correct bit C4.7, the error is in [0, -1/2 LSB). 1296 1.1 mrg ori C4, 1 << 7 1297 1.1 mrg 1298 1.1 mrg 2: ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB) 1299 1.1 mrg ;; in either case. 1300 1.1 mrg ori C4, 1 << 6 1301 1.1 mrg 1302 1.1 mrg ;; ???????????? 1303 1.1 mrg ;; sqrt16_round() runs on integers which means that it computes the 1304 1.1 mrg ;; square root of mant * 2^14 if we regard mant as Q-format 2.yy, 1305 1.1 mrg ;; i.e. 2 integral bits. The result is sqrt(mant) * 2^7, 1306 1.1 mrg ;; and in order to get the same scaling like the input, .expo has to 1307 1.1 mrg ;; be adjusted by 7. ??????????????? 1308 1.1 mrg 1309 1.1 mrg ldi Carry, 8 1310 1.1 mrg F7call normalize.store_with_flags 1311 1.1 mrg 1312 1.1 mrg pop r16 1313 1.1 mrg pop r17 1314 1.1 mrg ret 1315 1.1 mrg 1316 1.1 mrg ENDF sqrt_approx 1317 1.1 mrg #endif /* F7MOD_sqrt_approx_ */ 1318 1.1 mrg 1319 1.1 mrg 1320 1.1 mrg #undef CA 1321 1.1 mrg #undef C0 1322 1.1 mrg #undef C1 1323 1.1 mrg #undef C2 1324 1.1 mrg #undef C3 1325 1.1 mrg #undef C4 1326 1.1 mrg #undef C5 1327 1.1 mrg #undef C6 1328 1.1 mrg #undef Carry 1329 1.1 mrg 1330 1.1 mrg 1331 1.1 mrg #ifdef F7MOD_D_fabs_ 1332 1.1 mrg _DEFUN __fabs 1333 1.1 mrg DALIAS fabs 1334 1.1 mrg LALIAS fabsl 1335 1.1 mrg andi R25, 0b01111111 1336 1.1 mrg ret 1337 1.1 mrg _ENDF __fabs 1338 1.1 mrg #endif /* F7MOD_D_fabs_ */ 1339 1.1 mrg 1340 1.1 mrg 1341 1.1 mrg #ifdef F7MOD_D_neg_ 1342 1.1 mrg _DEFUN __neg 1343 1.1 mrg _LABEL __negdf2 1344 1.1 mrg subi R25, 0b10000000 1345 1.1 mrg ret 1346 1.1 mrg _ENDF __neg 1347 1.1 mrg #endif /* F7MOD_D_neg_ */ 1348 1.1 mrg 1349 1.1 mrg 1350 1.1 mrg #ifdef F7MOD_D_signbit_ 1351 1.1 mrg _DEFUN __signbit 1352 1.1 mrg DALIAS signbit 1353 1.1 mrg LALIAS signbitl 1354 1.1 mrg bst R25, 7 1355 1.1 mrg clr R25 1356 1.1 mrg clr R24 1357 1.1 mrg bld R24, 0 1358 1.1 mrg ret 1359 1.1 mrg _ENDF __signbit 1360 1.1 mrg #endif /* F7MOD_D_signbit_ */ 1361 1.1 mrg 1362 1.1 mrg 1363 1.1 mrg #ifdef F7MOD_D_copysign_ 1364 1.1 mrg _DEFUN __copysign 1365 1.1 mrg DALIAS copysign 1366 1.1 mrg LALIAS copysignl 1367 1.1 mrg bst R17, 7 1368 1.1 mrg bld R25, 7 1369 1.1 mrg ret 1370 1.1 mrg _ENDF __copysign 1371 1.1 mrg #endif /* F7MOD_D_copysign_ */ 1372 1.1 mrg 1373 1.1 mrg 1374 1.1 mrg #ifdef F7MOD_D_isinf_ 1375 1.1 mrg _DEFUN __isinf 1376 1.1 mrg DALIAS isinf 1377 1.1 mrg LALIAS isinfl 1378 1.1 mrg F7call class_D 1379 1.1 mrg ;; Inf: T = Z = 1. 1380 1.1 mrg brtc 0f 1381 1.1 mrg ldi R24, 1 1382 1.1 mrg breq 1f 1383 1.1 mrg 0: 1384 1.1 mrg clr R24 1385 1.1 mrg 1: 1386 1.1 mrg clr R25 1387 1.1 mrg ret 1388 1.1 mrg _ENDF __isinf 1389 1.1 mrg #endif /* F7MOD_D_isinf_ */ 1390 1.1 mrg 1391 1.1 mrg 1392 1.1 mrg #ifdef F7MOD_D_isnan_ 1393 1.1 mrg _DEFUN __isnan 1394 1.1 mrg DALIAS isnan 1395 1.1 mrg LALIAS isnanl 1396 1.1 mrg F7call class_D 1397 1.1 mrg ;; NaN: T = 1, Z = 0. 1398 1.1 mrg brtc 0f 1399 1.1 mrg ldi R24, 1 1400 1.1 mrg brne 1f 1401 1.1 mrg 0: 1402 1.1 mrg clr R24 1403 1.1 mrg 1: 1404 1.1 mrg clr R25 1405 1.1 mrg ret 1406 1.1 mrg _ENDF __isnan 1407 1.1 mrg #endif /* F7MOD_D_isnan_ */ 1408 1.1 mrg 1409 1.1 mrg 1410 1.1 mrg #ifdef F7MOD_D_isfinite_ 1411 1.1 mrg _DEFUN __isfinite 1412 1.1 mrg DALIAS isfinite 1413 1.1 mrg LALIAS isfinitel 1414 1.1 mrg F7call class_D 1415 1.1 mrg ;; Number <=> T = 0. 1416 1.1 mrg bld R24, 0 1417 1.1 mrg com R24 1418 1.1 mrg andi R24, 1 1419 1.1 mrg clr R25 1420 1.1 mrg ret 1421 1.1 mrg _ENDF __isfinite 1422 1.1 mrg #endif /* F7MOD_D_isfinite_ */ 1423 1.1 mrg 1424 1.1 mrg 1425 1.1 mrg #ifdef F7MOD_D_class_ 1426 1.1 mrg ;; The encoded exponent has 11 Bits. 1427 1.1 mrg #define MAX_BIASED_EXPO 0b0111111111110000 1428 1.1 mrg 1429 1.1 mrg ;; Classify a double in R18[] 1430 1.1 mrg ;; Number: T-Flag = 0. 1431 1.1 mrg ;; +-Inf : T-Flag = 1, Z-Flag = 1. 1432 1.1 mrg ;; NaN : T-Flag = 1, Z-Flag = 0. 1433 1.1 mrg DEFUN class_D 1434 1.1 mrg wmov R26, R24 1435 1.1 mrg andi R26, lo8 (MAX_BIASED_EXPO) 1436 1.1 mrg andi R27, hi8 (MAX_BIASED_EXPO) 1437 1.1 mrg subi R26, lo8 (MAX_BIASED_EXPO) 1438 1.1 mrg sbci R27, hi8 (MAX_BIASED_EXPO) 1439 1.1 mrg clt 1440 1.1 mrg brne .L.number 1441 1.1 mrg set 1442 1.1 mrg ;; Set sign and expo to 0. 1443 1.1 mrg clr R25 1444 1.1 mrg andi R24, lo8 (~MAX_BIASED_EXPO) 1445 1.1 mrg ;; What remains is the mantissa. 1446 1.1 mrg ;; Mantissa == 0 => +/-Inf. 1447 1.1 mrg ;; Mantissa != 0 => NaN. 1448 1.1 mrg ;; Compare R18[] against sign_extend(R26) with R26 = 0. 1449 1.1 mrg .global __cmpdi2_s8 1450 1.1 mrg XJMP __cmpdi2_s8 1451 1.1 mrg .L.number: 1452 1.1 mrg ret 1453 1.1 mrg 1454 1.1 mrg ENDF class_D 1455 1.1 mrg #endif /* F7MOD_D_class_ */ 1456 1.1 mrg 1457 1.1 mrg 1458 1.1 mrg #ifdef F7MOD_call_dd_ 1459 1.1 mrg 1460 1.1 mrg ;; Provide double wrappers for functions that operate on f7_t and get f7_t*. 1461 1.1 mrg ;; 1462 1.1 mrg ;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to 1463 1.1 mrg ;; f7_t in that frame location, then call *Z and finally convert the result f7_t 1464 1.1 mrg ;; to double R18[] if that's requested. 1465 1.1 mrg ;; 1466 1.1 mrg ;; call_dd: double func (double A) 1467 1.1 mrg ;; void (*Z) (f7_t *aa, const f7_t *aa) 1468 1.1 mrg ;; 1469 1.1 mrg ;; call_dx: double func (type_t A) , sizeof(type_t) <= 4 1470 1.1 mrg ;; void (*Z) (f7_t *aa, type_t) 1471 1.1 mrg ;; 1472 1.1 mrg ;; call_xd: type_t func (double A) 1473 1.1 mrg ;; type_t (*Z) (const f7_t *aa) 1474 1.1 mrg ;; 1475 1.1 mrg ;; call_ddx: double func (double A, word_t) , sizeof (word_t) <= 2 1476 1.1 mrg ;; void (*Z) (f7_t *aa, const f7_t *aa, word_t) 1477 1.1 mrg 1478 1.1 mrg #define WHAT R13 1479 1.1 mrg 1480 1.1 mrg DEFUN call_dd ; WHAT = R13 = 3 1481 1.1 mrg inc ZERO 1482 1.1 mrg LABEL call_xd ; WHAT = R13 = 2 1483 1.1 mrg inc ZERO 1484 1.1 mrg LABEL call_ddx ; WHAT = R13 = 1 1485 1.1 mrg inc ZERO 1486 1.1 mrg LABEL call_dx ; WHAT = R13 = 0 1487 1.1 mrg push WHAT 1488 1.1 mrg mov WHAT, ZERO 1489 1.1 mrg clr ZERO 1490 1.1 mrg ;; R14/R15 hold Z, the address of the f7_worker function, until we need it. 1491 1.1 mrg push r14 1492 1.1 mrg push r15 1493 1.1 mrg wmov r14, Z 1494 1.1 mrg 1495 1.1 mrg #define n_pushed 4 1496 1.1 mrg #define n_frame 10 1497 1.1 mrg 1498 1.1 mrg do_prologue_saves n_pushed, n_frame 1499 1.1 mrg ;; Y = FramePointer + 1 1500 1.1 mrg adiw Y, 1 1501 1.1 mrg dec WHAT 1502 1.1 mrg brmi .Ldx ; WHAT was initially 0. 1503 1.1 mrg ;; FP + 1 = (f7_t) arg1 1504 1.1 mrg wmov r16, Y 1505 1.1 mrg ;; The double argument is in R18[]. 1506 1.1 mrg XCALL F7_NAME (set_double_impl) 1507 1.1 mrg tst WHAT 1508 1.1 mrg brne .Lno.ddx ; WHAT was initially != 1. 1509 1.1 mrg ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument. 1510 1.1 mrg ;; Fetch it from where prologue_saves put it. 1511 1.1 mrg ldd r20, Y + n_frame + 3 ; Saved R16 1512 1.1 mrg ldd r21, Y + n_frame + 2 ; Saved R17 1513 1.1 mrg .Lno.ddx: 1514 1.1 mrg wmov r22, Y ; &arg1 (input) 1515 1.1 mrg .Ldo.dx: 1516 1.1 mrg wmov r24, Y ; &arg1 (output) 1517 1.1 mrg wmov Z, r14 1518 1.1 mrg XICALL 1519 1.1 mrg dec WHAT 1520 1.1 mrg breq .Lepilogue ; WHAT was initially 2: Return non-double. 1521 1.1 mrg wmov r24, Y ; &arg1 1522 1.1 mrg XCALL F7_NAME (get_double) 1523 1.1 mrg .Lepilogue: 1524 1.1 mrg ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves. 1525 1.1 mrg do_epilogue_restores n_pushed + 3, n_frame 1526 1.1 mrg 1527 1.1 mrg .Ldx: 1528 1.1 mrg ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4]. 1529 1.1 mrg wmov r20, r22 1530 1.1 mrg wmov r22, r24 1531 1.1 mrg rjmp .Ldo.dx 1532 1.1 mrg 1533 1.1 mrg ENDF call_dd 1534 1.1 mrg #endif /* F7MOD_call_dd_ */ 1535 1.1 mrg 1536 1.1 mrg 1537 1.1 mrg #ifdef F7MOD_call_ddd_ 1538 1.1 mrg 1539 1.1 mrg ;; Provide double wrappers for functions that operate on f7_t and get f7_t*. 1540 1.1 mrg ;; 1541 1.1 mrg ;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[] 1542 1.1 mrg ;; and R10[] to f7_t in these frame locations, then call *Z and finally 1543 1.1 mrg ;; convert the result f7_t to double R18[] if that's requested. 1544 1.1 mrg ;; 1545 1.1 mrg ;; call_ddd: double func (double A, double B) 1546 1.1 mrg ;; void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb) 1547 1.1 mrg ;; 1548 1.1 mrg ;; call_xdd: type_t func (double A, double B) 1549 1.1 mrg ;; type_t (*Z) (const f7_t *aa, const f7_t *bb) 1550 1.1 mrg 1551 1.1 mrg DEFUN call_ddd 1552 1.1 mrg inc ZERO 1553 1.1 mrg LABEL call_xdd 1554 1.1 mrg ;; R8/R9 hold Z, the address of the f7_worker function, until we need it. 1555 1.1 mrg push r9 1556 1.1 mrg push r8 1557 1.1 mrg wmov r8, Z 1558 1.1 mrg ;; This is an argument to call.2 and will be accessed by the arg pointer. 1559 1.1 mrg push ZERO 1560 1.1 mrg clr ZERO 1561 1.1 mrg rcall call.2 1562 1.1 mrg pop TMP 1563 1.1 mrg pop r8 1564 1.1 mrg pop r9 1565 1.1 mrg ret 1566 1.1 mrg 1567 1.1 mrg #define n_pushed 4 1568 1.1 mrg #define n_frame 20 1569 1.1 mrg 1570 1.1 mrg call.2: 1571 1.1 mrg do_prologue_saves n_pushed, n_frame 1572 1.1 mrg ;; Y = FramePointer + 1 1573 1.1 mrg adiw Y, 1 1574 1.1 mrg ;; FP + 1 = (f7_t) arg1 1575 1.1 mrg wmov r16, Y 1576 1.1 mrg ;; First double argument is already in R18[]. 1577 1.1 mrg XCALL F7_NAME (set_double_impl) 1578 1.1 mrg ;; FP + 11 = (f7_t) arg2 1579 1.1 mrg wmov r16, Y 1580 1.1 mrg subi r16, lo8 (-10) 1581 1.1 mrg sbci r17, hi8 (-10) 1582 1.1 mrg ;; Move second double argument to R18[]. 1583 1.1 mrg wmov r18, r10 1584 1.1 mrg wmov r20, r12 1585 1.1 mrg wmov r22, r14 1586 1.1 mrg ;; Get high word of arg2 from where prologue_saves put it. 1587 1.1 mrg ldd r24, Y + n_frame + 3 ; Saved R16 1588 1.1 mrg ldd r25, Y + n_frame + 2 ; Saved R17 1589 1.1 mrg XCALL F7_NAME (set_double_impl) 1590 1.1 mrg ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2) 1591 1.1 mrg wmov Z, r8 1592 1.1 mrg wmov r24, Y ; &arg1 1593 1.1 mrg ;; WHAT == 0 => call_xdd 1594 1.1 mrg ;; WHAT != 0 => call_ddd 1595 1.1 mrg ldd TMP, Y + n_frame + n_pushed + PC_SIZE 1596 1.1 mrg tst TMP 1597 1.1 mrg breq .Lxdd 1598 1.1 mrg wmov r22, Y ; &arg1 1599 1.1 mrg wmov r20, r16 ; &arg2 1600 1.1 mrg XICALL 1601 1.1 mrg wmov r24, Y ; &arg1 1602 1.1 mrg XCALL F7_NAME (get_double) 1603 1.1 mrg .Lepilogue: 1604 1.1 mrg do_epilogue_restores n_pushed, n_frame 1605 1.1 mrg .Lxdd: 1606 1.1 mrg wmov r22, r16 ; &arg2 1607 1.1 mrg XICALL 1608 1.1 mrg rjmp .Lepilogue 1609 1.1 mrg ENDF call_ddd 1610 1.1 mrg #endif /* F7MOD_call_ddd_ */ 1611 1.1 mrg 1612 1.1 mrg #include "f7-wraps.h" 1613 1.1 mrg 1614 1.1 mrg #endif /* !AVR_TINY */ 1615