1 1.2 rin /* $NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $ */ 2 1.1 rin 3 1.1 rin /* 4 1.1 rin * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 1.1 rin * All rights reserved. 6 1.1 rin * 7 1.1 rin * This code is derived from software contributed to The NetBSD Foundation 8 1.1 rin * by Rin Okuyama. 9 1.1 rin * 10 1.1 rin * Redistribution and use in source and binary forms, with or without 11 1.1 rin * modification, are permitted provided that the following conditions 12 1.1 rin * are met: 13 1.1 rin * 1. Redistributions of source code must retain the above copyright 14 1.1 rin * notice, this list of conditions and the following disclaimer. 15 1.1 rin * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 rin * notice, this list of conditions and the following disclaimer in the 17 1.1 rin * documentation and/or other materials provided with the distribution. 18 1.1 rin * 19 1.1 rin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 rin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 rin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 rin * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 rin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 rin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 rin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 rin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 rin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 rin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 rin * POSSIBILITY OF SUCH DAMAGE. 30 1.1 rin */ 31 1.1 rin 32 1.1 rin #include <machine/asm.h> 33 1.1 rin 34 1.2 rin RCSID("$NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $") 35 1.1 rin 36 1.1 rin | int64_t __muldi3(int64_t X, int64_t Y); 37 1.1 rin | 38 1.1 rin | * Return lower 64bit of (X * Y) into %d0:%d1. 39 1.1 rin | 40 1.1 rin | * Intended for 68060: 41 1.1 rin | - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul. 42 1.1 rin | - mulsl (and moveml) are not implemented for 68010. 43 1.1 rin | 44 1.1 rin | * Notation: 45 1.1 rin | - H32:L32 --> higher:lower 32bit of variable 46 1.1 rin | - H:L --> higher:lower 16bit of variable/register 47 1.1 rin 48 1.1 rin #ifdef __mc68010__ 49 1.1 rin #error "not for 68010" 50 1.1 rin #endif 51 1.1 rin 52 1.1 rin #define X_H32 (4 * 4) 53 1.1 rin #define X_L32 (X_H32 + 4) 54 1.1 rin #define Y_H32 (X_L32 + 4) 55 1.1 rin #define Y_L32 (Y_H32 + 4) 56 1.1 rin 57 1.1 rin ENTRY(__muldi3) 58 1.1 rin moveml %d2-%d4, -(%sp) | push %d2-%d4 59 1.1 rin 60 1.1 rin | First, calculate (X_L32 * Y_L32) as a 64bit integer. 61 1.1 rin 62 1.1 rin movel X_L32(%sp), %a0 | save X_L32 63 1.1 rin movel Y_L32(%sp), %a1 | save Y_L32 64 1.1 rin 65 1.1 rin movel %a0, %d2 | prepare for X_L32(H) in L 66 1.1 rin movel %a1, %d3 | prepare for Y_L32(H) in L 67 1.1 rin 68 1.1 rin movel %a0, %d4 | X_L32(L) in L 69 1.1 rin movel %a1, %d1 | Y_L32(L) in L 70 1.1 rin movel %a0, %d0 | X_L32(L) in L 71 1.1 rin 72 1.1 rin swap %d2 | X_L32(H) in L 73 1.1 rin swap %d3 | Y_L32(H) in L 74 1.1 rin 75 1.1 rin muluw %d1, %d4 | A = X_L32(L) * Y_L32(L) 76 1.1 rin muluw %d2, %d1 | B = X_L32(H) * Y_L32(L) 77 1.1 rin muluw %d3, %d2 | C = X_L32(H) * Y_L32(H) 78 1.1 rin muluw %d0, %d3 | D = X_L32(L) * Y_L32(H) 79 1.1 rin 80 1.1 rin movel %d4, %d0 | extract A(H) 81 1.1 rin clrw %d0 82 1.1 rin swap %d0 83 1.1 rin 84 1.1 rin addl %d0, %d1 | B += A(H) (no carry; max 0xffff0000) 85 1.1 rin 86 1.1 rin addl %d3, %d1 | B += D 87 1.1 rin bccs 1f | if (carry) 88 1.1 rin addil #0x10000, %d2 | C += 0x10000 89 1.1 rin 90 1.1 rin 1: swap %d1 | B(H) <--> B(L) 91 1.1 rin 92 1.1 rin | (%d0), (%d1), %d2 = C, %d3 = free, %d4 = A 93 1.1 rin 94 1.1 rin clrl %d3 | extract B(H) 95 1.1 rin movew %d1, %d3 96 1.1 rin 97 1.1 rin movew %d4, %d1 | %d1 = (B(L) << 16) + A(L) 98 1.1 rin 99 1.1 rin addl %d3, %d2 | C += B(H) 100 1.1 rin 101 1.1 rin | We have (X_L32 * Y_L32) in %d2:%d1. Lower 32bit was completed. 102 1.1 rin | Add (X_L32 * Y_H32 + X_H32 * Y_L32) to higher 32bit. 103 1.1 rin | 104 1.1 rin | (%d0), (%d1), %d2 = C, %d3 = free, %d4 = free 105 1.1 rin 106 1.1 rin movel %a0, %d0 | restore X_L32 107 1.1 rin movel %a1, %d3 | restore Y_L32 108 1.1 rin mulsl Y_H32(%sp), %d0 | E = X_L32 * Y_H32 109 1.1 rin mulsl X_H32(%sp), %d3 | F = X_H32 * Y_L32 110 1.1 rin addl %d2, %d0 | E += C 111 1.1 rin addl %d3, %d0 | %d0 = E + F 112 1.1 rin 113 1.2 rin moveml (%sp)+, %d2-%d4 | pop %d2-%d4 114 1.1 rin rts 115 1.1 rin END(__muldi3) 116