muldi3.S revision 1.2 1 1.2 rin /* $NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $ */
2 1.1 rin
3 1.1 rin /*
4 1.1 rin * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 rin * All rights reserved.
6 1.1 rin *
7 1.1 rin * This code is derived from software contributed to The NetBSD Foundation
8 1.1 rin * by Rin Okuyama.
9 1.1 rin *
10 1.1 rin * Redistribution and use in source and binary forms, with or without
11 1.1 rin * modification, are permitted provided that the following conditions
12 1.1 rin * are met:
13 1.1 rin * 1. Redistributions of source code must retain the above copyright
14 1.1 rin * notice, this list of conditions and the following disclaimer.
15 1.1 rin * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 rin * notice, this list of conditions and the following disclaimer in the
17 1.1 rin * documentation and/or other materials provided with the distribution.
18 1.1 rin *
19 1.1 rin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 rin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 rin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 rin * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 rin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 rin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 rin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 rin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 rin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 rin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 rin * POSSIBILITY OF SUCH DAMAGE.
30 1.1 rin */
31 1.1 rin
32 1.1 rin #include <machine/asm.h>
33 1.1 rin
34 1.2 rin RCSID("$NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $")
35 1.1 rin
36 1.1 rin | int64_t __muldi3(int64_t X, int64_t Y);
37 1.1 rin |
38 1.1 rin | * Return lower 64bit of (X * Y) into %d0:%d1.
39 1.1 rin |
40 1.1 rin | * Intended for 68060:
41 1.1 rin | - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul.
42 1.1 rin | - mulsl (and moveml) are not implemented for 68010.
43 1.1 rin |
44 1.1 rin | * Notation:
45 1.1 rin | - H32:L32 --> higher:lower 32bit of variable
46 1.1 rin | - H:L --> higher:lower 16bit of variable/register
47 1.1 rin
48 1.1 rin #ifdef __mc68010__
49 1.1 rin #error "not for 68010"
50 1.1 rin #endif
51 1.1 rin
52 1.1 rin #define X_H32 (4 * 4)
53 1.1 rin #define X_L32 (X_H32 + 4)
54 1.1 rin #define Y_H32 (X_L32 + 4)
55 1.1 rin #define Y_L32 (Y_H32 + 4)
56 1.1 rin
57 1.1 rin ENTRY(__muldi3)
58 1.1 rin moveml %d2-%d4, -(%sp) | push %d2-%d4
59 1.1 rin
60 1.1 rin | First, calculate (X_L32 * Y_L32) as a 64bit integer.
61 1.1 rin
62 1.1 rin movel X_L32(%sp), %a0 | save X_L32
63 1.1 rin movel Y_L32(%sp), %a1 | save Y_L32
64 1.1 rin
65 1.1 rin movel %a0, %d2 | prepare for X_L32(H) in L
66 1.1 rin movel %a1, %d3 | prepare for Y_L32(H) in L
67 1.1 rin
68 1.1 rin movel %a0, %d4 | X_L32(L) in L
69 1.1 rin movel %a1, %d1 | Y_L32(L) in L
70 1.1 rin movel %a0, %d0 | X_L32(L) in L
71 1.1 rin
72 1.1 rin swap %d2 | X_L32(H) in L
73 1.1 rin swap %d3 | Y_L32(H) in L
74 1.1 rin
75 1.1 rin muluw %d1, %d4 | A = X_L32(L) * Y_L32(L)
76 1.1 rin muluw %d2, %d1 | B = X_L32(H) * Y_L32(L)
77 1.1 rin muluw %d3, %d2 | C = X_L32(H) * Y_L32(H)
78 1.1 rin muluw %d0, %d3 | D = X_L32(L) * Y_L32(H)
79 1.1 rin
80 1.1 rin movel %d4, %d0 | extract A(H)
81 1.1 rin clrw %d0
82 1.1 rin swap %d0
83 1.1 rin
84 1.1 rin addl %d0, %d1 | B += A(H) (no carry; max 0xffff0000)
85 1.1 rin
86 1.1 rin addl %d3, %d1 | B += D
87 1.1 rin bccs 1f | if (carry)
88 1.1 rin addil #0x10000, %d2 | C += 0x10000
89 1.1 rin
90 1.1 rin 1: swap %d1 | B(H) <--> B(L)
91 1.1 rin
92 1.1 rin | (%d0), (%d1), %d2 = C, %d3 = free, %d4 = A
93 1.1 rin
94 1.1 rin clrl %d3 | extract B(H)
95 1.1 rin movew %d1, %d3
96 1.1 rin
97 1.1 rin movew %d4, %d1 | %d1 = (B(L) << 16) + A(L)
98 1.1 rin
99 1.1 rin addl %d3, %d2 | C += B(H)
100 1.1 rin
101 1.1 rin | We have (X_L32 * Y_L32) in %d2:%d1. Lower 32bit was completed.
102 1.1 rin | Add (X_L32 * Y_H32 + X_H32 * Y_L32) to higher 32bit.
103 1.1 rin |
104 1.1 rin | (%d0), (%d1), %d2 = C, %d3 = free, %d4 = free
105 1.1 rin
106 1.1 rin movel %a0, %d0 | restore X_L32
107 1.1 rin movel %a1, %d3 | restore Y_L32
108 1.1 rin mulsl Y_H32(%sp), %d0 | E = X_L32 * Y_H32
109 1.1 rin mulsl X_H32(%sp), %d3 | F = X_H32 * Y_L32
110 1.1 rin addl %d2, %d0 | E += C
111 1.1 rin addl %d3, %d0 | %d0 = E + F
112 1.1 rin
113 1.2 rin moveml (%sp)+, %d2-%d4 | pop %d2-%d4
114 1.1 rin rts
115 1.1 rin END(__muldi3)
116