hamdist.asm revision 1.1 1 1.1 mrg dnl SPARC v9 mpn_hamdist for T3/T4.
2 1.1 mrg
3 1.1 mrg dnl Contributed to the GNU project by David Miller.
4 1.1 mrg
5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc.
6 1.1 mrg
7 1.1 mrg dnl This file is part of the GNU MP Library.
8 1.1 mrg dnl
9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 1.1 mrg dnl it under the terms of either:
11 1.1 mrg dnl
12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free
13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your
14 1.1 mrg dnl option) any later version.
15 1.1 mrg dnl
16 1.1 mrg dnl or
17 1.1 mrg dnl
18 1.1 mrg dnl * the GNU General Public License as published by the Free Software
19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any
20 1.1 mrg dnl later version.
21 1.1 mrg dnl
22 1.1 mrg dnl or both in parallel, as here.
23 1.1 mrg dnl
24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 1.1 mrg dnl for more details.
28 1.1 mrg dnl
29 1.1 mrg dnl You should have received copies of the GNU General Public License and the
30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 1.1 mrg dnl see https://www.gnu.org/licenses/.
32 1.1 mrg
33 1.1 mrg include(`../config.m4')
34 1.1 mrg
35 1.1 mrg C cycles/limb
36 1.1 mrg C UltraSPARC T3: 18
37 1.1 mrg C UltraSPARC T4: 3.5
38 1.1 mrg
39 1.1 mrg C INPUT PARAMETERS
40 1.1 mrg define(`up', `%o0')
41 1.1 mrg define(`vp', `%o1')
42 1.1 mrg define(`n', `%o2')
43 1.1 mrg define(`pcnt', `%o5')
44 1.1 mrg
45 1.1 mrg ASM_START()
46 1.1 mrg REGISTER(%g2,#scratch)
47 1.1 mrg REGISTER(%g3,#scratch)
48 1.1 mrg PROLOGUE(mpn_hamdist)
49 1.1 mrg subcc n, 1, n
50 1.1 mrg be L(final_one)
51 1.1 mrg clr pcnt
52 1.1 mrg L(top):
53 1.1 mrg ldx [up + 0], %g1
54 1.1 mrg ldx [vp + 0], %g2
55 1.1 mrg ldx [up + 8], %o4
56 1.1 mrg ldx [vp + 8], %g3
57 1.1 mrg sub n, 2, n
58 1.1 mrg xor %g1, %g2, %g1
59 1.1 mrg add up, 16, up
60 1.1 mrg popc %g1, %g2
61 1.1 mrg add vp, 16, vp
62 1.1 mrg xor %o4, %g3, %o4
63 1.1 mrg add pcnt, %g2, pcnt
64 1.1 mrg popc %o4, %g3
65 1.1 mrg brgz n, L(top)
66 1.1 mrg add pcnt, %g3, pcnt
67 1.1 mrg brlz,pt n, L(done)
68 1.1 mrg nop
69 1.1 mrg L(final_one):
70 1.1 mrg ldx [up + 0], %g1
71 1.1 mrg ldx [vp + 0], %g2
72 1.1 mrg xor %g1,%g2, %g1
73 1.1 mrg popc %g1, %g2
74 1.1 mrg add pcnt, %g2, pcnt
75 1.1 mrg L(done):
76 1.1 mrg retl
77 1.1 mrg mov pcnt, %o0
78 1.1 mrg EPILOGUE()
79