bzero.S revision 1.4 1 /* $NetBSD: bzero.S,v 1.4 2008/04/28 20:22:57 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ross Harvey.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <machine/asm.h>
33
34 .file "bzero.c"
35 ENTRY(bzero)
36 mr 5, 4 # translate to memcpy signature
37 li 4, 0 # add missing memcpy param
38 ENTRY(memset)
39 cmpldi 7, 5, 7 # only optimize if len >= 8
40 mr 9, 3
41 ble 7, 2f
42 rldicl. 0, 3, 0, 61 # only optimize if aligned
43 beq 0, 4f
44 2:
45 cmpdi 7, 5, 0
46 beqlr 7
47 3:
48 mtctr 5
49 rldicl 0, 4, 0, 56
50 .p2align 4,, 15
51 2:
52 stb 0, 0(9) # byte-at-a-time loop
53 addi 9, 9, 1
54 bdnz 2b
55 blr # done
56 4:
57 rldicl 4, 4, 0, 56 # construct parallel store
58 cmpldi 7, 5, 31
59 mr 10, 3
60 sldi 0, 4, 8
61 or 0, 0, 4
62 sldi 9, 0, 16
63 or 0, 0, 9
64 sldi 11, 0, 32
65 or 0, 0, 11
66 ble 7, 5f
67 addi 9, 5, -32
68 srdi 9, 9, 5
69 addi 9, 9, 1
70 mtctr 9
71 .p2align 4,, 15
72 2:
73 std 0, 0(10) # 8-way + unrolled store loop
74 std 0, 8(10)
75 addi 5, 5, -32 # schedule count decrement
76 std 0, 16(10)
77 std 0, 24(10)
78 addi 10, 10, 32
79 bdnz 2b
80 cmpldi 7, 5, 7
81 ble 7, 8f
82 5:
83 addi 9, 5, -8
84 srdi 9, 9, 3
85 addi 9, 9, 1
86 mtctr 9
87 .p2align 4,, 15
88 14:
89 std 0, 0(10) # 8-way not-unrolled loop
90 addi 5, 5, -8
91 addi 10, 10, 8
92 bdnz 14b
93 8:
94 cmpdi 7, 5, 0
95 mr 9, 10
96 bne 7, 3b
97 blr
98 .long 0
99 .byte 0, 0, 0, 0, 0, 0, 0, 0
100 .size .memset, .-.memset
101