1 1.10 joerg /* $NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $ */ 2 1.1 matt 3 1.1 matt /* 4 1.1 matt * Copyright (c) 1995 Scott Stevens 5 1.1 matt * All rights reserved. 6 1.1 matt * 7 1.1 matt * Redistribution and use in source and binary forms, with or without 8 1.1 matt * modification, are permitted provided that the following conditions 9 1.1 matt * are met: 10 1.1 matt * 1. Redistributions of source code must retain the above copyright 11 1.1 matt * notice, this list of conditions and the following disclaimer. 12 1.1 matt * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 matt * notice, this list of conditions and the following disclaimer in the 14 1.1 matt * documentation and/or other materials provided with the distribution. 15 1.1 matt * 3. All advertising materials mentioning features or use of this software 16 1.1 matt * must display the following acknowledgement: 17 1.1 matt * This product includes software developed by Scott Stevens. 18 1.1 matt * 4. The name of the author may not be used to endorse or promote products 19 1.1 matt * derived from this software without specific prior written permission. 20 1.1 matt * 21 1.1 matt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 1.1 matt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 1.1 matt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 1.1 matt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 1.1 matt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 1.1 matt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 1.1 matt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 1.1 matt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 1.1 matt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 1.1 matt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 1.1 matt * 32 1.1 matt * RiscBSD kernel project 33 1.1 matt * 34 1.1 matt * bcopy_page.S 35 1.1 matt * 36 1.1 matt * page optimised bcopy and bzero routines 37 1.1 matt * 38 1.1 matt * Created : 08/04/95 39 1.1 matt */ 40 1.1 matt 41 1.1 matt #include <machine/asm.h> 42 1.1 matt 43 1.6 thorpej #include "assym.h" 44 1.6 thorpej 45 1.7 scw #ifndef __XSCALE__ 46 1.7 scw 47 1.2 chris /* #define BIG_LOOPS */ 48 1.2 chris 49 1.1 matt /* 50 1.1 matt * bcopy_page(src, dest) 51 1.1 matt * 52 1.1 matt * Optimised copy page routine. 53 1.1 matt * 54 1.1 matt * On entry: 55 1.1 matt * r0 - src address 56 1.1 matt * r1 - dest address 57 1.1 matt * 58 1.1 matt * Requires: 59 1.6 thorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 60 1.2 chris * otherwise. 61 1.1 matt */ 62 1.1 matt 63 1.3 thorpej #define CHUNK_SIZE 32 64 1.3 thorpej 65 1.3 thorpej #define PREFETCH_FIRST_CHUNK /* nothing */ 66 1.3 thorpej #define PREFETCH_NEXT_CHUNK /* nothing */ 67 1.3 thorpej 68 1.3 thorpej #ifndef COPY_CHUNK 69 1.3 thorpej #define COPY_CHUNK \ 70 1.3 thorpej PREFETCH_NEXT_CHUNK ; \ 71 1.3 thorpej ldmia r0!, {r3-r8,ip,lr} ; \ 72 1.3 thorpej stmia r1!, {r3-r8,ip,lr} 73 1.3 thorpej #endif /* ! COPY_CHUNK */ 74 1.3 thorpej 75 1.3 thorpej #ifndef SAVE_REGS 76 1.9 matt #define SAVE_REGS push {r4-r8, lr} 77 1.9 matt #define RESTORE_REGS pop {r4-r8, pc} 78 1.3 thorpej #endif 79 1.3 thorpej 80 1.1 matt ENTRY(bcopy_page) 81 1.3 thorpej PREFETCH_FIRST_CHUNK 82 1.3 thorpej SAVE_REGS 83 1.2 chris #ifdef BIG_LOOPS 84 1.6 thorpej mov r2, #(PAGE_SIZE >> 9) 85 1.2 chris #else 86 1.6 thorpej mov r2, #(PAGE_SIZE >> 7) 87 1.2 chris #endif 88 1.1 matt 89 1.5 thorpej 1: 90 1.3 thorpej COPY_CHUNK 91 1.3 thorpej COPY_CHUNK 92 1.3 thorpej COPY_CHUNK 93 1.3 thorpej COPY_CHUNK 94 1.2 chris 95 1.2 chris #ifdef BIG_LOOPS 96 1.2 chris /* There is little point making the loop any larger; unless we are 97 1.2 chris running with the cache off, the load/store overheads will 98 1.2 chris completely dominate this loop. */ 99 1.3 thorpej COPY_CHUNK 100 1.3 thorpej COPY_CHUNK 101 1.3 thorpej COPY_CHUNK 102 1.3 thorpej COPY_CHUNK 103 1.3 thorpej 104 1.3 thorpej COPY_CHUNK 105 1.3 thorpej COPY_CHUNK 106 1.3 thorpej COPY_CHUNK 107 1.3 thorpej COPY_CHUNK 108 1.3 thorpej 109 1.3 thorpej COPY_CHUNK 110 1.3 thorpej COPY_CHUNK 111 1.3 thorpej COPY_CHUNK 112 1.3 thorpej COPY_CHUNK 113 1.2 chris #endif 114 1.1 matt subs r2, r2, #1 115 1.5 thorpej bne 1b 116 1.1 matt 117 1.3 thorpej RESTORE_REGS /* ...and return. */ 118 1.9 matt END(bcopy_page) 119 1.1 matt 120 1.1 matt /* 121 1.1 matt * bzero_page(dest) 122 1.1 matt * 123 1.1 matt * Optimised zero page routine. 124 1.1 matt * 125 1.1 matt * On entry: 126 1.1 matt * r0 - dest address 127 1.1 matt * 128 1.1 matt * Requires: 129 1.6 thorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 130 1.2 chris * otherwise 131 1.1 matt */ 132 1.1 matt 133 1.1 matt ENTRY(bzero_page) 134 1.9 matt push {r4-r8, lr} 135 1.2 chris #ifdef BIG_LOOPS 136 1.6 thorpej mov r2, #(PAGE_SIZE >> 9) 137 1.2 chris #else 138 1.6 thorpej mov r2, #(PAGE_SIZE >> 7) 139 1.2 chris #endif 140 1.1 matt mov r3, #0 141 1.1 matt mov r4, #0 142 1.1 matt mov r5, #0 143 1.1 matt mov r6, #0 144 1.1 matt mov r7, #0 145 1.1 matt mov r8, #0 146 1.2 chris mov ip, #0 147 1.2 chris mov lr, #0 148 1.1 matt 149 1.5 thorpej 1: 150 1.2 chris stmia r0!, {r3-r8,ip,lr} 151 1.2 chris stmia r0!, {r3-r8,ip,lr} 152 1.2 chris stmia r0!, {r3-r8,ip,lr} 153 1.2 chris stmia r0!, {r3-r8,ip,lr} 154 1.2 chris 155 1.2 chris #ifdef BIG_LOOPS 156 1.2 chris /* There is little point making the loop any larger; unless we are 157 1.2 chris running with the cache off, the load/store overheads will 158 1.2 chris completely dominate this loop. */ 159 1.2 chris stmia r0!, {r3-r8,ip,lr} 160 1.2 chris stmia r0!, {r3-r8,ip,lr} 161 1.2 chris stmia r0!, {r3-r8,ip,lr} 162 1.2 chris stmia r0!, {r3-r8,ip,lr} 163 1.2 chris 164 1.2 chris stmia r0!, {r3-r8,ip,lr} 165 1.2 chris stmia r0!, {r3-r8,ip,lr} 166 1.2 chris stmia r0!, {r3-r8,ip,lr} 167 1.2 chris stmia r0!, {r3-r8,ip,lr} 168 1.2 chris 169 1.2 chris stmia r0!, {r3-r8,ip,lr} 170 1.2 chris stmia r0!, {r3-r8,ip,lr} 171 1.2 chris stmia r0!, {r3-r8,ip,lr} 172 1.2 chris stmia r0!, {r3-r8,ip,lr} 173 1.2 chris 174 1.2 chris #endif 175 1.1 matt 176 1.1 matt subs r2, r2, #1 177 1.5 thorpej bne 1b 178 1.1 matt 179 1.9 matt pop {r4-r8, pc} 180 1.9 matt END(bzero_page) 181 1.7 scw 182 1.7 scw #else /* __XSCALE__ */ 183 1.7 scw 184 1.7 scw /* 185 1.7 scw * XSCALE version of bcopy_page 186 1.7 scw */ 187 1.7 scw ENTRY(bcopy_page) 188 1.7 scw pld [r0] 189 1.9 matt push {r4, r5} 190 1.7 scw mov ip, #32 191 1.7 scw ldr r2, [r0], #0x04 /* 0x00 */ 192 1.7 scw ldr r3, [r0], #0x04 /* 0x04 */ 193 1.7 scw 1: pld [r0, #0x18] /* Prefetch 0x20 */ 194 1.7 scw ldr r4, [r0], #0x04 /* 0x08 */ 195 1.7 scw ldr r5, [r0], #0x04 /* 0x0c */ 196 1.10 joerg strd r2, r3, [r1], #0x08 197 1.7 scw ldr r2, [r0], #0x04 /* 0x10 */ 198 1.7 scw ldr r3, [r0], #0x04 /* 0x14 */ 199 1.10 joerg strd r4, r5, [r1], #0x08 200 1.7 scw ldr r4, [r0], #0x04 /* 0x18 */ 201 1.7 scw ldr r5, [r0], #0x04 /* 0x1c */ 202 1.10 joerg strd r2, r3, [r1], #0x08 203 1.7 scw ldr r2, [r0], #0x04 /* 0x20 */ 204 1.7 scw ldr r3, [r0], #0x04 /* 0x24 */ 205 1.7 scw pld [r0, #0x18] /* Prefetch 0x40 */ 206 1.10 joerg strd r4, r5, [r1], #0x08 207 1.7 scw ldr r4, [r0], #0x04 /* 0x28 */ 208 1.7 scw ldr r5, [r0], #0x04 /* 0x2c */ 209 1.10 joerg strd r2, r3, [r1], #0x08 210 1.7 scw ldr r2, [r0], #0x04 /* 0x30 */ 211 1.7 scw ldr r3, [r0], #0x04 /* 0x34 */ 212 1.10 joerg strd r4, r5, [r1], #0x08 213 1.7 scw ldr r4, [r0], #0x04 /* 0x38 */ 214 1.7 scw ldr r5, [r0], #0x04 /* 0x3c */ 215 1.10 joerg strd r2, r3, [r1], #0x08 216 1.7 scw ldr r2, [r0], #0x04 /* 0x40 */ 217 1.7 scw ldr r3, [r0], #0x04 /* 0x44 */ 218 1.7 scw pld [r0, #0x18] /* Prefetch 0x60 */ 219 1.10 joerg strd r4, r5, [r1], #0x08 220 1.7 scw ldr r4, [r0], #0x04 /* 0x48 */ 221 1.7 scw ldr r5, [r0], #0x04 /* 0x4c */ 222 1.10 joerg strd r2, r3, [r1], #0x08 223 1.7 scw ldr r2, [r0], #0x04 /* 0x50 */ 224 1.7 scw ldr r3, [r0], #0x04 /* 0x54 */ 225 1.10 joerg strd r4, r5, [r1], #0x08 226 1.7 scw ldr r4, [r0], #0x04 /* 0x58 */ 227 1.7 scw ldr r5, [r0], #0x04 /* 0x5c */ 228 1.10 joerg strd r2, r3, [r1], #0x08 229 1.7 scw ldr r2, [r0], #0x04 /* 0x60 */ 230 1.7 scw ldr r3, [r0], #0x04 /* 0x64 */ 231 1.7 scw pld [r0, #0x18] /* Prefetch 0x80 */ 232 1.10 joerg strd r4, r5, [r1], #0x08 233 1.7 scw ldr r4, [r0], #0x04 /* 0x68 */ 234 1.7 scw ldr r5, [r0], #0x04 /* 0x6c */ 235 1.10 joerg strd r2, r3, [r1], #0x08 236 1.7 scw ldr r2, [r0], #0x04 /* 0x70 */ 237 1.7 scw ldr r3, [r0], #0x04 /* 0x74 */ 238 1.10 joerg strd r4, r5, [r1], #0x08 239 1.7 scw ldr r4, [r0], #0x04 /* 0x78 */ 240 1.7 scw ldr r5, [r0], #0x04 /* 0x7c */ 241 1.10 joerg strd r2, r3, [r1], #0x08 242 1.7 scw subs ip, ip, #0x01 243 1.7 scw ldrgt r2, [r0], #0x04 /* 0x80 */ 244 1.7 scw ldrgt r3, [r0], #0x04 /* 0x84 */ 245 1.10 joerg strd r4, r5, [r1], #0x08 246 1.7 scw bgt 1b 247 1.9 matt pop {r4, r5} 248 1.9 matt RET 249 1.9 matt END(bcopy_page) 250 1.7 scw 251 1.7 scw /* 252 1.7 scw * XSCALE version of bzero_page 253 1.7 scw */ 254 1.7 scw ENTRY(bzero_page) 255 1.7 scw mov r1, #PAGE_SIZE 256 1.7 scw mov r2, #0 257 1.7 scw mov r3, #0 258 1.10 joerg 1: strd r2, r3, [r0], #8 /* 32 */ 259 1.10 joerg strd r2, r3, [r0], #8 260 1.10 joerg strd r2, r3, [r0], #8 261 1.10 joerg strd r2, r3, [r0], #8 262 1.10 joerg strd r2, r3, [r0], #8 /* 64 */ 263 1.10 joerg strd r2, r3, [r0], #8 264 1.10 joerg strd r2, r3, [r0], #8 265 1.10 joerg strd r2, r3, [r0], #8 266 1.10 joerg strd r2, r3, [r0], #8 /* 96 */ 267 1.10 joerg strd r2, r3, [r0], #8 268 1.10 joerg strd r2, r3, [r0], #8 269 1.10 joerg strd r2, r3, [r0], #8 270 1.10 joerg strd r2, r3, [r0], #8 /* 128 */ 271 1.10 joerg strd r2, r3, [r0], #8 272 1.10 joerg strd r2, r3, [r0], #8 273 1.10 joerg strd r2, r3, [r0], #8 274 1.7 scw subs r1, r1, #128 275 1.7 scw bne 1b 276 1.9 matt RET 277 1.9 matt END(bzero_page) 278 1.7 scw #endif /* __XSCALE__ */ 279