19ad247e8Sjmcneill/* 29ad247e8Sjmcneill * Copyright (c) 2012 39ad247e8Sjmcneill * MIPS Technologies, Inc., California. 49ad247e8Sjmcneill * 59ad247e8Sjmcneill * Redistribution and use in source and binary forms, with or without 69ad247e8Sjmcneill * modification, are permitted provided that the following conditions 79ad247e8Sjmcneill * are met: 89ad247e8Sjmcneill * 1. Redistributions of source code must retain the above copyright 99ad247e8Sjmcneill * notice, this list of conditions and the following disclaimer. 109ad247e8Sjmcneill * 2. Redistributions in binary form must reproduce the above copyright 119ad247e8Sjmcneill * notice, this list of conditions and the following disclaimer in the 129ad247e8Sjmcneill * documentation and/or other materials provided with the distribution. 139ad247e8Sjmcneill * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 149ad247e8Sjmcneill * contributors may be used to endorse or promote products derived from 159ad247e8Sjmcneill * this software without specific prior written permission. 169ad247e8Sjmcneill * 179ad247e8Sjmcneill * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 189ad247e8Sjmcneill * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 199ad247e8Sjmcneill * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 209ad247e8Sjmcneill * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 219ad247e8Sjmcneill * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 229ad247e8Sjmcneill * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 239ad247e8Sjmcneill * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 249ad247e8Sjmcneill * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 259ad247e8Sjmcneill * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 269ad247e8Sjmcneill * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 279ad247e8Sjmcneill * SUCH DAMAGE. 289ad247e8Sjmcneill */ 299ad247e8Sjmcneill 309ad247e8Sjmcneill#include "pixman-mips-dspr2-asm.h" 319ad247e8Sjmcneill 329ad247e8Sjmcneill/* 339ad247e8Sjmcneill * This routine could be optimized for MIPS64. The current code only 349ad247e8Sjmcneill * uses MIPS32 instructions. 359ad247e8Sjmcneill */ 369ad247e8Sjmcneill 379ad247e8Sjmcneill#ifdef EB 389ad247e8Sjmcneill# define LWHI lwl /* high part is left in big-endian */ 399ad247e8Sjmcneill# define SWHI swl /* high part is left in big-endian */ 409ad247e8Sjmcneill# define LWLO lwr /* low part is right in big-endian */ 419ad247e8Sjmcneill# define SWLO swr /* low part is right in big-endian */ 429ad247e8Sjmcneill#else 439ad247e8Sjmcneill# define LWHI lwr /* high part is right in little-endian */ 449ad247e8Sjmcneill# define SWHI swr /* high part is right in little-endian */ 459ad247e8Sjmcneill# define LWLO lwl /* low part is left in big-endian */ 469ad247e8Sjmcneill# define SWLO swl /* low part is left in big-endian */ 479ad247e8Sjmcneill#endif 489ad247e8Sjmcneill 499ad247e8SjmcneillLEAF_MIPS32R2(pixman_mips_fast_memcpy) 509ad247e8Sjmcneill 519ad247e8Sjmcneill slti AT, a2, 8 529ad247e8Sjmcneill bne AT, zero, $last8 539ad247e8Sjmcneill move v0, a0 /* memcpy returns the dst pointer */ 549ad247e8Sjmcneill 559ad247e8Sjmcneill/* Test if the src and dst are word-aligned, or can be made word-aligned */ 569ad247e8Sjmcneill xor t8, a1, a0 579ad247e8Sjmcneill andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ 589ad247e8Sjmcneill 599ad247e8Sjmcneill bne t8, zero, $unaligned 609ad247e8Sjmcneill negu a3, a0 619ad247e8Sjmcneill 629ad247e8Sjmcneill andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ 639ad247e8Sjmcneill beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ 649ad247e8Sjmcneill subu a2, a2, a3 /* now a2 is the remining bytes count */ 659ad247e8Sjmcneill 669ad247e8Sjmcneill LWHI t8, 0(a1) 679ad247e8Sjmcneill addu a1, a1, a3 689ad247e8Sjmcneill SWHI t8, 0(a0) 699ad247e8Sjmcneill addu a0, a0, a3 709ad247e8Sjmcneill 719ad247e8Sjmcneill/* Now the dst/src are mutually word-aligned with word-aligned addresses */ 729ad247e8Sjmcneill$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ 739ad247e8Sjmcneill /* t8 is the byte count after 64-byte chunks */ 749ad247e8Sjmcneill 759ad247e8Sjmcneill beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ 769ad247e8Sjmcneill /* There will be at most 1 32-byte chunk after it */ 779ad247e8Sjmcneill subu a3, a2, t8 /* subtract from a2 the reminder */ 789ad247e8Sjmcneill /* Here a3 counts bytes in 16w chunks */ 799ad247e8Sjmcneill addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ 809ad247e8Sjmcneill 819ad247e8Sjmcneill addu t0, a0, a2 /* t0 is the "past the end" address */ 829ad247e8Sjmcneill 839ad247e8Sjmcneill/* 849ad247e8Sjmcneill * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past 859ad247e8Sjmcneill * the "t0-32" address 869ad247e8Sjmcneill * This means: for x=128 the last "safe" a0 address is "t0-160" 879ad247e8Sjmcneill * Alternatively, for x=64 the last "safe" a0 address is "t0-96" 889ad247e8Sjmcneill * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit 899ad247e8Sjmcneill */ 909ad247e8Sjmcneill subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ 919ad247e8Sjmcneill 929ad247e8Sjmcneill pref 0, 0(a1) /* bring the first line of src, addr 0 */ 939ad247e8Sjmcneill pref 0, 32(a1) /* bring the second line of src, addr 32 */ 949ad247e8Sjmcneill pref 0, 64(a1) /* bring the third line of src, addr 64 */ 959ad247e8Sjmcneill pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ 969ad247e8Sjmcneill/* In case the a0 > t9 don't use "pref 30" at all */ 979ad247e8Sjmcneill sgtu v1, a0, t9 989ad247e8Sjmcneill bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ 999ad247e8Sjmcneill nop 1009ad247e8Sjmcneill/* otherwise, start with using pref30 */ 1019ad247e8Sjmcneill pref 30, 64(a0) 1029ad247e8Sjmcneill$loop16w: 1039ad247e8Sjmcneill pref 0, 96(a1) 1049ad247e8Sjmcneill lw t0, 0(a1) 1059ad247e8Sjmcneill bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ 1069ad247e8Sjmcneill lw t1, 4(a1) 1079ad247e8Sjmcneill pref 30, 96(a0) /* continue setting up the dest, addr 96 */ 1089ad247e8Sjmcneill$skip_pref30_96: 1099ad247e8Sjmcneill lw t2, 8(a1) 1109ad247e8Sjmcneill lw t3, 12(a1) 1119ad247e8Sjmcneill lw t4, 16(a1) 1129ad247e8Sjmcneill lw t5, 20(a1) 1139ad247e8Sjmcneill lw t6, 24(a1) 1149ad247e8Sjmcneill lw t7, 28(a1) 1159ad247e8Sjmcneill pref 0, 128(a1) /* bring the next lines of src, addr 128 */ 1169ad247e8Sjmcneill 1179ad247e8Sjmcneill sw t0, 0(a0) 1189ad247e8Sjmcneill sw t1, 4(a0) 1199ad247e8Sjmcneill sw t2, 8(a0) 1209ad247e8Sjmcneill sw t3, 12(a0) 1219ad247e8Sjmcneill sw t4, 16(a0) 1229ad247e8Sjmcneill sw t5, 20(a0) 1239ad247e8Sjmcneill sw t6, 24(a0) 1249ad247e8Sjmcneill sw t7, 28(a0) 1259ad247e8Sjmcneill 1269ad247e8Sjmcneill lw t0, 32(a1) 1279ad247e8Sjmcneill bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ 1289ad247e8Sjmcneill lw t1, 36(a1) 1299ad247e8Sjmcneill pref 30, 128(a0) /* continue setting up the dest, addr 128 */ 1309ad247e8Sjmcneill$skip_pref30_128: 1319ad247e8Sjmcneill lw t2, 40(a1) 1329ad247e8Sjmcneill lw t3, 44(a1) 1339ad247e8Sjmcneill lw t4, 48(a1) 1349ad247e8Sjmcneill lw t5, 52(a1) 1359ad247e8Sjmcneill lw t6, 56(a1) 1369ad247e8Sjmcneill lw t7, 60(a1) 1379ad247e8Sjmcneill pref 0, 160(a1) /* bring the next lines of src, addr 160 */ 1389ad247e8Sjmcneill 1399ad247e8Sjmcneill sw t0, 32(a0) 1409ad247e8Sjmcneill sw t1, 36(a0) 1419ad247e8Sjmcneill sw t2, 40(a0) 1429ad247e8Sjmcneill sw t3, 44(a0) 1439ad247e8Sjmcneill sw t4, 48(a0) 1449ad247e8Sjmcneill sw t5, 52(a0) 1459ad247e8Sjmcneill sw t6, 56(a0) 1469ad247e8Sjmcneill sw t7, 60(a0) 1479ad247e8Sjmcneill 1489ad247e8Sjmcneill addiu a0, a0, 64 /* adding 64 to dest */ 1499ad247e8Sjmcneill sgtu v1, a0, t9 1509ad247e8Sjmcneill bne a0, a3, $loop16w 1519ad247e8Sjmcneill addiu a1, a1, 64 /* adding 64 to src */ 1529ad247e8Sjmcneill move a2, t8 1539ad247e8Sjmcneill 1549ad247e8Sjmcneill/* Here we have src and dest word-aligned but less than 64-bytes to go */ 1559ad247e8Sjmcneill 1569ad247e8Sjmcneill$chk8w: 1579ad247e8Sjmcneill pref 0, 0x0(a1) 1589ad247e8Sjmcneill andi t8, a2, 0x1f /* is there a 32-byte chunk? */ 1599ad247e8Sjmcneill /* the t8 is the reminder count past 32-bytes */ 1609ad247e8Sjmcneill beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ 1619ad247e8Sjmcneill nop 1629ad247e8Sjmcneill 1639ad247e8Sjmcneill lw t0, 0(a1) 1649ad247e8Sjmcneill lw t1, 4(a1) 1659ad247e8Sjmcneill lw t2, 8(a1) 1669ad247e8Sjmcneill lw t3, 12(a1) 1679ad247e8Sjmcneill lw t4, 16(a1) 1689ad247e8Sjmcneill lw t5, 20(a1) 1699ad247e8Sjmcneill lw t6, 24(a1) 1709ad247e8Sjmcneill lw t7, 28(a1) 1719ad247e8Sjmcneill addiu a1, a1, 32 1729ad247e8Sjmcneill 1739ad247e8Sjmcneill sw t0, 0(a0) 1749ad247e8Sjmcneill sw t1, 4(a0) 1759ad247e8Sjmcneill sw t2, 8(a0) 1769ad247e8Sjmcneill sw t3, 12(a0) 1779ad247e8Sjmcneill sw t4, 16(a0) 1789ad247e8Sjmcneill sw t5, 20(a0) 1799ad247e8Sjmcneill sw t6, 24(a0) 1809ad247e8Sjmcneill sw t7, 28(a0) 1819ad247e8Sjmcneill addiu a0, a0, 32 1829ad247e8Sjmcneill 1839ad247e8Sjmcneill$chk1w: 1849ad247e8Sjmcneill andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ 1859ad247e8Sjmcneill beq a2, t8, $last8 1869ad247e8Sjmcneill subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ 1879ad247e8Sjmcneill addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ 1889ad247e8Sjmcneill 1899ad247e8Sjmcneill/* copying in words (4-byte chunks) */ 1909ad247e8Sjmcneill$wordCopy_loop: 1919ad247e8Sjmcneill lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ 1929ad247e8Sjmcneill addiu a1, a1, 4 1939ad247e8Sjmcneill addiu a0, a0, 4 1949ad247e8Sjmcneill bne a0, a3, $wordCopy_loop 1959ad247e8Sjmcneill sw t3, -4(a0) 1969ad247e8Sjmcneill 1979ad247e8Sjmcneill/* For the last (<8) bytes */ 1989ad247e8Sjmcneill$last8: 1999ad247e8Sjmcneill blez a2, leave 2009ad247e8Sjmcneill addu a3, a0, a2 /* a3 is the last dst address */ 2019ad247e8Sjmcneill$last8loop: 2029ad247e8Sjmcneill lb v1, 0(a1) 2039ad247e8Sjmcneill addiu a1, a1, 1 2049ad247e8Sjmcneill addiu a0, a0, 1 2059ad247e8Sjmcneill bne a0, a3, $last8loop 2069ad247e8Sjmcneill sb v1, -1(a0) 2079ad247e8Sjmcneill 2089ad247e8Sjmcneillleave: j ra 2099ad247e8Sjmcneill nop 2109ad247e8Sjmcneill 2119ad247e8Sjmcneill/* 2129ad247e8Sjmcneill * UNALIGNED case 2139ad247e8Sjmcneill */ 2149ad247e8Sjmcneill 2159ad247e8Sjmcneill$unaligned: 2169ad247e8Sjmcneill /* got here with a3="negu a0" */ 2179ad247e8Sjmcneill andi a3, a3, 0x3 /* test if the a0 is word aligned */ 2189ad247e8Sjmcneill beqz a3, $ua_chk16w 2199ad247e8Sjmcneill subu a2, a2, a3 /* bytes left after initial a3 bytes */ 2209ad247e8Sjmcneill 2219ad247e8Sjmcneill LWHI v1, 0(a1) 2229ad247e8Sjmcneill LWLO v1, 3(a1) 2239ad247e8Sjmcneill addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ 2249ad247e8Sjmcneill SWHI v1, 0(a0) 2259ad247e8Sjmcneill addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ 2269ad247e8Sjmcneill 2279ad247e8Sjmcneill$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ 2289ad247e8Sjmcneill /* t8 is the byte count after 64-byte chunks */ 2299ad247e8Sjmcneill beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ 2309ad247e8Sjmcneill /* There will be at most 1 32-byte chunk after it */ 2319ad247e8Sjmcneill subu a3, a2, t8 /* subtract from a2 the reminder */ 2329ad247e8Sjmcneill /* Here a3 counts bytes in 16w chunks */ 2339ad247e8Sjmcneill addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ 2349ad247e8Sjmcneill 2359ad247e8Sjmcneill addu t0, a0, a2 /* t0 is the "past the end" address */ 2369ad247e8Sjmcneill 2379ad247e8Sjmcneill subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ 2389ad247e8Sjmcneill 2399ad247e8Sjmcneill pref 0, 0(a1) /* bring the first line of src, addr 0 */ 2409ad247e8Sjmcneill pref 0, 32(a1) /* bring the second line of src, addr 32 */ 2419ad247e8Sjmcneill pref 0, 64(a1) /* bring the third line of src, addr 64 */ 2429ad247e8Sjmcneill pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ 2439ad247e8Sjmcneill/* In case the a0 > t9 don't use "pref 30" at all */ 2449ad247e8Sjmcneill sgtu v1, a0, t9 2459ad247e8Sjmcneill bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ 2469ad247e8Sjmcneill nop 2479ad247e8Sjmcneill/* otherwise, start with using pref30 */ 2489ad247e8Sjmcneill pref 30, 64(a0) 2499ad247e8Sjmcneill$ua_loop16w: 2509ad247e8Sjmcneill pref 0, 96(a1) 2519ad247e8Sjmcneill LWHI t0, 0(a1) 2529ad247e8Sjmcneill LWLO t0, 3(a1) 2539ad247e8Sjmcneill LWHI t1, 4(a1) 2549ad247e8Sjmcneill bgtz v1, $ua_skip_pref30_96 2559ad247e8Sjmcneill LWLO t1, 7(a1) 2569ad247e8Sjmcneill pref 30, 96(a0) /* continue setting up the dest, addr 96 */ 2579ad247e8Sjmcneill$ua_skip_pref30_96: 2589ad247e8Sjmcneill LWHI t2, 8(a1) 2599ad247e8Sjmcneill LWLO t2, 11(a1) 2609ad247e8Sjmcneill LWHI t3, 12(a1) 2619ad247e8Sjmcneill LWLO t3, 15(a1) 2629ad247e8Sjmcneill LWHI t4, 16(a1) 2639ad247e8Sjmcneill LWLO t4, 19(a1) 2649ad247e8Sjmcneill LWHI t5, 20(a1) 2659ad247e8Sjmcneill LWLO t5, 23(a1) 2669ad247e8Sjmcneill LWHI t6, 24(a1) 2679ad247e8Sjmcneill LWLO t6, 27(a1) 2689ad247e8Sjmcneill LWHI t7, 28(a1) 2699ad247e8Sjmcneill LWLO t7, 31(a1) 2709ad247e8Sjmcneill pref 0, 128(a1) /* bring the next lines of src, addr 128 */ 2719ad247e8Sjmcneill 2729ad247e8Sjmcneill sw t0, 0(a0) 2739ad247e8Sjmcneill sw t1, 4(a0) 2749ad247e8Sjmcneill sw t2, 8(a0) 2759ad247e8Sjmcneill sw t3, 12(a0) 2769ad247e8Sjmcneill sw t4, 16(a0) 2779ad247e8Sjmcneill sw t5, 20(a0) 2789ad247e8Sjmcneill sw t6, 24(a0) 2799ad247e8Sjmcneill sw t7, 28(a0) 2809ad247e8Sjmcneill 2819ad247e8Sjmcneill LWHI t0, 32(a1) 2829ad247e8Sjmcneill LWLO t0, 35(a1) 2839ad247e8Sjmcneill LWHI t1, 36(a1) 2849ad247e8Sjmcneill bgtz v1, $ua_skip_pref30_128 2859ad247e8Sjmcneill LWLO t1, 39(a1) 2869ad247e8Sjmcneill pref 30, 128(a0) /* continue setting up the dest, addr 128 */ 2879ad247e8Sjmcneill$ua_skip_pref30_128: 2889ad247e8Sjmcneill LWHI t2, 40(a1) 2899ad247e8Sjmcneill LWLO t2, 43(a1) 2909ad247e8Sjmcneill LWHI t3, 44(a1) 2919ad247e8Sjmcneill LWLO t3, 47(a1) 2929ad247e8Sjmcneill LWHI t4, 48(a1) 2939ad247e8Sjmcneill LWLO t4, 51(a1) 2949ad247e8Sjmcneill LWHI t5, 52(a1) 2959ad247e8Sjmcneill LWLO t5, 55(a1) 2969ad247e8Sjmcneill LWHI t6, 56(a1) 2979ad247e8Sjmcneill LWLO t6, 59(a1) 2989ad247e8Sjmcneill LWHI t7, 60(a1) 2999ad247e8Sjmcneill LWLO t7, 63(a1) 3009ad247e8Sjmcneill pref 0, 160(a1) /* bring the next lines of src, addr 160 */ 3019ad247e8Sjmcneill 3029ad247e8Sjmcneill sw t0, 32(a0) 3039ad247e8Sjmcneill sw t1, 36(a0) 3049ad247e8Sjmcneill sw t2, 40(a0) 3059ad247e8Sjmcneill sw t3, 44(a0) 3069ad247e8Sjmcneill sw t4, 48(a0) 3079ad247e8Sjmcneill sw t5, 52(a0) 3089ad247e8Sjmcneill sw t6, 56(a0) 3099ad247e8Sjmcneill sw t7, 60(a0) 3109ad247e8Sjmcneill 3119ad247e8Sjmcneill addiu a0, a0, 64 /* adding 64 to dest */ 3129ad247e8Sjmcneill sgtu v1, a0, t9 3139ad247e8Sjmcneill bne a0, a3, $ua_loop16w 3149ad247e8Sjmcneill addiu a1, a1, 64 /* adding 64 to src */ 3159ad247e8Sjmcneill move a2, t8 3169ad247e8Sjmcneill 3179ad247e8Sjmcneill/* Here we have src and dest word-aligned but less than 64-bytes to go */ 3189ad247e8Sjmcneill 3199ad247e8Sjmcneill$ua_chk8w: 3209ad247e8Sjmcneill pref 0, 0x0(a1) 3219ad247e8Sjmcneill andi t8, a2, 0x1f /* is there a 32-byte chunk? */ 3229ad247e8Sjmcneill /* the t8 is the reminder count */ 3239ad247e8Sjmcneill beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ 3249ad247e8Sjmcneill 3259ad247e8Sjmcneill LWHI t0, 0(a1) 3269ad247e8Sjmcneill LWLO t0, 3(a1) 3279ad247e8Sjmcneill LWHI t1, 4(a1) 3289ad247e8Sjmcneill LWLO t1, 7(a1) 3299ad247e8Sjmcneill LWHI t2, 8(a1) 3309ad247e8Sjmcneill LWLO t2, 11(a1) 3319ad247e8Sjmcneill LWHI t3, 12(a1) 3329ad247e8Sjmcneill LWLO t3, 15(a1) 3339ad247e8Sjmcneill LWHI t4, 16(a1) 3349ad247e8Sjmcneill LWLO t4, 19(a1) 3359ad247e8Sjmcneill LWHI t5, 20(a1) 3369ad247e8Sjmcneill LWLO t5, 23(a1) 3379ad247e8Sjmcneill LWHI t6, 24(a1) 3389ad247e8Sjmcneill LWLO t6, 27(a1) 3399ad247e8Sjmcneill LWHI t7, 28(a1) 3409ad247e8Sjmcneill LWLO t7, 31(a1) 3419ad247e8Sjmcneill addiu a1, a1, 32 3429ad247e8Sjmcneill 3439ad247e8Sjmcneill sw t0, 0(a0) 3449ad247e8Sjmcneill sw t1, 4(a0) 3459ad247e8Sjmcneill sw t2, 8(a0) 3469ad247e8Sjmcneill sw t3, 12(a0) 3479ad247e8Sjmcneill sw t4, 16(a0) 3489ad247e8Sjmcneill sw t5, 20(a0) 3499ad247e8Sjmcneill sw t6, 24(a0) 3509ad247e8Sjmcneill sw t7, 28(a0) 3519ad247e8Sjmcneill addiu a0, a0, 32 3529ad247e8Sjmcneill 3539ad247e8Sjmcneill$ua_chk1w: 3549ad247e8Sjmcneill andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ 3559ad247e8Sjmcneill beq a2, t8, $ua_smallCopy 3569ad247e8Sjmcneill subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ 3579ad247e8Sjmcneill addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ 3589ad247e8Sjmcneill 3599ad247e8Sjmcneill/* copying in words (4-byte chunks) */ 3609ad247e8Sjmcneill$ua_wordCopy_loop: 3619ad247e8Sjmcneill LWHI v1, 0(a1) 3629ad247e8Sjmcneill LWLO v1, 3(a1) 3639ad247e8Sjmcneill addiu a1, a1, 4 3649ad247e8Sjmcneill addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ 3659ad247e8Sjmcneill bne a0, a3, $ua_wordCopy_loop 3669ad247e8Sjmcneill sw v1, -4(a0) 3679ad247e8Sjmcneill 3689ad247e8Sjmcneill/* Now less than 4 bytes (value in a2) left to copy */ 3699ad247e8Sjmcneill$ua_smallCopy: 3709ad247e8Sjmcneill beqz a2, leave 3719ad247e8Sjmcneill addu a3, a0, a2 /* a3 is the last dst address */ 3729ad247e8Sjmcneill$ua_smallCopy_loop: 3739ad247e8Sjmcneill lb v1, 0(a1) 3749ad247e8Sjmcneill addiu a1, a1, 1 3759ad247e8Sjmcneill addiu a0, a0, 1 3769ad247e8Sjmcneill bne a0, a3, $ua_smallCopy_loop 3779ad247e8Sjmcneill sb v1, -1(a0) 3789ad247e8Sjmcneill 3799ad247e8Sjmcneill j ra 3809ad247e8Sjmcneill nop 3819ad247e8Sjmcneill 3829ad247e8SjmcneillEND(pixman_mips_fast_memcpy) 383