1 1.7 mrg /* Copyright (C) 2012-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify it 6 1.1 mrg under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 11 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 13 1.1 mrg License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg /* This file must be kept in sync with newlib/libc/machine/visium/memcpy.c */ 25 1.1 mrg 26 1.1 mrg #include <stddef.h> 27 1.1 mrg #include "memcpy.h" 28 1.1 mrg 29 1.1 mrg #define INST_BARRIER __asm__ __volatile__ ("":::"memory"); 30 1.1 mrg 31 1.1 mrg #define MOVE_32_OBJECTS(in,out) \ 32 1.1 mrg do { \ 33 1.1 mrg INST_BARRIER \ 34 1.1 mrg m0 = in [0]; \ 35 1.1 mrg m1 = in [1]; \ 36 1.1 mrg m2 = in [2]; \ 37 1.1 mrg m3 = in [3]; \ 38 1.1 mrg out [0] = m0; \ 39 1.1 mrg out [1] = m1; \ 40 1.1 mrg out [2] = m2; \ 41 1.1 mrg out [3] = m3; \ 42 1.1 mrg INST_BARRIER \ 43 1.1 mrg m0 = in [4]; \ 44 1.1 mrg m1 = in [5]; \ 45 1.1 mrg m2 = in [6]; \ 46 1.1 mrg m3 = in [7]; \ 47 1.1 mrg out [4] = m0; \ 48 1.1 mrg out [5] = m1; \ 49 1.1 mrg out [6] = m2; \ 50 1.1 mrg out [7] = m3; \ 51 1.1 mrg INST_BARRIER \ 52 1.1 mrg m0 = in [8]; \ 53 1.1 mrg m1 = in [9]; \ 54 1.1 mrg m2 = in [10]; \ 55 1.1 mrg m3 = in [11]; \ 56 1.1 mrg out [8] = m0; \ 57 1.1 mrg out [9] = m1; \ 58 1.1 mrg out [10] = m2; \ 59 1.1 mrg out [11] = m3; \ 60 1.1 mrg INST_BARRIER \ 61 1.1 mrg m0 = in [12]; \ 62 1.1 mrg m1 = in [13]; \ 63 1.1 mrg m2 = in [14]; \ 64 1.1 mrg m3 = in [15]; \ 65 1.1 mrg out [12] = m0; \ 66 1.1 mrg out [13] = m1; \ 67 1.1 mrg out [14] = m2; \ 68 1.1 mrg out [15] = m3; \ 69 1.1 mrg INST_BARRIER \ 70 1.1 mrg m0 = in [16]; \ 71 1.1 mrg m1 = in [17]; \ 72 1.1 mrg m2 = in [18]; \ 73 1.1 mrg m3 = in [19]; \ 74 1.1 mrg out [16] = m0; \ 75 1.1 mrg out [17] = m1; \ 76 1.1 mrg out [18] = m2; \ 77 1.1 mrg out [19] = m3; \ 78 1.1 mrg INST_BARRIER \ 79 1.1 mrg m0 = in [20]; \ 80 1.1 mrg m1 = in [21]; \ 81 1.1 mrg m2 = in [22]; \ 82 1.1 mrg m3 = in [23]; \ 83 1.1 mrg out [20] = m0; \ 84 1.1 mrg out [21] = m1; \ 85 1.1 mrg out [22] = m2; \ 86 1.1 mrg out [23] = m3; \ 87 1.1 mrg INST_BARRIER \ 88 1.1 mrg m0 = in [24]; \ 89 1.1 mrg m1 = in [25]; \ 90 1.1 mrg m2 = in [26]; \ 91 1.1 mrg m3 = in [27]; \ 92 1.1 mrg out [24] = m0; \ 93 1.1 mrg out [25] = m1; \ 94 1.1 mrg out [26] = m2; \ 95 1.1 mrg out [27] = m3; \ 96 1.1 mrg INST_BARRIER \ 97 1.1 mrg m0 = in [28]; \ 98 1.1 mrg m1 = in [29]; \ 99 1.1 mrg m2 = in [30]; \ 100 1.1 mrg m3 = in [31]; \ 101 1.1 mrg out [28] = m0; \ 102 1.1 mrg out [29] = m1; \ 103 1.1 mrg out [30] = m2; \ 104 1.1 mrg out [31] = m3; \ 105 1.1 mrg INST_BARRIER \ 106 1.1 mrg in += 32; \ 107 1.1 mrg out += 32; \ 108 1.1 mrg } while(0) 109 1.1 mrg 110 1.1 mrg #define MOVE_16_OBJECTS(in,out) \ 111 1.1 mrg do { \ 112 1.1 mrg INST_BARRIER \ 113 1.1 mrg m0 = in [0]; \ 114 1.1 mrg m1 = in [1]; \ 115 1.1 mrg m2 = in [2]; \ 116 1.1 mrg m3 = in [3]; \ 117 1.1 mrg out [0] = m0; \ 118 1.1 mrg out [1] = m1; \ 119 1.1 mrg out [2] = m2; \ 120 1.1 mrg out [3] = m3; \ 121 1.1 mrg INST_BARRIER \ 122 1.1 mrg m0 = in [4]; \ 123 1.1 mrg m1 = in [5]; \ 124 1.1 mrg m2 = in [6]; \ 125 1.1 mrg m3 = in [7]; \ 126 1.1 mrg out [4] = m0; \ 127 1.1 mrg out [5] = m1; \ 128 1.1 mrg out [6] = m2; \ 129 1.1 mrg out [7] = m3; \ 130 1.1 mrg INST_BARRIER \ 131 1.1 mrg m0 = in [8]; \ 132 1.1 mrg m1 = in [9]; \ 133 1.1 mrg m2 = in [10]; \ 134 1.1 mrg m3 = in [11]; \ 135 1.1 mrg out [8] = m0; \ 136 1.1 mrg out [9] = m1; \ 137 1.1 mrg out [10] = m2; \ 138 1.1 mrg out [11] = m3; \ 139 1.1 mrg INST_BARRIER \ 140 1.1 mrg m0 = in [12]; \ 141 1.1 mrg m1 = in [13]; \ 142 1.1 mrg m2 = in [14]; \ 143 1.1 mrg m3 = in [15]; \ 144 1.1 mrg out [12] = m0; \ 145 1.1 mrg out [13] = m1; \ 146 1.1 mrg out [14] = m2; \ 147 1.1 mrg out [15] = m3; \ 148 1.1 mrg INST_BARRIER \ 149 1.1 mrg in += 16; \ 150 1.1 mrg out += 16; \ 151 1.1 mrg } while(0) 152 1.1 mrg 153 1.1 mrg #define MOVE_12_OBJECTS(in,out) \ 154 1.1 mrg do { \ 155 1.1 mrg INST_BARRIER \ 156 1.1 mrg m0 = in [0]; \ 157 1.1 mrg m1 = in [1]; \ 158 1.1 mrg m2 = in [2]; \ 159 1.1 mrg m3 = in [3]; \ 160 1.1 mrg out [0] = m0; \ 161 1.1 mrg out [1] = m1; \ 162 1.1 mrg out [2] = m2; \ 163 1.1 mrg out [3] = m3; \ 164 1.1 mrg INST_BARRIER \ 165 1.1 mrg m0 = in [4]; \ 166 1.1 mrg m1 = in [5]; \ 167 1.1 mrg m2 = in [6]; \ 168 1.1 mrg m3 = in [7]; \ 169 1.1 mrg out [4] = m0; \ 170 1.1 mrg out [5] = m1; \ 171 1.1 mrg out [6] = m2; \ 172 1.1 mrg out [7] = m3; \ 173 1.1 mrg INST_BARRIER \ 174 1.1 mrg m0 = in [8]; \ 175 1.1 mrg m1 = in [9]; \ 176 1.1 mrg m2 = in [10]; \ 177 1.1 mrg m3 = in [11]; \ 178 1.1 mrg out [8] = m0; \ 179 1.1 mrg out [9] = m1; \ 180 1.1 mrg out [10] = m2; \ 181 1.1 mrg out [11] = m3; \ 182 1.1 mrg INST_BARRIER \ 183 1.1 mrg in += 12; \ 184 1.1 mrg out += 12; \ 185 1.1 mrg } while(0) 186 1.1 mrg 187 1.1 mrg #define MOVE_11_OBJECTS(in,out) \ 188 1.1 mrg do { \ 189 1.1 mrg INST_BARRIER \ 190 1.1 mrg m0 = in [0]; \ 191 1.1 mrg m1 = in [1]; \ 192 1.1 mrg m2 = in [2]; \ 193 1.1 mrg m3 = in [3]; \ 194 1.1 mrg out [0] = m0; \ 195 1.1 mrg out [1] = m1; \ 196 1.1 mrg out [2] = m2; \ 197 1.1 mrg out [3] = m3; \ 198 1.1 mrg INST_BARRIER \ 199 1.1 mrg m0 = in [4]; \ 200 1.1 mrg m1 = in [5]; \ 201 1.1 mrg m2 = in [6]; \ 202 1.1 mrg m3 = in [7]; \ 203 1.1 mrg out [4] = m0; \ 204 1.1 mrg out [5] = m1; \ 205 1.1 mrg out [6] = m2; \ 206 1.1 mrg out [7] = m3; \ 207 1.1 mrg INST_BARRIER \ 208 1.1 mrg m0 = in [8]; \ 209 1.1 mrg m1 = in [9]; \ 210 1.1 mrg m2 = in [10]; \ 211 1.1 mrg out [8] = m0; \ 212 1.1 mrg out [9] = m1; \ 213 1.1 mrg out [10] = m2; \ 214 1.1 mrg INST_BARRIER \ 215 1.1 mrg in += 11; \ 216 1.1 mrg out += 11; \ 217 1.1 mrg } while(0) 218 1.1 mrg 219 1.1 mrg #define MOVE_10_OBJECTS(in,out) \ 220 1.1 mrg do { \ 221 1.1 mrg INST_BARRIER \ 222 1.1 mrg m0 = in [0]; \ 223 1.1 mrg m1 = in [1]; \ 224 1.1 mrg m2 = in [2]; \ 225 1.1 mrg m3 = in [3]; \ 226 1.1 mrg out [0] = m0; \ 227 1.1 mrg out [1] = m1; \ 228 1.1 mrg out [2] = m2; \ 229 1.1 mrg out [3] = m3; \ 230 1.1 mrg INST_BARRIER \ 231 1.1 mrg m0 = in [4]; \ 232 1.1 mrg m1 = in [5]; \ 233 1.1 mrg m2 = in [6]; \ 234 1.1 mrg m3 = in [7]; \ 235 1.1 mrg out [4] = m0; \ 236 1.1 mrg m0 = in [8]; \ 237 1.1 mrg out [5] = m1; \ 238 1.1 mrg m1 = in [9]; \ 239 1.1 mrg out [6] = m2; \ 240 1.1 mrg out [7] = m3; \ 241 1.1 mrg out [8] = m0; \ 242 1.1 mrg out [9] = m1; \ 243 1.1 mrg INST_BARRIER \ 244 1.1 mrg in += 10; \ 245 1.1 mrg out += 10; \ 246 1.1 mrg } while(0) 247 1.1 mrg 248 1.1 mrg #define MOVE_9_OBJECTS(in,out) \ 249 1.1 mrg do { \ 250 1.1 mrg INST_BARRIER \ 251 1.1 mrg m0 = in [0]; \ 252 1.1 mrg m1 = in [1]; \ 253 1.1 mrg m2 = in [2]; \ 254 1.1 mrg m3 = in [3]; \ 255 1.1 mrg out [0] = m0; \ 256 1.1 mrg out [1] = m1; \ 257 1.1 mrg out [2] = m2; \ 258 1.1 mrg out [3] = m3; \ 259 1.1 mrg INST_BARRIER \ 260 1.1 mrg m0 = in [4]; \ 261 1.1 mrg m1 = in [5]; \ 262 1.1 mrg m2 = in [6]; \ 263 1.1 mrg m3 = in [7]; \ 264 1.1 mrg out [4] = m0; \ 265 1.1 mrg out [5] = m1; \ 266 1.1 mrg out [6] = m2; \ 267 1.1 mrg out [7] = m3; \ 268 1.1 mrg INST_BARRIER \ 269 1.1 mrg m0 = in [8]; \ 270 1.1 mrg out [8] = m0; \ 271 1.1 mrg in += 9; \ 272 1.1 mrg out += 9; \ 273 1.1 mrg } while(0) 274 1.1 mrg 275 1.1 mrg #define MOVE_8_OBJECTS(in,out) \ 276 1.1 mrg do { \ 277 1.1 mrg INST_BARRIER \ 278 1.1 mrg m0 = in [0]; \ 279 1.1 mrg m1 = in [1]; \ 280 1.1 mrg m2 = in [2]; \ 281 1.1 mrg m3 = in [3]; \ 282 1.1 mrg out [0] = m0; \ 283 1.1 mrg out [1] = m1; \ 284 1.1 mrg out [2] = m2; \ 285 1.1 mrg out [3] = m3; \ 286 1.1 mrg INST_BARRIER \ 287 1.1 mrg m0 = in [4]; \ 288 1.1 mrg m1 = in [5]; \ 289 1.1 mrg m2 = in [6]; \ 290 1.1 mrg m3 = in [7]; \ 291 1.1 mrg out [4] = m0; \ 292 1.1 mrg out [5] = m1; \ 293 1.1 mrg out [6] = m2; \ 294 1.1 mrg out [7] = m3; \ 295 1.1 mrg INST_BARRIER \ 296 1.1 mrg in += 8; \ 297 1.1 mrg out += 8; \ 298 1.1 mrg } while(0) 299 1.1 mrg 300 1.1 mrg #define MOVE_7_OBJECTS(in,out) \ 301 1.1 mrg do { \ 302 1.1 mrg INST_BARRIER \ 303 1.1 mrg m0 = in [0]; \ 304 1.1 mrg m1 = in [1]; \ 305 1.1 mrg m2 = in [2]; \ 306 1.1 mrg m3 = in [3]; \ 307 1.1 mrg out [0] = m0; \ 308 1.1 mrg out [1] = m1; \ 309 1.1 mrg out [2] = m2; \ 310 1.1 mrg out [3] = m3; \ 311 1.1 mrg INST_BARRIER \ 312 1.1 mrg m0 = in [4]; \ 313 1.1 mrg m1 = in [5]; \ 314 1.1 mrg m2 = in [6]; \ 315 1.1 mrg out [4] = m0; \ 316 1.1 mrg out [5] = m1; \ 317 1.1 mrg out [6] = m2; \ 318 1.1 mrg INST_BARRIER \ 319 1.1 mrg in += 7; \ 320 1.1 mrg out += 7; \ 321 1.1 mrg } while(0) 322 1.1 mrg 323 1.1 mrg #define MOVE_6_OBJECTS(in,out) \ 324 1.1 mrg do { \ 325 1.1 mrg INST_BARRIER \ 326 1.1 mrg m0 = in [0]; \ 327 1.1 mrg m1 = in [1]; \ 328 1.1 mrg m2 = in [2]; \ 329 1.1 mrg m3 = in [3]; \ 330 1.1 mrg out [0] = m0; \ 331 1.1 mrg INST_BARRIER \ 332 1.1 mrg m0 = in [4]; \ 333 1.1 mrg out [1] = m1; \ 334 1.1 mrg INST_BARRIER \ 335 1.1 mrg m1 = in [5]; \ 336 1.1 mrg out [2] = m2; \ 337 1.1 mrg out [3] = m3; \ 338 1.1 mrg out [4] = m0; \ 339 1.1 mrg out [5] = m1; \ 340 1.1 mrg INST_BARRIER \ 341 1.1 mrg in += 6; \ 342 1.1 mrg out += 6; \ 343 1.1 mrg } while(0) 344 1.1 mrg 345 1.1 mrg #define MOVE_5_OBJECTS(in,out) \ 346 1.1 mrg do { \ 347 1.1 mrg INST_BARRIER \ 348 1.1 mrg m0 = in [0]; \ 349 1.1 mrg m1 = in [1]; \ 350 1.1 mrg m2 = in [2]; \ 351 1.1 mrg m3 = in [3]; \ 352 1.1 mrg INST_BARRIER \ 353 1.1 mrg out [0] = m0; \ 354 1.1 mrg m0 = in [4]; \ 355 1.1 mrg INST_BARRIER \ 356 1.1 mrg out [1] = m1; \ 357 1.1 mrg out [2] = m2; \ 358 1.1 mrg out [3] = m3; \ 359 1.1 mrg out [4] = m0; \ 360 1.1 mrg INST_BARRIER \ 361 1.1 mrg in += 5; \ 362 1.1 mrg out += 5; \ 363 1.1 mrg } while(0) 364 1.1 mrg 365 1.1 mrg #define MOVE_4_OBJECTS(in,out) \ 366 1.1 mrg do { \ 367 1.1 mrg INST_BARRIER \ 368 1.1 mrg m0 = in [0]; \ 369 1.1 mrg m1 = in [1]; \ 370 1.1 mrg m2 = in [2]; \ 371 1.1 mrg m3 = in [3]; \ 372 1.1 mrg out [0] = m0; \ 373 1.1 mrg out [1] = m1; \ 374 1.1 mrg out [2] = m2; \ 375 1.1 mrg out [3] = m3; \ 376 1.1 mrg INST_BARRIER \ 377 1.1 mrg in += 4; \ 378 1.1 mrg out += 4; \ 379 1.1 mrg } while(0) 380 1.1 mrg 381 1.1 mrg #define MOVE_3_OBJECTS(in,out) \ 382 1.1 mrg do { \ 383 1.1 mrg INST_BARRIER \ 384 1.1 mrg m0 = in [0]; \ 385 1.1 mrg m1 = in [1]; \ 386 1.1 mrg m2 = in [2]; \ 387 1.1 mrg out [0] = m0; \ 388 1.1 mrg out [1] = m1; \ 389 1.1 mrg out [2] = m2; \ 390 1.1 mrg INST_BARRIER \ 391 1.1 mrg in += 3; \ 392 1.1 mrg out += 3; \ 393 1.1 mrg } while(0) 394 1.1 mrg 395 1.1 mrg #define MOVE_2_OBJECTS(in,out) \ 396 1.1 mrg do { \ 397 1.1 mrg INST_BARRIER \ 398 1.1 mrg m0 = in [0]; \ 399 1.1 mrg m1 = in [1]; \ 400 1.1 mrg out [0] = m0; \ 401 1.1 mrg out [1] = m1; \ 402 1.1 mrg INST_BARRIER \ 403 1.1 mrg in += 2; \ 404 1.1 mrg out += 2; \ 405 1.1 mrg } while(0) 406 1.1 mrg 407 1.1 mrg #define MOVE_1_OBJECT(in,out) \ 408 1.1 mrg do { \ 409 1.1 mrg INST_BARRIER \ 410 1.1 mrg m0 = in [0]; \ 411 1.1 mrg out [0] = m0; \ 412 1.1 mrg INST_BARRIER \ 413 1.1 mrg in += 1; \ 414 1.1 mrg out += 1; \ 415 1.1 mrg } while(0) 416 1.1 mrg 417 1.1 mrg 418 1.1 mrg static inline void 419 1.1 mrg __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 420 1.1 mrg { 421 1.1 mrg int value = n; 422 1.1 mrg int loop_var; 423 1.1 mrg const int *in = s2; 424 1.1 mrg int *out = s1; 425 1.1 mrg int count; 426 1.1 mrg int m0,m1,m2,m3; 427 1.1 mrg 428 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5 429 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */ 430 1.1 mrg switch (value & 0x1f) 431 1.1 mrg { 432 1.1 mrg case 0: 433 1.1 mrg break; 434 1.1 mrg case 1: 435 1.1 mrg MOVE_1_OBJECT (in, out); 436 1.1 mrg break; 437 1.1 mrg case 2: 438 1.1 mrg MOVE_2_OBJECTS (in, out); 439 1.1 mrg break; 440 1.1 mrg case 3: 441 1.1 mrg MOVE_3_OBJECTS (in, out); 442 1.1 mrg break; 443 1.1 mrg case 4: 444 1.1 mrg MOVE_4_OBJECTS (in, out); 445 1.1 mrg break; 446 1.1 mrg case 5: 447 1.1 mrg MOVE_5_OBJECTS (in, out); 448 1.1 mrg break; 449 1.1 mrg case 6: 450 1.1 mrg MOVE_6_OBJECTS (in, out); 451 1.1 mrg break; 452 1.1 mrg case 7: 453 1.1 mrg MOVE_7_OBJECTS (in, out); 454 1.1 mrg break; 455 1.1 mrg case 8: 456 1.1 mrg MOVE_8_OBJECTS (in, out); 457 1.1 mrg break; 458 1.1 mrg case 9: 459 1.1 mrg MOVE_9_OBJECTS (in, out); 460 1.1 mrg break; 461 1.1 mrg case 10: 462 1.1 mrg MOVE_10_OBJECTS (in, out); 463 1.1 mrg break; 464 1.1 mrg case 11: 465 1.1 mrg MOVE_11_OBJECTS (in, out); 466 1.1 mrg break; 467 1.1 mrg case 12: 468 1.1 mrg MOVE_12_OBJECTS (in, out); 469 1.1 mrg break; 470 1.1 mrg case 13: 471 1.1 mrg MOVE_9_OBJECTS (in, out); 472 1.1 mrg MOVE_4_OBJECTS (in, out); 473 1.1 mrg break; 474 1.1 mrg case 14: 475 1.1 mrg MOVE_12_OBJECTS (in, out); 476 1.1 mrg MOVE_2_OBJECTS (in, out); 477 1.1 mrg break; 478 1.1 mrg case 15: 479 1.1 mrg MOVE_11_OBJECTS (in, out); 480 1.1 mrg MOVE_4_OBJECTS (in, out); 481 1.1 mrg break; 482 1.1 mrg case 16: 483 1.1 mrg MOVE_16_OBJECTS (in, out); 484 1.1 mrg break; 485 1.1 mrg case 17: 486 1.1 mrg MOVE_11_OBJECTS (in, out); 487 1.1 mrg MOVE_6_OBJECTS (in, out); 488 1.1 mrg break; 489 1.1 mrg case 18: 490 1.1 mrg MOVE_9_OBJECTS (in, out); 491 1.1 mrg MOVE_9_OBJECTS (in, out); 492 1.1 mrg break; 493 1.1 mrg case 19: 494 1.1 mrg MOVE_16_OBJECTS (in, out); 495 1.1 mrg MOVE_3_OBJECTS (in, out); 496 1.1 mrg break; 497 1.1 mrg case 20: 498 1.1 mrg MOVE_16_OBJECTS (in, out); 499 1.1 mrg MOVE_4_OBJECTS (in, out); 500 1.1 mrg break; 501 1.1 mrg case 21: 502 1.1 mrg MOVE_16_OBJECTS (in, out); 503 1.1 mrg MOVE_5_OBJECTS (in, out); 504 1.1 mrg break; 505 1.1 mrg case 22: 506 1.1 mrg MOVE_16_OBJECTS (in, out); 507 1.1 mrg MOVE_6_OBJECTS (in, out); 508 1.1 mrg break; 509 1.1 mrg case 23: 510 1.1 mrg MOVE_16_OBJECTS (in, out); 511 1.1 mrg MOVE_7_OBJECTS (in, out); 512 1.1 mrg break; 513 1.1 mrg case 24: 514 1.1 mrg MOVE_16_OBJECTS (in, out); 515 1.1 mrg MOVE_8_OBJECTS (in, out); 516 1.1 mrg break; 517 1.1 mrg case 25: 518 1.1 mrg MOVE_16_OBJECTS (in, out); 519 1.1 mrg MOVE_9_OBJECTS (in, out); 520 1.1 mrg break; 521 1.1 mrg case 26: 522 1.1 mrg MOVE_16_OBJECTS (in, out); 523 1.1 mrg MOVE_10_OBJECTS (in, out); 524 1.1 mrg break; 525 1.1 mrg case 27: 526 1.1 mrg MOVE_16_OBJECTS (in, out); 527 1.1 mrg MOVE_11_OBJECTS (in, out); 528 1.1 mrg break; 529 1.1 mrg case 28: 530 1.1 mrg MOVE_16_OBJECTS (in, out); 531 1.1 mrg MOVE_8_OBJECTS (in, out); 532 1.1 mrg MOVE_4_OBJECTS (in, out); 533 1.1 mrg break; 534 1.1 mrg case 29: 535 1.1 mrg MOVE_16_OBJECTS (in, out); 536 1.1 mrg MOVE_9_OBJECTS (in, out); 537 1.1 mrg MOVE_4_OBJECTS (in, out); 538 1.1 mrg break; 539 1.1 mrg case 30: 540 1.1 mrg MOVE_16_OBJECTS (in, out); 541 1.1 mrg MOVE_12_OBJECTS (in, out); 542 1.1 mrg MOVE_2_OBJECTS (in, out); 543 1.1 mrg break; 544 1.1 mrg case 31: 545 1.1 mrg MOVE_16_OBJECTS (in, out); 546 1.1 mrg MOVE_11_OBJECTS (in, out); 547 1.1 mrg MOVE_4_OBJECTS (in, out); 548 1.1 mrg break; 549 1.1 mrg } 550 1.1 mrg 551 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long 552 1.1 mrg word copies. */ 553 1.1 mrg count = value >> 5; 554 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++) 555 1.1 mrg MOVE_32_OBJECTS (in, out); 556 1.1 mrg } 557 1.1 mrg 558 1.1 mrg static inline void 559 1.1 mrg __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 560 1.1 mrg { 561 1.1 mrg int value = n; 562 1.1 mrg int loop_var; 563 1.1 mrg const short int *in = s2; 564 1.1 mrg int short *out = s1; 565 1.1 mrg int count; 566 1.1 mrg int m0,m1,m2,m3; 567 1.1 mrg 568 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5 569 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */ 570 1.1 mrg switch (value & 0x1f) 571 1.1 mrg { 572 1.1 mrg case 0: 573 1.1 mrg break; 574 1.1 mrg case 1: 575 1.1 mrg MOVE_1_OBJECT (in, out); 576 1.1 mrg break; 577 1.1 mrg case 2: 578 1.1 mrg MOVE_2_OBJECTS (in, out); 579 1.1 mrg break; 580 1.1 mrg case 3: 581 1.1 mrg MOVE_3_OBJECTS (in, out); 582 1.1 mrg break; 583 1.1 mrg case 4: 584 1.1 mrg MOVE_4_OBJECTS (in, out); 585 1.1 mrg break; 586 1.1 mrg case 5: 587 1.1 mrg MOVE_5_OBJECTS (in, out); 588 1.1 mrg break; 589 1.1 mrg case 6: 590 1.1 mrg MOVE_6_OBJECTS (in, out); 591 1.1 mrg break; 592 1.1 mrg case 7: 593 1.1 mrg MOVE_7_OBJECTS (in, out); 594 1.1 mrg break; 595 1.1 mrg case 8: 596 1.1 mrg MOVE_8_OBJECTS (in, out); 597 1.1 mrg break; 598 1.1 mrg case 9: 599 1.1 mrg MOVE_9_OBJECTS (in, out); 600 1.1 mrg break; 601 1.1 mrg case 10: 602 1.1 mrg MOVE_10_OBJECTS (in, out); 603 1.1 mrg break; 604 1.1 mrg case 11: 605 1.1 mrg MOVE_11_OBJECTS (in, out); 606 1.1 mrg break; 607 1.1 mrg case 12: 608 1.1 mrg MOVE_12_OBJECTS (in, out); 609 1.1 mrg break; 610 1.1 mrg case 13: 611 1.1 mrg MOVE_9_OBJECTS (in, out); 612 1.1 mrg MOVE_4_OBJECTS (in, out); 613 1.1 mrg break; 614 1.1 mrg case 14: 615 1.1 mrg MOVE_12_OBJECTS (in, out); 616 1.1 mrg MOVE_2_OBJECTS (in, out); 617 1.1 mrg break; 618 1.1 mrg case 15: 619 1.1 mrg MOVE_11_OBJECTS (in, out); 620 1.1 mrg MOVE_4_OBJECTS (in, out); 621 1.1 mrg break; 622 1.1 mrg case 16: 623 1.1 mrg MOVE_16_OBJECTS (in, out); 624 1.1 mrg break; 625 1.1 mrg case 17: 626 1.1 mrg MOVE_11_OBJECTS (in, out); 627 1.1 mrg MOVE_6_OBJECTS (in, out); 628 1.1 mrg break; 629 1.1 mrg case 18: 630 1.1 mrg MOVE_9_OBJECTS (in, out); 631 1.1 mrg MOVE_9_OBJECTS (in, out); 632 1.1 mrg break; 633 1.1 mrg case 19: 634 1.1 mrg MOVE_16_OBJECTS (in, out); 635 1.1 mrg MOVE_3_OBJECTS (in, out); 636 1.1 mrg break; 637 1.1 mrg case 20: 638 1.1 mrg MOVE_16_OBJECTS (in, out); 639 1.1 mrg MOVE_4_OBJECTS (in, out); 640 1.1 mrg break; 641 1.1 mrg case 21: 642 1.1 mrg MOVE_16_OBJECTS (in, out); 643 1.1 mrg MOVE_5_OBJECTS (in, out); 644 1.1 mrg break; 645 1.1 mrg case 22: 646 1.1 mrg MOVE_16_OBJECTS (in, out); 647 1.1 mrg MOVE_6_OBJECTS (in, out); 648 1.1 mrg break; 649 1.1 mrg case 23: 650 1.1 mrg MOVE_16_OBJECTS (in, out); 651 1.1 mrg MOVE_7_OBJECTS (in, out); 652 1.1 mrg break; 653 1.1 mrg case 24: 654 1.1 mrg MOVE_16_OBJECTS (in, out); 655 1.1 mrg MOVE_8_OBJECTS (in, out); 656 1.1 mrg break; 657 1.1 mrg case 25: 658 1.1 mrg MOVE_16_OBJECTS (in, out); 659 1.1 mrg MOVE_9_OBJECTS (in, out); 660 1.1 mrg break; 661 1.1 mrg case 26: 662 1.1 mrg MOVE_16_OBJECTS (in, out); 663 1.1 mrg MOVE_10_OBJECTS (in, out); 664 1.1 mrg break; 665 1.1 mrg case 27: 666 1.1 mrg MOVE_16_OBJECTS (in, out); 667 1.1 mrg MOVE_11_OBJECTS (in, out); 668 1.1 mrg break; 669 1.1 mrg case 28: 670 1.1 mrg MOVE_16_OBJECTS (in, out); 671 1.1 mrg MOVE_8_OBJECTS (in, out); 672 1.1 mrg MOVE_4_OBJECTS (in, out); 673 1.1 mrg break; 674 1.1 mrg case 29: 675 1.1 mrg MOVE_16_OBJECTS (in, out); 676 1.1 mrg MOVE_9_OBJECTS (in, out); 677 1.1 mrg MOVE_4_OBJECTS (in, out); 678 1.1 mrg break; 679 1.1 mrg case 30: 680 1.1 mrg MOVE_16_OBJECTS (in, out); 681 1.1 mrg MOVE_12_OBJECTS (in, out); 682 1.1 mrg MOVE_2_OBJECTS (in, out); 683 1.1 mrg break; 684 1.1 mrg case 31: 685 1.1 mrg MOVE_16_OBJECTS (in, out); 686 1.1 mrg MOVE_11_OBJECTS (in, out); 687 1.1 mrg MOVE_4_OBJECTS (in, out); 688 1.1 mrg break; 689 1.1 mrg } 690 1.1 mrg 691 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long 692 1.1 mrg word copies. */ 693 1.1 mrg count = value >> 5; 694 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++) 695 1.1 mrg MOVE_32_OBJECTS (in, out); 696 1.1 mrg } 697 1.1 mrg 698 1.1 mrg 699 1.1 mrg static inline void 700 1.1 mrg __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 701 1.1 mrg { 702 1.1 mrg int value = n; 703 1.1 mrg int loop_var; 704 1.1 mrg const char *in = s2; 705 1.1 mrg char *out = s1; 706 1.1 mrg int count; 707 1.1 mrg int m0,m1,m2,m3; 708 1.1 mrg 709 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5 710 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */ 711 1.1 mrg switch (value & 0x1f) 712 1.1 mrg { 713 1.1 mrg case 0: 714 1.1 mrg break; 715 1.1 mrg case 1: 716 1.1 mrg MOVE_1_OBJECT (in, out); 717 1.1 mrg break; 718 1.1 mrg case 2: 719 1.1 mrg MOVE_2_OBJECTS (in, out); 720 1.1 mrg break; 721 1.1 mrg case 3: 722 1.1 mrg MOVE_3_OBJECTS (in, out); 723 1.1 mrg break; 724 1.1 mrg case 4: 725 1.1 mrg MOVE_4_OBJECTS (in, out); 726 1.1 mrg break; 727 1.1 mrg case 5: 728 1.1 mrg MOVE_5_OBJECTS (in, out); 729 1.1 mrg break; 730 1.1 mrg case 6: 731 1.1 mrg MOVE_6_OBJECTS (in, out); 732 1.1 mrg break; 733 1.1 mrg case 7: 734 1.1 mrg MOVE_7_OBJECTS (in, out); 735 1.1 mrg break; 736 1.1 mrg case 8: 737 1.1 mrg MOVE_8_OBJECTS (in, out); 738 1.1 mrg break; 739 1.1 mrg case 9: 740 1.1 mrg MOVE_9_OBJECTS (in, out); 741 1.1 mrg break; 742 1.1 mrg case 10: 743 1.1 mrg MOVE_10_OBJECTS (in, out); 744 1.1 mrg break; 745 1.1 mrg case 11: 746 1.1 mrg MOVE_11_OBJECTS (in, out); 747 1.1 mrg break; 748 1.1 mrg case 12: 749 1.1 mrg MOVE_12_OBJECTS (in, out); 750 1.1 mrg break; 751 1.1 mrg case 13: 752 1.1 mrg MOVE_9_OBJECTS (in, out); 753 1.1 mrg MOVE_4_OBJECTS (in, out); 754 1.1 mrg break; 755 1.1 mrg case 14: 756 1.1 mrg MOVE_12_OBJECTS (in, out); 757 1.1 mrg MOVE_2_OBJECTS (in, out); 758 1.1 mrg break; 759 1.1 mrg case 15: 760 1.1 mrg MOVE_11_OBJECTS (in, out); 761 1.1 mrg MOVE_4_OBJECTS (in, out); 762 1.1 mrg break; 763 1.1 mrg case 16: 764 1.1 mrg MOVE_16_OBJECTS (in, out); 765 1.1 mrg break; 766 1.1 mrg case 17: 767 1.1 mrg MOVE_11_OBJECTS (in, out); 768 1.1 mrg MOVE_6_OBJECTS (in, out); 769 1.1 mrg break; 770 1.1 mrg case 18: 771 1.1 mrg MOVE_9_OBJECTS (in, out); 772 1.1 mrg MOVE_9_OBJECTS (in, out); 773 1.1 mrg break; 774 1.1 mrg case 19: 775 1.1 mrg MOVE_16_OBJECTS (in, out); 776 1.1 mrg MOVE_3_OBJECTS (in, out); 777 1.1 mrg break; 778 1.1 mrg case 20: 779 1.1 mrg MOVE_16_OBJECTS (in, out); 780 1.1 mrg MOVE_4_OBJECTS (in, out); 781 1.1 mrg break; 782 1.1 mrg case 21: 783 1.1 mrg MOVE_16_OBJECTS (in, out); 784 1.1 mrg MOVE_5_OBJECTS (in, out); 785 1.1 mrg break; 786 1.1 mrg case 22: 787 1.1 mrg MOVE_16_OBJECTS (in, out); 788 1.1 mrg MOVE_6_OBJECTS (in, out); 789 1.1 mrg break; 790 1.1 mrg case 23: 791 1.1 mrg MOVE_16_OBJECTS (in, out); 792 1.1 mrg MOVE_7_OBJECTS (in, out); 793 1.1 mrg break; 794 1.1 mrg case 24: 795 1.1 mrg MOVE_16_OBJECTS (in, out); 796 1.1 mrg MOVE_8_OBJECTS (in, out); 797 1.1 mrg break; 798 1.1 mrg case 25: 799 1.1 mrg MOVE_16_OBJECTS (in, out); 800 1.1 mrg MOVE_9_OBJECTS (in, out); 801 1.1 mrg break; 802 1.1 mrg case 26: 803 1.1 mrg MOVE_16_OBJECTS (in, out); 804 1.1 mrg MOVE_10_OBJECTS (in, out); 805 1.1 mrg break; 806 1.1 mrg case 27: 807 1.1 mrg MOVE_16_OBJECTS (in, out); 808 1.1 mrg MOVE_11_OBJECTS (in, out); 809 1.1 mrg break; 810 1.1 mrg case 28: 811 1.1 mrg MOVE_16_OBJECTS (in, out); 812 1.1 mrg MOVE_8_OBJECTS (in, out); 813 1.1 mrg MOVE_4_OBJECTS (in, out); 814 1.1 mrg break; 815 1.1 mrg case 29: 816 1.1 mrg MOVE_16_OBJECTS (in, out); 817 1.1 mrg MOVE_9_OBJECTS (in, out); 818 1.1 mrg MOVE_4_OBJECTS (in, out); 819 1.1 mrg break; 820 1.1 mrg case 30: 821 1.1 mrg MOVE_16_OBJECTS (in, out); 822 1.1 mrg MOVE_12_OBJECTS (in, out); 823 1.1 mrg MOVE_2_OBJECTS (in, out); 824 1.1 mrg break; 825 1.1 mrg case 31: 826 1.1 mrg MOVE_16_OBJECTS (in, out); 827 1.1 mrg MOVE_11_OBJECTS (in, out); 828 1.1 mrg MOVE_4_OBJECTS (in, out); 829 1.1 mrg break; 830 1.1 mrg } 831 1.1 mrg 832 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long 833 1.1 mrg word copies. */ 834 1.1 mrg count = value >> 5; 835 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++) 836 1.1 mrg MOVE_32_OBJECTS (in, out); 837 1.1 mrg } 838 1.1 mrg 839 1.1 mrg 840 1.1 mrg /* Exposed interface. */ 841 1.1 mrg 842 1.1 mrg #ifndef __VISIUM_ARCH_BMI__ 843 1.1 mrg 844 1.1 mrg void 845 1.1 mrg __long_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 846 1.1 mrg { 847 1.1 mrg __int_memcpy (s1, s2, n); 848 1.1 mrg } 849 1.1 mrg 850 1.1 mrg #endif /* !__VISIUM_ARCH_BMI__ */ 851 1.1 mrg 852 1.1 mrg void 853 1.1 mrg __wrd_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 854 1.1 mrg { 855 1.1 mrg __shrt_int_memcpy (s1, s2, n); 856 1.1 mrg } 857 1.1 mrg 858 1.1 mrg void 859 1.1 mrg __byt_memcpy (void *__restrict s1, const void *__restrict s2, size_t n) 860 1.1 mrg { 861 1.1 mrg __byte_memcpy (s1, s2, n); 862 1.1 mrg } 863