memcpy.c revision 1.1 1 1.1 mrg /* Copyright (C) 2012-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify it
6 1.1 mrg under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
11 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
13 1.1 mrg License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg /* This file must be kept in sync with newlib/libc/machine/visium/memcpy.c */
25 1.1 mrg
26 1.1 mrg #include <stddef.h>
27 1.1 mrg #include "memcpy.h"
28 1.1 mrg
29 1.1 mrg #define INST_BARRIER __asm__ __volatile__ ("":::"memory");
30 1.1 mrg
31 1.1 mrg #define MOVE_32_OBJECTS(in,out) \
32 1.1 mrg do { \
33 1.1 mrg INST_BARRIER \
34 1.1 mrg m0 = in [0]; \
35 1.1 mrg m1 = in [1]; \
36 1.1 mrg m2 = in [2]; \
37 1.1 mrg m3 = in [3]; \
38 1.1 mrg out [0] = m0; \
39 1.1 mrg out [1] = m1; \
40 1.1 mrg out [2] = m2; \
41 1.1 mrg out [3] = m3; \
42 1.1 mrg INST_BARRIER \
43 1.1 mrg m0 = in [4]; \
44 1.1 mrg m1 = in [5]; \
45 1.1 mrg m2 = in [6]; \
46 1.1 mrg m3 = in [7]; \
47 1.1 mrg out [4] = m0; \
48 1.1 mrg out [5] = m1; \
49 1.1 mrg out [6] = m2; \
50 1.1 mrg out [7] = m3; \
51 1.1 mrg INST_BARRIER \
52 1.1 mrg m0 = in [8]; \
53 1.1 mrg m1 = in [9]; \
54 1.1 mrg m2 = in [10]; \
55 1.1 mrg m3 = in [11]; \
56 1.1 mrg out [8] = m0; \
57 1.1 mrg out [9] = m1; \
58 1.1 mrg out [10] = m2; \
59 1.1 mrg out [11] = m3; \
60 1.1 mrg INST_BARRIER \
61 1.1 mrg m0 = in [12]; \
62 1.1 mrg m1 = in [13]; \
63 1.1 mrg m2 = in [14]; \
64 1.1 mrg m3 = in [15]; \
65 1.1 mrg out [12] = m0; \
66 1.1 mrg out [13] = m1; \
67 1.1 mrg out [14] = m2; \
68 1.1 mrg out [15] = m3; \
69 1.1 mrg INST_BARRIER \
70 1.1 mrg m0 = in [16]; \
71 1.1 mrg m1 = in [17]; \
72 1.1 mrg m2 = in [18]; \
73 1.1 mrg m3 = in [19]; \
74 1.1 mrg out [16] = m0; \
75 1.1 mrg out [17] = m1; \
76 1.1 mrg out [18] = m2; \
77 1.1 mrg out [19] = m3; \
78 1.1 mrg INST_BARRIER \
79 1.1 mrg m0 = in [20]; \
80 1.1 mrg m1 = in [21]; \
81 1.1 mrg m2 = in [22]; \
82 1.1 mrg m3 = in [23]; \
83 1.1 mrg out [20] = m0; \
84 1.1 mrg out [21] = m1; \
85 1.1 mrg out [22] = m2; \
86 1.1 mrg out [23] = m3; \
87 1.1 mrg INST_BARRIER \
88 1.1 mrg m0 = in [24]; \
89 1.1 mrg m1 = in [25]; \
90 1.1 mrg m2 = in [26]; \
91 1.1 mrg m3 = in [27]; \
92 1.1 mrg out [24] = m0; \
93 1.1 mrg out [25] = m1; \
94 1.1 mrg out [26] = m2; \
95 1.1 mrg out [27] = m3; \
96 1.1 mrg INST_BARRIER \
97 1.1 mrg m0 = in [28]; \
98 1.1 mrg m1 = in [29]; \
99 1.1 mrg m2 = in [30]; \
100 1.1 mrg m3 = in [31]; \
101 1.1 mrg out [28] = m0; \
102 1.1 mrg out [29] = m1; \
103 1.1 mrg out [30] = m2; \
104 1.1 mrg out [31] = m3; \
105 1.1 mrg INST_BARRIER \
106 1.1 mrg in += 32; \
107 1.1 mrg out += 32; \
108 1.1 mrg } while(0)
109 1.1 mrg
110 1.1 mrg #define MOVE_16_OBJECTS(in,out) \
111 1.1 mrg do { \
112 1.1 mrg INST_BARRIER \
113 1.1 mrg m0 = in [0]; \
114 1.1 mrg m1 = in [1]; \
115 1.1 mrg m2 = in [2]; \
116 1.1 mrg m3 = in [3]; \
117 1.1 mrg out [0] = m0; \
118 1.1 mrg out [1] = m1; \
119 1.1 mrg out [2] = m2; \
120 1.1 mrg out [3] = m3; \
121 1.1 mrg INST_BARRIER \
122 1.1 mrg m0 = in [4]; \
123 1.1 mrg m1 = in [5]; \
124 1.1 mrg m2 = in [6]; \
125 1.1 mrg m3 = in [7]; \
126 1.1 mrg out [4] = m0; \
127 1.1 mrg out [5] = m1; \
128 1.1 mrg out [6] = m2; \
129 1.1 mrg out [7] = m3; \
130 1.1 mrg INST_BARRIER \
131 1.1 mrg m0 = in [8]; \
132 1.1 mrg m1 = in [9]; \
133 1.1 mrg m2 = in [10]; \
134 1.1 mrg m3 = in [11]; \
135 1.1 mrg out [8] = m0; \
136 1.1 mrg out [9] = m1; \
137 1.1 mrg out [10] = m2; \
138 1.1 mrg out [11] = m3; \
139 1.1 mrg INST_BARRIER \
140 1.1 mrg m0 = in [12]; \
141 1.1 mrg m1 = in [13]; \
142 1.1 mrg m2 = in [14]; \
143 1.1 mrg m3 = in [15]; \
144 1.1 mrg out [12] = m0; \
145 1.1 mrg out [13] = m1; \
146 1.1 mrg out [14] = m2; \
147 1.1 mrg out [15] = m3; \
148 1.1 mrg INST_BARRIER \
149 1.1 mrg in += 16; \
150 1.1 mrg out += 16; \
151 1.1 mrg } while(0)
152 1.1 mrg
153 1.1 mrg #define MOVE_12_OBJECTS(in,out) \
154 1.1 mrg do { \
155 1.1 mrg INST_BARRIER \
156 1.1 mrg m0 = in [0]; \
157 1.1 mrg m1 = in [1]; \
158 1.1 mrg m2 = in [2]; \
159 1.1 mrg m3 = in [3]; \
160 1.1 mrg out [0] = m0; \
161 1.1 mrg out [1] = m1; \
162 1.1 mrg out [2] = m2; \
163 1.1 mrg out [3] = m3; \
164 1.1 mrg INST_BARRIER \
165 1.1 mrg m0 = in [4]; \
166 1.1 mrg m1 = in [5]; \
167 1.1 mrg m2 = in [6]; \
168 1.1 mrg m3 = in [7]; \
169 1.1 mrg out [4] = m0; \
170 1.1 mrg out [5] = m1; \
171 1.1 mrg out [6] = m2; \
172 1.1 mrg out [7] = m3; \
173 1.1 mrg INST_BARRIER \
174 1.1 mrg m0 = in [8]; \
175 1.1 mrg m1 = in [9]; \
176 1.1 mrg m2 = in [10]; \
177 1.1 mrg m3 = in [11]; \
178 1.1 mrg out [8] = m0; \
179 1.1 mrg out [9] = m1; \
180 1.1 mrg out [10] = m2; \
181 1.1 mrg out [11] = m3; \
182 1.1 mrg INST_BARRIER \
183 1.1 mrg in += 12; \
184 1.1 mrg out += 12; \
185 1.1 mrg } while(0)
186 1.1 mrg
187 1.1 mrg #define MOVE_11_OBJECTS(in,out) \
188 1.1 mrg do { \
189 1.1 mrg INST_BARRIER \
190 1.1 mrg m0 = in [0]; \
191 1.1 mrg m1 = in [1]; \
192 1.1 mrg m2 = in [2]; \
193 1.1 mrg m3 = in [3]; \
194 1.1 mrg out [0] = m0; \
195 1.1 mrg out [1] = m1; \
196 1.1 mrg out [2] = m2; \
197 1.1 mrg out [3] = m3; \
198 1.1 mrg INST_BARRIER \
199 1.1 mrg m0 = in [4]; \
200 1.1 mrg m1 = in [5]; \
201 1.1 mrg m2 = in [6]; \
202 1.1 mrg m3 = in [7]; \
203 1.1 mrg out [4] = m0; \
204 1.1 mrg out [5] = m1; \
205 1.1 mrg out [6] = m2; \
206 1.1 mrg out [7] = m3; \
207 1.1 mrg INST_BARRIER \
208 1.1 mrg m0 = in [8]; \
209 1.1 mrg m1 = in [9]; \
210 1.1 mrg m2 = in [10]; \
211 1.1 mrg out [8] = m0; \
212 1.1 mrg out [9] = m1; \
213 1.1 mrg out [10] = m2; \
214 1.1 mrg INST_BARRIER \
215 1.1 mrg in += 11; \
216 1.1 mrg out += 11; \
217 1.1 mrg } while(0)
218 1.1 mrg
219 1.1 mrg #define MOVE_10_OBJECTS(in,out) \
220 1.1 mrg do { \
221 1.1 mrg INST_BARRIER \
222 1.1 mrg m0 = in [0]; \
223 1.1 mrg m1 = in [1]; \
224 1.1 mrg m2 = in [2]; \
225 1.1 mrg m3 = in [3]; \
226 1.1 mrg out [0] = m0; \
227 1.1 mrg out [1] = m1; \
228 1.1 mrg out [2] = m2; \
229 1.1 mrg out [3] = m3; \
230 1.1 mrg INST_BARRIER \
231 1.1 mrg m0 = in [4]; \
232 1.1 mrg m1 = in [5]; \
233 1.1 mrg m2 = in [6]; \
234 1.1 mrg m3 = in [7]; \
235 1.1 mrg out [4] = m0; \
236 1.1 mrg m0 = in [8]; \
237 1.1 mrg out [5] = m1; \
238 1.1 mrg m1 = in [9]; \
239 1.1 mrg out [6] = m2; \
240 1.1 mrg out [7] = m3; \
241 1.1 mrg out [8] = m0; \
242 1.1 mrg out [9] = m1; \
243 1.1 mrg INST_BARRIER \
244 1.1 mrg in += 10; \
245 1.1 mrg out += 10; \
246 1.1 mrg } while(0)
247 1.1 mrg
248 1.1 mrg #define MOVE_9_OBJECTS(in,out) \
249 1.1 mrg do { \
250 1.1 mrg INST_BARRIER \
251 1.1 mrg m0 = in [0]; \
252 1.1 mrg m1 = in [1]; \
253 1.1 mrg m2 = in [2]; \
254 1.1 mrg m3 = in [3]; \
255 1.1 mrg out [0] = m0; \
256 1.1 mrg out [1] = m1; \
257 1.1 mrg out [2] = m2; \
258 1.1 mrg out [3] = m3; \
259 1.1 mrg INST_BARRIER \
260 1.1 mrg m0 = in [4]; \
261 1.1 mrg m1 = in [5]; \
262 1.1 mrg m2 = in [6]; \
263 1.1 mrg m3 = in [7]; \
264 1.1 mrg out [4] = m0; \
265 1.1 mrg out [5] = m1; \
266 1.1 mrg out [6] = m2; \
267 1.1 mrg out [7] = m3; \
268 1.1 mrg INST_BARRIER \
269 1.1 mrg m0 = in [8]; \
270 1.1 mrg out [8] = m0; \
271 1.1 mrg in += 9; \
272 1.1 mrg out += 9; \
273 1.1 mrg } while(0)
274 1.1 mrg
275 1.1 mrg #define MOVE_8_OBJECTS(in,out) \
276 1.1 mrg do { \
277 1.1 mrg INST_BARRIER \
278 1.1 mrg m0 = in [0]; \
279 1.1 mrg m1 = in [1]; \
280 1.1 mrg m2 = in [2]; \
281 1.1 mrg m3 = in [3]; \
282 1.1 mrg out [0] = m0; \
283 1.1 mrg out [1] = m1; \
284 1.1 mrg out [2] = m2; \
285 1.1 mrg out [3] = m3; \
286 1.1 mrg INST_BARRIER \
287 1.1 mrg m0 = in [4]; \
288 1.1 mrg m1 = in [5]; \
289 1.1 mrg m2 = in [6]; \
290 1.1 mrg m3 = in [7]; \
291 1.1 mrg out [4] = m0; \
292 1.1 mrg out [5] = m1; \
293 1.1 mrg out [6] = m2; \
294 1.1 mrg out [7] = m3; \
295 1.1 mrg INST_BARRIER \
296 1.1 mrg in += 8; \
297 1.1 mrg out += 8; \
298 1.1 mrg } while(0)
299 1.1 mrg
300 1.1 mrg #define MOVE_7_OBJECTS(in,out) \
301 1.1 mrg do { \
302 1.1 mrg INST_BARRIER \
303 1.1 mrg m0 = in [0]; \
304 1.1 mrg m1 = in [1]; \
305 1.1 mrg m2 = in [2]; \
306 1.1 mrg m3 = in [3]; \
307 1.1 mrg out [0] = m0; \
308 1.1 mrg out [1] = m1; \
309 1.1 mrg out [2] = m2; \
310 1.1 mrg out [3] = m3; \
311 1.1 mrg INST_BARRIER \
312 1.1 mrg m0 = in [4]; \
313 1.1 mrg m1 = in [5]; \
314 1.1 mrg m2 = in [6]; \
315 1.1 mrg out [4] = m0; \
316 1.1 mrg out [5] = m1; \
317 1.1 mrg out [6] = m2; \
318 1.1 mrg INST_BARRIER \
319 1.1 mrg in += 7; \
320 1.1 mrg out += 7; \
321 1.1 mrg } while(0)
322 1.1 mrg
323 1.1 mrg #define MOVE_6_OBJECTS(in,out) \
324 1.1 mrg do { \
325 1.1 mrg INST_BARRIER \
326 1.1 mrg m0 = in [0]; \
327 1.1 mrg m1 = in [1]; \
328 1.1 mrg m2 = in [2]; \
329 1.1 mrg m3 = in [3]; \
330 1.1 mrg out [0] = m0; \
331 1.1 mrg INST_BARRIER \
332 1.1 mrg m0 = in [4]; \
333 1.1 mrg out [1] = m1; \
334 1.1 mrg INST_BARRIER \
335 1.1 mrg m1 = in [5]; \
336 1.1 mrg out [2] = m2; \
337 1.1 mrg out [3] = m3; \
338 1.1 mrg out [4] = m0; \
339 1.1 mrg out [5] = m1; \
340 1.1 mrg INST_BARRIER \
341 1.1 mrg in += 6; \
342 1.1 mrg out += 6; \
343 1.1 mrg } while(0)
344 1.1 mrg
345 1.1 mrg #define MOVE_5_OBJECTS(in,out) \
346 1.1 mrg do { \
347 1.1 mrg INST_BARRIER \
348 1.1 mrg m0 = in [0]; \
349 1.1 mrg m1 = in [1]; \
350 1.1 mrg m2 = in [2]; \
351 1.1 mrg m3 = in [3]; \
352 1.1 mrg INST_BARRIER \
353 1.1 mrg out [0] = m0; \
354 1.1 mrg m0 = in [4]; \
355 1.1 mrg INST_BARRIER \
356 1.1 mrg out [1] = m1; \
357 1.1 mrg out [2] = m2; \
358 1.1 mrg out [3] = m3; \
359 1.1 mrg out [4] = m0; \
360 1.1 mrg INST_BARRIER \
361 1.1 mrg in += 5; \
362 1.1 mrg out += 5; \
363 1.1 mrg } while(0)
364 1.1 mrg
365 1.1 mrg #define MOVE_4_OBJECTS(in,out) \
366 1.1 mrg do { \
367 1.1 mrg INST_BARRIER \
368 1.1 mrg m0 = in [0]; \
369 1.1 mrg m1 = in [1]; \
370 1.1 mrg m2 = in [2]; \
371 1.1 mrg m3 = in [3]; \
372 1.1 mrg out [0] = m0; \
373 1.1 mrg out [1] = m1; \
374 1.1 mrg out [2] = m2; \
375 1.1 mrg out [3] = m3; \
376 1.1 mrg INST_BARRIER \
377 1.1 mrg in += 4; \
378 1.1 mrg out += 4; \
379 1.1 mrg } while(0)
380 1.1 mrg
381 1.1 mrg #define MOVE_3_OBJECTS(in,out) \
382 1.1 mrg do { \
383 1.1 mrg INST_BARRIER \
384 1.1 mrg m0 = in [0]; \
385 1.1 mrg m1 = in [1]; \
386 1.1 mrg m2 = in [2]; \
387 1.1 mrg out [0] = m0; \
388 1.1 mrg out [1] = m1; \
389 1.1 mrg out [2] = m2; \
390 1.1 mrg INST_BARRIER \
391 1.1 mrg in += 3; \
392 1.1 mrg out += 3; \
393 1.1 mrg } while(0)
394 1.1 mrg
395 1.1 mrg #define MOVE_2_OBJECTS(in,out) \
396 1.1 mrg do { \
397 1.1 mrg INST_BARRIER \
398 1.1 mrg m0 = in [0]; \
399 1.1 mrg m1 = in [1]; \
400 1.1 mrg out [0] = m0; \
401 1.1 mrg out [1] = m1; \
402 1.1 mrg INST_BARRIER \
403 1.1 mrg in += 2; \
404 1.1 mrg out += 2; \
405 1.1 mrg } while(0)
406 1.1 mrg
407 1.1 mrg #define MOVE_1_OBJECT(in,out) \
408 1.1 mrg do { \
409 1.1 mrg INST_BARRIER \
410 1.1 mrg m0 = in [0]; \
411 1.1 mrg out [0] = m0; \
412 1.1 mrg INST_BARRIER \
413 1.1 mrg in += 1; \
414 1.1 mrg out += 1; \
415 1.1 mrg } while(0)
416 1.1 mrg
417 1.1 mrg
418 1.1 mrg static inline void
419 1.1 mrg __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
420 1.1 mrg {
421 1.1 mrg int value = n;
422 1.1 mrg int loop_var;
423 1.1 mrg const int *in = s2;
424 1.1 mrg int *out = s1;
425 1.1 mrg int count;
426 1.1 mrg int m0,m1,m2,m3;
427 1.1 mrg
428 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5
429 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */
430 1.1 mrg switch (value & 0x1f)
431 1.1 mrg {
432 1.1 mrg case 0:
433 1.1 mrg break;
434 1.1 mrg case 1:
435 1.1 mrg MOVE_1_OBJECT (in, out);
436 1.1 mrg break;
437 1.1 mrg case 2:
438 1.1 mrg MOVE_2_OBJECTS (in, out);
439 1.1 mrg break;
440 1.1 mrg case 3:
441 1.1 mrg MOVE_3_OBJECTS (in, out);
442 1.1 mrg break;
443 1.1 mrg case 4:
444 1.1 mrg MOVE_4_OBJECTS (in, out);
445 1.1 mrg break;
446 1.1 mrg case 5:
447 1.1 mrg MOVE_5_OBJECTS (in, out);
448 1.1 mrg break;
449 1.1 mrg case 6:
450 1.1 mrg MOVE_6_OBJECTS (in, out);
451 1.1 mrg break;
452 1.1 mrg case 7:
453 1.1 mrg MOVE_7_OBJECTS (in, out);
454 1.1 mrg break;
455 1.1 mrg case 8:
456 1.1 mrg MOVE_8_OBJECTS (in, out);
457 1.1 mrg break;
458 1.1 mrg case 9:
459 1.1 mrg MOVE_9_OBJECTS (in, out);
460 1.1 mrg break;
461 1.1 mrg case 10:
462 1.1 mrg MOVE_10_OBJECTS (in, out);
463 1.1 mrg break;
464 1.1 mrg case 11:
465 1.1 mrg MOVE_11_OBJECTS (in, out);
466 1.1 mrg break;
467 1.1 mrg case 12:
468 1.1 mrg MOVE_12_OBJECTS (in, out);
469 1.1 mrg break;
470 1.1 mrg case 13:
471 1.1 mrg MOVE_9_OBJECTS (in, out);
472 1.1 mrg MOVE_4_OBJECTS (in, out);
473 1.1 mrg break;
474 1.1 mrg case 14:
475 1.1 mrg MOVE_12_OBJECTS (in, out);
476 1.1 mrg MOVE_2_OBJECTS (in, out);
477 1.1 mrg break;
478 1.1 mrg case 15:
479 1.1 mrg MOVE_11_OBJECTS (in, out);
480 1.1 mrg MOVE_4_OBJECTS (in, out);
481 1.1 mrg break;
482 1.1 mrg case 16:
483 1.1 mrg MOVE_16_OBJECTS (in, out);
484 1.1 mrg break;
485 1.1 mrg case 17:
486 1.1 mrg MOVE_11_OBJECTS (in, out);
487 1.1 mrg MOVE_6_OBJECTS (in, out);
488 1.1 mrg break;
489 1.1 mrg case 18:
490 1.1 mrg MOVE_9_OBJECTS (in, out);
491 1.1 mrg MOVE_9_OBJECTS (in, out);
492 1.1 mrg break;
493 1.1 mrg case 19:
494 1.1 mrg MOVE_16_OBJECTS (in, out);
495 1.1 mrg MOVE_3_OBJECTS (in, out);
496 1.1 mrg break;
497 1.1 mrg case 20:
498 1.1 mrg MOVE_16_OBJECTS (in, out);
499 1.1 mrg MOVE_4_OBJECTS (in, out);
500 1.1 mrg break;
501 1.1 mrg case 21:
502 1.1 mrg MOVE_16_OBJECTS (in, out);
503 1.1 mrg MOVE_5_OBJECTS (in, out);
504 1.1 mrg break;
505 1.1 mrg case 22:
506 1.1 mrg MOVE_16_OBJECTS (in, out);
507 1.1 mrg MOVE_6_OBJECTS (in, out);
508 1.1 mrg break;
509 1.1 mrg case 23:
510 1.1 mrg MOVE_16_OBJECTS (in, out);
511 1.1 mrg MOVE_7_OBJECTS (in, out);
512 1.1 mrg break;
513 1.1 mrg case 24:
514 1.1 mrg MOVE_16_OBJECTS (in, out);
515 1.1 mrg MOVE_8_OBJECTS (in, out);
516 1.1 mrg break;
517 1.1 mrg case 25:
518 1.1 mrg MOVE_16_OBJECTS (in, out);
519 1.1 mrg MOVE_9_OBJECTS (in, out);
520 1.1 mrg break;
521 1.1 mrg case 26:
522 1.1 mrg MOVE_16_OBJECTS (in, out);
523 1.1 mrg MOVE_10_OBJECTS (in, out);
524 1.1 mrg break;
525 1.1 mrg case 27:
526 1.1 mrg MOVE_16_OBJECTS (in, out);
527 1.1 mrg MOVE_11_OBJECTS (in, out);
528 1.1 mrg break;
529 1.1 mrg case 28:
530 1.1 mrg MOVE_16_OBJECTS (in, out);
531 1.1 mrg MOVE_8_OBJECTS (in, out);
532 1.1 mrg MOVE_4_OBJECTS (in, out);
533 1.1 mrg break;
534 1.1 mrg case 29:
535 1.1 mrg MOVE_16_OBJECTS (in, out);
536 1.1 mrg MOVE_9_OBJECTS (in, out);
537 1.1 mrg MOVE_4_OBJECTS (in, out);
538 1.1 mrg break;
539 1.1 mrg case 30:
540 1.1 mrg MOVE_16_OBJECTS (in, out);
541 1.1 mrg MOVE_12_OBJECTS (in, out);
542 1.1 mrg MOVE_2_OBJECTS (in, out);
543 1.1 mrg break;
544 1.1 mrg case 31:
545 1.1 mrg MOVE_16_OBJECTS (in, out);
546 1.1 mrg MOVE_11_OBJECTS (in, out);
547 1.1 mrg MOVE_4_OBJECTS (in, out);
548 1.1 mrg break;
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long
552 1.1 mrg word copies. */
553 1.1 mrg count = value >> 5;
554 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++)
555 1.1 mrg MOVE_32_OBJECTS (in, out);
556 1.1 mrg }
557 1.1 mrg
558 1.1 mrg static inline void
559 1.1 mrg __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
560 1.1 mrg {
561 1.1 mrg int value = n;
562 1.1 mrg int loop_var;
563 1.1 mrg const short int *in = s2;
564 1.1 mrg int short *out = s1;
565 1.1 mrg int count;
566 1.1 mrg int m0,m1,m2,m3;
567 1.1 mrg
568 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5
569 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */
570 1.1 mrg switch (value & 0x1f)
571 1.1 mrg {
572 1.1 mrg case 0:
573 1.1 mrg break;
574 1.1 mrg case 1:
575 1.1 mrg MOVE_1_OBJECT (in, out);
576 1.1 mrg break;
577 1.1 mrg case 2:
578 1.1 mrg MOVE_2_OBJECTS (in, out);
579 1.1 mrg break;
580 1.1 mrg case 3:
581 1.1 mrg MOVE_3_OBJECTS (in, out);
582 1.1 mrg break;
583 1.1 mrg case 4:
584 1.1 mrg MOVE_4_OBJECTS (in, out);
585 1.1 mrg break;
586 1.1 mrg case 5:
587 1.1 mrg MOVE_5_OBJECTS (in, out);
588 1.1 mrg break;
589 1.1 mrg case 6:
590 1.1 mrg MOVE_6_OBJECTS (in, out);
591 1.1 mrg break;
592 1.1 mrg case 7:
593 1.1 mrg MOVE_7_OBJECTS (in, out);
594 1.1 mrg break;
595 1.1 mrg case 8:
596 1.1 mrg MOVE_8_OBJECTS (in, out);
597 1.1 mrg break;
598 1.1 mrg case 9:
599 1.1 mrg MOVE_9_OBJECTS (in, out);
600 1.1 mrg break;
601 1.1 mrg case 10:
602 1.1 mrg MOVE_10_OBJECTS (in, out);
603 1.1 mrg break;
604 1.1 mrg case 11:
605 1.1 mrg MOVE_11_OBJECTS (in, out);
606 1.1 mrg break;
607 1.1 mrg case 12:
608 1.1 mrg MOVE_12_OBJECTS (in, out);
609 1.1 mrg break;
610 1.1 mrg case 13:
611 1.1 mrg MOVE_9_OBJECTS (in, out);
612 1.1 mrg MOVE_4_OBJECTS (in, out);
613 1.1 mrg break;
614 1.1 mrg case 14:
615 1.1 mrg MOVE_12_OBJECTS (in, out);
616 1.1 mrg MOVE_2_OBJECTS (in, out);
617 1.1 mrg break;
618 1.1 mrg case 15:
619 1.1 mrg MOVE_11_OBJECTS (in, out);
620 1.1 mrg MOVE_4_OBJECTS (in, out);
621 1.1 mrg break;
622 1.1 mrg case 16:
623 1.1 mrg MOVE_16_OBJECTS (in, out);
624 1.1 mrg break;
625 1.1 mrg case 17:
626 1.1 mrg MOVE_11_OBJECTS (in, out);
627 1.1 mrg MOVE_6_OBJECTS (in, out);
628 1.1 mrg break;
629 1.1 mrg case 18:
630 1.1 mrg MOVE_9_OBJECTS (in, out);
631 1.1 mrg MOVE_9_OBJECTS (in, out);
632 1.1 mrg break;
633 1.1 mrg case 19:
634 1.1 mrg MOVE_16_OBJECTS (in, out);
635 1.1 mrg MOVE_3_OBJECTS (in, out);
636 1.1 mrg break;
637 1.1 mrg case 20:
638 1.1 mrg MOVE_16_OBJECTS (in, out);
639 1.1 mrg MOVE_4_OBJECTS (in, out);
640 1.1 mrg break;
641 1.1 mrg case 21:
642 1.1 mrg MOVE_16_OBJECTS (in, out);
643 1.1 mrg MOVE_5_OBJECTS (in, out);
644 1.1 mrg break;
645 1.1 mrg case 22:
646 1.1 mrg MOVE_16_OBJECTS (in, out);
647 1.1 mrg MOVE_6_OBJECTS (in, out);
648 1.1 mrg break;
649 1.1 mrg case 23:
650 1.1 mrg MOVE_16_OBJECTS (in, out);
651 1.1 mrg MOVE_7_OBJECTS (in, out);
652 1.1 mrg break;
653 1.1 mrg case 24:
654 1.1 mrg MOVE_16_OBJECTS (in, out);
655 1.1 mrg MOVE_8_OBJECTS (in, out);
656 1.1 mrg break;
657 1.1 mrg case 25:
658 1.1 mrg MOVE_16_OBJECTS (in, out);
659 1.1 mrg MOVE_9_OBJECTS (in, out);
660 1.1 mrg break;
661 1.1 mrg case 26:
662 1.1 mrg MOVE_16_OBJECTS (in, out);
663 1.1 mrg MOVE_10_OBJECTS (in, out);
664 1.1 mrg break;
665 1.1 mrg case 27:
666 1.1 mrg MOVE_16_OBJECTS (in, out);
667 1.1 mrg MOVE_11_OBJECTS (in, out);
668 1.1 mrg break;
669 1.1 mrg case 28:
670 1.1 mrg MOVE_16_OBJECTS (in, out);
671 1.1 mrg MOVE_8_OBJECTS (in, out);
672 1.1 mrg MOVE_4_OBJECTS (in, out);
673 1.1 mrg break;
674 1.1 mrg case 29:
675 1.1 mrg MOVE_16_OBJECTS (in, out);
676 1.1 mrg MOVE_9_OBJECTS (in, out);
677 1.1 mrg MOVE_4_OBJECTS (in, out);
678 1.1 mrg break;
679 1.1 mrg case 30:
680 1.1 mrg MOVE_16_OBJECTS (in, out);
681 1.1 mrg MOVE_12_OBJECTS (in, out);
682 1.1 mrg MOVE_2_OBJECTS (in, out);
683 1.1 mrg break;
684 1.1 mrg case 31:
685 1.1 mrg MOVE_16_OBJECTS (in, out);
686 1.1 mrg MOVE_11_OBJECTS (in, out);
687 1.1 mrg MOVE_4_OBJECTS (in, out);
688 1.1 mrg break;
689 1.1 mrg }
690 1.1 mrg
691 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long
692 1.1 mrg word copies. */
693 1.1 mrg count = value >> 5;
694 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++)
695 1.1 mrg MOVE_32_OBJECTS (in, out);
696 1.1 mrg }
697 1.1 mrg
698 1.1 mrg
699 1.1 mrg static inline void
700 1.1 mrg __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
701 1.1 mrg {
702 1.1 mrg int value = n;
703 1.1 mrg int loop_var;
704 1.1 mrg const char *in = s2;
705 1.1 mrg char *out = s1;
706 1.1 mrg int count;
707 1.1 mrg int m0,m1,m2,m3;
708 1.1 mrg
709 1.1 mrg /* This code currently give a stall for any value with a 1->2 in the low 5
710 1.1 mrg bits, i.e. 1,2, 33,34 ? not acceptable! */
711 1.1 mrg switch (value & 0x1f)
712 1.1 mrg {
713 1.1 mrg case 0:
714 1.1 mrg break;
715 1.1 mrg case 1:
716 1.1 mrg MOVE_1_OBJECT (in, out);
717 1.1 mrg break;
718 1.1 mrg case 2:
719 1.1 mrg MOVE_2_OBJECTS (in, out);
720 1.1 mrg break;
721 1.1 mrg case 3:
722 1.1 mrg MOVE_3_OBJECTS (in, out);
723 1.1 mrg break;
724 1.1 mrg case 4:
725 1.1 mrg MOVE_4_OBJECTS (in, out);
726 1.1 mrg break;
727 1.1 mrg case 5:
728 1.1 mrg MOVE_5_OBJECTS (in, out);
729 1.1 mrg break;
730 1.1 mrg case 6:
731 1.1 mrg MOVE_6_OBJECTS (in, out);
732 1.1 mrg break;
733 1.1 mrg case 7:
734 1.1 mrg MOVE_7_OBJECTS (in, out);
735 1.1 mrg break;
736 1.1 mrg case 8:
737 1.1 mrg MOVE_8_OBJECTS (in, out);
738 1.1 mrg break;
739 1.1 mrg case 9:
740 1.1 mrg MOVE_9_OBJECTS (in, out);
741 1.1 mrg break;
742 1.1 mrg case 10:
743 1.1 mrg MOVE_10_OBJECTS (in, out);
744 1.1 mrg break;
745 1.1 mrg case 11:
746 1.1 mrg MOVE_11_OBJECTS (in, out);
747 1.1 mrg break;
748 1.1 mrg case 12:
749 1.1 mrg MOVE_12_OBJECTS (in, out);
750 1.1 mrg break;
751 1.1 mrg case 13:
752 1.1 mrg MOVE_9_OBJECTS (in, out);
753 1.1 mrg MOVE_4_OBJECTS (in, out);
754 1.1 mrg break;
755 1.1 mrg case 14:
756 1.1 mrg MOVE_12_OBJECTS (in, out);
757 1.1 mrg MOVE_2_OBJECTS (in, out);
758 1.1 mrg break;
759 1.1 mrg case 15:
760 1.1 mrg MOVE_11_OBJECTS (in, out);
761 1.1 mrg MOVE_4_OBJECTS (in, out);
762 1.1 mrg break;
763 1.1 mrg case 16:
764 1.1 mrg MOVE_16_OBJECTS (in, out);
765 1.1 mrg break;
766 1.1 mrg case 17:
767 1.1 mrg MOVE_11_OBJECTS (in, out);
768 1.1 mrg MOVE_6_OBJECTS (in, out);
769 1.1 mrg break;
770 1.1 mrg case 18:
771 1.1 mrg MOVE_9_OBJECTS (in, out);
772 1.1 mrg MOVE_9_OBJECTS (in, out);
773 1.1 mrg break;
774 1.1 mrg case 19:
775 1.1 mrg MOVE_16_OBJECTS (in, out);
776 1.1 mrg MOVE_3_OBJECTS (in, out);
777 1.1 mrg break;
778 1.1 mrg case 20:
779 1.1 mrg MOVE_16_OBJECTS (in, out);
780 1.1 mrg MOVE_4_OBJECTS (in, out);
781 1.1 mrg break;
782 1.1 mrg case 21:
783 1.1 mrg MOVE_16_OBJECTS (in, out);
784 1.1 mrg MOVE_5_OBJECTS (in, out);
785 1.1 mrg break;
786 1.1 mrg case 22:
787 1.1 mrg MOVE_16_OBJECTS (in, out);
788 1.1 mrg MOVE_6_OBJECTS (in, out);
789 1.1 mrg break;
790 1.1 mrg case 23:
791 1.1 mrg MOVE_16_OBJECTS (in, out);
792 1.1 mrg MOVE_7_OBJECTS (in, out);
793 1.1 mrg break;
794 1.1 mrg case 24:
795 1.1 mrg MOVE_16_OBJECTS (in, out);
796 1.1 mrg MOVE_8_OBJECTS (in, out);
797 1.1 mrg break;
798 1.1 mrg case 25:
799 1.1 mrg MOVE_16_OBJECTS (in, out);
800 1.1 mrg MOVE_9_OBJECTS (in, out);
801 1.1 mrg break;
802 1.1 mrg case 26:
803 1.1 mrg MOVE_16_OBJECTS (in, out);
804 1.1 mrg MOVE_10_OBJECTS (in, out);
805 1.1 mrg break;
806 1.1 mrg case 27:
807 1.1 mrg MOVE_16_OBJECTS (in, out);
808 1.1 mrg MOVE_11_OBJECTS (in, out);
809 1.1 mrg break;
810 1.1 mrg case 28:
811 1.1 mrg MOVE_16_OBJECTS (in, out);
812 1.1 mrg MOVE_8_OBJECTS (in, out);
813 1.1 mrg MOVE_4_OBJECTS (in, out);
814 1.1 mrg break;
815 1.1 mrg case 29:
816 1.1 mrg MOVE_16_OBJECTS (in, out);
817 1.1 mrg MOVE_9_OBJECTS (in, out);
818 1.1 mrg MOVE_4_OBJECTS (in, out);
819 1.1 mrg break;
820 1.1 mrg case 30:
821 1.1 mrg MOVE_16_OBJECTS (in, out);
822 1.1 mrg MOVE_12_OBJECTS (in, out);
823 1.1 mrg MOVE_2_OBJECTS (in, out);
824 1.1 mrg break;
825 1.1 mrg case 31:
826 1.1 mrg MOVE_16_OBJECTS (in, out);
827 1.1 mrg MOVE_11_OBJECTS (in, out);
828 1.1 mrg MOVE_4_OBJECTS (in, out);
829 1.1 mrg break;
830 1.1 mrg }
831 1.1 mrg
832 1.1 mrg /* This loop governs the asmptoptic behaviour of this algorithm, for long
833 1.1 mrg word copies. */
834 1.1 mrg count = value >> 5;
835 1.1 mrg for (loop_var = 0; loop_var < count; loop_var++)
836 1.1 mrg MOVE_32_OBJECTS (in, out);
837 1.1 mrg }
838 1.1 mrg
839 1.1 mrg
840 1.1 mrg /* Exposed interface. */
841 1.1 mrg
842 1.1 mrg #ifndef __VISIUM_ARCH_BMI__
843 1.1 mrg
844 1.1 mrg void
845 1.1 mrg __long_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
846 1.1 mrg {
847 1.1 mrg __int_memcpy (s1, s2, n);
848 1.1 mrg }
849 1.1 mrg
850 1.1 mrg #endif /* !__VISIUM_ARCH_BMI__ */
851 1.1 mrg
852 1.1 mrg void
853 1.1 mrg __wrd_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
854 1.1 mrg {
855 1.1 mrg __shrt_int_memcpy (s1, s2, n);
856 1.1 mrg }
857 1.1 mrg
858 1.1 mrg void
859 1.1 mrg __byt_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
860 1.1 mrg {
861 1.1 mrg __byte_memcpy (s1, s2, n);
862 1.1 mrg }
863