rf_nwayxor.c revision 1.3.8.1 1 /* $NetBSD: rf_nwayxor.c,v 1.3.8.1 2000/11/20 11:42:56 bouyer Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /************************************************************
30 *
31 * nwayxor.c -- code to do N-way xors for reconstruction
32 *
33 * nWayXorN xors N input buffers into the destination buffer.
34 * adapted from danner's longword_bxor code.
35 *
36 ************************************************************/
37
38 #include "rf_nwayxor.h"
39 #include "rf_shutdown.h"
40
41 static int callcount[10];
42 static void rf_ShutdownNWayXor(void *);
43
44 static void
45 rf_ShutdownNWayXor(ignored)
46 void *ignored;
47 {
48 int i;
49
50 if (rf_showXorCallCounts == 0)
51 return;
52 printf("Call counts for n-way xor routines: ");
53 for (i = 0; i < 10; i++)
54 printf("%d ", callcount[i]);
55 printf("\n");
56 }
57
58 int
59 rf_ConfigureNWayXor(listp)
60 RF_ShutdownList_t **listp;
61 {
62 int i, rc;
63
64 for (i = 0; i < 10; i++)
65 callcount[i] = 0;
66 rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
67 return (rc);
68 }
69
70 void
71 rf_nWayXor1(src_rbs, dest_rb, len)
72 RF_ReconBuffer_t **src_rbs;
73 RF_ReconBuffer_t *dest_rb;
74 int len;
75 {
76 unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
77 unsigned long *dest = (unsigned long *) dest_rb->buffer;
78 unsigned long *end = src + len;
79 unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
80
81 callcount[1]++;
82 while (len >= 4) {
83 d0 = dest[0];
84 d1 = dest[1];
85 d2 = dest[2];
86 d3 = dest[3];
87 s0 = src[0];
88 s1 = src[1];
89 s2 = src[2];
90 s3 = src[3];
91 dest[0] = d0 ^ s0;
92 dest[1] = d1 ^ s1;
93 dest[2] = d2 ^ s2;
94 dest[3] = d3 ^ s3;
95 src += 4;
96 dest += 4;
97 len -= 4;
98 }
99 while (src < end) {
100 *dest++ ^= *src++;
101 }
102 }
103
104 void
105 rf_nWayXor2(src_rbs, dest_rb, len)
106 RF_ReconBuffer_t **src_rbs;
107 RF_ReconBuffer_t *dest_rb;
108 int len;
109 {
110 unsigned long *dst = (unsigned long *) dest_rb->buffer;
111 unsigned long *a = dst;
112 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
113 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
114 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
115
116 callcount[2]++;
117 /* align dest to cache line */
118 while ((((unsigned long) dst) & 0x1f)) {
119 *dst++ = *a++ ^ *b++ ^ *c++;
120 len--;
121 }
122 while (len > 4) {
123 a0 = a[0];
124 len -= 4;
125
126 a1 = a[1];
127 a2 = a[2];
128
129 a3 = a[3];
130 a += 4;
131
132 b0 = b[0];
133 b1 = b[1];
134
135 b2 = b[2];
136 b3 = b[3];
137 /* start dual issue */
138 a0 ^= b0;
139 b0 = c[0];
140
141 b += 4;
142 a1 ^= b1;
143
144 a2 ^= b2;
145 a3 ^= b3;
146
147 b1 = c[1];
148 a0 ^= b0;
149
150 b2 = c[2];
151 a1 ^= b1;
152
153 b3 = c[3];
154 a2 ^= b2;
155
156 dst[0] = a0;
157 a3 ^= b3;
158 dst[1] = a1;
159 c += 4;
160 dst[2] = a2;
161 dst[3] = a3;
162 dst += 4;
163 }
164 while (len) {
165 *dst++ = *a++ ^ *b++ ^ *c++;
166 len--;
167 }
168 }
169 /* note that first arg is not incremented but 2nd arg is */
170 #define LOAD_FIRST(_dst,_b) \
171 a0 = _dst[0]; len -= 4; \
172 a1 = _dst[1]; \
173 a2 = _dst[2]; \
174 a3 = _dst[3]; \
175 b0 = _b[0]; \
176 b1 = _b[1]; \
177 b2 = _b[2]; \
178 b3 = _b[3]; _b += 4;
179
180 /* note: arg is incremented */
181 #define XOR_AND_LOAD_NEXT(_n) \
182 a0 ^= b0; b0 = _n[0]; \
183 a1 ^= b1; b1 = _n[1]; \
184 a2 ^= b2; b2 = _n[2]; \
185 a3 ^= b3; b3 = _n[3]; \
186 _n += 4;
187
188 /* arg is incremented */
189 #define XOR_AND_STORE(_dst) \
190 a0 ^= b0; _dst[0] = a0; \
191 a1 ^= b1; _dst[1] = a1; \
192 a2 ^= b2; _dst[2] = a2; \
193 a3 ^= b3; _dst[3] = a3; \
194 _dst += 4;
195
196
197 void
198 rf_nWayXor3(src_rbs, dest_rb, len)
199 RF_ReconBuffer_t **src_rbs;
200 RF_ReconBuffer_t *dest_rb;
201 int len;
202 {
203 unsigned long *dst = (unsigned long *) dest_rb->buffer;
204 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
205 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
206 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
207 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
208
209 callcount[3]++;
210 /* align dest to cache line */
211 while ((((unsigned long) dst) & 0x1f)) {
212 *dst++ ^= *b++ ^ *c++ ^ *d++;
213 len--;
214 }
215 while (len > 4) {
216 LOAD_FIRST(dst, b);
217 XOR_AND_LOAD_NEXT(c);
218 XOR_AND_LOAD_NEXT(d);
219 XOR_AND_STORE(dst);
220 }
221 while (len) {
222 *dst++ ^= *b++ ^ *c++ ^ *d++;
223 len--;
224 }
225 }
226
227 void
228 rf_nWayXor4(src_rbs, dest_rb, len)
229 RF_ReconBuffer_t **src_rbs;
230 RF_ReconBuffer_t *dest_rb;
231 int len;
232 {
233 unsigned long *dst = (unsigned long *) dest_rb->buffer;
234 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
235 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
236 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
237 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
238 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
239
240 callcount[4]++;
241 /* align dest to cache line */
242 while ((((unsigned long) dst) & 0x1f)) {
243 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
244 len--;
245 }
246 while (len > 4) {
247 LOAD_FIRST(dst, b);
248 XOR_AND_LOAD_NEXT(c);
249 XOR_AND_LOAD_NEXT(d);
250 XOR_AND_LOAD_NEXT(e);
251 XOR_AND_STORE(dst);
252 }
253 while (len) {
254 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
255 len--;
256 }
257 }
258
259 void
260 rf_nWayXor5(src_rbs, dest_rb, len)
261 RF_ReconBuffer_t **src_rbs;
262 RF_ReconBuffer_t *dest_rb;
263 int len;
264 {
265 unsigned long *dst = (unsigned long *) dest_rb->buffer;
266 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
267 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
268 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
269 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
270 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
271 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
272
273 callcount[5]++;
274 /* align dest to cache line */
275 while ((((unsigned long) dst) & 0x1f)) {
276 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
277 len--;
278 }
279 while (len > 4) {
280 LOAD_FIRST(dst, b);
281 XOR_AND_LOAD_NEXT(c);
282 XOR_AND_LOAD_NEXT(d);
283 XOR_AND_LOAD_NEXT(e);
284 XOR_AND_LOAD_NEXT(f);
285 XOR_AND_STORE(dst);
286 }
287 while (len) {
288 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
289 len--;
290 }
291 }
292
293 void
294 rf_nWayXor6(src_rbs, dest_rb, len)
295 RF_ReconBuffer_t **src_rbs;
296 RF_ReconBuffer_t *dest_rb;
297 int len;
298 {
299 unsigned long *dst = (unsigned long *) dest_rb->buffer;
300 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
301 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
302 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
303 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
304 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
305 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
306 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
307
308 callcount[6]++;
309 /* align dest to cache line */
310 while ((((unsigned long) dst) & 0x1f)) {
311 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
312 len--;
313 }
314 while (len > 4) {
315 LOAD_FIRST(dst, b);
316 XOR_AND_LOAD_NEXT(c);
317 XOR_AND_LOAD_NEXT(d);
318 XOR_AND_LOAD_NEXT(e);
319 XOR_AND_LOAD_NEXT(f);
320 XOR_AND_LOAD_NEXT(g);
321 XOR_AND_STORE(dst);
322 }
323 while (len) {
324 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
325 len--;
326 }
327 }
328
329 void
330 rf_nWayXor7(src_rbs, dest_rb, len)
331 RF_ReconBuffer_t **src_rbs;
332 RF_ReconBuffer_t *dest_rb;
333 int len;
334 {
335 unsigned long *dst = (unsigned long *) dest_rb->buffer;
336 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
337 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
338 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
339 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
340 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
341 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
342 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
343 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
344
345 callcount[7]++;
346 /* align dest to cache line */
347 while ((((unsigned long) dst) & 0x1f)) {
348 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
349 len--;
350 }
351 while (len > 4) {
352 LOAD_FIRST(dst, b);
353 XOR_AND_LOAD_NEXT(c);
354 XOR_AND_LOAD_NEXT(d);
355 XOR_AND_LOAD_NEXT(e);
356 XOR_AND_LOAD_NEXT(f);
357 XOR_AND_LOAD_NEXT(g);
358 XOR_AND_LOAD_NEXT(h);
359 XOR_AND_STORE(dst);
360 }
361 while (len) {
362 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
363 len--;
364 }
365 }
366
367 void
368 rf_nWayXor8(src_rbs, dest_rb, len)
369 RF_ReconBuffer_t **src_rbs;
370 RF_ReconBuffer_t *dest_rb;
371 int len;
372 {
373 unsigned long *dst = (unsigned long *) dest_rb->buffer;
374 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
375 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
376 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
377 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
378 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
379 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
380 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
381 unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
382 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
383
384 callcount[8]++;
385 /* align dest to cache line */
386 while ((((unsigned long) dst) & 0x1f)) {
387 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
388 len--;
389 }
390 while (len > 4) {
391 LOAD_FIRST(dst, b);
392 XOR_AND_LOAD_NEXT(c);
393 XOR_AND_LOAD_NEXT(d);
394 XOR_AND_LOAD_NEXT(e);
395 XOR_AND_LOAD_NEXT(f);
396 XOR_AND_LOAD_NEXT(g);
397 XOR_AND_LOAD_NEXT(h);
398 XOR_AND_LOAD_NEXT(i);
399 XOR_AND_STORE(dst);
400 }
401 while (len) {
402 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
403 len--;
404 }
405 }
406
407
408 void
409 rf_nWayXor9(src_rbs, dest_rb, len)
410 RF_ReconBuffer_t **src_rbs;
411 RF_ReconBuffer_t *dest_rb;
412 int len;
413 {
414 unsigned long *dst = (unsigned long *) dest_rb->buffer;
415 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
416 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
417 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
418 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
419 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
420 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
421 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
422 unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
423 unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
424 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
425
426 callcount[9]++;
427 /* align dest to cache line */
428 while ((((unsigned long) dst) & 0x1f)) {
429 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
430 len--;
431 }
432 while (len > 4) {
433 LOAD_FIRST(dst, b);
434 XOR_AND_LOAD_NEXT(c);
435 XOR_AND_LOAD_NEXT(d);
436 XOR_AND_LOAD_NEXT(e);
437 XOR_AND_LOAD_NEXT(f);
438 XOR_AND_LOAD_NEXT(g);
439 XOR_AND_LOAD_NEXT(h);
440 XOR_AND_LOAD_NEXT(i);
441 XOR_AND_LOAD_NEXT(j);
442 XOR_AND_STORE(dst);
443 }
444 while (len) {
445 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
446 len--;
447 }
448 }
449