rf_nwayxor.c revision 1.5 1 /* $NetBSD: rf_nwayxor.c,v 1.5 2001/11/13 07:11:15 lukem Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /************************************************************
30 *
31 * nwayxor.c -- code to do N-way xors for reconstruction
32 *
33 * nWayXorN xors N input buffers into the destination buffer.
34 * adapted from danner's longword_bxor code.
35 *
36 ************************************************************/
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.5 2001/11/13 07:11:15 lukem Exp $");
40
41 #include "rf_nwayxor.h"
42 #include "rf_shutdown.h"
43
44 static int callcount[10];
45 static void rf_ShutdownNWayXor(void *);
46
47 static void
48 rf_ShutdownNWayXor(ignored)
49 void *ignored;
50 {
51 int i;
52
53 if (rf_showXorCallCounts == 0)
54 return;
55 printf("Call counts for n-way xor routines: ");
56 for (i = 0; i < 10; i++)
57 printf("%d ", callcount[i]);
58 printf("\n");
59 }
60
61 int
62 rf_ConfigureNWayXor(listp)
63 RF_ShutdownList_t **listp;
64 {
65 int i, rc;
66
67 for (i = 0; i < 10; i++)
68 callcount[i] = 0;
69 rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
70 return (rc);
71 }
72
73 void
74 rf_nWayXor1(src_rbs, dest_rb, len)
75 RF_ReconBuffer_t **src_rbs;
76 RF_ReconBuffer_t *dest_rb;
77 int len;
78 {
79 unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
80 unsigned long *dest = (unsigned long *) dest_rb->buffer;
81 unsigned long *end = src + len;
82 unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
83
84 callcount[1]++;
85 while (len >= 4) {
86 d0 = dest[0];
87 d1 = dest[1];
88 d2 = dest[2];
89 d3 = dest[3];
90 s0 = src[0];
91 s1 = src[1];
92 s2 = src[2];
93 s3 = src[3];
94 dest[0] = d0 ^ s0;
95 dest[1] = d1 ^ s1;
96 dest[2] = d2 ^ s2;
97 dest[3] = d3 ^ s3;
98 src += 4;
99 dest += 4;
100 len -= 4;
101 }
102 while (src < end) {
103 *dest++ ^= *src++;
104 }
105 }
106
107 void
108 rf_nWayXor2(src_rbs, dest_rb, len)
109 RF_ReconBuffer_t **src_rbs;
110 RF_ReconBuffer_t *dest_rb;
111 int len;
112 {
113 unsigned long *dst = (unsigned long *) dest_rb->buffer;
114 unsigned long *a = dst;
115 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
116 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
117 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
118
119 callcount[2]++;
120 /* align dest to cache line */
121 while ((((unsigned long) dst) & 0x1f)) {
122 *dst++ = *a++ ^ *b++ ^ *c++;
123 len--;
124 }
125 while (len > 4) {
126 a0 = a[0];
127 len -= 4;
128
129 a1 = a[1];
130 a2 = a[2];
131
132 a3 = a[3];
133 a += 4;
134
135 b0 = b[0];
136 b1 = b[1];
137
138 b2 = b[2];
139 b3 = b[3];
140 /* start dual issue */
141 a0 ^= b0;
142 b0 = c[0];
143
144 b += 4;
145 a1 ^= b1;
146
147 a2 ^= b2;
148 a3 ^= b3;
149
150 b1 = c[1];
151 a0 ^= b0;
152
153 b2 = c[2];
154 a1 ^= b1;
155
156 b3 = c[3];
157 a2 ^= b2;
158
159 dst[0] = a0;
160 a3 ^= b3;
161 dst[1] = a1;
162 c += 4;
163 dst[2] = a2;
164 dst[3] = a3;
165 dst += 4;
166 }
167 while (len) {
168 *dst++ = *a++ ^ *b++ ^ *c++;
169 len--;
170 }
171 }
172 /* note that first arg is not incremented but 2nd arg is */
173 #define LOAD_FIRST(_dst,_b) \
174 a0 = _dst[0]; len -= 4; \
175 a1 = _dst[1]; \
176 a2 = _dst[2]; \
177 a3 = _dst[3]; \
178 b0 = _b[0]; \
179 b1 = _b[1]; \
180 b2 = _b[2]; \
181 b3 = _b[3]; _b += 4;
182
183 /* note: arg is incremented */
184 #define XOR_AND_LOAD_NEXT(_n) \
185 a0 ^= b0; b0 = _n[0]; \
186 a1 ^= b1; b1 = _n[1]; \
187 a2 ^= b2; b2 = _n[2]; \
188 a3 ^= b3; b3 = _n[3]; \
189 _n += 4;
190
191 /* arg is incremented */
192 #define XOR_AND_STORE(_dst) \
193 a0 ^= b0; _dst[0] = a0; \
194 a1 ^= b1; _dst[1] = a1; \
195 a2 ^= b2; _dst[2] = a2; \
196 a3 ^= b3; _dst[3] = a3; \
197 _dst += 4;
198
199
200 void
201 rf_nWayXor3(src_rbs, dest_rb, len)
202 RF_ReconBuffer_t **src_rbs;
203 RF_ReconBuffer_t *dest_rb;
204 int len;
205 {
206 unsigned long *dst = (unsigned long *) dest_rb->buffer;
207 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
208 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
209 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
210 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
211
212 callcount[3]++;
213 /* align dest to cache line */
214 while ((((unsigned long) dst) & 0x1f)) {
215 *dst++ ^= *b++ ^ *c++ ^ *d++;
216 len--;
217 }
218 while (len > 4) {
219 LOAD_FIRST(dst, b);
220 XOR_AND_LOAD_NEXT(c);
221 XOR_AND_LOAD_NEXT(d);
222 XOR_AND_STORE(dst);
223 }
224 while (len) {
225 *dst++ ^= *b++ ^ *c++ ^ *d++;
226 len--;
227 }
228 }
229
230 void
231 rf_nWayXor4(src_rbs, dest_rb, len)
232 RF_ReconBuffer_t **src_rbs;
233 RF_ReconBuffer_t *dest_rb;
234 int len;
235 {
236 unsigned long *dst = (unsigned long *) dest_rb->buffer;
237 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
238 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
239 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
240 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
241 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
242
243 callcount[4]++;
244 /* align dest to cache line */
245 while ((((unsigned long) dst) & 0x1f)) {
246 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
247 len--;
248 }
249 while (len > 4) {
250 LOAD_FIRST(dst, b);
251 XOR_AND_LOAD_NEXT(c);
252 XOR_AND_LOAD_NEXT(d);
253 XOR_AND_LOAD_NEXT(e);
254 XOR_AND_STORE(dst);
255 }
256 while (len) {
257 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
258 len--;
259 }
260 }
261
262 void
263 rf_nWayXor5(src_rbs, dest_rb, len)
264 RF_ReconBuffer_t **src_rbs;
265 RF_ReconBuffer_t *dest_rb;
266 int len;
267 {
268 unsigned long *dst = (unsigned long *) dest_rb->buffer;
269 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
270 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
271 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
272 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
273 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
274 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
275
276 callcount[5]++;
277 /* align dest to cache line */
278 while ((((unsigned long) dst) & 0x1f)) {
279 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
280 len--;
281 }
282 while (len > 4) {
283 LOAD_FIRST(dst, b);
284 XOR_AND_LOAD_NEXT(c);
285 XOR_AND_LOAD_NEXT(d);
286 XOR_AND_LOAD_NEXT(e);
287 XOR_AND_LOAD_NEXT(f);
288 XOR_AND_STORE(dst);
289 }
290 while (len) {
291 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
292 len--;
293 }
294 }
295
296 void
297 rf_nWayXor6(src_rbs, dest_rb, len)
298 RF_ReconBuffer_t **src_rbs;
299 RF_ReconBuffer_t *dest_rb;
300 int len;
301 {
302 unsigned long *dst = (unsigned long *) dest_rb->buffer;
303 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
304 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
305 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
306 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
307 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
308 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
309 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
310
311 callcount[6]++;
312 /* align dest to cache line */
313 while ((((unsigned long) dst) & 0x1f)) {
314 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
315 len--;
316 }
317 while (len > 4) {
318 LOAD_FIRST(dst, b);
319 XOR_AND_LOAD_NEXT(c);
320 XOR_AND_LOAD_NEXT(d);
321 XOR_AND_LOAD_NEXT(e);
322 XOR_AND_LOAD_NEXT(f);
323 XOR_AND_LOAD_NEXT(g);
324 XOR_AND_STORE(dst);
325 }
326 while (len) {
327 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
328 len--;
329 }
330 }
331
332 void
333 rf_nWayXor7(src_rbs, dest_rb, len)
334 RF_ReconBuffer_t **src_rbs;
335 RF_ReconBuffer_t *dest_rb;
336 int len;
337 {
338 unsigned long *dst = (unsigned long *) dest_rb->buffer;
339 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
340 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
341 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
342 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
343 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
344 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
345 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
346 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
347
348 callcount[7]++;
349 /* align dest to cache line */
350 while ((((unsigned long) dst) & 0x1f)) {
351 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
352 len--;
353 }
354 while (len > 4) {
355 LOAD_FIRST(dst, b);
356 XOR_AND_LOAD_NEXT(c);
357 XOR_AND_LOAD_NEXT(d);
358 XOR_AND_LOAD_NEXT(e);
359 XOR_AND_LOAD_NEXT(f);
360 XOR_AND_LOAD_NEXT(g);
361 XOR_AND_LOAD_NEXT(h);
362 XOR_AND_STORE(dst);
363 }
364 while (len) {
365 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
366 len--;
367 }
368 }
369
370 void
371 rf_nWayXor8(src_rbs, dest_rb, len)
372 RF_ReconBuffer_t **src_rbs;
373 RF_ReconBuffer_t *dest_rb;
374 int len;
375 {
376 unsigned long *dst = (unsigned long *) dest_rb->buffer;
377 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
378 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
379 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
380 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
381 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
382 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
383 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
384 unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
385 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
386
387 callcount[8]++;
388 /* align dest to cache line */
389 while ((((unsigned long) dst) & 0x1f)) {
390 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
391 len--;
392 }
393 while (len > 4) {
394 LOAD_FIRST(dst, b);
395 XOR_AND_LOAD_NEXT(c);
396 XOR_AND_LOAD_NEXT(d);
397 XOR_AND_LOAD_NEXT(e);
398 XOR_AND_LOAD_NEXT(f);
399 XOR_AND_LOAD_NEXT(g);
400 XOR_AND_LOAD_NEXT(h);
401 XOR_AND_LOAD_NEXT(i);
402 XOR_AND_STORE(dst);
403 }
404 while (len) {
405 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
406 len--;
407 }
408 }
409
410
411 void
412 rf_nWayXor9(src_rbs, dest_rb, len)
413 RF_ReconBuffer_t **src_rbs;
414 RF_ReconBuffer_t *dest_rb;
415 int len;
416 {
417 unsigned long *dst = (unsigned long *) dest_rb->buffer;
418 unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
419 unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
420 unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
421 unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
422 unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
423 unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
424 unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
425 unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
426 unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
427 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
428
429 callcount[9]++;
430 /* align dest to cache line */
431 while ((((unsigned long) dst) & 0x1f)) {
432 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
433 len--;
434 }
435 while (len > 4) {
436 LOAD_FIRST(dst, b);
437 XOR_AND_LOAD_NEXT(c);
438 XOR_AND_LOAD_NEXT(d);
439 XOR_AND_LOAD_NEXT(e);
440 XOR_AND_LOAD_NEXT(f);
441 XOR_AND_LOAD_NEXT(g);
442 XOR_AND_LOAD_NEXT(h);
443 XOR_AND_LOAD_NEXT(i);
444 XOR_AND_LOAD_NEXT(j);
445 XOR_AND_STORE(dst);
446 }
447 while (len) {
448 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
449 len--;
450 }
451 }
452