rf_nwayxor.c revision 1.2 1 1.2 oster /* $NetBSD: rf_nwayxor.c,v 1.2 1999/01/26 02:33:59 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /************************************************************
30 1.1 oster *
31 1.1 oster * nwayxor.c -- code to do N-way xors for reconstruction
32 1.1 oster *
33 1.1 oster * nWayXorN xors N input buffers into the destination buffer.
34 1.1 oster * adapted from danner's longword_bxor code.
35 1.1 oster *
36 1.1 oster ************************************************************/
37 1.1 oster
38 1.1 oster #include "rf_nwayxor.h"
39 1.1 oster #include "rf_shutdown.h"
40 1.1 oster
41 1.1 oster static int callcount[10];
42 1.1 oster static void rf_ShutdownNWayXor(void *);
43 1.1 oster
44 1.1 oster static void rf_ShutdownNWayXor(ignored)
45 1.1 oster void *ignored;
46 1.1 oster {
47 1.1 oster int i;
48 1.1 oster
49 1.1 oster if (rf_showXorCallCounts == 0)
50 1.1 oster return;
51 1.1 oster printf("Call counts for n-way xor routines: ");
52 1.1 oster for (i=0; i<10; i++)
53 1.1 oster printf("%d ",callcount[i]);
54 1.1 oster printf("\n");
55 1.1 oster }
56 1.1 oster
57 1.1 oster int rf_ConfigureNWayXor(listp)
58 1.1 oster RF_ShutdownList_t **listp;
59 1.1 oster {
60 1.1 oster int i, rc;
61 1.1 oster
62 1.1 oster for (i=0; i<10; i++)
63 1.1 oster callcount[i] = 0;
64 1.1 oster rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
65 1.1 oster return(rc);
66 1.1 oster }
67 1.1 oster
68 1.1 oster void rf_nWayXor1(src_rbs, dest_rb, len)
69 1.1 oster RF_ReconBuffer_t **src_rbs;
70 1.1 oster RF_ReconBuffer_t *dest_rb;
71 1.1 oster int len;
72 1.1 oster {
73 1.1 oster register unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
74 1.1 oster register unsigned long *dest= (unsigned long *) dest_rb->buffer;
75 1.1 oster register unsigned long *end = src+len;
76 1.1 oster register unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
77 1.1 oster
78 1.1 oster callcount[1]++;
79 1.1 oster while (len >= 4 )
80 1.1 oster {
81 1.1 oster d0 = dest[0];
82 1.1 oster d1 = dest[1];
83 1.1 oster d2 = dest[2];
84 1.1 oster d3 = dest[3];
85 1.1 oster s0 = src[0];
86 1.1 oster s1 = src[1];
87 1.1 oster s2 = src[2];
88 1.1 oster s3 = src[3];
89 1.1 oster dest[0] = d0 ^ s0;
90 1.1 oster dest[1] = d1 ^ s1;
91 1.1 oster dest[2] = d2 ^ s2;
92 1.1 oster dest[3] = d3 ^ s3;
93 1.1 oster src += 4;
94 1.1 oster dest += 4;
95 1.1 oster len -= 4;
96 1.1 oster }
97 1.1 oster while (src < end) {*dest++ ^= *src++;}
98 1.1 oster }
99 1.1 oster
100 1.1 oster void rf_nWayXor2(src_rbs, dest_rb, len)
101 1.1 oster RF_ReconBuffer_t **src_rbs;
102 1.1 oster RF_ReconBuffer_t *dest_rb;
103 1.1 oster int len;
104 1.1 oster {
105 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
106 1.1 oster register unsigned long *a = dst;
107 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
108 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
109 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
110 1.1 oster
111 1.1 oster callcount[2]++;
112 1.1 oster /* align dest to cache line */
113 1.1 oster while ((((unsigned long) dst) & 0x1f))
114 1.1 oster {
115 1.1 oster *dst++ = *a++ ^ *b++ ^ *c++;
116 1.1 oster len--;
117 1.1 oster }
118 1.1 oster while (len > 4 )
119 1.1 oster {
120 1.1 oster a0 = a[0]; len -= 4;
121 1.1 oster
122 1.1 oster a1 = a[1];
123 1.1 oster a2 = a[2];
124 1.1 oster
125 1.1 oster a3 = a[3]; a += 4;
126 1.1 oster
127 1.1 oster b0 = b[0];
128 1.1 oster b1 = b[1];
129 1.1 oster
130 1.1 oster b2 = b[2];
131 1.1 oster b3 = b[3];
132 1.1 oster /* start dual issue */
133 1.1 oster a0 ^= b0; b0 = c[0];
134 1.1 oster
135 1.1 oster b += 4; a1 ^= b1;
136 1.1 oster
137 1.1 oster a2 ^= b2; a3 ^= b3;
138 1.1 oster
139 1.1 oster b1 = c[1]; a0 ^= b0;
140 1.1 oster
141 1.1 oster b2 = c[2]; a1 ^= b1;
142 1.1 oster
143 1.1 oster b3 = c[3]; a2 ^= b2;
144 1.1 oster
145 1.1 oster dst[0] = a0; a3 ^= b3;
146 1.1 oster dst[1] = a1; c += 4;
147 1.1 oster dst[2] = a2;
148 1.1 oster dst[3] = a3; dst += 4;
149 1.1 oster }
150 1.1 oster while (len)
151 1.1 oster {
152 1.1 oster *dst++ = *a++ ^ *b++ ^ *c++;
153 1.1 oster len--;
154 1.1 oster }
155 1.1 oster }
156 1.1 oster
157 1.1 oster /* note that first arg is not incremented but 2nd arg is */
158 1.1 oster #define LOAD_FIRST(_dst,_b) \
159 1.1 oster a0 = _dst[0]; len -= 4; \
160 1.1 oster a1 = _dst[1]; \
161 1.1 oster a2 = _dst[2]; \
162 1.1 oster a3 = _dst[3]; \
163 1.1 oster b0 = _b[0]; \
164 1.1 oster b1 = _b[1]; \
165 1.1 oster b2 = _b[2]; \
166 1.1 oster b3 = _b[3]; _b += 4;
167 1.1 oster
168 1.1 oster /* note: arg is incremented */
169 1.1 oster #define XOR_AND_LOAD_NEXT(_n) \
170 1.1 oster a0 ^= b0; b0 = _n[0]; \
171 1.1 oster a1 ^= b1; b1 = _n[1]; \
172 1.1 oster a2 ^= b2; b2 = _n[2]; \
173 1.1 oster a3 ^= b3; b3 = _n[3]; \
174 1.1 oster _n += 4;
175 1.1 oster
176 1.1 oster /* arg is incremented */
177 1.1 oster #define XOR_AND_STORE(_dst) \
178 1.1 oster a0 ^= b0; _dst[0] = a0; \
179 1.1 oster a1 ^= b1; _dst[1] = a1; \
180 1.1 oster a2 ^= b2; _dst[2] = a2; \
181 1.1 oster a3 ^= b3; _dst[3] = a3; \
182 1.1 oster _dst += 4;
183 1.1 oster
184 1.1 oster
185 1.1 oster void rf_nWayXor3(src_rbs, dest_rb, len)
186 1.1 oster RF_ReconBuffer_t **src_rbs;
187 1.1 oster RF_ReconBuffer_t *dest_rb;
188 1.1 oster int len;
189 1.1 oster {
190 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
191 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
192 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
193 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
194 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
195 1.1 oster
196 1.1 oster callcount[3]++;
197 1.1 oster /* align dest to cache line */
198 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
199 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++;
200 1.1 oster len--;
201 1.1 oster }
202 1.1 oster while (len > 4 ) {
203 1.1 oster LOAD_FIRST(dst,b);
204 1.1 oster XOR_AND_LOAD_NEXT(c);
205 1.1 oster XOR_AND_LOAD_NEXT(d);
206 1.1 oster XOR_AND_STORE(dst);
207 1.1 oster }
208 1.1 oster while (len) {
209 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++;
210 1.1 oster len--;
211 1.1 oster }
212 1.1 oster }
213 1.1 oster
214 1.1 oster void rf_nWayXor4(src_rbs, dest_rb, len)
215 1.1 oster RF_ReconBuffer_t **src_rbs;
216 1.1 oster RF_ReconBuffer_t *dest_rb;
217 1.1 oster int len;
218 1.1 oster {
219 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
220 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
221 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
222 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
223 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
224 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
225 1.1 oster
226 1.1 oster callcount[4]++;
227 1.1 oster /* align dest to cache line */
228 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
229 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
230 1.1 oster len--;
231 1.1 oster }
232 1.1 oster while (len > 4 ) {
233 1.1 oster LOAD_FIRST(dst,b);
234 1.1 oster XOR_AND_LOAD_NEXT(c);
235 1.1 oster XOR_AND_LOAD_NEXT(d);
236 1.1 oster XOR_AND_LOAD_NEXT(e);
237 1.1 oster XOR_AND_STORE(dst);
238 1.1 oster }
239 1.1 oster while (len) {
240 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
241 1.1 oster len--;
242 1.1 oster }
243 1.1 oster }
244 1.1 oster
245 1.1 oster void rf_nWayXor5(src_rbs, dest_rb, len)
246 1.1 oster RF_ReconBuffer_t **src_rbs;
247 1.1 oster RF_ReconBuffer_t *dest_rb;
248 1.1 oster int len;
249 1.1 oster {
250 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
251 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
252 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
253 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
254 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
255 1.1 oster register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
256 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
257 1.1 oster
258 1.1 oster callcount[5]++;
259 1.1 oster /* align dest to cache line */
260 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
261 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
262 1.1 oster len--;
263 1.1 oster }
264 1.1 oster while (len > 4 ) {
265 1.1 oster LOAD_FIRST(dst,b);
266 1.1 oster XOR_AND_LOAD_NEXT(c);
267 1.1 oster XOR_AND_LOAD_NEXT(d);
268 1.1 oster XOR_AND_LOAD_NEXT(e);
269 1.1 oster XOR_AND_LOAD_NEXT(f);
270 1.1 oster XOR_AND_STORE(dst);
271 1.1 oster }
272 1.1 oster while (len) {
273 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
274 1.1 oster len--;
275 1.1 oster }
276 1.1 oster }
277 1.1 oster
278 1.1 oster void rf_nWayXor6(src_rbs, dest_rb, len)
279 1.1 oster RF_ReconBuffer_t **src_rbs;
280 1.1 oster RF_ReconBuffer_t *dest_rb;
281 1.1 oster int len;
282 1.1 oster {
283 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
284 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
285 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
286 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
287 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
288 1.1 oster register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
289 1.1 oster register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
290 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
291 1.1 oster
292 1.1 oster callcount[6]++;
293 1.1 oster /* align dest to cache line */
294 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
295 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
296 1.1 oster len--;
297 1.1 oster }
298 1.1 oster while (len > 4 ) {
299 1.1 oster LOAD_FIRST(dst,b);
300 1.1 oster XOR_AND_LOAD_NEXT(c);
301 1.1 oster XOR_AND_LOAD_NEXT(d);
302 1.1 oster XOR_AND_LOAD_NEXT(e);
303 1.1 oster XOR_AND_LOAD_NEXT(f);
304 1.1 oster XOR_AND_LOAD_NEXT(g);
305 1.1 oster XOR_AND_STORE(dst);
306 1.1 oster }
307 1.1 oster while (len) {
308 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
309 1.1 oster len--;
310 1.1 oster }
311 1.1 oster }
312 1.1 oster
313 1.1 oster void rf_nWayXor7(src_rbs, dest_rb, len)
314 1.1 oster RF_ReconBuffer_t **src_rbs;
315 1.1 oster RF_ReconBuffer_t *dest_rb;
316 1.1 oster int len;
317 1.1 oster {
318 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
319 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
320 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
321 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
322 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
323 1.1 oster register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
324 1.1 oster register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
325 1.1 oster register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
326 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
327 1.1 oster
328 1.1 oster callcount[7]++;
329 1.1 oster /* align dest to cache line */
330 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
331 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
332 1.1 oster len--;
333 1.1 oster }
334 1.1 oster while (len > 4 ) {
335 1.1 oster LOAD_FIRST(dst,b);
336 1.1 oster XOR_AND_LOAD_NEXT(c);
337 1.1 oster XOR_AND_LOAD_NEXT(d);
338 1.1 oster XOR_AND_LOAD_NEXT(e);
339 1.1 oster XOR_AND_LOAD_NEXT(f);
340 1.1 oster XOR_AND_LOAD_NEXT(g);
341 1.1 oster XOR_AND_LOAD_NEXT(h);
342 1.1 oster XOR_AND_STORE(dst);
343 1.1 oster }
344 1.1 oster while (len) {
345 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
346 1.1 oster len--;
347 1.1 oster }
348 1.1 oster }
349 1.1 oster
350 1.1 oster void rf_nWayXor8(src_rbs, dest_rb, len)
351 1.1 oster RF_ReconBuffer_t **src_rbs;
352 1.1 oster RF_ReconBuffer_t *dest_rb;
353 1.1 oster int len;
354 1.1 oster {
355 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
356 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
357 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
358 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
359 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
360 1.1 oster register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
361 1.1 oster register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
362 1.1 oster register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
363 1.1 oster register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
364 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
365 1.1 oster
366 1.1 oster callcount[8]++;
367 1.1 oster /* align dest to cache line */
368 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
369 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
370 1.1 oster len--;
371 1.1 oster }
372 1.1 oster while (len > 4 ) {
373 1.1 oster LOAD_FIRST(dst,b);
374 1.1 oster XOR_AND_LOAD_NEXT(c);
375 1.1 oster XOR_AND_LOAD_NEXT(d);
376 1.1 oster XOR_AND_LOAD_NEXT(e);
377 1.1 oster XOR_AND_LOAD_NEXT(f);
378 1.1 oster XOR_AND_LOAD_NEXT(g);
379 1.1 oster XOR_AND_LOAD_NEXT(h);
380 1.1 oster XOR_AND_LOAD_NEXT(i);
381 1.1 oster XOR_AND_STORE(dst);
382 1.1 oster }
383 1.1 oster while (len) {
384 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
385 1.1 oster len--;
386 1.1 oster }
387 1.1 oster }
388 1.1 oster
389 1.1 oster
390 1.1 oster void rf_nWayXor9(src_rbs, dest_rb, len)
391 1.1 oster RF_ReconBuffer_t **src_rbs;
392 1.1 oster RF_ReconBuffer_t *dest_rb;
393 1.1 oster int len;
394 1.1 oster {
395 1.1 oster register unsigned long *dst = (unsigned long *) dest_rb->buffer;
396 1.1 oster register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
397 1.1 oster register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
398 1.1 oster register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
399 1.1 oster register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
400 1.1 oster register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
401 1.1 oster register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
402 1.1 oster register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
403 1.1 oster register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
404 1.1 oster register unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
405 1.1 oster unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
406 1.1 oster
407 1.1 oster callcount[9]++;
408 1.1 oster /* align dest to cache line */
409 1.1 oster while ((((unsigned long) dst) & 0x1f)) {
410 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
411 1.1 oster len--;
412 1.1 oster }
413 1.1 oster while (len > 4 ) {
414 1.1 oster LOAD_FIRST(dst,b);
415 1.1 oster XOR_AND_LOAD_NEXT(c);
416 1.1 oster XOR_AND_LOAD_NEXT(d);
417 1.1 oster XOR_AND_LOAD_NEXT(e);
418 1.1 oster XOR_AND_LOAD_NEXT(f);
419 1.1 oster XOR_AND_LOAD_NEXT(g);
420 1.1 oster XOR_AND_LOAD_NEXT(h);
421 1.1 oster XOR_AND_LOAD_NEXT(i);
422 1.1 oster XOR_AND_LOAD_NEXT(j);
423 1.1 oster XOR_AND_STORE(dst);
424 1.1 oster }
425 1.1 oster while (len) {
426 1.1 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
427 1.1 oster len--;
428 1.1 oster }
429 1.1 oster }
430