rf_nwayxor.c revision 1.7.8.1 1 1.7.8.1 kent /* $NetBSD: rf_nwayxor.c,v 1.7.8.1 2005/04/29 11:29:15 kent Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /************************************************************
30 1.1 oster *
31 1.1 oster * nwayxor.c -- code to do N-way xors for reconstruction
32 1.1 oster *
33 1.1 oster * nWayXorN xors N input buffers into the destination buffer.
34 1.1 oster * adapted from danner's longword_bxor code.
35 1.1 oster *
36 1.1 oster ************************************************************/
37 1.5 lukem
38 1.5 lukem #include <sys/cdefs.h>
39 1.7.8.1 kent __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.7.8.1 2005/04/29 11:29:15 kent Exp $");
40 1.1 oster
41 1.1 oster #include "rf_nwayxor.h"
42 1.1 oster #include "rf_shutdown.h"
43 1.1 oster
44 1.1 oster static int callcount[10];
45 1.1 oster static void rf_ShutdownNWayXor(void *);
46 1.1 oster
47 1.7.8.1 kent static void
48 1.6 oster rf_ShutdownNWayXor(void *ignored)
49 1.3 oster {
50 1.3 oster int i;
51 1.3 oster
52 1.3 oster if (rf_showXorCallCounts == 0)
53 1.3 oster return;
54 1.3 oster printf("Call counts for n-way xor routines: ");
55 1.3 oster for (i = 0; i < 10; i++)
56 1.3 oster printf("%d ", callcount[i]);
57 1.3 oster printf("\n");
58 1.1 oster }
59 1.1 oster
60 1.7.8.1 kent int
61 1.6 oster rf_ConfigureNWayXor(RF_ShutdownList_t **listp)
62 1.3 oster {
63 1.7 oster int i;
64 1.3 oster
65 1.3 oster for (i = 0; i < 10; i++)
66 1.3 oster callcount[i] = 0;
67 1.7 oster rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
68 1.7 oster return (0);
69 1.1 oster }
70 1.1 oster
71 1.7.8.1 kent void
72 1.6 oster rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
73 1.3 oster {
74 1.4 augustss unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
75 1.4 augustss unsigned long *dest = (unsigned long *) dest_rb->buffer;
76 1.4 augustss unsigned long *end = src + len;
77 1.4 augustss unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
78 1.3 oster
79 1.3 oster callcount[1]++;
80 1.3 oster while (len >= 4) {
81 1.3 oster d0 = dest[0];
82 1.3 oster d1 = dest[1];
83 1.3 oster d2 = dest[2];
84 1.3 oster d3 = dest[3];
85 1.3 oster s0 = src[0];
86 1.3 oster s1 = src[1];
87 1.3 oster s2 = src[2];
88 1.3 oster s3 = src[3];
89 1.3 oster dest[0] = d0 ^ s0;
90 1.3 oster dest[1] = d1 ^ s1;
91 1.3 oster dest[2] = d2 ^ s2;
92 1.3 oster dest[3] = d3 ^ s3;
93 1.3 oster src += 4;
94 1.3 oster dest += 4;
95 1.3 oster len -= 4;
96 1.3 oster }
97 1.3 oster while (src < end) {
98 1.3 oster *dest++ ^= *src++;
99 1.3 oster }
100 1.1 oster }
101 1.1 oster
102 1.7.8.1 kent void
103 1.6 oster rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
104 1.3 oster {
105 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
106 1.4 augustss unsigned long *a = dst;
107 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
108 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
109 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
110 1.3 oster
111 1.3 oster callcount[2]++;
112 1.3 oster /* align dest to cache line */
113 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
114 1.3 oster *dst++ = *a++ ^ *b++ ^ *c++;
115 1.3 oster len--;
116 1.3 oster }
117 1.3 oster while (len > 4) {
118 1.3 oster a0 = a[0];
119 1.3 oster len -= 4;
120 1.3 oster
121 1.3 oster a1 = a[1];
122 1.3 oster a2 = a[2];
123 1.3 oster
124 1.3 oster a3 = a[3];
125 1.3 oster a += 4;
126 1.3 oster
127 1.3 oster b0 = b[0];
128 1.3 oster b1 = b[1];
129 1.3 oster
130 1.3 oster b2 = b[2];
131 1.3 oster b3 = b[3];
132 1.3 oster /* start dual issue */
133 1.3 oster a0 ^= b0;
134 1.3 oster b0 = c[0];
135 1.3 oster
136 1.3 oster b += 4;
137 1.3 oster a1 ^= b1;
138 1.3 oster
139 1.3 oster a2 ^= b2;
140 1.3 oster a3 ^= b3;
141 1.3 oster
142 1.3 oster b1 = c[1];
143 1.3 oster a0 ^= b0;
144 1.3 oster
145 1.3 oster b2 = c[2];
146 1.3 oster a1 ^= b1;
147 1.3 oster
148 1.3 oster b3 = c[3];
149 1.3 oster a2 ^= b2;
150 1.3 oster
151 1.3 oster dst[0] = a0;
152 1.3 oster a3 ^= b3;
153 1.3 oster dst[1] = a1;
154 1.3 oster c += 4;
155 1.3 oster dst[2] = a2;
156 1.3 oster dst[3] = a3;
157 1.3 oster dst += 4;
158 1.3 oster }
159 1.3 oster while (len) {
160 1.3 oster *dst++ = *a++ ^ *b++ ^ *c++;
161 1.3 oster len--;
162 1.3 oster }
163 1.1 oster }
164 1.1 oster /* note that first arg is not incremented but 2nd arg is */
165 1.1 oster #define LOAD_FIRST(_dst,_b) \
166 1.1 oster a0 = _dst[0]; len -= 4; \
167 1.1 oster a1 = _dst[1]; \
168 1.1 oster a2 = _dst[2]; \
169 1.1 oster a3 = _dst[3]; \
170 1.1 oster b0 = _b[0]; \
171 1.1 oster b1 = _b[1]; \
172 1.1 oster b2 = _b[2]; \
173 1.1 oster b3 = _b[3]; _b += 4;
174 1.1 oster
175 1.1 oster /* note: arg is incremented */
176 1.1 oster #define XOR_AND_LOAD_NEXT(_n) \
177 1.1 oster a0 ^= b0; b0 = _n[0]; \
178 1.1 oster a1 ^= b1; b1 = _n[1]; \
179 1.1 oster a2 ^= b2; b2 = _n[2]; \
180 1.1 oster a3 ^= b3; b3 = _n[3]; \
181 1.1 oster _n += 4;
182 1.1 oster
183 1.1 oster /* arg is incremented */
184 1.1 oster #define XOR_AND_STORE(_dst) \
185 1.1 oster a0 ^= b0; _dst[0] = a0; \
186 1.1 oster a1 ^= b1; _dst[1] = a1; \
187 1.1 oster a2 ^= b2; _dst[2] = a2; \
188 1.1 oster a3 ^= b3; _dst[3] = a3; \
189 1.1 oster _dst += 4;
190 1.1 oster
191 1.1 oster
192 1.7.8.1 kent void
193 1.6 oster rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
194 1.3 oster {
195 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
196 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
197 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
198 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
199 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
200 1.3 oster
201 1.3 oster callcount[3]++;
202 1.3 oster /* align dest to cache line */
203 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
204 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++;
205 1.3 oster len--;
206 1.3 oster }
207 1.3 oster while (len > 4) {
208 1.3 oster LOAD_FIRST(dst, b);
209 1.3 oster XOR_AND_LOAD_NEXT(c);
210 1.3 oster XOR_AND_LOAD_NEXT(d);
211 1.3 oster XOR_AND_STORE(dst);
212 1.3 oster }
213 1.3 oster while (len) {
214 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++;
215 1.3 oster len--;
216 1.3 oster }
217 1.1 oster }
218 1.1 oster
219 1.7.8.1 kent void
220 1.6 oster rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
221 1.3 oster {
222 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
223 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
224 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
225 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
226 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
227 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
228 1.3 oster
229 1.3 oster callcount[4]++;
230 1.3 oster /* align dest to cache line */
231 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
232 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
233 1.3 oster len--;
234 1.3 oster }
235 1.3 oster while (len > 4) {
236 1.3 oster LOAD_FIRST(dst, b);
237 1.3 oster XOR_AND_LOAD_NEXT(c);
238 1.3 oster XOR_AND_LOAD_NEXT(d);
239 1.3 oster XOR_AND_LOAD_NEXT(e);
240 1.3 oster XOR_AND_STORE(dst);
241 1.3 oster }
242 1.3 oster while (len) {
243 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
244 1.3 oster len--;
245 1.3 oster }
246 1.1 oster }
247 1.1 oster
248 1.7.8.1 kent void
249 1.6 oster rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
250 1.3 oster {
251 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
252 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
253 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
254 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
255 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
256 1.4 augustss unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
257 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
258 1.3 oster
259 1.3 oster callcount[5]++;
260 1.3 oster /* align dest to cache line */
261 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
262 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
263 1.3 oster len--;
264 1.3 oster }
265 1.3 oster while (len > 4) {
266 1.3 oster LOAD_FIRST(dst, b);
267 1.3 oster XOR_AND_LOAD_NEXT(c);
268 1.3 oster XOR_AND_LOAD_NEXT(d);
269 1.3 oster XOR_AND_LOAD_NEXT(e);
270 1.3 oster XOR_AND_LOAD_NEXT(f);
271 1.3 oster XOR_AND_STORE(dst);
272 1.3 oster }
273 1.3 oster while (len) {
274 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
275 1.3 oster len--;
276 1.3 oster }
277 1.1 oster }
278 1.1 oster
279 1.7.8.1 kent void
280 1.6 oster rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
281 1.3 oster {
282 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
283 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
284 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
285 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
286 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
287 1.4 augustss unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
288 1.4 augustss unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
289 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
290 1.3 oster
291 1.3 oster callcount[6]++;
292 1.3 oster /* align dest to cache line */
293 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
294 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
295 1.3 oster len--;
296 1.3 oster }
297 1.3 oster while (len > 4) {
298 1.3 oster LOAD_FIRST(dst, b);
299 1.3 oster XOR_AND_LOAD_NEXT(c);
300 1.3 oster XOR_AND_LOAD_NEXT(d);
301 1.3 oster XOR_AND_LOAD_NEXT(e);
302 1.3 oster XOR_AND_LOAD_NEXT(f);
303 1.3 oster XOR_AND_LOAD_NEXT(g);
304 1.3 oster XOR_AND_STORE(dst);
305 1.3 oster }
306 1.3 oster while (len) {
307 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
308 1.3 oster len--;
309 1.3 oster }
310 1.1 oster }
311 1.1 oster
312 1.7.8.1 kent void
313 1.6 oster rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
314 1.3 oster {
315 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
316 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
317 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
318 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
319 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
320 1.4 augustss unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
321 1.4 augustss unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
322 1.4 augustss unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
323 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
324 1.3 oster
325 1.3 oster callcount[7]++;
326 1.3 oster /* align dest to cache line */
327 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
328 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
329 1.3 oster len--;
330 1.3 oster }
331 1.3 oster while (len > 4) {
332 1.3 oster LOAD_FIRST(dst, b);
333 1.3 oster XOR_AND_LOAD_NEXT(c);
334 1.3 oster XOR_AND_LOAD_NEXT(d);
335 1.3 oster XOR_AND_LOAD_NEXT(e);
336 1.3 oster XOR_AND_LOAD_NEXT(f);
337 1.3 oster XOR_AND_LOAD_NEXT(g);
338 1.3 oster XOR_AND_LOAD_NEXT(h);
339 1.3 oster XOR_AND_STORE(dst);
340 1.3 oster }
341 1.3 oster while (len) {
342 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
343 1.3 oster len--;
344 1.3 oster }
345 1.1 oster }
346 1.1 oster
347 1.7.8.1 kent void
348 1.6 oster rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
349 1.3 oster {
350 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
351 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
352 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
353 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
354 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
355 1.4 augustss unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
356 1.4 augustss unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
357 1.4 augustss unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
358 1.4 augustss unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
359 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
360 1.3 oster
361 1.3 oster callcount[8]++;
362 1.3 oster /* align dest to cache line */
363 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
364 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
365 1.3 oster len--;
366 1.3 oster }
367 1.3 oster while (len > 4) {
368 1.3 oster LOAD_FIRST(dst, b);
369 1.3 oster XOR_AND_LOAD_NEXT(c);
370 1.3 oster XOR_AND_LOAD_NEXT(d);
371 1.3 oster XOR_AND_LOAD_NEXT(e);
372 1.3 oster XOR_AND_LOAD_NEXT(f);
373 1.3 oster XOR_AND_LOAD_NEXT(g);
374 1.3 oster XOR_AND_LOAD_NEXT(h);
375 1.3 oster XOR_AND_LOAD_NEXT(i);
376 1.3 oster XOR_AND_STORE(dst);
377 1.3 oster }
378 1.3 oster while (len) {
379 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
380 1.3 oster len--;
381 1.3 oster }
382 1.1 oster }
383 1.1 oster
384 1.1 oster
385 1.7.8.1 kent void
386 1.6 oster rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
387 1.3 oster {
388 1.4 augustss unsigned long *dst = (unsigned long *) dest_rb->buffer;
389 1.4 augustss unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
390 1.4 augustss unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
391 1.4 augustss unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
392 1.4 augustss unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
393 1.4 augustss unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
394 1.4 augustss unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
395 1.4 augustss unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
396 1.4 augustss unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
397 1.4 augustss unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
398 1.3 oster unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
399 1.3 oster
400 1.3 oster callcount[9]++;
401 1.3 oster /* align dest to cache line */
402 1.3 oster while ((((unsigned long) dst) & 0x1f)) {
403 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
404 1.3 oster len--;
405 1.3 oster }
406 1.3 oster while (len > 4) {
407 1.3 oster LOAD_FIRST(dst, b);
408 1.3 oster XOR_AND_LOAD_NEXT(c);
409 1.3 oster XOR_AND_LOAD_NEXT(d);
410 1.3 oster XOR_AND_LOAD_NEXT(e);
411 1.3 oster XOR_AND_LOAD_NEXT(f);
412 1.3 oster XOR_AND_LOAD_NEXT(g);
413 1.3 oster XOR_AND_LOAD_NEXT(h);
414 1.3 oster XOR_AND_LOAD_NEXT(i);
415 1.3 oster XOR_AND_LOAD_NEXT(j);
416 1.3 oster XOR_AND_STORE(dst);
417 1.3 oster }
418 1.3 oster while (len) {
419 1.3 oster *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
420 1.3 oster len--;
421 1.3 oster }
422 1.1 oster }
423