rf_netbsdkintf.c revision 1.47 1 /* $NetBSD: rf_netbsdkintf.c,v 1.47 2000/01/21 23:39:59 thorpej Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 static int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265
266 void rf_ReconThread __P((struct rf_recon_req *));
267 /* XXX what I want is: */
268 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
269 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
270 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
271 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
272
273 void
274 raidattach(num)
275 int num;
276 {
277 int raidID;
278 int i, rc;
279
280 #ifdef DEBUG
281 printf("raidattach: Asked for %d units\n", num);
282 #endif
283
284 if (num <= 0) {
285 #ifdef DIAGNOSTIC
286 panic("raidattach: count <= 0");
287 #endif
288 return;
289 }
290 /* This is where all the initialization stuff gets done. */
291
292 /* Make some space for requested number of units... */
293
294 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
295 if (raidPtrs == NULL) {
296 panic("raidPtrs is NULL!!\n");
297 }
298
299 rc = rf_mutex_init(&rf_sparet_wait_mutex);
300 if (rc) {
301 RF_PANIC();
302 }
303
304 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
305
306 for (i = 0; i < numraid; i++)
307 raidPtrs[i] = NULL;
308 rc = rf_BootRaidframe();
309 if (rc == 0)
310 printf("Kernelized RAIDframe activated\n");
311 else
312 panic("Serious error booting RAID!!\n");
313
314 /* put together some datastructures like the CCD device does.. This
315 * lets us lock the device and what-not when it gets opened. */
316
317 raid_softc = (struct raid_softc *)
318 malloc(num * sizeof(struct raid_softc),
319 M_RAIDFRAME, M_NOWAIT);
320 if (raid_softc == NULL) {
321 printf("WARNING: no memory for RAIDframe driver\n");
322 return;
323 }
324 numraid = num;
325 bzero(raid_softc, num * sizeof(struct raid_softc));
326
327 for (raidID = 0; raidID < num; raidID++) {
328 BUFQ_INIT(&raid_softc[raidID].buf_queue);
329 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
330 (RF_Raid_t *));
331 if (raidPtrs[raidID] == NULL) {
332 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
333 numraid = raidID;
334 return;
335 }
336 }
337 }
338
339
340 int
341 raidsize(dev)
342 dev_t dev;
343 {
344 struct raid_softc *rs;
345 struct disklabel *lp;
346 int part, unit, omask, size;
347
348 unit = raidunit(dev);
349 if (unit >= numraid)
350 return (-1);
351 rs = &raid_softc[unit];
352
353 if ((rs->sc_flags & RAIDF_INITED) == 0)
354 return (-1);
355
356 part = DISKPART(dev);
357 omask = rs->sc_dkdev.dk_openmask & (1 << part);
358 lp = rs->sc_dkdev.dk_label;
359
360 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
361 return (-1);
362
363 if (lp->d_partitions[part].p_fstype != FS_SWAP)
364 size = -1;
365 else
366 size = lp->d_partitions[part].p_size *
367 (lp->d_secsize / DEV_BSIZE);
368
369 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
370 return (-1);
371
372 return (size);
373
374 }
375
376 int
377 raiddump(dev, blkno, va, size)
378 dev_t dev;
379 daddr_t blkno;
380 caddr_t va;
381 size_t size;
382 {
383 /* Not implemented. */
384 return ENXIO;
385 }
386 /* ARGSUSED */
387 int
388 raidopen(dev, flags, fmt, p)
389 dev_t dev;
390 int flags, fmt;
391 struct proc *p;
392 {
393 int unit = raidunit(dev);
394 struct raid_softc *rs;
395 struct disklabel *lp;
396 int part, pmask;
397 int error = 0;
398
399 if (unit >= numraid)
400 return (ENXIO);
401 rs = &raid_softc[unit];
402
403 if ((error = raidlock(rs)) != 0)
404 return (error);
405 lp = rs->sc_dkdev.dk_label;
406
407 part = DISKPART(dev);
408 pmask = (1 << part);
409
410 db1_printf(("Opening raid device number: %d partition: %d\n",
411 unit, part));
412
413
414 if ((rs->sc_flags & RAIDF_INITED) &&
415 (rs->sc_dkdev.dk_openmask == 0))
416 raidgetdisklabel(dev);
417
418 /* make sure that this partition exists */
419
420 if (part != RAW_PART) {
421 db1_printf(("Not a raw partition..\n"));
422 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
423 ((part >= lp->d_npartitions) ||
424 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
425 error = ENXIO;
426 raidunlock(rs);
427 db1_printf(("Bailing out...\n"));
428 return (error);
429 }
430 }
431 /* Prevent this unit from being unconfigured while open. */
432 switch (fmt) {
433 case S_IFCHR:
434 rs->sc_dkdev.dk_copenmask |= pmask;
435 break;
436
437 case S_IFBLK:
438 rs->sc_dkdev.dk_bopenmask |= pmask;
439 break;
440 }
441
442 if ((rs->sc_dkdev.dk_openmask == 0) &&
443 ((rs->sc_flags & RAIDF_INITED) != 0)) {
444 /* First one... mark things as dirty... Note that we *MUST*
445 have done a configure before this. I DO NOT WANT TO BE
446 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
447 THAT THEY BELONG TOGETHER!!!!! */
448 /* XXX should check to see if we're only open for reading
449 here... If so, we needn't do this, but then need some
450 other way of keeping track of what's happened.. */
451
452 rf_markalldirty( raidPtrs[unit] );
453 }
454
455
456 rs->sc_dkdev.dk_openmask =
457 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
458
459 raidunlock(rs);
460
461 return (error);
462
463
464 }
465 /* ARGSUSED */
466 int
467 raidclose(dev, flags, fmt, p)
468 dev_t dev;
469 int flags, fmt;
470 struct proc *p;
471 {
472 int unit = raidunit(dev);
473 struct raid_softc *rs;
474 int error = 0;
475 int part;
476
477 if (unit >= numraid)
478 return (ENXIO);
479 rs = &raid_softc[unit];
480
481 if ((error = raidlock(rs)) != 0)
482 return (error);
483
484 part = DISKPART(dev);
485
486 /* ...that much closer to allowing unconfiguration... */
487 switch (fmt) {
488 case S_IFCHR:
489 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
490 break;
491
492 case S_IFBLK:
493 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
494 break;
495 }
496 rs->sc_dkdev.dk_openmask =
497 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
498
499 if ((rs->sc_dkdev.dk_openmask == 0) &&
500 ((rs->sc_flags & RAIDF_INITED) != 0)) {
501 /* Last one... device is not unconfigured yet.
502 Device shutdown has taken care of setting the
503 clean bits if RAIDF_INITED is not set
504 mark things as clean... */
505 rf_update_component_labels( raidPtrs[unit] );
506 }
507
508 raidunlock(rs);
509 return (0);
510
511 }
512
513 void
514 raidstrategy(bp)
515 register struct buf *bp;
516 {
517 register int s;
518
519 unsigned int raidID = raidunit(bp->b_dev);
520 RF_Raid_t *raidPtr;
521 struct raid_softc *rs = &raid_softc[raidID];
522 struct disklabel *lp;
523 int wlabel;
524
525 if ((rs->sc_flags & RAIDF_INITED) ==0) {
526 bp->b_error = ENXIO;
527 bp->b_flags = B_ERROR;
528 bp->b_resid = bp->b_bcount;
529 biodone(bp);
530 return;
531 }
532 if (raidID >= numraid || !raidPtrs[raidID]) {
533 bp->b_error = ENODEV;
534 bp->b_flags |= B_ERROR;
535 bp->b_resid = bp->b_bcount;
536 biodone(bp);
537 return;
538 }
539 raidPtr = raidPtrs[raidID];
540 if (!raidPtr->valid) {
541 bp->b_error = ENODEV;
542 bp->b_flags |= B_ERROR;
543 bp->b_resid = bp->b_bcount;
544 biodone(bp);
545 return;
546 }
547 if (bp->b_bcount == 0) {
548 db1_printf(("b_bcount is zero..\n"));
549 biodone(bp);
550 return;
551 }
552 lp = rs->sc_dkdev.dk_label;
553
554 /*
555 * Do bounds checking and adjust transfer. If there's an
556 * error, the bounds check will flag that for us.
557 */
558
559 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
560 if (DISKPART(bp->b_dev) != RAW_PART)
561 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
562 db1_printf(("Bounds check failed!!:%d %d\n",
563 (int) bp->b_blkno, (int) wlabel));
564 biodone(bp);
565 return;
566 }
567 s = splbio();
568
569 bp->b_resid = 0;
570
571 /* stuff it onto our queue */
572 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
573
574 raidstart(raidPtrs[raidID]);
575
576 splx(s);
577 }
578 /* ARGSUSED */
579 int
580 raidread(dev, uio, flags)
581 dev_t dev;
582 struct uio *uio;
583 int flags;
584 {
585 int unit = raidunit(dev);
586 struct raid_softc *rs;
587 int part;
588
589 if (unit >= numraid)
590 return (ENXIO);
591 rs = &raid_softc[unit];
592
593 if ((rs->sc_flags & RAIDF_INITED) == 0)
594 return (ENXIO);
595 part = DISKPART(dev);
596
597 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
598
599 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
600
601 }
602 /* ARGSUSED */
603 int
604 raidwrite(dev, uio, flags)
605 dev_t dev;
606 struct uio *uio;
607 int flags;
608 {
609 int unit = raidunit(dev);
610 struct raid_softc *rs;
611
612 if (unit >= numraid)
613 return (ENXIO);
614 rs = &raid_softc[unit];
615
616 if ((rs->sc_flags & RAIDF_INITED) == 0)
617 return (ENXIO);
618 db1_printf(("raidwrite\n"));
619 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
620
621 }
622
623 int
624 raidioctl(dev, cmd, data, flag, p)
625 dev_t dev;
626 u_long cmd;
627 caddr_t data;
628 int flag;
629 struct proc *p;
630 {
631 int unit = raidunit(dev);
632 int error = 0;
633 int part, pmask;
634 struct raid_softc *rs;
635 RF_Config_t *k_cfg, *u_cfg;
636 RF_Raid_t *raidPtr;
637 RF_AccTotals_t *totals;
638 RF_DeviceConfig_t *d_cfg, **ucfgp;
639 u_char *specific_buf;
640 int retcode = 0;
641 int row;
642 int column;
643 struct rf_recon_req *rrcopy, *rr;
644 RF_ComponentLabel_t *component_label;
645 RF_ComponentLabel_t ci_label;
646 RF_ComponentLabel_t **c_label_ptr;
647 RF_SingleComponent_t *sparePtr,*componentPtr;
648 RF_SingleComponent_t hot_spare;
649 RF_SingleComponent_t component;
650 int i, j, d;
651
652 if (unit >= numraid)
653 return (ENXIO);
654 rs = &raid_softc[unit];
655 raidPtr = raidPtrs[unit];
656
657 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
658 (int) DISKPART(dev), (int) unit, (int) cmd));
659
660 /* Must be open for writes for these commands... */
661 switch (cmd) {
662 case DIOCSDINFO:
663 case DIOCWDINFO:
664 case DIOCWLABEL:
665 if ((flag & FWRITE) == 0)
666 return (EBADF);
667 }
668
669 /* Must be initialized for these... */
670 switch (cmd) {
671 case DIOCGDINFO:
672 case DIOCSDINFO:
673 case DIOCWDINFO:
674 case DIOCGPART:
675 case DIOCWLABEL:
676 case DIOCGDEFLABEL:
677 case RAIDFRAME_SHUTDOWN:
678 case RAIDFRAME_REWRITEPARITY:
679 case RAIDFRAME_GET_INFO:
680 case RAIDFRAME_RESET_ACCTOTALS:
681 case RAIDFRAME_GET_ACCTOTALS:
682 case RAIDFRAME_KEEP_ACCTOTALS:
683 case RAIDFRAME_GET_SIZE:
684 case RAIDFRAME_FAIL_DISK:
685 case RAIDFRAME_COPYBACK:
686 case RAIDFRAME_CHECK_RECON_STATUS:
687 case RAIDFRAME_GET_COMPONENT_LABEL:
688 case RAIDFRAME_SET_COMPONENT_LABEL:
689 case RAIDFRAME_ADD_HOT_SPARE:
690 case RAIDFRAME_REMOVE_HOT_SPARE:
691 case RAIDFRAME_INIT_LABELS:
692 case RAIDFRAME_REBUILD_IN_PLACE:
693 case RAIDFRAME_CHECK_PARITY:
694 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
695 case RAIDFRAME_CHECK_COPYBACK_STATUS:
696 if ((rs->sc_flags & RAIDF_INITED) == 0)
697 return (ENXIO);
698 }
699
700 switch (cmd) {
701
702 /* configure the system */
703 case RAIDFRAME_CONFIGURE:
704 /* copy-in the configuration information */
705 /* data points to a pointer to the configuration structure */
706
707 u_cfg = *((RF_Config_t **) data);
708 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
709 if (k_cfg == NULL) {
710 return (ENOMEM);
711 }
712 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
713 sizeof(RF_Config_t));
714 if (retcode) {
715 RF_Free(k_cfg, sizeof(RF_Config_t));
716 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
717 retcode));
718 return (retcode);
719 }
720 /* allocate a buffer for the layout-specific data, and copy it
721 * in */
722 if (k_cfg->layoutSpecificSize) {
723 if (k_cfg->layoutSpecificSize > 10000) {
724 /* sanity check */
725 RF_Free(k_cfg, sizeof(RF_Config_t));
726 return (EINVAL);
727 }
728 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
729 (u_char *));
730 if (specific_buf == NULL) {
731 RF_Free(k_cfg, sizeof(RF_Config_t));
732 return (ENOMEM);
733 }
734 retcode = copyin(k_cfg->layoutSpecific,
735 (caddr_t) specific_buf,
736 k_cfg->layoutSpecificSize);
737 if (retcode) {
738 RF_Free(k_cfg, sizeof(RF_Config_t));
739 RF_Free(specific_buf,
740 k_cfg->layoutSpecificSize);
741 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
742 retcode));
743 return (retcode);
744 }
745 } else
746 specific_buf = NULL;
747 k_cfg->layoutSpecific = specific_buf;
748
749 /* should do some kind of sanity check on the configuration.
750 * Store the sum of all the bytes in the last byte? */
751
752 /* configure the system */
753
754 raidPtr->raidid = unit;
755
756 retcode = rf_Configure(raidPtr, k_cfg);
757
758 if (retcode == 0) {
759
760 /* allow this many simultaneous IO's to
761 this RAID device */
762 raidPtr->openings = RAIDOUTSTANDING;
763
764 /* XXX should be moved to rf_Configure() */
765
766 raidPtr->copyback_in_progress = 0;
767 raidPtr->parity_rewrite_in_progress = 0;
768 raidPtr->recon_in_progress = 0;
769
770 retcode = raidinit(dev, raidPtr, unit);
771 rf_markalldirty( raidPtr );
772 }
773 /* free the buffers. No return code here. */
774 if (k_cfg->layoutSpecificSize) {
775 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
776 }
777 RF_Free(k_cfg, sizeof(RF_Config_t));
778
779 return (retcode);
780
781 /* shutdown the system */
782 case RAIDFRAME_SHUTDOWN:
783
784 if ((error = raidlock(rs)) != 0)
785 return (error);
786
787 /*
788 * If somebody has a partition mounted, we shouldn't
789 * shutdown.
790 */
791
792 part = DISKPART(dev);
793 pmask = (1 << part);
794 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
795 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
796 (rs->sc_dkdev.dk_copenmask & pmask))) {
797 raidunlock(rs);
798 return (EBUSY);
799 }
800
801 retcode = rf_Shutdown(raidPtr);
802
803 pool_destroy(&rs->sc_cbufpool);
804
805 /* It's no longer initialized... */
806 rs->sc_flags &= ~RAIDF_INITED;
807
808 /* Detach the disk. */
809 disk_detach(&rs->sc_dkdev);
810
811 raidunlock(rs);
812
813 return (retcode);
814 case RAIDFRAME_GET_COMPONENT_LABEL:
815 c_label_ptr = (RF_ComponentLabel_t **) data;
816 /* need to read the component label for the disk indicated
817 by row,column in component_label */
818
819 /* For practice, let's get it directly fromdisk, rather
820 than from the in-core copy */
821 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
822 (RF_ComponentLabel_t *));
823 if (component_label == NULL)
824 return (ENOMEM);
825
826 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
827
828 retcode = copyin( *c_label_ptr, component_label,
829 sizeof(RF_ComponentLabel_t));
830
831 if (retcode) {
832 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
833 return(retcode);
834 }
835
836 row = component_label->row;
837 column = component_label->column;
838
839 if ((row < 0) || (row >= raidPtr->numRow) ||
840 (column < 0) || (column >= raidPtr->numCol)) {
841 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
842 return(EINVAL);
843 }
844
845 raidread_component_label(
846 raidPtr->Disks[row][column].dev,
847 raidPtr->raid_cinfo[row][column].ci_vp,
848 component_label );
849
850 retcode = copyout((caddr_t) component_label,
851 (caddr_t) *c_label_ptr,
852 sizeof(RF_ComponentLabel_t));
853 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
854 return (retcode);
855
856 case RAIDFRAME_SET_COMPONENT_LABEL:
857 component_label = (RF_ComponentLabel_t *) data;
858
859 /* XXX check the label for valid stuff... */
860 /* Note that some things *should not* get modified --
861 the user should be re-initing the labels instead of
862 trying to patch things.
863 */
864
865 printf("Got component label:\n");
866 printf("Version: %d\n",component_label->version);
867 printf("Serial Number: %d\n",component_label->serial_number);
868 printf("Mod counter: %d\n",component_label->mod_counter);
869 printf("Row: %d\n", component_label->row);
870 printf("Column: %d\n", component_label->column);
871 printf("Num Rows: %d\n", component_label->num_rows);
872 printf("Num Columns: %d\n", component_label->num_columns);
873 printf("Clean: %d\n", component_label->clean);
874 printf("Status: %d\n", component_label->status);
875
876 row = component_label->row;
877 column = component_label->column;
878
879 if ((row < 0) || (row >= raidPtr->numRow) ||
880 (column < 0) || (column >= raidPtr->numCol)) {
881 return(EINVAL);
882 }
883
884 /* XXX this isn't allowed to do anything for now :-) */
885 #if 0
886 raidwrite_component_label(
887 raidPtr->Disks[row][column].dev,
888 raidPtr->raid_cinfo[row][column].ci_vp,
889 component_label );
890 #endif
891 return (0);
892
893 case RAIDFRAME_INIT_LABELS:
894 component_label = (RF_ComponentLabel_t *) data;
895 /*
896 we only want the serial number from
897 the above. We get all the rest of the information
898 from the config that was used to create this RAID
899 set.
900 */
901
902 raidPtr->serial_number = component_label->serial_number;
903 /* current version number */
904 ci_label.version = RF_COMPONENT_LABEL_VERSION;
905 ci_label.serial_number = component_label->serial_number;
906 ci_label.mod_counter = raidPtr->mod_counter;
907 ci_label.num_rows = raidPtr->numRow;
908 ci_label.num_columns = raidPtr->numCol;
909 ci_label.clean = RF_RAID_DIRTY; /* not clean */
910 ci_label.status = rf_ds_optimal; /* "It's good!" */
911
912 for(row=0;row<raidPtr->numRow;row++) {
913 ci_label.row = row;
914 for(column=0;column<raidPtr->numCol;column++) {
915 ci_label.column = column;
916 raidwrite_component_label(
917 raidPtr->Disks[row][column].dev,
918 raidPtr->raid_cinfo[row][column].ci_vp,
919 &ci_label );
920 }
921 }
922
923 return (retcode);
924
925 /* initialize all parity */
926 case RAIDFRAME_REWRITEPARITY:
927
928 if (raidPtr->Layout.map->faultsTolerated == 0) {
929 /* Parity for RAID 0 is trivially correct */
930 raidPtr->parity_good = RF_RAID_CLEAN;
931 return(0);
932 }
933
934 if (raidPtr->parity_rewrite_in_progress == 1) {
935 /* Re-write is already in progress! */
936 return(EINVAL);
937 }
938
939 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
940 rf_RewriteParityThread,
941 raidPtr,"raid_parity");
942 return (retcode);
943
944
945 case RAIDFRAME_ADD_HOT_SPARE:
946 sparePtr = (RF_SingleComponent_t *) data;
947 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
948 printf("Adding spare\n");
949 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
950 return(retcode);
951
952 case RAIDFRAME_REMOVE_HOT_SPARE:
953 return(retcode);
954
955 case RAIDFRAME_REBUILD_IN_PLACE:
956
957 if (raidPtr->Layout.map->faultsTolerated == 0) {
958 /* Can't do this on a RAID 0!! */
959 return(EINVAL);
960 }
961
962 if (raidPtr->recon_in_progress == 1) {
963 /* a reconstruct is already in progress! */
964 return(EINVAL);
965 }
966
967 componentPtr = (RF_SingleComponent_t *) data;
968 memcpy( &component, componentPtr,
969 sizeof(RF_SingleComponent_t));
970 row = component.row;
971 column = component.column;
972 printf("Rebuild: %d %d\n",row, column);
973 if ((row < 0) || (row >= raidPtr->numRow) ||
974 (column < 0) || (column >= raidPtr->numCol)) {
975 return(EINVAL);
976 }
977
978 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
979 if (rrcopy == NULL)
980 return(ENOMEM);
981
982 rrcopy->raidPtr = (void *) raidPtr;
983 rrcopy->row = row;
984 rrcopy->col = column;
985
986 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
987 rf_ReconstructInPlaceThread,
988 rrcopy,"raid_reconip");
989 return(retcode);
990
991 case RAIDFRAME_GET_INFO:
992 if (!raidPtr->valid)
993 return (ENODEV);
994 ucfgp = (RF_DeviceConfig_t **) data;
995 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
996 (RF_DeviceConfig_t *));
997 if (d_cfg == NULL)
998 return (ENOMEM);
999 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1000 d_cfg->rows = raidPtr->numRow;
1001 d_cfg->cols = raidPtr->numCol;
1002 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1003 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1004 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1005 return (ENOMEM);
1006 }
1007 d_cfg->nspares = raidPtr->numSpare;
1008 if (d_cfg->nspares >= RF_MAX_DISKS) {
1009 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1010 return (ENOMEM);
1011 }
1012 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1013 d = 0;
1014 for (i = 0; i < d_cfg->rows; i++) {
1015 for (j = 0; j < d_cfg->cols; j++) {
1016 d_cfg->devs[d] = raidPtr->Disks[i][j];
1017 d++;
1018 }
1019 }
1020 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1021 d_cfg->spares[i] = raidPtr->Disks[0][j];
1022 }
1023 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1024 sizeof(RF_DeviceConfig_t));
1025 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1026
1027 return (retcode);
1028
1029 case RAIDFRAME_CHECK_PARITY:
1030 *(int *) data = raidPtr->parity_good;
1031 return (0);
1032
1033 case RAIDFRAME_RESET_ACCTOTALS:
1034 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1035 return (0);
1036
1037 case RAIDFRAME_GET_ACCTOTALS:
1038 totals = (RF_AccTotals_t *) data;
1039 *totals = raidPtr->acc_totals;
1040 return (0);
1041
1042 case RAIDFRAME_KEEP_ACCTOTALS:
1043 raidPtr->keep_acc_totals = *(int *)data;
1044 return (0);
1045
1046 case RAIDFRAME_GET_SIZE:
1047 *(int *) data = raidPtr->totalSectors;
1048 return (0);
1049
1050 /* fail a disk & optionally start reconstruction */
1051 case RAIDFRAME_FAIL_DISK:
1052
1053 if (raidPtr->Layout.map->faultsTolerated == 0) {
1054 /* Can't do this on a RAID 0!! */
1055 return(EINVAL);
1056 }
1057
1058 rr = (struct rf_recon_req *) data;
1059
1060 if (rr->row < 0 || rr->row >= raidPtr->numRow
1061 || rr->col < 0 || rr->col >= raidPtr->numCol)
1062 return (EINVAL);
1063
1064 printf("raid%d: Failing the disk: row: %d col: %d\n",
1065 unit, rr->row, rr->col);
1066
1067 /* make a copy of the recon request so that we don't rely on
1068 * the user's buffer */
1069 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1070 if (rrcopy == NULL)
1071 return(ENOMEM);
1072 bcopy(rr, rrcopy, sizeof(*rr));
1073 rrcopy->raidPtr = (void *) raidPtr;
1074
1075 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1076 rf_ReconThread,
1077 rrcopy,"raid_recon");
1078 return (0);
1079
1080 /* invoke a copyback operation after recon on whatever disk
1081 * needs it, if any */
1082 case RAIDFRAME_COPYBACK:
1083
1084 if (raidPtr->Layout.map->faultsTolerated == 0) {
1085 /* This makes no sense on a RAID 0!! */
1086 return(EINVAL);
1087 }
1088
1089 if (raidPtr->copyback_in_progress == 1) {
1090 /* Copyback is already in progress! */
1091 return(EINVAL);
1092 }
1093
1094 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1095 rf_CopybackThread,
1096 raidPtr,"raid_copyback");
1097 return (retcode);
1098
1099 /* return the percentage completion of reconstruction */
1100 case RAIDFRAME_CHECK_RECON_STATUS:
1101 if (raidPtr->Layout.map->faultsTolerated == 0) {
1102 /* This makes no sense on a RAID 0 */
1103 return(EINVAL);
1104 }
1105 row = 0; /* XXX we only consider a single row... */
1106 if (raidPtr->status[row] != rf_rs_reconstructing)
1107 *(int *) data = 100;
1108 else
1109 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1110 return (0);
1111
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* This makes no sense on a RAID 0 */
1115 return(EINVAL);
1116 }
1117 if (raidPtr->parity_rewrite_in_progress == 1) {
1118 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1119 } else {
1120 *(int *) data = 100;
1121 }
1122 return (0);
1123
1124 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1125 if (raidPtr->Layout.map->faultsTolerated == 0) {
1126 /* This makes no sense on a RAID 0 */
1127 return(EINVAL);
1128 }
1129 if (raidPtr->copyback_in_progress == 1) {
1130 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1131 raidPtr->Layout.numStripe;
1132 } else {
1133 *(int *) data = 100;
1134 }
1135 return (0);
1136
1137
1138 /* the sparetable daemon calls this to wait for the kernel to
1139 * need a spare table. this ioctl does not return until a
1140 * spare table is needed. XXX -- calling mpsleep here in the
1141 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1142 * -- I should either compute the spare table in the kernel,
1143 * or have a different -- XXX XXX -- interface (a different
1144 * character device) for delivering the table -- XXX */
1145 #if 0
1146 case RAIDFRAME_SPARET_WAIT:
1147 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1148 while (!rf_sparet_wait_queue)
1149 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1150 waitreq = rf_sparet_wait_queue;
1151 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1152 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1153
1154 /* structure assignment */
1155 *((RF_SparetWait_t *) data) = *waitreq;
1156
1157 RF_Free(waitreq, sizeof(*waitreq));
1158 return (0);
1159
1160 /* wakes up a process waiting on SPARET_WAIT and puts an error
1161 * code in it that will cause the dameon to exit */
1162 case RAIDFRAME_ABORT_SPARET_WAIT:
1163 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1164 waitreq->fcol = -1;
1165 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1166 waitreq->next = rf_sparet_wait_queue;
1167 rf_sparet_wait_queue = waitreq;
1168 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1169 wakeup(&rf_sparet_wait_queue);
1170 return (0);
1171
1172 /* used by the spare table daemon to deliver a spare table
1173 * into the kernel */
1174 case RAIDFRAME_SEND_SPARET:
1175
1176 /* install the spare table */
1177 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1178
1179 /* respond to the requestor. the return status of the spare
1180 * table installation is passed in the "fcol" field */
1181 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1182 waitreq->fcol = retcode;
1183 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1184 waitreq->next = rf_sparet_resp_queue;
1185 rf_sparet_resp_queue = waitreq;
1186 wakeup(&rf_sparet_resp_queue);
1187 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1188
1189 return (retcode);
1190 #endif
1191
1192 default:
1193 break; /* fall through to the os-specific code below */
1194
1195 }
1196
1197 if (!raidPtr->valid)
1198 return (EINVAL);
1199
1200 /*
1201 * Add support for "regular" device ioctls here.
1202 */
1203
1204 switch (cmd) {
1205 case DIOCGDINFO:
1206 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1207 break;
1208
1209 case DIOCGPART:
1210 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1211 ((struct partinfo *) data)->part =
1212 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1213 break;
1214
1215 case DIOCWDINFO:
1216 case DIOCSDINFO:
1217 if ((error = raidlock(rs)) != 0)
1218 return (error);
1219
1220 rs->sc_flags |= RAIDF_LABELLING;
1221
1222 error = setdisklabel(rs->sc_dkdev.dk_label,
1223 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1224 if (error == 0) {
1225 if (cmd == DIOCWDINFO)
1226 error = writedisklabel(RAIDLABELDEV(dev),
1227 raidstrategy, rs->sc_dkdev.dk_label,
1228 rs->sc_dkdev.dk_cpulabel);
1229 }
1230 rs->sc_flags &= ~RAIDF_LABELLING;
1231
1232 raidunlock(rs);
1233
1234 if (error)
1235 return (error);
1236 break;
1237
1238 case DIOCWLABEL:
1239 if (*(int *) data != 0)
1240 rs->sc_flags |= RAIDF_WLABEL;
1241 else
1242 rs->sc_flags &= ~RAIDF_WLABEL;
1243 break;
1244
1245 case DIOCGDEFLABEL:
1246 raidgetdefaultlabel(raidPtr, rs,
1247 (struct disklabel *) data);
1248 break;
1249
1250 default:
1251 retcode = ENOTTY;
1252 }
1253 return (retcode);
1254
1255 }
1256
1257
1258 /* raidinit -- complete the rest of the initialization for the
1259 RAIDframe device. */
1260
1261
1262 static int
1263 raidinit(dev, raidPtr, unit)
1264 dev_t dev;
1265 RF_Raid_t *raidPtr;
1266 int unit;
1267 {
1268 int retcode;
1269 struct raid_softc *rs;
1270
1271 retcode = 0;
1272
1273 rs = &raid_softc[unit];
1274 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1275 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1276
1277
1278 /* XXX should check return code first... */
1279 rs->sc_flags |= RAIDF_INITED;
1280
1281 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1282
1283 rs->sc_dkdev.dk_name = rs->sc_xname;
1284
1285 /* disk_attach actually creates space for the CPU disklabel, among
1286 * other things, so it's critical to call this *BEFORE* we try putzing
1287 * with disklabels. */
1288
1289 disk_attach(&rs->sc_dkdev);
1290
1291 /* XXX There may be a weird interaction here between this, and
1292 * protectedSectors, as used in RAIDframe. */
1293
1294 rs->sc_size = raidPtr->totalSectors;
1295 rs->sc_dev = dev;
1296
1297 return (retcode);
1298 }
1299
1300 /* wake up the daemon & tell it to get us a spare table
1301 * XXX
1302 * the entries in the queues should be tagged with the raidPtr
1303 * so that in the extremely rare case that two recons happen at once,
1304 * we know for which device were requesting a spare table
1305 * XXX
1306 *
1307 * XXX This code is not currently used. GO
1308 */
1309 int
1310 rf_GetSpareTableFromDaemon(req)
1311 RF_SparetWait_t *req;
1312 {
1313 int retcode;
1314
1315 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1316 req->next = rf_sparet_wait_queue;
1317 rf_sparet_wait_queue = req;
1318 wakeup(&rf_sparet_wait_queue);
1319
1320 /* mpsleep unlocks the mutex */
1321 while (!rf_sparet_resp_queue) {
1322 tsleep(&rf_sparet_resp_queue, PRIBIO,
1323 "raidframe getsparetable", 0);
1324 }
1325 req = rf_sparet_resp_queue;
1326 rf_sparet_resp_queue = req->next;
1327 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1328
1329 retcode = req->fcol;
1330 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1331 * alloc'd */
1332 return (retcode);
1333 }
1334
1335 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1336 * bp & passes it down.
1337 * any calls originating in the kernel must use non-blocking I/O
1338 * do some extra sanity checking to return "appropriate" error values for
1339 * certain conditions (to make some standard utilities work)
1340 *
1341 * Formerly known as: rf_DoAccessKernel
1342 */
1343 void
1344 raidstart(raidPtr)
1345 RF_Raid_t *raidPtr;
1346 {
1347 RF_SectorCount_t num_blocks, pb, sum;
1348 RF_RaidAddr_t raid_addr;
1349 int retcode;
1350 struct partition *pp;
1351 daddr_t blocknum;
1352 int unit;
1353 struct raid_softc *rs;
1354 int do_async;
1355 struct buf *bp;
1356
1357 unit = raidPtr->raidid;
1358 rs = &raid_softc[unit];
1359
1360 /* Check to see if we're at the limit... */
1361 RF_LOCK_MUTEX(raidPtr->mutex);
1362 while (raidPtr->openings > 0) {
1363 RF_UNLOCK_MUTEX(raidPtr->mutex);
1364
1365 /* get the next item, if any, from the queue */
1366 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1367 /* nothing more to do */
1368 return;
1369 }
1370 BUFQ_REMOVE(&rs->buf_queue, bp);
1371
1372 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1373 * partition.. Need to make it absolute to the underlying
1374 * device.. */
1375
1376 blocknum = bp->b_blkno;
1377 if (DISKPART(bp->b_dev) != RAW_PART) {
1378 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1379 blocknum += pp->p_offset;
1380 }
1381
1382 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1383 (int) blocknum));
1384
1385 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1386 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1387
1388 /* *THIS* is where we adjust what block we're going to...
1389 * but DO NOT TOUCH bp->b_blkno!!! */
1390 raid_addr = blocknum;
1391
1392 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1393 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1394 sum = raid_addr + num_blocks + pb;
1395 if (1 || rf_debugKernelAccess) {
1396 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1397 (int) raid_addr, (int) sum, (int) num_blocks,
1398 (int) pb, (int) bp->b_resid));
1399 }
1400 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1401 || (sum < num_blocks) || (sum < pb)) {
1402 bp->b_error = ENOSPC;
1403 bp->b_flags |= B_ERROR;
1404 bp->b_resid = bp->b_bcount;
1405 biodone(bp);
1406 RF_LOCK_MUTEX(raidPtr->mutex);
1407 continue;
1408 }
1409 /*
1410 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1411 */
1412
1413 if (bp->b_bcount & raidPtr->sectorMask) {
1414 bp->b_error = EINVAL;
1415 bp->b_flags |= B_ERROR;
1416 bp->b_resid = bp->b_bcount;
1417 biodone(bp);
1418 RF_LOCK_MUTEX(raidPtr->mutex);
1419 continue;
1420
1421 }
1422 db1_printf(("Calling DoAccess..\n"));
1423
1424
1425 RF_LOCK_MUTEX(raidPtr->mutex);
1426 raidPtr->openings--;
1427 RF_UNLOCK_MUTEX(raidPtr->mutex);
1428
1429 /*
1430 * Everything is async.
1431 */
1432 do_async = 1;
1433
1434 /* don't ever condition on bp->b_flags & B_WRITE.
1435 * always condition on B_READ instead */
1436
1437 /* XXX we're still at splbio() here... do we *really*
1438 need to be? */
1439
1440
1441 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1442 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1443 do_async, raid_addr, num_blocks,
1444 bp->b_un.b_addr, bp, NULL, NULL,
1445 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1446
1447
1448 RF_LOCK_MUTEX(raidPtr->mutex);
1449 }
1450 RF_UNLOCK_MUTEX(raidPtr->mutex);
1451 }
1452
1453
1454
1455
1456 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1457
1458 int
1459 rf_DispatchKernelIO(queue, req)
1460 RF_DiskQueue_t *queue;
1461 RF_DiskQueueData_t *req;
1462 {
1463 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1464 struct buf *bp;
1465 struct raidbuf *raidbp = NULL;
1466 struct raid_softc *rs;
1467 int unit;
1468 int s;
1469
1470 s=0;
1471 /* s = splbio();*/ /* want to test this */
1472 /* XXX along with the vnode, we also need the softc associated with
1473 * this device.. */
1474
1475 req->queue = queue;
1476
1477 unit = queue->raidPtr->raidid;
1478
1479 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1480
1481 if (unit >= numraid) {
1482 printf("Invalid unit number: %d %d\n", unit, numraid);
1483 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1484 }
1485 rs = &raid_softc[unit];
1486
1487 /* XXX is this the right place? */
1488 disk_busy(&rs->sc_dkdev);
1489
1490 bp = req->bp;
1491 #if 1
1492 /* XXX when there is a physical disk failure, someone is passing us a
1493 * buffer that contains old stuff!! Attempt to deal with this problem
1494 * without taking a performance hit... (not sure where the real bug
1495 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1496
1497 if (bp->b_flags & B_ERROR) {
1498 bp->b_flags &= ~B_ERROR;
1499 }
1500 if (bp->b_error != 0) {
1501 bp->b_error = 0;
1502 }
1503 #endif
1504 raidbp = RAIDGETBUF(rs);
1505
1506 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1507
1508 /*
1509 * context for raidiodone
1510 */
1511 raidbp->rf_obp = bp;
1512 raidbp->req = req;
1513
1514 LIST_INIT(&raidbp->rf_buf.b_dep);
1515
1516 switch (req->type) {
1517 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1518 /* XXX need to do something extra here.. */
1519 /* I'm leaving this in, as I've never actually seen it used,
1520 * and I'd like folks to report it... GO */
1521 printf(("WAKEUP CALLED\n"));
1522 queue->numOutstanding++;
1523
1524 /* XXX need to glue the original buffer into this?? */
1525
1526 KernelWakeupFunc(&raidbp->rf_buf);
1527 break;
1528
1529 case RF_IO_TYPE_READ:
1530 case RF_IO_TYPE_WRITE:
1531
1532 if (req->tracerec) {
1533 RF_ETIMER_START(req->tracerec->timer);
1534 }
1535 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1536 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1537 req->sectorOffset, req->numSector,
1538 req->buf, KernelWakeupFunc, (void *) req,
1539 queue->raidPtr->logBytesPerSector, req->b_proc);
1540
1541 if (rf_debugKernelAccess) {
1542 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1543 (long) bp->b_blkno));
1544 }
1545 queue->numOutstanding++;
1546 queue->last_deq_sector = req->sectorOffset;
1547 /* acc wouldn't have been let in if there were any pending
1548 * reqs at any other priority */
1549 queue->curPriority = req->priority;
1550
1551 db1_printf(("Going for %c to unit %d row %d col %d\n",
1552 req->type, unit, queue->row, queue->col));
1553 db1_printf(("sector %d count %d (%d bytes) %d\n",
1554 (int) req->sectorOffset, (int) req->numSector,
1555 (int) (req->numSector <<
1556 queue->raidPtr->logBytesPerSector),
1557 (int) queue->raidPtr->logBytesPerSector));
1558 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1559 raidbp->rf_buf.b_vp->v_numoutput++;
1560 }
1561 VOP_STRATEGY(&raidbp->rf_buf);
1562
1563 break;
1564
1565 default:
1566 panic("bad req->type in rf_DispatchKernelIO");
1567 }
1568 db1_printf(("Exiting from DispatchKernelIO\n"));
1569 /* splx(s); */ /* want to test this */
1570 return (0);
1571 }
1572 /* this is the callback function associated with a I/O invoked from
1573 kernel code.
1574 */
1575 static void
1576 KernelWakeupFunc(vbp)
1577 struct buf *vbp;
1578 {
1579 RF_DiskQueueData_t *req = NULL;
1580 RF_DiskQueue_t *queue;
1581 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1582 struct buf *bp;
1583 struct raid_softc *rs;
1584 int unit;
1585 register int s;
1586
1587 s = splbio();
1588 db1_printf(("recovering the request queue:\n"));
1589 req = raidbp->req;
1590
1591 bp = raidbp->rf_obp;
1592
1593 queue = (RF_DiskQueue_t *) req->queue;
1594
1595 if (raidbp->rf_buf.b_flags & B_ERROR) {
1596 bp->b_flags |= B_ERROR;
1597 bp->b_error = raidbp->rf_buf.b_error ?
1598 raidbp->rf_buf.b_error : EIO;
1599 }
1600
1601 /* XXX methinks this could be wrong... */
1602 #if 1
1603 bp->b_resid = raidbp->rf_buf.b_resid;
1604 #endif
1605
1606 if (req->tracerec) {
1607 RF_ETIMER_STOP(req->tracerec->timer);
1608 RF_ETIMER_EVAL(req->tracerec->timer);
1609 RF_LOCK_MUTEX(rf_tracing_mutex);
1610 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1611 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1612 req->tracerec->num_phys_ios++;
1613 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1614 }
1615 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1616
1617 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1618
1619
1620 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1621 * ballistic, and mark the component as hosed... */
1622
1623 if (bp->b_flags & B_ERROR) {
1624 /* Mark the disk as dead */
1625 /* but only mark it once... */
1626 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1627 rf_ds_optimal) {
1628 printf("raid%d: IO Error. Marking %s as failed.\n",
1629 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1630 queue->raidPtr->Disks[queue->row][queue->col].status =
1631 rf_ds_failed;
1632 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1633 queue->raidPtr->numFailures++;
1634 /* XXX here we should bump the version number for each component, and write that data out */
1635 } else { /* Disk is already dead... */
1636 /* printf("Disk already marked as dead!\n"); */
1637 }
1638
1639 }
1640
1641 rs = &raid_softc[unit];
1642 RAIDPUTBUF(rs, raidbp);
1643
1644
1645 if (bp->b_resid == 0) {
1646 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1647 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1648 }
1649
1650 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1651 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1652
1653 splx(s);
1654 }
1655
1656
1657
1658 /*
1659 * initialize a buf structure for doing an I/O in the kernel.
1660 */
1661 static void
1662 InitBP(
1663 struct buf * bp,
1664 struct vnode * b_vp,
1665 unsigned rw_flag,
1666 dev_t dev,
1667 RF_SectorNum_t startSect,
1668 RF_SectorCount_t numSect,
1669 caddr_t buf,
1670 void (*cbFunc) (struct buf *),
1671 void *cbArg,
1672 int logBytesPerSector,
1673 struct proc * b_proc)
1674 {
1675 /* bp->b_flags = B_PHYS | rw_flag; */
1676 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1677 bp->b_bcount = numSect << logBytesPerSector;
1678 bp->b_bufsize = bp->b_bcount;
1679 bp->b_error = 0;
1680 bp->b_dev = dev;
1681 bp->b_un.b_addr = buf;
1682 bp->b_blkno = startSect;
1683 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1684 if (bp->b_bcount == 0) {
1685 panic("bp->b_bcount is zero in InitBP!!\n");
1686 }
1687 bp->b_proc = b_proc;
1688 bp->b_iodone = cbFunc;
1689 bp->b_vp = b_vp;
1690
1691 }
1692
1693 static void
1694 raidgetdefaultlabel(raidPtr, rs, lp)
1695 RF_Raid_t *raidPtr;
1696 struct raid_softc *rs;
1697 struct disklabel *lp;
1698 {
1699 db1_printf(("Building a default label...\n"));
1700 bzero(lp, sizeof(*lp));
1701
1702 /* fabricate a label... */
1703 lp->d_secperunit = raidPtr->totalSectors;
1704 lp->d_secsize = raidPtr->bytesPerSector;
1705 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1706 lp->d_ntracks = 1;
1707 lp->d_ncylinders = raidPtr->totalSectors /
1708 (lp->d_nsectors * lp->d_ntracks);
1709 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1710
1711 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1712 lp->d_type = DTYPE_RAID;
1713 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1714 lp->d_rpm = 3600;
1715 lp->d_interleave = 1;
1716 lp->d_flags = 0;
1717
1718 lp->d_partitions[RAW_PART].p_offset = 0;
1719 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1720 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1721 lp->d_npartitions = RAW_PART + 1;
1722
1723 lp->d_magic = DISKMAGIC;
1724 lp->d_magic2 = DISKMAGIC;
1725 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1726
1727 }
1728 /*
1729 * Read the disklabel from the raid device. If one is not present, fake one
1730 * up.
1731 */
1732 static void
1733 raidgetdisklabel(dev)
1734 dev_t dev;
1735 {
1736 int unit = raidunit(dev);
1737 struct raid_softc *rs = &raid_softc[unit];
1738 char *errstring;
1739 struct disklabel *lp = rs->sc_dkdev.dk_label;
1740 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1741 RF_Raid_t *raidPtr;
1742
1743 db1_printf(("Getting the disklabel...\n"));
1744
1745 bzero(clp, sizeof(*clp));
1746
1747 raidPtr = raidPtrs[unit];
1748
1749 raidgetdefaultlabel(raidPtr, rs, lp);
1750
1751 /*
1752 * Call the generic disklabel extraction routine.
1753 */
1754 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1755 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1756 if (errstring)
1757 raidmakedisklabel(rs);
1758 else {
1759 int i;
1760 struct partition *pp;
1761
1762 /*
1763 * Sanity check whether the found disklabel is valid.
1764 *
1765 * This is necessary since total size of the raid device
1766 * may vary when an interleave is changed even though exactly
1767 * same componets are used, and old disklabel may used
1768 * if that is found.
1769 */
1770 if (lp->d_secperunit != rs->sc_size)
1771 printf("WARNING: %s: "
1772 "total sector size in disklabel (%d) != "
1773 "the size of raid (%ld)\n", rs->sc_xname,
1774 lp->d_secperunit, (long) rs->sc_size);
1775 for (i = 0; i < lp->d_npartitions; i++) {
1776 pp = &lp->d_partitions[i];
1777 if (pp->p_offset + pp->p_size > rs->sc_size)
1778 printf("WARNING: %s: end of partition `%c' "
1779 "exceeds the size of raid (%ld)\n",
1780 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1781 }
1782 }
1783
1784 }
1785 /*
1786 * Take care of things one might want to take care of in the event
1787 * that a disklabel isn't present.
1788 */
1789 static void
1790 raidmakedisklabel(rs)
1791 struct raid_softc *rs;
1792 {
1793 struct disklabel *lp = rs->sc_dkdev.dk_label;
1794 db1_printf(("Making a label..\n"));
1795
1796 /*
1797 * For historical reasons, if there's no disklabel present
1798 * the raw partition must be marked FS_BSDFFS.
1799 */
1800
1801 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1802
1803 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1804
1805 lp->d_checksum = dkcksum(lp);
1806 }
1807 /*
1808 * Lookup the provided name in the filesystem. If the file exists,
1809 * is a valid block device, and isn't being used by anyone else,
1810 * set *vpp to the file's vnode.
1811 * You'll find the original of this in ccd.c
1812 */
1813 int
1814 raidlookup(path, p, vpp)
1815 char *path;
1816 struct proc *p;
1817 struct vnode **vpp; /* result */
1818 {
1819 struct nameidata nd;
1820 struct vnode *vp;
1821 struct vattr va;
1822 int error;
1823
1824 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1825 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1826 #ifdef DEBUG
1827 printf("RAIDframe: vn_open returned %d\n", error);
1828 #endif
1829 return (error);
1830 }
1831 vp = nd.ni_vp;
1832 if (vp->v_usecount > 1) {
1833 VOP_UNLOCK(vp, 0);
1834 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1835 return (EBUSY);
1836 }
1837 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1838 VOP_UNLOCK(vp, 0);
1839 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1840 return (error);
1841 }
1842 /* XXX: eventually we should handle VREG, too. */
1843 if (va.va_type != VBLK) {
1844 VOP_UNLOCK(vp, 0);
1845 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1846 return (ENOTBLK);
1847 }
1848 VOP_UNLOCK(vp, 0);
1849 *vpp = vp;
1850 return (0);
1851 }
1852 /*
1853 * Wait interruptibly for an exclusive lock.
1854 *
1855 * XXX
1856 * Several drivers do this; it should be abstracted and made MP-safe.
1857 * (Hmm... where have we seen this warning before :-> GO )
1858 */
1859 static int
1860 raidlock(rs)
1861 struct raid_softc *rs;
1862 {
1863 int error;
1864
1865 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1866 rs->sc_flags |= RAIDF_WANTED;
1867 if ((error =
1868 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1869 return (error);
1870 }
1871 rs->sc_flags |= RAIDF_LOCKED;
1872 return (0);
1873 }
1874 /*
1875 * Unlock and wake up any waiters.
1876 */
1877 static void
1878 raidunlock(rs)
1879 struct raid_softc *rs;
1880 {
1881
1882 rs->sc_flags &= ~RAIDF_LOCKED;
1883 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1884 rs->sc_flags &= ~RAIDF_WANTED;
1885 wakeup(rs);
1886 }
1887 }
1888
1889
1890 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1891 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1892
1893 int
1894 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1895 {
1896 RF_ComponentLabel_t component_label;
1897 raidread_component_label(dev, b_vp, &component_label);
1898 component_label.mod_counter = mod_counter;
1899 component_label.clean = RF_RAID_CLEAN;
1900 raidwrite_component_label(dev, b_vp, &component_label);
1901 return(0);
1902 }
1903
1904
1905 int
1906 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1907 {
1908 RF_ComponentLabel_t component_label;
1909 raidread_component_label(dev, b_vp, &component_label);
1910 component_label.mod_counter = mod_counter;
1911 component_label.clean = RF_RAID_DIRTY;
1912 raidwrite_component_label(dev, b_vp, &component_label);
1913 return(0);
1914 }
1915
1916 /* ARGSUSED */
1917 int
1918 raidread_component_label(dev, b_vp, component_label)
1919 dev_t dev;
1920 struct vnode *b_vp;
1921 RF_ComponentLabel_t *component_label;
1922 {
1923 struct buf *bp;
1924 int error;
1925
1926 /* XXX should probably ensure that we don't try to do this if
1927 someone has changed rf_protected_sectors. */
1928
1929 /* get a block of the appropriate size... */
1930 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1931 bp->b_dev = dev;
1932
1933 /* get our ducks in a row for the read */
1934 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1935 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1936 bp->b_flags = B_BUSY | B_READ;
1937 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1938
1939 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1940
1941 error = biowait(bp);
1942
1943 if (!error) {
1944 memcpy(component_label, bp->b_un.b_addr,
1945 sizeof(RF_ComponentLabel_t));
1946 #if 0
1947 printf("raidread_component_label: got component label:\n");
1948 printf("Version: %d\n",component_label->version);
1949 printf("Serial Number: %d\n",component_label->serial_number);
1950 printf("Mod counter: %d\n",component_label->mod_counter);
1951 printf("Row: %d\n", component_label->row);
1952 printf("Column: %d\n", component_label->column);
1953 printf("Num Rows: %d\n", component_label->num_rows);
1954 printf("Num Columns: %d\n", component_label->num_columns);
1955 printf("Clean: %d\n", component_label->clean);
1956 printf("Status: %d\n", component_label->status);
1957 #endif
1958 } else {
1959 printf("Failed to read RAID component label!\n");
1960 }
1961
1962 bp->b_flags = B_INVAL | B_AGE;
1963 brelse(bp);
1964 return(error);
1965 }
1966 /* ARGSUSED */
1967 int
1968 raidwrite_component_label(dev, b_vp, component_label)
1969 dev_t dev;
1970 struct vnode *b_vp;
1971 RF_ComponentLabel_t *component_label;
1972 {
1973 struct buf *bp;
1974 int error;
1975
1976 /* get a block of the appropriate size... */
1977 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1978 bp->b_dev = dev;
1979
1980 /* get our ducks in a row for the write */
1981 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1982 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1983 bp->b_flags = B_BUSY | B_WRITE;
1984 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1985
1986 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
1987
1988 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
1989
1990 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1991 error = biowait(bp);
1992 bp->b_flags = B_INVAL | B_AGE;
1993 brelse(bp);
1994 if (error) {
1995 printf("Failed to write RAID component info!\n");
1996 }
1997
1998 return(error);
1999 }
2000
2001 void
2002 rf_markalldirty( raidPtr )
2003 RF_Raid_t *raidPtr;
2004 {
2005 RF_ComponentLabel_t c_label;
2006 int r,c;
2007
2008 raidPtr->mod_counter++;
2009 for (r = 0; r < raidPtr->numRow; r++) {
2010 for (c = 0; c < raidPtr->numCol; c++) {
2011 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2012 raidread_component_label(
2013 raidPtr->Disks[r][c].dev,
2014 raidPtr->raid_cinfo[r][c].ci_vp,
2015 &c_label);
2016 if (c_label.status == rf_ds_spared) {
2017 /* XXX do something special...
2018 but whatever you do, don't
2019 try to access it!! */
2020 } else {
2021 #if 0
2022 c_label.status =
2023 raidPtr->Disks[r][c].status;
2024 raidwrite_component_label(
2025 raidPtr->Disks[r][c].dev,
2026 raidPtr->raid_cinfo[r][c].ci_vp,
2027 &c_label);
2028 #endif
2029 raidmarkdirty(
2030 raidPtr->Disks[r][c].dev,
2031 raidPtr->raid_cinfo[r][c].ci_vp,
2032 raidPtr->mod_counter);
2033 }
2034 }
2035 }
2036 }
2037 /* printf("Component labels marked dirty.\n"); */
2038 #if 0
2039 for( c = 0; c < raidPtr->numSpare ; c++) {
2040 sparecol = raidPtr->numCol + c;
2041 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2042 /*
2043
2044 XXX this is where we get fancy and map this spare
2045 into it's correct spot in the array.
2046
2047 */
2048 /*
2049
2050 we claim this disk is "optimal" if it's
2051 rf_ds_used_spare, as that means it should be
2052 directly substitutable for the disk it replaced.
2053 We note that too...
2054
2055 */
2056
2057 for(i=0;i<raidPtr->numRow;i++) {
2058 for(j=0;j<raidPtr->numCol;j++) {
2059 if ((raidPtr->Disks[i][j].spareRow ==
2060 r) &&
2061 (raidPtr->Disks[i][j].spareCol ==
2062 sparecol)) {
2063 srow = r;
2064 scol = sparecol;
2065 break;
2066 }
2067 }
2068 }
2069
2070 raidread_component_label(
2071 raidPtr->Disks[r][sparecol].dev,
2072 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2073 &c_label);
2074 /* make sure status is noted */
2075 c_label.version = RF_COMPONENT_LABEL_VERSION;
2076 c_label.mod_counter = raidPtr->mod_counter;
2077 c_label.serial_number = raidPtr->serial_number;
2078 c_label.row = srow;
2079 c_label.column = scol;
2080 c_label.num_rows = raidPtr->numRow;
2081 c_label.num_columns = raidPtr->numCol;
2082 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2083 c_label.status = rf_ds_optimal;
2084 raidwrite_component_label(
2085 raidPtr->Disks[r][sparecol].dev,
2086 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2087 &c_label);
2088 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2089 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2090 }
2091 }
2092
2093 #endif
2094 }
2095
2096
2097 void
2098 rf_update_component_labels( raidPtr )
2099 RF_Raid_t *raidPtr;
2100 {
2101 RF_ComponentLabel_t c_label;
2102 int sparecol;
2103 int r,c;
2104 int i,j;
2105 int srow, scol;
2106
2107 srow = -1;
2108 scol = -1;
2109
2110 /* XXX should do extra checks to make sure things really are clean,
2111 rather than blindly setting the clean bit... */
2112
2113 raidPtr->mod_counter++;
2114
2115 for (r = 0; r < raidPtr->numRow; r++) {
2116 for (c = 0; c < raidPtr->numCol; c++) {
2117 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2118 raidread_component_label(
2119 raidPtr->Disks[r][c].dev,
2120 raidPtr->raid_cinfo[r][c].ci_vp,
2121 &c_label);
2122 /* make sure status is noted */
2123 c_label.status = rf_ds_optimal;
2124 raidwrite_component_label(
2125 raidPtr->Disks[r][c].dev,
2126 raidPtr->raid_cinfo[r][c].ci_vp,
2127 &c_label);
2128 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2129 raidmarkclean(
2130 raidPtr->Disks[r][c].dev,
2131 raidPtr->raid_cinfo[r][c].ci_vp,
2132 raidPtr->mod_counter);
2133 }
2134 }
2135 /* else we don't touch it.. */
2136 #if 0
2137 else if (raidPtr->Disks[r][c].status !=
2138 rf_ds_failed) {
2139 raidread_component_label(
2140 raidPtr->Disks[r][c].dev,
2141 raidPtr->raid_cinfo[r][c].ci_vp,
2142 &c_label);
2143 /* make sure status is noted */
2144 c_label.status =
2145 raidPtr->Disks[r][c].status;
2146 raidwrite_component_label(
2147 raidPtr->Disks[r][c].dev,
2148 raidPtr->raid_cinfo[r][c].ci_vp,
2149 &c_label);
2150 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2151 raidmarkclean(
2152 raidPtr->Disks[r][c].dev,
2153 raidPtr->raid_cinfo[r][c].ci_vp,
2154 raidPtr->mod_counter);
2155 }
2156 }
2157 #endif
2158 }
2159 }
2160
2161 for( c = 0; c < raidPtr->numSpare ; c++) {
2162 sparecol = raidPtr->numCol + c;
2163 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2164 /*
2165
2166 we claim this disk is "optimal" if it's
2167 rf_ds_used_spare, as that means it should be
2168 directly substitutable for the disk it replaced.
2169 We note that too...
2170
2171 */
2172
2173 for(i=0;i<raidPtr->numRow;i++) {
2174 for(j=0;j<raidPtr->numCol;j++) {
2175 if ((raidPtr->Disks[i][j].spareRow ==
2176 0) &&
2177 (raidPtr->Disks[i][j].spareCol ==
2178 sparecol)) {
2179 srow = i;
2180 scol = j;
2181 break;
2182 }
2183 }
2184 }
2185
2186 raidread_component_label(
2187 raidPtr->Disks[0][sparecol].dev,
2188 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2189 &c_label);
2190 /* make sure status is noted */
2191 c_label.version = RF_COMPONENT_LABEL_VERSION;
2192 c_label.mod_counter = raidPtr->mod_counter;
2193 c_label.serial_number = raidPtr->serial_number;
2194 c_label.row = srow;
2195 c_label.column = scol;
2196 c_label.num_rows = raidPtr->numRow;
2197 c_label.num_columns = raidPtr->numCol;
2198 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2199 c_label.status = rf_ds_optimal;
2200 raidwrite_component_label(
2201 raidPtr->Disks[0][sparecol].dev,
2202 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2203 &c_label);
2204 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2205 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2206 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2207 raidPtr->mod_counter);
2208 }
2209 }
2210 }
2211 /* printf("Component labels updated\n"); */
2212 }
2213
2214 void
2215 rf_ReconThread(req)
2216 struct rf_recon_req *req;
2217 {
2218 int s;
2219 RF_Raid_t *raidPtr;
2220
2221 s = splbio();
2222 raidPtr = (RF_Raid_t *) req->raidPtr;
2223 raidPtr->recon_in_progress = 1;
2224
2225 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2226 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2227
2228 /* XXX get rid of this! we don't need it at all.. */
2229 RF_Free(req, sizeof(*req));
2230
2231 raidPtr->recon_in_progress = 0;
2232 splx(s);
2233
2234 /* That's all... */
2235 kthread_exit(0); /* does not return */
2236 }
2237
2238 void
2239 rf_RewriteParityThread(raidPtr)
2240 RF_Raid_t *raidPtr;
2241 {
2242 int retcode;
2243 int s;
2244
2245 raidPtr->parity_rewrite_in_progress = 1;
2246 s = splbio();
2247 retcode = rf_RewriteParity(raidPtr);
2248 splx(s);
2249 if (retcode) {
2250 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2251 } else {
2252 /* set the clean bit! If we shutdown correctly,
2253 the clean bit on each component label will get
2254 set */
2255 raidPtr->parity_good = RF_RAID_CLEAN;
2256 }
2257 raidPtr->parity_rewrite_in_progress = 0;
2258
2259 /* That's all... */
2260 kthread_exit(0); /* does not return */
2261 }
2262
2263
2264 void
2265 rf_CopybackThread(raidPtr)
2266 RF_Raid_t *raidPtr;
2267 {
2268 int s;
2269
2270 raidPtr->copyback_in_progress = 1;
2271 s = splbio();
2272 rf_CopybackReconstructedData(raidPtr);
2273 splx(s);
2274 raidPtr->copyback_in_progress = 0;
2275
2276 /* That's all... */
2277 kthread_exit(0); /* does not return */
2278 }
2279
2280
2281 void
2282 rf_ReconstructInPlaceThread(req)
2283 struct rf_recon_req *req;
2284 {
2285 int retcode;
2286 int s;
2287 RF_Raid_t *raidPtr;
2288
2289 s = splbio();
2290 raidPtr = req->raidPtr;
2291 raidPtr->recon_in_progress = 1;
2292 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2293 RF_Free(req, sizeof(*req));
2294 raidPtr->recon_in_progress = 0;
2295 splx(s);
2296
2297 /* That's all... */
2298 kthread_exit(0); /* does not return */
2299 }
2300