rf_netbsdkintf.c revision 1.89 1 /* $NetBSD: rf_netbsdkintf.c,v 1.89 2000/06/01 00:50:01 matt Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305 RF_AutoConfig_t *ac_list; /* autoconfig list */
306 RF_ConfigSet_t *config_sets;
307
308 #ifdef DEBUG
309 printf("raidattach: Asked for %d units\n", num);
310 #endif
311
312 if (num <= 0) {
313 #ifdef DIAGNOSTIC
314 panic("raidattach: count <= 0");
315 #endif
316 return;
317 }
318 /* This is where all the initialization stuff gets done. */
319
320 numraid = num;
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < num; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
358 M_RAIDFRAME, M_NOWAIT);
359 if (raidrootdev == NULL) {
360 panic("No memory for RAIDframe driver!!?!?!\n");
361 }
362
363 for (raidID = 0; raidID < num; raidID++) {
364 BUFQ_INIT(&raid_softc[raidID].buf_queue);
365
366 raidrootdev[raidID].dv_class = DV_DISK;
367 raidrootdev[raidID].dv_cfdata = NULL;
368 raidrootdev[raidID].dv_unit = raidID;
369 raidrootdev[raidID].dv_parent = NULL;
370 raidrootdev[raidID].dv_flags = 0;
371 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
372
373 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
374 (RF_Raid_t *));
375 if (raidPtrs[raidID] == NULL) {
376 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
377 numraid = raidID;
378 return;
379 }
380 }
381
382 #if RAID_AUTOCONFIG
383 raidautoconfig = 1;
384 #endif
385
386 if (raidautoconfig) {
387 /* 1. locate all RAID components on the system */
388
389 #if DEBUG
390 printf("Searching for raid components...\n");
391 #endif
392 ac_list = rf_find_raid_components();
393
394 /* 2. sort them into their respective sets */
395
396 config_sets = rf_create_auto_sets(ac_list);
397
398 /* 3. evaluate each set and configure the valid ones
399 This gets done in rf_buildroothack() */
400
401 /* schedule the creation of the thread to do the
402 "/ on RAID" stuff */
403
404 kthread_create(rf_buildroothack,config_sets);
405
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 }
410
411 }
412
413 void
414 rf_buildroothack(arg)
415 void *arg;
416 {
417 RF_ConfigSet_t *config_sets = arg;
418 RF_ConfigSet_t *cset;
419 RF_ConfigSet_t *next_cset;
420 int retcode;
421 int raidID;
422 int rootID;
423 int num_root;
424
425 num_root = 0;
426 cset = config_sets;
427 while(cset != NULL ) {
428 next_cset = cset->next;
429 if (rf_have_enough_components(cset) &&
430 cset->ac->clabel->autoconfigure==1) {
431 retcode = rf_auto_config_set(cset,&raidID);
432 if (!retcode) {
433 if (cset->rootable) {
434 rootID = raidID;
435 num_root++;
436 }
437 } else {
438 /* The autoconfig didn't work :( */
439 #if DEBUG
440 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
441 #endif
442 rf_release_all_vps(cset);
443 }
444 } else {
445 /* we're not autoconfiguring this set...
446 release the associated resources */
447 rf_release_all_vps(cset);
448 }
449 /* cleanup */
450 rf_cleanup_config_set(cset);
451 cset = next_cset;
452 }
453 if (boothowto & RB_ASKNAME) {
454 /* We don't auto-config... */
455 } else {
456 /* They didn't ask, and we found something bootable... */
457
458 if (num_root == 1) {
459 booted_device = &raidrootdev[rootID];
460 } else if (num_root > 1) {
461 /* we can't guess.. require the user to answer... */
462 boothowto |= RB_ASKNAME;
463 }
464 }
465 }
466
467
468 int
469 raidsize(dev)
470 dev_t dev;
471 {
472 struct raid_softc *rs;
473 struct disklabel *lp;
474 int part, unit, omask, size;
475
476 unit = raidunit(dev);
477 if (unit >= numraid)
478 return (-1);
479 rs = &raid_softc[unit];
480
481 if ((rs->sc_flags & RAIDF_INITED) == 0)
482 return (-1);
483
484 part = DISKPART(dev);
485 omask = rs->sc_dkdev.dk_openmask & (1 << part);
486 lp = rs->sc_dkdev.dk_label;
487
488 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
489 return (-1);
490
491 if (lp->d_partitions[part].p_fstype != FS_SWAP)
492 size = -1;
493 else
494 size = lp->d_partitions[part].p_size *
495 (lp->d_secsize / DEV_BSIZE);
496
497 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
498 return (-1);
499
500 return (size);
501
502 }
503
504 int
505 raiddump(dev, blkno, va, size)
506 dev_t dev;
507 daddr_t blkno;
508 caddr_t va;
509 size_t size;
510 {
511 /* Not implemented. */
512 return ENXIO;
513 }
514 /* ARGSUSED */
515 int
516 raidopen(dev, flags, fmt, p)
517 dev_t dev;
518 int flags, fmt;
519 struct proc *p;
520 {
521 int unit = raidunit(dev);
522 struct raid_softc *rs;
523 struct disklabel *lp;
524 int part, pmask;
525 int error = 0;
526
527 if (unit >= numraid)
528 return (ENXIO);
529 rs = &raid_softc[unit];
530
531 if ((error = raidlock(rs)) != 0)
532 return (error);
533 lp = rs->sc_dkdev.dk_label;
534
535 part = DISKPART(dev);
536 pmask = (1 << part);
537
538 db1_printf(("Opening raid device number: %d partition: %d\n",
539 unit, part));
540
541
542 if ((rs->sc_flags & RAIDF_INITED) &&
543 (rs->sc_dkdev.dk_openmask == 0))
544 raidgetdisklabel(dev);
545
546 /* make sure that this partition exists */
547
548 if (part != RAW_PART) {
549 db1_printf(("Not a raw partition..\n"));
550 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
551 ((part >= lp->d_npartitions) ||
552 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
553 error = ENXIO;
554 raidunlock(rs);
555 db1_printf(("Bailing out...\n"));
556 return (error);
557 }
558 }
559 /* Prevent this unit from being unconfigured while open. */
560 switch (fmt) {
561 case S_IFCHR:
562 rs->sc_dkdev.dk_copenmask |= pmask;
563 break;
564
565 case S_IFBLK:
566 rs->sc_dkdev.dk_bopenmask |= pmask;
567 break;
568 }
569
570 if ((rs->sc_dkdev.dk_openmask == 0) &&
571 ((rs->sc_flags & RAIDF_INITED) != 0)) {
572 /* First one... mark things as dirty... Note that we *MUST*
573 have done a configure before this. I DO NOT WANT TO BE
574 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
575 THAT THEY BELONG TOGETHER!!!!! */
576 /* XXX should check to see if we're only open for reading
577 here... If so, we needn't do this, but then need some
578 other way of keeping track of what's happened.. */
579
580 rf_markalldirty( raidPtrs[unit] );
581 }
582
583
584 rs->sc_dkdev.dk_openmask =
585 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
586
587 raidunlock(rs);
588
589 return (error);
590
591
592 }
593 /* ARGSUSED */
594 int
595 raidclose(dev, flags, fmt, p)
596 dev_t dev;
597 int flags, fmt;
598 struct proc *p;
599 {
600 int unit = raidunit(dev);
601 struct raid_softc *rs;
602 int error = 0;
603 int part;
604
605 if (unit >= numraid)
606 return (ENXIO);
607 rs = &raid_softc[unit];
608
609 if ((error = raidlock(rs)) != 0)
610 return (error);
611
612 part = DISKPART(dev);
613
614 /* ...that much closer to allowing unconfiguration... */
615 switch (fmt) {
616 case S_IFCHR:
617 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
618 break;
619
620 case S_IFBLK:
621 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
622 break;
623 }
624 rs->sc_dkdev.dk_openmask =
625 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
626
627 if ((rs->sc_dkdev.dk_openmask == 0) &&
628 ((rs->sc_flags & RAIDF_INITED) != 0)) {
629 /* Last one... device is not unconfigured yet.
630 Device shutdown has taken care of setting the
631 clean bits if RAIDF_INITED is not set
632 mark things as clean... */
633 #if 0
634 printf("Last one on raid%d. Updating status.\n",unit);
635 #endif
636 rf_final_update_component_labels( raidPtrs[unit] );
637 }
638
639 raidunlock(rs);
640 return (0);
641
642 }
643
644 void
645 raidstrategy(bp)
646 struct buf *bp;
647 {
648 int s;
649
650 unsigned int raidID = raidunit(bp->b_dev);
651 RF_Raid_t *raidPtr;
652 struct raid_softc *rs = &raid_softc[raidID];
653 struct disklabel *lp;
654 int wlabel;
655
656 if ((rs->sc_flags & RAIDF_INITED) ==0) {
657 bp->b_error = ENXIO;
658 bp->b_flags = B_ERROR;
659 bp->b_resid = bp->b_bcount;
660 biodone(bp);
661 return;
662 }
663 if (raidID >= numraid || !raidPtrs[raidID]) {
664 bp->b_error = ENODEV;
665 bp->b_flags |= B_ERROR;
666 bp->b_resid = bp->b_bcount;
667 biodone(bp);
668 return;
669 }
670 raidPtr = raidPtrs[raidID];
671 if (!raidPtr->valid) {
672 bp->b_error = ENODEV;
673 bp->b_flags |= B_ERROR;
674 bp->b_resid = bp->b_bcount;
675 biodone(bp);
676 return;
677 }
678 if (bp->b_bcount == 0) {
679 db1_printf(("b_bcount is zero..\n"));
680 biodone(bp);
681 return;
682 }
683 lp = rs->sc_dkdev.dk_label;
684
685 /*
686 * Do bounds checking and adjust transfer. If there's an
687 * error, the bounds check will flag that for us.
688 */
689
690 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
691 if (DISKPART(bp->b_dev) != RAW_PART)
692 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
693 db1_printf(("Bounds check failed!!:%d %d\n",
694 (int) bp->b_blkno, (int) wlabel));
695 biodone(bp);
696 return;
697 }
698 s = splbio();
699
700 bp->b_resid = 0;
701
702 /* stuff it onto our queue */
703 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
704
705 raidstart(raidPtrs[raidID]);
706
707 splx(s);
708 }
709 /* ARGSUSED */
710 int
711 raidread(dev, uio, flags)
712 dev_t dev;
713 struct uio *uio;
714 int flags;
715 {
716 int unit = raidunit(dev);
717 struct raid_softc *rs;
718 int part;
719
720 if (unit >= numraid)
721 return (ENXIO);
722 rs = &raid_softc[unit];
723
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 part = DISKPART(dev);
727
728 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
729
730 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
731
732 }
733 /* ARGSUSED */
734 int
735 raidwrite(dev, uio, flags)
736 dev_t dev;
737 struct uio *uio;
738 int flags;
739 {
740 int unit = raidunit(dev);
741 struct raid_softc *rs;
742
743 if (unit >= numraid)
744 return (ENXIO);
745 rs = &raid_softc[unit];
746
747 if ((rs->sc_flags & RAIDF_INITED) == 0)
748 return (ENXIO);
749 db1_printf(("raidwrite\n"));
750 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
751
752 }
753
754 int
755 raidioctl(dev, cmd, data, flag, p)
756 dev_t dev;
757 u_long cmd;
758 caddr_t data;
759 int flag;
760 struct proc *p;
761 {
762 int unit = raidunit(dev);
763 int error = 0;
764 int part, pmask;
765 struct raid_softc *rs;
766 RF_Config_t *k_cfg, *u_cfg;
767 RF_Raid_t *raidPtr;
768 RF_RaidDisk_t *diskPtr;
769 RF_AccTotals_t *totals;
770 RF_DeviceConfig_t *d_cfg, **ucfgp;
771 u_char *specific_buf;
772 int retcode = 0;
773 int row;
774 int column;
775 struct rf_recon_req *rrcopy, *rr;
776 RF_ComponentLabel_t *clabel;
777 RF_ComponentLabel_t ci_label;
778 RF_ComponentLabel_t **clabel_ptr;
779 RF_SingleComponent_t *sparePtr,*componentPtr;
780 RF_SingleComponent_t hot_spare;
781 RF_SingleComponent_t component;
782 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
821 case RAIDFRAME_GET_COMPONENT_LABEL:
822 case RAIDFRAME_SET_COMPONENT_LABEL:
823 case RAIDFRAME_ADD_HOT_SPARE:
824 case RAIDFRAME_REMOVE_HOT_SPARE:
825 case RAIDFRAME_INIT_LABELS:
826 case RAIDFRAME_REBUILD_IN_PLACE:
827 case RAIDFRAME_CHECK_PARITY:
828 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
829 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
830 case RAIDFRAME_CHECK_COPYBACK_STATUS:
831 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
832 case RAIDFRAME_SET_AUTOCONFIG:
833 case RAIDFRAME_SET_ROOT:
834 case RAIDFRAME_DELETE_COMPONENT:
835 case RAIDFRAME_INCORPORATE_HOT_SPARE:
836 if ((rs->sc_flags & RAIDF_INITED) == 0)
837 return (ENXIO);
838 }
839
840 switch (cmd) {
841
842 /* configure the system */
843 case RAIDFRAME_CONFIGURE:
844
845 if (raidPtr->valid) {
846 /* There is a valid RAID set running on this unit! */
847 printf("raid%d: Device already configured!\n",unit);
848 return(EINVAL);
849 }
850
851 /* copy-in the configuration information */
852 /* data points to a pointer to the configuration structure */
853
854 u_cfg = *((RF_Config_t **) data);
855 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
856 if (k_cfg == NULL) {
857 return (ENOMEM);
858 }
859 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
860 sizeof(RF_Config_t));
861 if (retcode) {
862 RF_Free(k_cfg, sizeof(RF_Config_t));
863 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
864 retcode));
865 return (retcode);
866 }
867 /* allocate a buffer for the layout-specific data, and copy it
868 * in */
869 if (k_cfg->layoutSpecificSize) {
870 if (k_cfg->layoutSpecificSize > 10000) {
871 /* sanity check */
872 RF_Free(k_cfg, sizeof(RF_Config_t));
873 return (EINVAL);
874 }
875 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
876 (u_char *));
877 if (specific_buf == NULL) {
878 RF_Free(k_cfg, sizeof(RF_Config_t));
879 return (ENOMEM);
880 }
881 retcode = copyin(k_cfg->layoutSpecific,
882 (caddr_t) specific_buf,
883 k_cfg->layoutSpecificSize);
884 if (retcode) {
885 RF_Free(k_cfg, sizeof(RF_Config_t));
886 RF_Free(specific_buf,
887 k_cfg->layoutSpecificSize);
888 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
889 retcode));
890 return (retcode);
891 }
892 } else
893 specific_buf = NULL;
894 k_cfg->layoutSpecific = specific_buf;
895
896 /* should do some kind of sanity check on the configuration.
897 * Store the sum of all the bytes in the last byte? */
898
899 /* configure the system */
900
901 /*
902 * Clear the entire RAID descriptor, just to make sure
903 * there is no stale data left in the case of a
904 * reconfiguration
905 */
906 bzero((char *) raidPtr, sizeof(RF_Raid_t));
907 raidPtr->raidid = unit;
908
909 retcode = rf_Configure(raidPtr, k_cfg, NULL);
910
911 if (retcode == 0) {
912
913 /* allow this many simultaneous IO's to
914 this RAID device */
915 raidPtr->openings = RAIDOUTSTANDING;
916
917 raidinit(raidPtr);
918 rf_markalldirty(raidPtr);
919 }
920 /* free the buffers. No return code here. */
921 if (k_cfg->layoutSpecificSize) {
922 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
923 }
924 RF_Free(k_cfg, sizeof(RF_Config_t));
925
926 return (retcode);
927
928 /* shutdown the system */
929 case RAIDFRAME_SHUTDOWN:
930
931 if ((error = raidlock(rs)) != 0)
932 return (error);
933
934 /*
935 * If somebody has a partition mounted, we shouldn't
936 * shutdown.
937 */
938
939 part = DISKPART(dev);
940 pmask = (1 << part);
941 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
942 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
943 (rs->sc_dkdev.dk_copenmask & pmask))) {
944 raidunlock(rs);
945 return (EBUSY);
946 }
947
948 retcode = rf_Shutdown(raidPtr);
949
950 pool_destroy(&rs->sc_cbufpool);
951
952 /* It's no longer initialized... */
953 rs->sc_flags &= ~RAIDF_INITED;
954
955 /* Detach the disk. */
956 disk_detach(&rs->sc_dkdev);
957
958 raidunlock(rs);
959
960 return (retcode);
961 case RAIDFRAME_GET_COMPONENT_LABEL:
962 clabel_ptr = (RF_ComponentLabel_t **) data;
963 /* need to read the component label for the disk indicated
964 by row,column in clabel */
965
966 /* For practice, let's get it directly fromdisk, rather
967 than from the in-core copy */
968 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
969 (RF_ComponentLabel_t *));
970 if (clabel == NULL)
971 return (ENOMEM);
972
973 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
974
975 retcode = copyin( *clabel_ptr, clabel,
976 sizeof(RF_ComponentLabel_t));
977
978 if (retcode) {
979 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
980 return(retcode);
981 }
982
983 row = clabel->row;
984 column = clabel->column;
985
986 if ((row < 0) || (row >= raidPtr->numRow) ||
987 (column < 0) || (column >= raidPtr->numCol)) {
988 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
989 return(EINVAL);
990 }
991
992 raidread_component_label(raidPtr->Disks[row][column].dev,
993 raidPtr->raid_cinfo[row][column].ci_vp,
994 clabel );
995
996 retcode = copyout((caddr_t) clabel,
997 (caddr_t) *clabel_ptr,
998 sizeof(RF_ComponentLabel_t));
999 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1000 return (retcode);
1001
1002 case RAIDFRAME_SET_COMPONENT_LABEL:
1003 clabel = (RF_ComponentLabel_t *) data;
1004
1005 /* XXX check the label for valid stuff... */
1006 /* Note that some things *should not* get modified --
1007 the user should be re-initing the labels instead of
1008 trying to patch things.
1009 */
1010
1011 printf("Got component label:\n");
1012 printf("Version: %d\n",clabel->version);
1013 printf("Serial Number: %d\n",clabel->serial_number);
1014 printf("Mod counter: %d\n",clabel->mod_counter);
1015 printf("Row: %d\n", clabel->row);
1016 printf("Column: %d\n", clabel->column);
1017 printf("Num Rows: %d\n", clabel->num_rows);
1018 printf("Num Columns: %d\n", clabel->num_columns);
1019 printf("Clean: %d\n", clabel->clean);
1020 printf("Status: %d\n", clabel->status);
1021
1022 row = clabel->row;
1023 column = clabel->column;
1024
1025 if ((row < 0) || (row >= raidPtr->numRow) ||
1026 (column < 0) || (column >= raidPtr->numCol)) {
1027 return(EINVAL);
1028 }
1029
1030 /* XXX this isn't allowed to do anything for now :-) */
1031
1032 /* XXX and before it is, we need to fill in the rest
1033 of the fields!?!?!?! */
1034 #if 0
1035 raidwrite_component_label(
1036 raidPtr->Disks[row][column].dev,
1037 raidPtr->raid_cinfo[row][column].ci_vp,
1038 clabel );
1039 #endif
1040 return (0);
1041
1042 case RAIDFRAME_INIT_LABELS:
1043 clabel = (RF_ComponentLabel_t *) data;
1044 /*
1045 we only want the serial number from
1046 the above. We get all the rest of the information
1047 from the config that was used to create this RAID
1048 set.
1049 */
1050
1051 raidPtr->serial_number = clabel->serial_number;
1052
1053 raid_init_component_label(raidPtr, &ci_label);
1054 ci_label.serial_number = clabel->serial_number;
1055
1056 for(row=0;row<raidPtr->numRow;row++) {
1057 ci_label.row = row;
1058 for(column=0;column<raidPtr->numCol;column++) {
1059 diskPtr = &raidPtr->Disks[row][column];
1060 ci_label.partitionSize = diskPtr->partitionSize;
1061 ci_label.column = column;
1062 raidwrite_component_label(
1063 raidPtr->Disks[row][column].dev,
1064 raidPtr->raid_cinfo[row][column].ci_vp,
1065 &ci_label );
1066 }
1067 }
1068
1069 return (retcode);
1070 case RAIDFRAME_SET_AUTOCONFIG:
1071 d = rf_set_autoconfig(raidPtr, *(int *) data);
1072 printf("New autoconfig value is: %d\n", d);
1073 *(int *) data = d;
1074 return (retcode);
1075
1076 case RAIDFRAME_SET_ROOT:
1077 d = rf_set_rootpartition(raidPtr, *(int *) data);
1078 printf("New rootpartition value is: %d\n", d);
1079 *(int *) data = d;
1080 return (retcode);
1081
1082 /* initialize all parity */
1083 case RAIDFRAME_REWRITEPARITY:
1084
1085 if (raidPtr->Layout.map->faultsTolerated == 0) {
1086 /* Parity for RAID 0 is trivially correct */
1087 raidPtr->parity_good = RF_RAID_CLEAN;
1088 return(0);
1089 }
1090
1091 if (raidPtr->parity_rewrite_in_progress == 1) {
1092 /* Re-write is already in progress! */
1093 return(EINVAL);
1094 }
1095
1096 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1097 rf_RewriteParityThread,
1098 raidPtr,"raid_parity");
1099 return (retcode);
1100
1101
1102 case RAIDFRAME_ADD_HOT_SPARE:
1103 sparePtr = (RF_SingleComponent_t *) data;
1104 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1105 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1106 return(retcode);
1107
1108 case RAIDFRAME_REMOVE_HOT_SPARE:
1109 return(retcode);
1110
1111 case RAIDFRAME_DELETE_COMPONENT:
1112 componentPtr = (RF_SingleComponent_t *)data;
1113 memcpy( &component, componentPtr,
1114 sizeof(RF_SingleComponent_t));
1115 retcode = rf_delete_component(raidPtr, &component);
1116 return(retcode);
1117
1118 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1119 componentPtr = (RF_SingleComponent_t *)data;
1120 memcpy( &component, componentPtr,
1121 sizeof(RF_SingleComponent_t));
1122 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1123 return(retcode);
1124
1125 case RAIDFRAME_REBUILD_IN_PLACE:
1126
1127 if (raidPtr->Layout.map->faultsTolerated == 0) {
1128 /* Can't do this on a RAID 0!! */
1129 return(EINVAL);
1130 }
1131
1132 if (raidPtr->recon_in_progress == 1) {
1133 /* a reconstruct is already in progress! */
1134 return(EINVAL);
1135 }
1136
1137 componentPtr = (RF_SingleComponent_t *) data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 row = component.row;
1141 column = component.column;
1142 printf("Rebuild: %d %d\n",row, column);
1143 if ((row < 0) || (row >= raidPtr->numRow) ||
1144 (column < 0) || (column >= raidPtr->numCol)) {
1145 return(EINVAL);
1146 }
1147
1148 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1149 if (rrcopy == NULL)
1150 return(ENOMEM);
1151
1152 rrcopy->raidPtr = (void *) raidPtr;
1153 rrcopy->row = row;
1154 rrcopy->col = column;
1155
1156 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1157 rf_ReconstructInPlaceThread,
1158 rrcopy,"raid_reconip");
1159 return(retcode);
1160
1161 case RAIDFRAME_GET_INFO:
1162 if (!raidPtr->valid)
1163 return (ENODEV);
1164 ucfgp = (RF_DeviceConfig_t **) data;
1165 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1166 (RF_DeviceConfig_t *));
1167 if (d_cfg == NULL)
1168 return (ENOMEM);
1169 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1170 d_cfg->rows = raidPtr->numRow;
1171 d_cfg->cols = raidPtr->numCol;
1172 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1173 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1174 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1175 return (ENOMEM);
1176 }
1177 d_cfg->nspares = raidPtr->numSpare;
1178 if (d_cfg->nspares >= RF_MAX_DISKS) {
1179 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1180 return (ENOMEM);
1181 }
1182 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1183 d = 0;
1184 for (i = 0; i < d_cfg->rows; i++) {
1185 for (j = 0; j < d_cfg->cols; j++) {
1186 d_cfg->devs[d] = raidPtr->Disks[i][j];
1187 d++;
1188 }
1189 }
1190 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1191 d_cfg->spares[i] = raidPtr->Disks[0][j];
1192 }
1193 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1194 sizeof(RF_DeviceConfig_t));
1195 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1196
1197 return (retcode);
1198
1199 case RAIDFRAME_CHECK_PARITY:
1200 *(int *) data = raidPtr->parity_good;
1201 return (0);
1202
1203 case RAIDFRAME_RESET_ACCTOTALS:
1204 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1205 return (0);
1206
1207 case RAIDFRAME_GET_ACCTOTALS:
1208 totals = (RF_AccTotals_t *) data;
1209 *totals = raidPtr->acc_totals;
1210 return (0);
1211
1212 case RAIDFRAME_KEEP_ACCTOTALS:
1213 raidPtr->keep_acc_totals = *(int *)data;
1214 return (0);
1215
1216 case RAIDFRAME_GET_SIZE:
1217 *(int *) data = raidPtr->totalSectors;
1218 return (0);
1219
1220 /* fail a disk & optionally start reconstruction */
1221 case RAIDFRAME_FAIL_DISK:
1222
1223 if (raidPtr->Layout.map->faultsTolerated == 0) {
1224 /* Can't do this on a RAID 0!! */
1225 return(EINVAL);
1226 }
1227
1228 rr = (struct rf_recon_req *) data;
1229
1230 if (rr->row < 0 || rr->row >= raidPtr->numRow
1231 || rr->col < 0 || rr->col >= raidPtr->numCol)
1232 return (EINVAL);
1233
1234 printf("raid%d: Failing the disk: row: %d col: %d\n",
1235 unit, rr->row, rr->col);
1236
1237 /* make a copy of the recon request so that we don't rely on
1238 * the user's buffer */
1239 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1240 if (rrcopy == NULL)
1241 return(ENOMEM);
1242 bcopy(rr, rrcopy, sizeof(*rr));
1243 rrcopy->raidPtr = (void *) raidPtr;
1244
1245 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1246 rf_ReconThread,
1247 rrcopy,"raid_recon");
1248 return (0);
1249
1250 /* invoke a copyback operation after recon on whatever disk
1251 * needs it, if any */
1252 case RAIDFRAME_COPYBACK:
1253
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* This makes no sense on a RAID 0!! */
1256 return(EINVAL);
1257 }
1258
1259 if (raidPtr->copyback_in_progress == 1) {
1260 /* Copyback is already in progress! */
1261 return(EINVAL);
1262 }
1263
1264 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1265 rf_CopybackThread,
1266 raidPtr,"raid_copyback");
1267 return (retcode);
1268
1269 /* return the percentage completion of reconstruction */
1270 case RAIDFRAME_CHECK_RECON_STATUS:
1271 if (raidPtr->Layout.map->faultsTolerated == 0) {
1272 /* This makes no sense on a RAID 0, so tell the
1273 user it's done. */
1274 *(int *) data = 100;
1275 return(0);
1276 }
1277 row = 0; /* XXX we only consider a single row... */
1278 if (raidPtr->status[row] != rf_rs_reconstructing)
1279 *(int *) data = 100;
1280 else
1281 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1282 return (0);
1283 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1284 progressInfoPtr = (RF_ProgressInfo_t **) data;
1285 row = 0; /* XXX we only consider a single row... */
1286 if (raidPtr->status[row] != rf_rs_reconstructing) {
1287 progressInfo.remaining = 0;
1288 progressInfo.completed = 100;
1289 progressInfo.total = 100;
1290 } else {
1291 progressInfo.total =
1292 raidPtr->reconControl[row]->numRUsTotal;
1293 progressInfo.completed =
1294 raidPtr->reconControl[row]->numRUsComplete;
1295 progressInfo.remaining = progressInfo.total -
1296 progressInfo.completed;
1297 }
1298 retcode = copyout((caddr_t) &progressInfo,
1299 (caddr_t) *progressInfoPtr,
1300 sizeof(RF_ProgressInfo_t));
1301 return (retcode);
1302
1303 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 if (raidPtr->parity_rewrite_in_progress == 1) {
1311 *(int *) data = 100 *
1312 raidPtr->parity_rewrite_stripes_done /
1313 raidPtr->Layout.numStripe;
1314 } else {
1315 *(int *) data = 100;
1316 }
1317 return (0);
1318
1319 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1320 progressInfoPtr = (RF_ProgressInfo_t **) data;
1321 if (raidPtr->parity_rewrite_in_progress == 1) {
1322 progressInfo.total = raidPtr->Layout.numStripe;
1323 progressInfo.completed =
1324 raidPtr->parity_rewrite_stripes_done;
1325 progressInfo.remaining = progressInfo.total -
1326 progressInfo.completed;
1327 } else {
1328 progressInfo.remaining = 0;
1329 progressInfo.completed = 100;
1330 progressInfo.total = 100;
1331 }
1332 retcode = copyout((caddr_t) &progressInfo,
1333 (caddr_t) *progressInfoPtr,
1334 sizeof(RF_ProgressInfo_t));
1335 return (retcode);
1336
1337 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* This makes no sense on a RAID 0 */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->copyback_in_progress == 1) {
1344 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1345 raidPtr->Layout.numStripe;
1346 } else {
1347 *(int *) data = 100;
1348 }
1349 return (0);
1350
1351 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1352 if (raidPtr->copyback_in_progress == 1) {
1353 progressInfo.total = raidPtr->Layout.numStripe;
1354 progressInfo.completed =
1355 raidPtr->parity_rewrite_stripes_done;
1356 progressInfo.remaining = progressInfo.total -
1357 progressInfo.completed;
1358 } else {
1359 progressInfo.remaining = 0;
1360 progressInfo.completed = 100;
1361 progressInfo.total = 100;
1362 }
1363 retcode = copyout((caddr_t) &progressInfo,
1364 (caddr_t) *progressInfoPtr,
1365 sizeof(RF_ProgressInfo_t));
1366 return (retcode);
1367
1368 /* the sparetable daemon calls this to wait for the kernel to
1369 * need a spare table. this ioctl does not return until a
1370 * spare table is needed. XXX -- calling mpsleep here in the
1371 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1372 * -- I should either compute the spare table in the kernel,
1373 * or have a different -- XXX XXX -- interface (a different
1374 * character device) for delivering the table -- XXX */
1375 #if 0
1376 case RAIDFRAME_SPARET_WAIT:
1377 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1378 while (!rf_sparet_wait_queue)
1379 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1380 waitreq = rf_sparet_wait_queue;
1381 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1382 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1383
1384 /* structure assignment */
1385 *((RF_SparetWait_t *) data) = *waitreq;
1386
1387 RF_Free(waitreq, sizeof(*waitreq));
1388 return (0);
1389
1390 /* wakes up a process waiting on SPARET_WAIT and puts an error
1391 * code in it that will cause the dameon to exit */
1392 case RAIDFRAME_ABORT_SPARET_WAIT:
1393 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1394 waitreq->fcol = -1;
1395 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1396 waitreq->next = rf_sparet_wait_queue;
1397 rf_sparet_wait_queue = waitreq;
1398 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1399 wakeup(&rf_sparet_wait_queue);
1400 return (0);
1401
1402 /* used by the spare table daemon to deliver a spare table
1403 * into the kernel */
1404 case RAIDFRAME_SEND_SPARET:
1405
1406 /* install the spare table */
1407 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1408
1409 /* respond to the requestor. the return status of the spare
1410 * table installation is passed in the "fcol" field */
1411 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1412 waitreq->fcol = retcode;
1413 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1414 waitreq->next = rf_sparet_resp_queue;
1415 rf_sparet_resp_queue = waitreq;
1416 wakeup(&rf_sparet_resp_queue);
1417 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1418
1419 return (retcode);
1420 #endif
1421
1422 default:
1423 break; /* fall through to the os-specific code below */
1424
1425 }
1426
1427 if (!raidPtr->valid)
1428 return (EINVAL);
1429
1430 /*
1431 * Add support for "regular" device ioctls here.
1432 */
1433
1434 switch (cmd) {
1435 case DIOCGDINFO:
1436 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1437 break;
1438
1439 case DIOCGPART:
1440 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1441 ((struct partinfo *) data)->part =
1442 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1443 break;
1444
1445 case DIOCWDINFO:
1446 case DIOCSDINFO:
1447 if ((error = raidlock(rs)) != 0)
1448 return (error);
1449
1450 rs->sc_flags |= RAIDF_LABELLING;
1451
1452 error = setdisklabel(rs->sc_dkdev.dk_label,
1453 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1454 if (error == 0) {
1455 if (cmd == DIOCWDINFO)
1456 error = writedisklabel(RAIDLABELDEV(dev),
1457 raidstrategy, rs->sc_dkdev.dk_label,
1458 rs->sc_dkdev.dk_cpulabel);
1459 }
1460 rs->sc_flags &= ~RAIDF_LABELLING;
1461
1462 raidunlock(rs);
1463
1464 if (error)
1465 return (error);
1466 break;
1467
1468 case DIOCWLABEL:
1469 if (*(int *) data != 0)
1470 rs->sc_flags |= RAIDF_WLABEL;
1471 else
1472 rs->sc_flags &= ~RAIDF_WLABEL;
1473 break;
1474
1475 case DIOCGDEFLABEL:
1476 raidgetdefaultlabel(raidPtr, rs,
1477 (struct disklabel *) data);
1478 break;
1479
1480 default:
1481 retcode = ENOTTY;
1482 }
1483 return (retcode);
1484
1485 }
1486
1487
1488 /* raidinit -- complete the rest of the initialization for the
1489 RAIDframe device. */
1490
1491
1492 static void
1493 raidinit(raidPtr)
1494 RF_Raid_t *raidPtr;
1495 {
1496 struct raid_softc *rs;
1497 int unit;
1498
1499 unit = raidPtr->raidid;
1500
1501 rs = &raid_softc[unit];
1502 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1503 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1504
1505
1506 /* XXX should check return code first... */
1507 rs->sc_flags |= RAIDF_INITED;
1508
1509 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1510
1511 rs->sc_dkdev.dk_name = rs->sc_xname;
1512
1513 /* disk_attach actually creates space for the CPU disklabel, among
1514 * other things, so it's critical to call this *BEFORE* we try putzing
1515 * with disklabels. */
1516
1517 disk_attach(&rs->sc_dkdev);
1518
1519 /* XXX There may be a weird interaction here between this, and
1520 * protectedSectors, as used in RAIDframe. */
1521
1522 rs->sc_size = raidPtr->totalSectors;
1523
1524 }
1525
1526 /* wake up the daemon & tell it to get us a spare table
1527 * XXX
1528 * the entries in the queues should be tagged with the raidPtr
1529 * so that in the extremely rare case that two recons happen at once,
1530 * we know for which device were requesting a spare table
1531 * XXX
1532 *
1533 * XXX This code is not currently used. GO
1534 */
1535 int
1536 rf_GetSpareTableFromDaemon(req)
1537 RF_SparetWait_t *req;
1538 {
1539 int retcode;
1540
1541 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1542 req->next = rf_sparet_wait_queue;
1543 rf_sparet_wait_queue = req;
1544 wakeup(&rf_sparet_wait_queue);
1545
1546 /* mpsleep unlocks the mutex */
1547 while (!rf_sparet_resp_queue) {
1548 tsleep(&rf_sparet_resp_queue, PRIBIO,
1549 "raidframe getsparetable", 0);
1550 }
1551 req = rf_sparet_resp_queue;
1552 rf_sparet_resp_queue = req->next;
1553 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1554
1555 retcode = req->fcol;
1556 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1557 * alloc'd */
1558 return (retcode);
1559 }
1560
1561 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1562 * bp & passes it down.
1563 * any calls originating in the kernel must use non-blocking I/O
1564 * do some extra sanity checking to return "appropriate" error values for
1565 * certain conditions (to make some standard utilities work)
1566 *
1567 * Formerly known as: rf_DoAccessKernel
1568 */
1569 void
1570 raidstart(raidPtr)
1571 RF_Raid_t *raidPtr;
1572 {
1573 RF_SectorCount_t num_blocks, pb, sum;
1574 RF_RaidAddr_t raid_addr;
1575 int retcode;
1576 struct partition *pp;
1577 daddr_t blocknum;
1578 int unit;
1579 struct raid_softc *rs;
1580 int do_async;
1581 struct buf *bp;
1582
1583 unit = raidPtr->raidid;
1584 rs = &raid_softc[unit];
1585
1586 /* quick check to see if anything has died recently */
1587 RF_LOCK_MUTEX(raidPtr->mutex);
1588 if (raidPtr->numNewFailures > 0) {
1589 rf_update_component_labels(raidPtr);
1590 raidPtr->numNewFailures--;
1591 }
1592 RF_UNLOCK_MUTEX(raidPtr->mutex);
1593
1594 /* Check to see if we're at the limit... */
1595 RF_LOCK_MUTEX(raidPtr->mutex);
1596 while (raidPtr->openings > 0) {
1597 RF_UNLOCK_MUTEX(raidPtr->mutex);
1598
1599 /* get the next item, if any, from the queue */
1600 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1601 /* nothing more to do */
1602 return;
1603 }
1604 BUFQ_REMOVE(&rs->buf_queue, bp);
1605
1606 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1607 * partition.. Need to make it absolute to the underlying
1608 * device.. */
1609
1610 blocknum = bp->b_blkno;
1611 if (DISKPART(bp->b_dev) != RAW_PART) {
1612 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1613 blocknum += pp->p_offset;
1614 }
1615
1616 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1617 (int) blocknum));
1618
1619 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1620 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1621
1622 /* *THIS* is where we adjust what block we're going to...
1623 * but DO NOT TOUCH bp->b_blkno!!! */
1624 raid_addr = blocknum;
1625
1626 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1627 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1628 sum = raid_addr + num_blocks + pb;
1629 if (1 || rf_debugKernelAccess) {
1630 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1631 (int) raid_addr, (int) sum, (int) num_blocks,
1632 (int) pb, (int) bp->b_resid));
1633 }
1634 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1635 || (sum < num_blocks) || (sum < pb)) {
1636 bp->b_error = ENOSPC;
1637 bp->b_flags |= B_ERROR;
1638 bp->b_resid = bp->b_bcount;
1639 biodone(bp);
1640 RF_LOCK_MUTEX(raidPtr->mutex);
1641 continue;
1642 }
1643 /*
1644 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1645 */
1646
1647 if (bp->b_bcount & raidPtr->sectorMask) {
1648 bp->b_error = EINVAL;
1649 bp->b_flags |= B_ERROR;
1650 bp->b_resid = bp->b_bcount;
1651 biodone(bp);
1652 RF_LOCK_MUTEX(raidPtr->mutex);
1653 continue;
1654
1655 }
1656 db1_printf(("Calling DoAccess..\n"));
1657
1658
1659 RF_LOCK_MUTEX(raidPtr->mutex);
1660 raidPtr->openings--;
1661 RF_UNLOCK_MUTEX(raidPtr->mutex);
1662
1663 /*
1664 * Everything is async.
1665 */
1666 do_async = 1;
1667
1668 /* don't ever condition on bp->b_flags & B_WRITE.
1669 * always condition on B_READ instead */
1670
1671 /* XXX we're still at splbio() here... do we *really*
1672 need to be? */
1673
1674
1675 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1676 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1677 do_async, raid_addr, num_blocks,
1678 bp->b_data, bp, NULL, NULL,
1679 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1680
1681
1682 RF_LOCK_MUTEX(raidPtr->mutex);
1683 }
1684 RF_UNLOCK_MUTEX(raidPtr->mutex);
1685 }
1686
1687
1688
1689
1690 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1691
1692 int
1693 rf_DispatchKernelIO(queue, req)
1694 RF_DiskQueue_t *queue;
1695 RF_DiskQueueData_t *req;
1696 {
1697 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1698 struct buf *bp;
1699 struct raidbuf *raidbp = NULL;
1700 struct raid_softc *rs;
1701 int unit;
1702 int s;
1703
1704 s=0;
1705 /* s = splbio();*/ /* want to test this */
1706 /* XXX along with the vnode, we also need the softc associated with
1707 * this device.. */
1708
1709 req->queue = queue;
1710
1711 unit = queue->raidPtr->raidid;
1712
1713 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1714
1715 if (unit >= numraid) {
1716 printf("Invalid unit number: %d %d\n", unit, numraid);
1717 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1718 }
1719 rs = &raid_softc[unit];
1720
1721 /* XXX is this the right place? */
1722 disk_busy(&rs->sc_dkdev);
1723
1724 bp = req->bp;
1725 #if 1
1726 /* XXX when there is a physical disk failure, someone is passing us a
1727 * buffer that contains old stuff!! Attempt to deal with this problem
1728 * without taking a performance hit... (not sure where the real bug
1729 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1730
1731 if (bp->b_flags & B_ERROR) {
1732 bp->b_flags &= ~B_ERROR;
1733 }
1734 if (bp->b_error != 0) {
1735 bp->b_error = 0;
1736 }
1737 #endif
1738 raidbp = RAIDGETBUF(rs);
1739
1740 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1741
1742 /*
1743 * context for raidiodone
1744 */
1745 raidbp->rf_obp = bp;
1746 raidbp->req = req;
1747
1748 LIST_INIT(&raidbp->rf_buf.b_dep);
1749
1750 switch (req->type) {
1751 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1752 /* XXX need to do something extra here.. */
1753 /* I'm leaving this in, as I've never actually seen it used,
1754 * and I'd like folks to report it... GO */
1755 printf(("WAKEUP CALLED\n"));
1756 queue->numOutstanding++;
1757
1758 /* XXX need to glue the original buffer into this?? */
1759
1760 KernelWakeupFunc(&raidbp->rf_buf);
1761 break;
1762
1763 case RF_IO_TYPE_READ:
1764 case RF_IO_TYPE_WRITE:
1765
1766 if (req->tracerec) {
1767 RF_ETIMER_START(req->tracerec->timer);
1768 }
1769 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1770 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1771 req->sectorOffset, req->numSector,
1772 req->buf, KernelWakeupFunc, (void *) req,
1773 queue->raidPtr->logBytesPerSector, req->b_proc);
1774
1775 if (rf_debugKernelAccess) {
1776 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1777 (long) bp->b_blkno));
1778 }
1779 queue->numOutstanding++;
1780 queue->last_deq_sector = req->sectorOffset;
1781 /* acc wouldn't have been let in if there were any pending
1782 * reqs at any other priority */
1783 queue->curPriority = req->priority;
1784
1785 db1_printf(("Going for %c to unit %d row %d col %d\n",
1786 req->type, unit, queue->row, queue->col));
1787 db1_printf(("sector %d count %d (%d bytes) %d\n",
1788 (int) req->sectorOffset, (int) req->numSector,
1789 (int) (req->numSector <<
1790 queue->raidPtr->logBytesPerSector),
1791 (int) queue->raidPtr->logBytesPerSector));
1792 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1793 raidbp->rf_buf.b_vp->v_numoutput++;
1794 }
1795 VOP_STRATEGY(&raidbp->rf_buf);
1796
1797 break;
1798
1799 default:
1800 panic("bad req->type in rf_DispatchKernelIO");
1801 }
1802 db1_printf(("Exiting from DispatchKernelIO\n"));
1803 /* splx(s); */ /* want to test this */
1804 return (0);
1805 }
1806 /* this is the callback function associated with a I/O invoked from
1807 kernel code.
1808 */
1809 static void
1810 KernelWakeupFunc(vbp)
1811 struct buf *vbp;
1812 {
1813 RF_DiskQueueData_t *req = NULL;
1814 RF_DiskQueue_t *queue;
1815 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1816 struct buf *bp;
1817 struct raid_softc *rs;
1818 int unit;
1819 int s;
1820
1821 s = splbio();
1822 db1_printf(("recovering the request queue:\n"));
1823 req = raidbp->req;
1824
1825 bp = raidbp->rf_obp;
1826
1827 queue = (RF_DiskQueue_t *) req->queue;
1828
1829 if (raidbp->rf_buf.b_flags & B_ERROR) {
1830 bp->b_flags |= B_ERROR;
1831 bp->b_error = raidbp->rf_buf.b_error ?
1832 raidbp->rf_buf.b_error : EIO;
1833 }
1834
1835 /* XXX methinks this could be wrong... */
1836 #if 1
1837 bp->b_resid = raidbp->rf_buf.b_resid;
1838 #endif
1839
1840 if (req->tracerec) {
1841 RF_ETIMER_STOP(req->tracerec->timer);
1842 RF_ETIMER_EVAL(req->tracerec->timer);
1843 RF_LOCK_MUTEX(rf_tracing_mutex);
1844 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1845 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1846 req->tracerec->num_phys_ios++;
1847 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1848 }
1849 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1850
1851 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1852
1853
1854 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1855 * ballistic, and mark the component as hosed... */
1856
1857 if (bp->b_flags & B_ERROR) {
1858 /* Mark the disk as dead */
1859 /* but only mark it once... */
1860 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1861 rf_ds_optimal) {
1862 printf("raid%d: IO Error. Marking %s as failed.\n",
1863 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1864 queue->raidPtr->Disks[queue->row][queue->col].status =
1865 rf_ds_failed;
1866 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1867 queue->raidPtr->numFailures++;
1868 queue->raidPtr->numNewFailures++;
1869 /* XXX here we should bump the version number for each component, and write that data out */
1870 } else { /* Disk is already dead... */
1871 /* printf("Disk already marked as dead!\n"); */
1872 }
1873
1874 }
1875
1876 rs = &raid_softc[unit];
1877 RAIDPUTBUF(rs, raidbp);
1878
1879
1880 if (bp->b_resid == 0) {
1881 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1882 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1883 }
1884
1885 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1886 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1887
1888 splx(s);
1889 }
1890
1891
1892
1893 /*
1894 * initialize a buf structure for doing an I/O in the kernel.
1895 */
1896 static void
1897 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1898 logBytesPerSector, b_proc)
1899 struct buf *bp;
1900 struct vnode *b_vp;
1901 unsigned rw_flag;
1902 dev_t dev;
1903 RF_SectorNum_t startSect;
1904 RF_SectorCount_t numSect;
1905 caddr_t buf;
1906 void (*cbFunc) (struct buf *);
1907 void *cbArg;
1908 int logBytesPerSector;
1909 struct proc *b_proc;
1910 {
1911 /* bp->b_flags = B_PHYS | rw_flag; */
1912 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1913 bp->b_bcount = numSect << logBytesPerSector;
1914 bp->b_bufsize = bp->b_bcount;
1915 bp->b_error = 0;
1916 bp->b_dev = dev;
1917 bp->b_data = buf;
1918 bp->b_blkno = startSect;
1919 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1920 if (bp->b_bcount == 0) {
1921 panic("bp->b_bcount is zero in InitBP!!\n");
1922 }
1923 bp->b_proc = b_proc;
1924 bp->b_iodone = cbFunc;
1925 bp->b_vp = b_vp;
1926
1927 }
1928
1929 static void
1930 raidgetdefaultlabel(raidPtr, rs, lp)
1931 RF_Raid_t *raidPtr;
1932 struct raid_softc *rs;
1933 struct disklabel *lp;
1934 {
1935 db1_printf(("Building a default label...\n"));
1936 bzero(lp, sizeof(*lp));
1937
1938 /* fabricate a label... */
1939 lp->d_secperunit = raidPtr->totalSectors;
1940 lp->d_secsize = raidPtr->bytesPerSector;
1941 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1942 lp->d_ntracks = 1;
1943 lp->d_ncylinders = raidPtr->totalSectors /
1944 (lp->d_nsectors * lp->d_ntracks);
1945 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1946
1947 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1948 lp->d_type = DTYPE_RAID;
1949 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1950 lp->d_rpm = 3600;
1951 lp->d_interleave = 1;
1952 lp->d_flags = 0;
1953
1954 lp->d_partitions[RAW_PART].p_offset = 0;
1955 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1956 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1957 lp->d_npartitions = RAW_PART + 1;
1958
1959 lp->d_magic = DISKMAGIC;
1960 lp->d_magic2 = DISKMAGIC;
1961 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1962
1963 }
1964 /*
1965 * Read the disklabel from the raid device. If one is not present, fake one
1966 * up.
1967 */
1968 static void
1969 raidgetdisklabel(dev)
1970 dev_t dev;
1971 {
1972 int unit = raidunit(dev);
1973 struct raid_softc *rs = &raid_softc[unit];
1974 char *errstring;
1975 struct disklabel *lp = rs->sc_dkdev.dk_label;
1976 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1977 RF_Raid_t *raidPtr;
1978
1979 db1_printf(("Getting the disklabel...\n"));
1980
1981 bzero(clp, sizeof(*clp));
1982
1983 raidPtr = raidPtrs[unit];
1984
1985 raidgetdefaultlabel(raidPtr, rs, lp);
1986
1987 /*
1988 * Call the generic disklabel extraction routine.
1989 */
1990 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1991 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1992 if (errstring)
1993 raidmakedisklabel(rs);
1994 else {
1995 int i;
1996 struct partition *pp;
1997
1998 /*
1999 * Sanity check whether the found disklabel is valid.
2000 *
2001 * This is necessary since total size of the raid device
2002 * may vary when an interleave is changed even though exactly
2003 * same componets are used, and old disklabel may used
2004 * if that is found.
2005 */
2006 if (lp->d_secperunit != rs->sc_size)
2007 printf("WARNING: %s: "
2008 "total sector size in disklabel (%d) != "
2009 "the size of raid (%ld)\n", rs->sc_xname,
2010 lp->d_secperunit, (long) rs->sc_size);
2011 for (i = 0; i < lp->d_npartitions; i++) {
2012 pp = &lp->d_partitions[i];
2013 if (pp->p_offset + pp->p_size > rs->sc_size)
2014 printf("WARNING: %s: end of partition `%c' "
2015 "exceeds the size of raid (%ld)\n",
2016 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2017 }
2018 }
2019
2020 }
2021 /*
2022 * Take care of things one might want to take care of in the event
2023 * that a disklabel isn't present.
2024 */
2025 static void
2026 raidmakedisklabel(rs)
2027 struct raid_softc *rs;
2028 {
2029 struct disklabel *lp = rs->sc_dkdev.dk_label;
2030 db1_printf(("Making a label..\n"));
2031
2032 /*
2033 * For historical reasons, if there's no disklabel present
2034 * the raw partition must be marked FS_BSDFFS.
2035 */
2036
2037 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2038
2039 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2040
2041 lp->d_checksum = dkcksum(lp);
2042 }
2043 /*
2044 * Lookup the provided name in the filesystem. If the file exists,
2045 * is a valid block device, and isn't being used by anyone else,
2046 * set *vpp to the file's vnode.
2047 * You'll find the original of this in ccd.c
2048 */
2049 int
2050 raidlookup(path, p, vpp)
2051 char *path;
2052 struct proc *p;
2053 struct vnode **vpp; /* result */
2054 {
2055 struct nameidata nd;
2056 struct vnode *vp;
2057 struct vattr va;
2058 int error;
2059
2060 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2061 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2062 #ifdef DEBUG
2063 printf("RAIDframe: vn_open returned %d\n", error);
2064 #endif
2065 return (error);
2066 }
2067 vp = nd.ni_vp;
2068 if (vp->v_usecount > 1) {
2069 VOP_UNLOCK(vp, 0);
2070 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2071 return (EBUSY);
2072 }
2073 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2074 VOP_UNLOCK(vp, 0);
2075 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2076 return (error);
2077 }
2078 /* XXX: eventually we should handle VREG, too. */
2079 if (va.va_type != VBLK) {
2080 VOP_UNLOCK(vp, 0);
2081 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2082 return (ENOTBLK);
2083 }
2084 VOP_UNLOCK(vp, 0);
2085 *vpp = vp;
2086 return (0);
2087 }
2088 /*
2089 * Wait interruptibly for an exclusive lock.
2090 *
2091 * XXX
2092 * Several drivers do this; it should be abstracted and made MP-safe.
2093 * (Hmm... where have we seen this warning before :-> GO )
2094 */
2095 static int
2096 raidlock(rs)
2097 struct raid_softc *rs;
2098 {
2099 int error;
2100
2101 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2102 rs->sc_flags |= RAIDF_WANTED;
2103 if ((error =
2104 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2105 return (error);
2106 }
2107 rs->sc_flags |= RAIDF_LOCKED;
2108 return (0);
2109 }
2110 /*
2111 * Unlock and wake up any waiters.
2112 */
2113 static void
2114 raidunlock(rs)
2115 struct raid_softc *rs;
2116 {
2117
2118 rs->sc_flags &= ~RAIDF_LOCKED;
2119 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2120 rs->sc_flags &= ~RAIDF_WANTED;
2121 wakeup(rs);
2122 }
2123 }
2124
2125
2126 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2127 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2128
2129 int
2130 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2131 {
2132 RF_ComponentLabel_t clabel;
2133 raidread_component_label(dev, b_vp, &clabel);
2134 clabel.mod_counter = mod_counter;
2135 clabel.clean = RF_RAID_CLEAN;
2136 raidwrite_component_label(dev, b_vp, &clabel);
2137 return(0);
2138 }
2139
2140
2141 int
2142 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2143 {
2144 RF_ComponentLabel_t clabel;
2145 raidread_component_label(dev, b_vp, &clabel);
2146 clabel.mod_counter = mod_counter;
2147 clabel.clean = RF_RAID_DIRTY;
2148 raidwrite_component_label(dev, b_vp, &clabel);
2149 return(0);
2150 }
2151
2152 /* ARGSUSED */
2153 int
2154 raidread_component_label(dev, b_vp, clabel)
2155 dev_t dev;
2156 struct vnode *b_vp;
2157 RF_ComponentLabel_t *clabel;
2158 {
2159 struct buf *bp;
2160 int error;
2161
2162 /* XXX should probably ensure that we don't try to do this if
2163 someone has changed rf_protected_sectors. */
2164
2165 /* get a block of the appropriate size... */
2166 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2167 bp->b_dev = dev;
2168
2169 /* get our ducks in a row for the read */
2170 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2171 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2172 bp->b_flags = B_BUSY | B_READ;
2173 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2174
2175 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2176
2177 error = biowait(bp);
2178
2179 if (!error) {
2180 memcpy(clabel, bp->b_data,
2181 sizeof(RF_ComponentLabel_t));
2182 #if 0
2183 rf_print_component_label( clabel );
2184 #endif
2185 } else {
2186 #if 0
2187 printf("Failed to read RAID component label!\n");
2188 #endif
2189 }
2190
2191 bp->b_flags = B_INVAL | B_AGE;
2192 brelse(bp);
2193 return(error);
2194 }
2195 /* ARGSUSED */
2196 int
2197 raidwrite_component_label(dev, b_vp, clabel)
2198 dev_t dev;
2199 struct vnode *b_vp;
2200 RF_ComponentLabel_t *clabel;
2201 {
2202 struct buf *bp;
2203 int error;
2204
2205 /* get a block of the appropriate size... */
2206 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2207 bp->b_dev = dev;
2208
2209 /* get our ducks in a row for the write */
2210 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2211 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2212 bp->b_flags = B_BUSY | B_WRITE;
2213 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2214
2215 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2216
2217 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2218
2219 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2220 error = biowait(bp);
2221 bp->b_flags = B_INVAL | B_AGE;
2222 brelse(bp);
2223 if (error) {
2224 #if 1
2225 printf("Failed to write RAID component info!\n");
2226 #endif
2227 }
2228
2229 return(error);
2230 }
2231
2232 void
2233 rf_markalldirty(raidPtr)
2234 RF_Raid_t *raidPtr;
2235 {
2236 RF_ComponentLabel_t clabel;
2237 int r,c;
2238
2239 raidPtr->mod_counter++;
2240 for (r = 0; r < raidPtr->numRow; r++) {
2241 for (c = 0; c < raidPtr->numCol; c++) {
2242 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2243 raidread_component_label(
2244 raidPtr->Disks[r][c].dev,
2245 raidPtr->raid_cinfo[r][c].ci_vp,
2246 &clabel);
2247 if (clabel.status == rf_ds_spared) {
2248 /* XXX do something special...
2249 but whatever you do, don't
2250 try to access it!! */
2251 } else {
2252 #if 0
2253 clabel.status =
2254 raidPtr->Disks[r][c].status;
2255 raidwrite_component_label(
2256 raidPtr->Disks[r][c].dev,
2257 raidPtr->raid_cinfo[r][c].ci_vp,
2258 &clabel);
2259 #endif
2260 raidmarkdirty(
2261 raidPtr->Disks[r][c].dev,
2262 raidPtr->raid_cinfo[r][c].ci_vp,
2263 raidPtr->mod_counter);
2264 }
2265 }
2266 }
2267 }
2268 /* printf("Component labels marked dirty.\n"); */
2269 #if 0
2270 for( c = 0; c < raidPtr->numSpare ; c++) {
2271 sparecol = raidPtr->numCol + c;
2272 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2273 /*
2274
2275 XXX this is where we get fancy and map this spare
2276 into it's correct spot in the array.
2277
2278 */
2279 /*
2280
2281 we claim this disk is "optimal" if it's
2282 rf_ds_used_spare, as that means it should be
2283 directly substitutable for the disk it replaced.
2284 We note that too...
2285
2286 */
2287
2288 for(i=0;i<raidPtr->numRow;i++) {
2289 for(j=0;j<raidPtr->numCol;j++) {
2290 if ((raidPtr->Disks[i][j].spareRow ==
2291 r) &&
2292 (raidPtr->Disks[i][j].spareCol ==
2293 sparecol)) {
2294 srow = r;
2295 scol = sparecol;
2296 break;
2297 }
2298 }
2299 }
2300
2301 raidread_component_label(
2302 raidPtr->Disks[r][sparecol].dev,
2303 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2304 &clabel);
2305 /* make sure status is noted */
2306 clabel.version = RF_COMPONENT_LABEL_VERSION;
2307 clabel.mod_counter = raidPtr->mod_counter;
2308 clabel.serial_number = raidPtr->serial_number;
2309 clabel.row = srow;
2310 clabel.column = scol;
2311 clabel.num_rows = raidPtr->numRow;
2312 clabel.num_columns = raidPtr->numCol;
2313 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2314 clabel.status = rf_ds_optimal;
2315 raidwrite_component_label(
2316 raidPtr->Disks[r][sparecol].dev,
2317 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2318 &clabel);
2319 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2320 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2321 }
2322 }
2323
2324 #endif
2325 }
2326
2327
2328 void
2329 rf_update_component_labels(raidPtr)
2330 RF_Raid_t *raidPtr;
2331 {
2332 RF_ComponentLabel_t clabel;
2333 int sparecol;
2334 int r,c;
2335 int i,j;
2336 int srow, scol;
2337
2338 srow = -1;
2339 scol = -1;
2340
2341 /* XXX should do extra checks to make sure things really are clean,
2342 rather than blindly setting the clean bit... */
2343
2344 raidPtr->mod_counter++;
2345
2346 for (r = 0; r < raidPtr->numRow; r++) {
2347 for (c = 0; c < raidPtr->numCol; c++) {
2348 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2349 raidread_component_label(
2350 raidPtr->Disks[r][c].dev,
2351 raidPtr->raid_cinfo[r][c].ci_vp,
2352 &clabel);
2353 /* make sure status is noted */
2354 clabel.status = rf_ds_optimal;
2355 /* bump the counter */
2356 clabel.mod_counter = raidPtr->mod_counter;
2357
2358 raidwrite_component_label(
2359 raidPtr->Disks[r][c].dev,
2360 raidPtr->raid_cinfo[r][c].ci_vp,
2361 &clabel);
2362 }
2363 /* else we don't touch it.. */
2364 }
2365 }
2366
2367 for( c = 0; c < raidPtr->numSpare ; c++) {
2368 sparecol = raidPtr->numCol + c;
2369 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2370 /*
2371
2372 we claim this disk is "optimal" if it's
2373 rf_ds_used_spare, as that means it should be
2374 directly substitutable for the disk it replaced.
2375 We note that too...
2376
2377 */
2378
2379 for(i=0;i<raidPtr->numRow;i++) {
2380 for(j=0;j<raidPtr->numCol;j++) {
2381 if ((raidPtr->Disks[i][j].spareRow ==
2382 0) &&
2383 (raidPtr->Disks[i][j].spareCol ==
2384 sparecol)) {
2385 srow = i;
2386 scol = j;
2387 break;
2388 }
2389 }
2390 }
2391
2392 /* XXX shouldn't *really* need this... */
2393 raidread_component_label(
2394 raidPtr->Disks[0][sparecol].dev,
2395 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2396 &clabel);
2397 /* make sure status is noted */
2398
2399 raid_init_component_label(raidPtr, &clabel);
2400
2401 clabel.mod_counter = raidPtr->mod_counter;
2402 clabel.row = srow;
2403 clabel.column = scol;
2404 clabel.status = rf_ds_optimal;
2405
2406 raidwrite_component_label(
2407 raidPtr->Disks[0][sparecol].dev,
2408 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2409 &clabel);
2410 }
2411 }
2412 /* printf("Component labels updated\n"); */
2413 }
2414
2415
2416 void
2417 rf_final_update_component_labels(raidPtr)
2418 RF_Raid_t *raidPtr;
2419 {
2420 RF_ComponentLabel_t clabel;
2421 int sparecol;
2422 int r,c;
2423 int i,j;
2424 int srow, scol;
2425
2426 srow = -1;
2427 scol = -1;
2428
2429 /* XXX should do extra checks to make sure things really are clean,
2430 rather than blindly setting the clean bit... */
2431
2432 raidPtr->mod_counter++;
2433
2434 for (r = 0; r < raidPtr->numRow; r++) {
2435 for (c = 0; c < raidPtr->numCol; c++) {
2436 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2437 raidread_component_label(
2438 raidPtr->Disks[r][c].dev,
2439 raidPtr->raid_cinfo[r][c].ci_vp,
2440 &clabel);
2441 /* make sure status is noted */
2442 clabel.status = rf_ds_optimal;
2443 /* bump the counter */
2444 clabel.mod_counter = raidPtr->mod_counter;
2445
2446 raidwrite_component_label(
2447 raidPtr->Disks[r][c].dev,
2448 raidPtr->raid_cinfo[r][c].ci_vp,
2449 &clabel);
2450 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2451 raidmarkclean(
2452 raidPtr->Disks[r][c].dev,
2453 raidPtr->raid_cinfo[r][c].ci_vp,
2454 raidPtr->mod_counter);
2455 }
2456 }
2457 /* else we don't touch it.. */
2458 }
2459 }
2460
2461 for( c = 0; c < raidPtr->numSpare ; c++) {
2462 sparecol = raidPtr->numCol + c;
2463 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2464 /*
2465
2466 we claim this disk is "optimal" if it's
2467 rf_ds_used_spare, as that means it should be
2468 directly substitutable for the disk it replaced.
2469 We note that too...
2470
2471 */
2472
2473 for(i=0;i<raidPtr->numRow;i++) {
2474 for(j=0;j<raidPtr->numCol;j++) {
2475 if ((raidPtr->Disks[i][j].spareRow ==
2476 0) &&
2477 (raidPtr->Disks[i][j].spareCol ==
2478 sparecol)) {
2479 srow = i;
2480 scol = j;
2481 break;
2482 }
2483 }
2484 }
2485
2486 /* XXX shouldn't *really* need this... */
2487 raidread_component_label(
2488 raidPtr->Disks[0][sparecol].dev,
2489 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2490 &clabel);
2491 /* make sure status is noted */
2492
2493 raid_init_component_label(raidPtr, &clabel);
2494
2495 clabel.mod_counter = raidPtr->mod_counter;
2496 clabel.row = srow;
2497 clabel.column = scol;
2498 clabel.status = rf_ds_optimal;
2499
2500 raidwrite_component_label(
2501 raidPtr->Disks[0][sparecol].dev,
2502 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2503 &clabel);
2504 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2505 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2506 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2507 raidPtr->mod_counter);
2508 }
2509 }
2510 }
2511 /* printf("Component labels updated\n"); */
2512 }
2513
2514 void
2515 rf_close_component(raidPtr, vp, auto_configured)
2516 RF_Raid_t *raidPtr;
2517 struct vnode *vp;
2518 int auto_configured;
2519 {
2520 struct proc *p;
2521
2522 p = raidPtr->engine_thread;
2523
2524 if (vp != NULL) {
2525 if (auto_configured == 1) {
2526 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2527 vput(vp);
2528
2529 } else {
2530 VOP_UNLOCK(vp, 0);
2531 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2532 }
2533 } else {
2534 printf("vnode was NULL\n");
2535 }
2536 }
2537
2538
2539 void
2540 rf_UnconfigureVnodes(raidPtr)
2541 RF_Raid_t *raidPtr;
2542 {
2543 int r,c;
2544 struct proc *p;
2545 struct vnode *vp;
2546 int acd;
2547
2548
2549 /* We take this opportunity to close the vnodes like we should.. */
2550
2551 p = raidPtr->engine_thread;
2552
2553 for (r = 0; r < raidPtr->numRow; r++) {
2554 for (c = 0; c < raidPtr->numCol; c++) {
2555 printf("Closing vnode for row: %d col: %d\n", r, c);
2556 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2557 acd = raidPtr->Disks[r][c].auto_configured;
2558 rf_close_component(raidPtr, vp, acd);
2559 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2560 raidPtr->Disks[r][c].auto_configured = 0;
2561 }
2562 }
2563 for (r = 0; r < raidPtr->numSpare; r++) {
2564 printf("Closing vnode for spare: %d\n", r);
2565 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2566 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2567 rf_close_component(raidPtr, vp, acd);
2568 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2569 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2570 }
2571 }
2572
2573
2574 void
2575 rf_ReconThread(req)
2576 struct rf_recon_req *req;
2577 {
2578 int s;
2579 RF_Raid_t *raidPtr;
2580
2581 s = splbio();
2582 raidPtr = (RF_Raid_t *) req->raidPtr;
2583 raidPtr->recon_in_progress = 1;
2584
2585 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2586 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2587
2588 /* XXX get rid of this! we don't need it at all.. */
2589 RF_Free(req, sizeof(*req));
2590
2591 raidPtr->recon_in_progress = 0;
2592 splx(s);
2593
2594 /* That's all... */
2595 kthread_exit(0); /* does not return */
2596 }
2597
2598 void
2599 rf_RewriteParityThread(raidPtr)
2600 RF_Raid_t *raidPtr;
2601 {
2602 int retcode;
2603 int s;
2604
2605 raidPtr->parity_rewrite_in_progress = 1;
2606 s = splbio();
2607 retcode = rf_RewriteParity(raidPtr);
2608 splx(s);
2609 if (retcode) {
2610 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2611 } else {
2612 /* set the clean bit! If we shutdown correctly,
2613 the clean bit on each component label will get
2614 set */
2615 raidPtr->parity_good = RF_RAID_CLEAN;
2616 }
2617 raidPtr->parity_rewrite_in_progress = 0;
2618
2619 /* Anyone waiting for us to stop? If so, inform them... */
2620 if (raidPtr->waitShutdown) {
2621 wakeup(&raidPtr->parity_rewrite_in_progress);
2622 }
2623
2624 /* That's all... */
2625 kthread_exit(0); /* does not return */
2626 }
2627
2628
2629 void
2630 rf_CopybackThread(raidPtr)
2631 RF_Raid_t *raidPtr;
2632 {
2633 int s;
2634
2635 raidPtr->copyback_in_progress = 1;
2636 s = splbio();
2637 rf_CopybackReconstructedData(raidPtr);
2638 splx(s);
2639 raidPtr->copyback_in_progress = 0;
2640
2641 /* That's all... */
2642 kthread_exit(0); /* does not return */
2643 }
2644
2645
2646 void
2647 rf_ReconstructInPlaceThread(req)
2648 struct rf_recon_req *req;
2649 {
2650 int retcode;
2651 int s;
2652 RF_Raid_t *raidPtr;
2653
2654 s = splbio();
2655 raidPtr = req->raidPtr;
2656 raidPtr->recon_in_progress = 1;
2657 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2658 RF_Free(req, sizeof(*req));
2659 raidPtr->recon_in_progress = 0;
2660 splx(s);
2661
2662 /* That's all... */
2663 kthread_exit(0); /* does not return */
2664 }
2665
2666 void
2667 rf_mountroot_hook(dev)
2668 struct device *dev;
2669 {
2670
2671 }
2672
2673
2674 RF_AutoConfig_t *
2675 rf_find_raid_components()
2676 {
2677 struct devnametobdevmaj *dtobdm;
2678 struct vnode *vp;
2679 struct disklabel label;
2680 struct device *dv;
2681 char *cd_name;
2682 dev_t dev;
2683 int error;
2684 int i;
2685 int good_one;
2686 RF_ComponentLabel_t *clabel;
2687 RF_AutoConfig_t *ac_list;
2688 RF_AutoConfig_t *ac;
2689
2690
2691 /* initialize the AutoConfig list */
2692 ac_list = NULL;
2693
2694 if (raidautoconfig) {
2695
2696 /* we begin by trolling through *all* the devices on the system */
2697
2698 for (dv = alldevs.tqh_first; dv != NULL;
2699 dv = dv->dv_list.tqe_next) {
2700
2701 /* we are only interested in disks... */
2702 if (dv->dv_class != DV_DISK)
2703 continue;
2704
2705 /* we don't care about floppies... */
2706 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2707 continue;
2708 }
2709
2710 /* need to find the device_name_to_block_device_major stuff */
2711 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2712 dtobdm = dev_name2blk;
2713 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2714 dtobdm++;
2715 }
2716
2717 /* get a vnode for the raw partition of this disk */
2718
2719 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2720 if (bdevvp(dev, &vp))
2721 panic("RAID can't alloc vnode");
2722
2723 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2724
2725 if (error) {
2726 /* "Who cares." Continue looking
2727 for something that exists*/
2728 vput(vp);
2729 continue;
2730 }
2731
2732 /* Ok, the disk exists. Go get the disklabel. */
2733 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2734 FREAD, NOCRED, 0);
2735 if (error) {
2736 /*
2737 * XXX can't happen - open() would
2738 * have errored out (or faked up one)
2739 */
2740 printf("can't get label for dev %s%c (%d)!?!?\n",
2741 dv->dv_xname, 'a' + RAW_PART, error);
2742 }
2743
2744 /* don't need this any more. We'll allocate it again
2745 a little later if we really do... */
2746 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2747 vput(vp);
2748
2749 for (i=0; i < label.d_npartitions; i++) {
2750 /* We only support partitions marked as RAID */
2751 if (label.d_partitions[i].p_fstype != FS_RAID)
2752 continue;
2753
2754 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2755 if (bdevvp(dev, &vp))
2756 panic("RAID can't alloc vnode");
2757
2758 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2759 if (error) {
2760 /* Whatever... */
2761 vput(vp);
2762 continue;
2763 }
2764
2765 good_one = 0;
2766
2767 clabel = (RF_ComponentLabel_t *)
2768 malloc(sizeof(RF_ComponentLabel_t),
2769 M_RAIDFRAME, M_NOWAIT);
2770 if (clabel == NULL) {
2771 /* XXX CLEANUP HERE */
2772 printf("RAID auto config: out of memory!\n");
2773 return(NULL); /* XXX probably should panic? */
2774 }
2775
2776 if (!raidread_component_label(dev, vp, clabel)) {
2777 /* Got the label. Does it look reasonable? */
2778 if (rf_reasonable_label(clabel) &&
2779 (clabel->partitionSize <=
2780 label.d_partitions[i].p_size)) {
2781 #if DEBUG
2782 printf("Component on: %s%c: %d\n",
2783 dv->dv_xname, 'a'+i,
2784 label.d_partitions[i].p_size);
2785 rf_print_component_label(clabel);
2786 #endif
2787 /* if it's reasonable, add it,
2788 else ignore it. */
2789 ac = (RF_AutoConfig_t *)
2790 malloc(sizeof(RF_AutoConfig_t),
2791 M_RAIDFRAME,
2792 M_NOWAIT);
2793 if (ac == NULL) {
2794 /* XXX should panic?? */
2795 return(NULL);
2796 }
2797
2798 sprintf(ac->devname, "%s%c",
2799 dv->dv_xname, 'a'+i);
2800 ac->dev = dev;
2801 ac->vp = vp;
2802 ac->clabel = clabel;
2803 ac->next = ac_list;
2804 ac_list = ac;
2805 good_one = 1;
2806 }
2807 }
2808 if (!good_one) {
2809 /* cleanup */
2810 free(clabel, M_RAIDFRAME);
2811 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2812 vput(vp);
2813 }
2814 }
2815 }
2816 }
2817 return(ac_list);
2818 }
2819
2820 static int
2821 rf_reasonable_label(clabel)
2822 RF_ComponentLabel_t *clabel;
2823 {
2824
2825 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2826 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2827 ((clabel->clean == RF_RAID_CLEAN) ||
2828 (clabel->clean == RF_RAID_DIRTY)) &&
2829 clabel->row >=0 &&
2830 clabel->column >= 0 &&
2831 clabel->num_rows > 0 &&
2832 clabel->num_columns > 0 &&
2833 clabel->row < clabel->num_rows &&
2834 clabel->column < clabel->num_columns &&
2835 clabel->blockSize > 0 &&
2836 clabel->numBlocks > 0) {
2837 /* label looks reasonable enough... */
2838 return(1);
2839 }
2840 return(0);
2841 }
2842
2843
2844 void
2845 rf_print_component_label(clabel)
2846 RF_ComponentLabel_t *clabel;
2847 {
2848 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2849 clabel->row, clabel->column,
2850 clabel->num_rows, clabel->num_columns);
2851 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2852 clabel->version, clabel->serial_number,
2853 clabel->mod_counter);
2854 printf(" Clean: %s Status: %d\n",
2855 clabel->clean ? "Yes" : "No", clabel->status );
2856 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2857 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2858 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2859 (char) clabel->parityConfig, clabel->blockSize,
2860 clabel->numBlocks);
2861 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2862 printf(" Contains root partition: %s\n",
2863 clabel->root_partition ? "Yes" : "No" );
2864 printf(" Last configured as: raid%d\n", clabel->last_unit );
2865 #if 0
2866 printf(" Config order: %d\n", clabel->config_order);
2867 #endif
2868
2869 }
2870
2871 RF_ConfigSet_t *
2872 rf_create_auto_sets(ac_list)
2873 RF_AutoConfig_t *ac_list;
2874 {
2875 RF_AutoConfig_t *ac;
2876 RF_ConfigSet_t *config_sets;
2877 RF_ConfigSet_t *cset;
2878 RF_AutoConfig_t *ac_next;
2879
2880
2881 config_sets = NULL;
2882
2883 /* Go through the AutoConfig list, and figure out which components
2884 belong to what sets. */
2885 ac = ac_list;
2886 while(ac!=NULL) {
2887 /* we're going to putz with ac->next, so save it here
2888 for use at the end of the loop */
2889 ac_next = ac->next;
2890
2891 if (config_sets == NULL) {
2892 /* will need at least this one... */
2893 config_sets = (RF_ConfigSet_t *)
2894 malloc(sizeof(RF_ConfigSet_t),
2895 M_RAIDFRAME, M_NOWAIT);
2896 if (config_sets == NULL) {
2897 panic("rf_create_auto_sets: No memory!\n");
2898 }
2899 /* this one is easy :) */
2900 config_sets->ac = ac;
2901 config_sets->next = NULL;
2902 config_sets->rootable = 0;
2903 ac->next = NULL;
2904 } else {
2905 /* which set does this component fit into? */
2906 cset = config_sets;
2907 while(cset!=NULL) {
2908 if (rf_does_it_fit(cset, ac)) {
2909 /* looks like it matches... */
2910 ac->next = cset->ac;
2911 cset->ac = ac;
2912 break;
2913 }
2914 cset = cset->next;
2915 }
2916 if (cset==NULL) {
2917 /* didn't find a match above... new set..*/
2918 cset = (RF_ConfigSet_t *)
2919 malloc(sizeof(RF_ConfigSet_t),
2920 M_RAIDFRAME, M_NOWAIT);
2921 if (cset == NULL) {
2922 panic("rf_create_auto_sets: No memory!\n");
2923 }
2924 cset->ac = ac;
2925 ac->next = NULL;
2926 cset->next = config_sets;
2927 cset->rootable = 0;
2928 config_sets = cset;
2929 }
2930 }
2931 ac = ac_next;
2932 }
2933
2934
2935 return(config_sets);
2936 }
2937
2938 static int
2939 rf_does_it_fit(cset, ac)
2940 RF_ConfigSet_t *cset;
2941 RF_AutoConfig_t *ac;
2942 {
2943 RF_ComponentLabel_t *clabel1, *clabel2;
2944
2945 /* If this one matches the *first* one in the set, that's good
2946 enough, since the other members of the set would have been
2947 through here too... */
2948 /* note that we are not checking partitionSize here..
2949
2950 Note that we are also not checking the mod_counters here.
2951 If everything else matches execpt the mod_counter, that's
2952 good enough for this test. We will deal with the mod_counters
2953 a little later in the autoconfiguration process.
2954
2955 (clabel1->mod_counter == clabel2->mod_counter) &&
2956
2957 The reason we don't check for this is that failed disks
2958 will have lower modification counts. If those disks are
2959 not added to the set they used to belong to, then they will
2960 form their own set, which may result in 2 different sets,
2961 for example, competing to be configured at raid0, and
2962 perhaps competing to be the root filesystem set. If the
2963 wrong ones get configured, or both attempt to become /,
2964 weird behaviour and or serious lossage will occur. Thus we
2965 need to bring them into the fold here, and kick them out at
2966 a later point.
2967
2968 */
2969
2970 clabel1 = cset->ac->clabel;
2971 clabel2 = ac->clabel;
2972 if ((clabel1->version == clabel2->version) &&
2973 (clabel1->serial_number == clabel2->serial_number) &&
2974 (clabel1->num_rows == clabel2->num_rows) &&
2975 (clabel1->num_columns == clabel2->num_columns) &&
2976 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2977 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2978 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2979 (clabel1->parityConfig == clabel2->parityConfig) &&
2980 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2981 (clabel1->blockSize == clabel2->blockSize) &&
2982 (clabel1->numBlocks == clabel2->numBlocks) &&
2983 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2984 (clabel1->root_partition == clabel2->root_partition) &&
2985 (clabel1->last_unit == clabel2->last_unit) &&
2986 (clabel1->config_order == clabel2->config_order)) {
2987 /* if it get's here, it almost *has* to be a match */
2988 } else {
2989 /* it's not consistent with somebody in the set..
2990 punt */
2991 return(0);
2992 }
2993 /* all was fine.. it must fit... */
2994 return(1);
2995 }
2996
2997 int
2998 rf_have_enough_components(cset)
2999 RF_ConfigSet_t *cset;
3000 {
3001 RF_AutoConfig_t *ac;
3002 RF_AutoConfig_t *auto_config;
3003 RF_ComponentLabel_t *clabel;
3004 int r,c;
3005 int num_rows;
3006 int num_cols;
3007 int num_missing;
3008 int mod_counter;
3009 int mod_counter_found;
3010 int even_pair_failed;
3011 char parity_type;
3012
3013
3014 /* check to see that we have enough 'live' components
3015 of this set. If so, we can configure it if necessary */
3016
3017 num_rows = cset->ac->clabel->num_rows;
3018 num_cols = cset->ac->clabel->num_columns;
3019 parity_type = cset->ac->clabel->parityConfig;
3020
3021 /* XXX Check for duplicate components!?!?!? */
3022
3023 /* Determine what the mod_counter is supposed to be for this set. */
3024
3025 mod_counter_found = 0;
3026 ac = cset->ac;
3027 while(ac!=NULL) {
3028 if (mod_counter_found==0) {
3029 mod_counter = ac->clabel->mod_counter;
3030 mod_counter_found = 1;
3031 } else {
3032 if (ac->clabel->mod_counter > mod_counter) {
3033 mod_counter = ac->clabel->mod_counter;
3034 }
3035 }
3036 ac = ac->next;
3037 }
3038
3039 num_missing = 0;
3040 auto_config = cset->ac;
3041
3042 for(r=0; r<num_rows; r++) {
3043 even_pair_failed = 0;
3044 for(c=0; c<num_cols; c++) {
3045 ac = auto_config;
3046 while(ac!=NULL) {
3047 if ((ac->clabel->row == r) &&
3048 (ac->clabel->column == c) &&
3049 (ac->clabel->mod_counter == mod_counter)) {
3050 /* it's this one... */
3051 #if DEBUG
3052 printf("Found: %s at %d,%d\n",
3053 ac->devname,r,c);
3054 #endif
3055 break;
3056 }
3057 ac=ac->next;
3058 }
3059 if (ac==NULL) {
3060 /* Didn't find one here! */
3061 /* special case for RAID 1, especially
3062 where there are more than 2
3063 components (where RAIDframe treats
3064 things a little differently :( ) */
3065 if (parity_type == '1') {
3066 if (c%2 == 0) { /* even component */
3067 even_pair_failed = 1;
3068 } else { /* odd component. If
3069 we're failed, and
3070 so is the even
3071 component, it's
3072 "Good Night, Charlie" */
3073 if (even_pair_failed == 1) {
3074 return(0);
3075 }
3076 }
3077 } else {
3078 /* normal accounting */
3079 num_missing++;
3080 }
3081 }
3082 if ((parity_type == '1') && (c%2 == 1)) {
3083 /* Just did an even component, and we didn't
3084 bail.. reset the even_pair_failed flag,
3085 and go on to the next component.... */
3086 even_pair_failed = 0;
3087 }
3088 }
3089 }
3090
3091 clabel = cset->ac->clabel;
3092
3093 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3094 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3095 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3096 /* XXX this needs to be made *much* more general */
3097 /* Too many failures */
3098 return(0);
3099 }
3100 /* otherwise, all is well, and we've got enough to take a kick
3101 at autoconfiguring this set */
3102 return(1);
3103 }
3104
3105 void
3106 rf_create_configuration(ac,config,raidPtr)
3107 RF_AutoConfig_t *ac;
3108 RF_Config_t *config;
3109 RF_Raid_t *raidPtr;
3110 {
3111 RF_ComponentLabel_t *clabel;
3112 int i;
3113
3114 clabel = ac->clabel;
3115
3116 /* 1. Fill in the common stuff */
3117 config->numRow = clabel->num_rows;
3118 config->numCol = clabel->num_columns;
3119 config->numSpare = 0; /* XXX should this be set here? */
3120 config->sectPerSU = clabel->sectPerSU;
3121 config->SUsPerPU = clabel->SUsPerPU;
3122 config->SUsPerRU = clabel->SUsPerRU;
3123 config->parityConfig = clabel->parityConfig;
3124 /* XXX... */
3125 strcpy(config->diskQueueType,"fifo");
3126 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3127 config->layoutSpecificSize = 0; /* XXX ?? */
3128
3129 while(ac!=NULL) {
3130 /* row/col values will be in range due to the checks
3131 in reasonable_label() */
3132 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3133 ac->devname);
3134 ac = ac->next;
3135 }
3136
3137 for(i=0;i<RF_MAXDBGV;i++) {
3138 config->debugVars[i][0] = NULL;
3139 }
3140 }
3141
3142 int
3143 rf_set_autoconfig(raidPtr, new_value)
3144 RF_Raid_t *raidPtr;
3145 int new_value;
3146 {
3147 RF_ComponentLabel_t clabel;
3148 struct vnode *vp;
3149 dev_t dev;
3150 int row, column;
3151
3152 raidPtr->autoconfigure = new_value;
3153 for(row=0; row<raidPtr->numRow; row++) {
3154 for(column=0; column<raidPtr->numCol; column++) {
3155 if (raidPtr->Disks[row][column].status ==
3156 rf_ds_optimal) {
3157 dev = raidPtr->Disks[row][column].dev;
3158 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3159 raidread_component_label(dev, vp, &clabel);
3160 clabel.autoconfigure = new_value;
3161 raidwrite_component_label(dev, vp, &clabel);
3162 }
3163 }
3164 }
3165 return(new_value);
3166 }
3167
3168 int
3169 rf_set_rootpartition(raidPtr, new_value)
3170 RF_Raid_t *raidPtr;
3171 int new_value;
3172 {
3173 RF_ComponentLabel_t clabel;
3174 struct vnode *vp;
3175 dev_t dev;
3176 int row, column;
3177
3178 raidPtr->root_partition = new_value;
3179 for(row=0; row<raidPtr->numRow; row++) {
3180 for(column=0; column<raidPtr->numCol; column++) {
3181 if (raidPtr->Disks[row][column].status ==
3182 rf_ds_optimal) {
3183 dev = raidPtr->Disks[row][column].dev;
3184 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3185 raidread_component_label(dev, vp, &clabel);
3186 clabel.root_partition = new_value;
3187 raidwrite_component_label(dev, vp, &clabel);
3188 }
3189 }
3190 }
3191 return(new_value);
3192 }
3193
3194 void
3195 rf_release_all_vps(cset)
3196 RF_ConfigSet_t *cset;
3197 {
3198 RF_AutoConfig_t *ac;
3199
3200 ac = cset->ac;
3201 while(ac!=NULL) {
3202 /* Close the vp, and give it back */
3203 if (ac->vp) {
3204 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3205 vput(ac->vp);
3206 ac->vp = NULL;
3207 }
3208 ac = ac->next;
3209 }
3210 }
3211
3212
3213 void
3214 rf_cleanup_config_set(cset)
3215 RF_ConfigSet_t *cset;
3216 {
3217 RF_AutoConfig_t *ac;
3218 RF_AutoConfig_t *next_ac;
3219
3220 ac = cset->ac;
3221 while(ac!=NULL) {
3222 next_ac = ac->next;
3223 /* nuke the label */
3224 free(ac->clabel, M_RAIDFRAME);
3225 /* cleanup the config structure */
3226 free(ac, M_RAIDFRAME);
3227 /* "next.." */
3228 ac = next_ac;
3229 }
3230 /* and, finally, nuke the config set */
3231 free(cset, M_RAIDFRAME);
3232 }
3233
3234
3235 void
3236 raid_init_component_label(raidPtr, clabel)
3237 RF_Raid_t *raidPtr;
3238 RF_ComponentLabel_t *clabel;
3239 {
3240 /* current version number */
3241 clabel->version = RF_COMPONENT_LABEL_VERSION;
3242 clabel->serial_number = raidPtr->serial_number;
3243 clabel->mod_counter = raidPtr->mod_counter;
3244 clabel->num_rows = raidPtr->numRow;
3245 clabel->num_columns = raidPtr->numCol;
3246 clabel->clean = RF_RAID_DIRTY; /* not clean */
3247 clabel->status = rf_ds_optimal; /* "It's good!" */
3248
3249 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3250 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3251 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3252
3253 clabel->blockSize = raidPtr->bytesPerSector;
3254 clabel->numBlocks = raidPtr->sectorsPerDisk;
3255
3256 /* XXX not portable */
3257 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3258 clabel->maxOutstanding = raidPtr->maxOutstanding;
3259 clabel->autoconfigure = raidPtr->autoconfigure;
3260 clabel->root_partition = raidPtr->root_partition;
3261 clabel->last_unit = raidPtr->raidid;
3262 clabel->config_order = raidPtr->config_order;
3263 }
3264
3265 int
3266 rf_auto_config_set(cset,unit)
3267 RF_ConfigSet_t *cset;
3268 int *unit;
3269 {
3270 RF_Raid_t *raidPtr;
3271 RF_Config_t *config;
3272 int raidID;
3273 int retcode;
3274
3275 printf("RAID autoconfigure\n");
3276
3277 retcode = 0;
3278 *unit = -1;
3279
3280 /* 1. Create a config structure */
3281
3282 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3283 M_RAIDFRAME,
3284 M_NOWAIT);
3285 if (config==NULL) {
3286 printf("Out of mem!?!?\n");
3287 /* XXX do something more intelligent here. */
3288 return(1);
3289 }
3290
3291 memset(config, 0, sizeof(RF_Config_t));
3292
3293 /* XXX raidID needs to be set correctly.. */
3294
3295 /*
3296 2. Figure out what RAID ID this one is supposed to live at
3297 See if we can get the same RAID dev that it was configured
3298 on last time..
3299 */
3300
3301 raidID = cset->ac->clabel->last_unit;
3302 if ((raidID < 0) || (raidID >= numraid)) {
3303 /* let's not wander off into lala land. */
3304 raidID = numraid - 1;
3305 }
3306 if (raidPtrs[raidID]->valid != 0) {
3307
3308 /*
3309 Nope... Go looking for an alternative...
3310 Start high so we don't immediately use raid0 if that's
3311 not taken.
3312 */
3313
3314 for(raidID = numraid; raidID >= 0; raidID--) {
3315 if (raidPtrs[raidID]->valid == 0) {
3316 /* can use this one! */
3317 break;
3318 }
3319 }
3320 }
3321
3322 if (raidID < 0) {
3323 /* punt... */
3324 printf("Unable to auto configure this set!\n");
3325 printf("(Out of RAID devs!)\n");
3326 return(1);
3327 }
3328 printf("Configuring raid%d:\n",raidID);
3329 raidPtr = raidPtrs[raidID];
3330
3331 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3332 raidPtr->raidid = raidID;
3333 raidPtr->openings = RAIDOUTSTANDING;
3334
3335 /* 3. Build the configuration structure */
3336 rf_create_configuration(cset->ac, config, raidPtr);
3337
3338 /* 4. Do the configuration */
3339 retcode = rf_Configure(raidPtr, config, cset->ac);
3340
3341 if (retcode == 0) {
3342
3343 raidinit(raidPtrs[raidID]);
3344
3345 rf_markalldirty(raidPtrs[raidID]);
3346 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3347 if (cset->ac->clabel->root_partition==1) {
3348 /* everything configured just fine. Make a note
3349 that this set is eligible to be root. */
3350 cset->rootable = 1;
3351 /* XXX do this here? */
3352 raidPtrs[raidID]->root_partition = 1;
3353 }
3354 }
3355
3356 /* 5. Cleanup */
3357 free(config, M_RAIDFRAME);
3358
3359 *unit = raidID;
3360 return(retcode);
3361 }
3362