rf_netbsdkintf.c revision 1.90 1 /* $NetBSD: rf_netbsdkintf.c,v 1.90 2000/06/03 16:44:43 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305 RF_AutoConfig_t *ac_list; /* autoconfig list */
306 RF_ConfigSet_t *config_sets;
307
308 #ifdef DEBUG
309 printf("raidattach: Asked for %d units\n", num);
310 #endif
311
312 if (num <= 0) {
313 #ifdef DIAGNOSTIC
314 panic("raidattach: count <= 0");
315 #endif
316 return;
317 }
318 /* This is where all the initialization stuff gets done. */
319
320 numraid = num;
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < num; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
358 M_RAIDFRAME, M_NOWAIT);
359 if (raidrootdev == NULL) {
360 panic("No memory for RAIDframe driver!!?!?!\n");
361 }
362
363 for (raidID = 0; raidID < num; raidID++) {
364 BUFQ_INIT(&raid_softc[raidID].buf_queue);
365
366 raidrootdev[raidID].dv_class = DV_DISK;
367 raidrootdev[raidID].dv_cfdata = NULL;
368 raidrootdev[raidID].dv_unit = raidID;
369 raidrootdev[raidID].dv_parent = NULL;
370 raidrootdev[raidID].dv_flags = 0;
371 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
372
373 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
374 (RF_Raid_t *));
375 if (raidPtrs[raidID] == NULL) {
376 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
377 numraid = raidID;
378 return;
379 }
380 }
381
382 #if RAID_AUTOCONFIG
383 raidautoconfig = 1;
384 #endif
385
386 if (raidautoconfig) {
387 /* 1. locate all RAID components on the system */
388
389 #if DEBUG
390 printf("Searching for raid components...\n");
391 #endif
392 ac_list = rf_find_raid_components();
393
394 /* 2. sort them into their respective sets */
395
396 config_sets = rf_create_auto_sets(ac_list);
397
398 /* 3. evaluate each set and configure the valid ones
399 This gets done in rf_buildroothack() */
400
401 /* schedule the creation of the thread to do the
402 "/ on RAID" stuff */
403
404 kthread_create(rf_buildroothack,config_sets);
405
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 }
410
411 }
412
413 void
414 rf_buildroothack(arg)
415 void *arg;
416 {
417 RF_ConfigSet_t *config_sets = arg;
418 RF_ConfigSet_t *cset;
419 RF_ConfigSet_t *next_cset;
420 int retcode;
421 int raidID;
422 int rootID;
423 int num_root;
424
425 num_root = 0;
426 cset = config_sets;
427 while(cset != NULL ) {
428 next_cset = cset->next;
429 if (rf_have_enough_components(cset) &&
430 cset->ac->clabel->autoconfigure==1) {
431 retcode = rf_auto_config_set(cset,&raidID);
432 if (!retcode) {
433 if (cset->rootable) {
434 rootID = raidID;
435 num_root++;
436 }
437 } else {
438 /* The autoconfig didn't work :( */
439 #if DEBUG
440 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
441 #endif
442 rf_release_all_vps(cset);
443 }
444 } else {
445 /* we're not autoconfiguring this set...
446 release the associated resources */
447 rf_release_all_vps(cset);
448 }
449 /* cleanup */
450 rf_cleanup_config_set(cset);
451 cset = next_cset;
452 }
453 if (boothowto & RB_ASKNAME) {
454 /* We don't auto-config... */
455 } else {
456 /* They didn't ask, and we found something bootable... */
457
458 if (num_root == 1) {
459 booted_device = &raidrootdev[rootID];
460 } else if (num_root > 1) {
461 /* we can't guess.. require the user to answer... */
462 boothowto |= RB_ASKNAME;
463 }
464 }
465 }
466
467
468 int
469 raidsize(dev)
470 dev_t dev;
471 {
472 struct raid_softc *rs;
473 struct disklabel *lp;
474 int part, unit, omask, size;
475
476 unit = raidunit(dev);
477 if (unit >= numraid)
478 return (-1);
479 rs = &raid_softc[unit];
480
481 if ((rs->sc_flags & RAIDF_INITED) == 0)
482 return (-1);
483
484 part = DISKPART(dev);
485 omask = rs->sc_dkdev.dk_openmask & (1 << part);
486 lp = rs->sc_dkdev.dk_label;
487
488 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
489 return (-1);
490
491 if (lp->d_partitions[part].p_fstype != FS_SWAP)
492 size = -1;
493 else
494 size = lp->d_partitions[part].p_size *
495 (lp->d_secsize / DEV_BSIZE);
496
497 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
498 return (-1);
499
500 return (size);
501
502 }
503
504 int
505 raiddump(dev, blkno, va, size)
506 dev_t dev;
507 daddr_t blkno;
508 caddr_t va;
509 size_t size;
510 {
511 /* Not implemented. */
512 return ENXIO;
513 }
514 /* ARGSUSED */
515 int
516 raidopen(dev, flags, fmt, p)
517 dev_t dev;
518 int flags, fmt;
519 struct proc *p;
520 {
521 int unit = raidunit(dev);
522 struct raid_softc *rs;
523 struct disklabel *lp;
524 int part, pmask;
525 int error = 0;
526
527 if (unit >= numraid)
528 return (ENXIO);
529 rs = &raid_softc[unit];
530
531 if ((error = raidlock(rs)) != 0)
532 return (error);
533 lp = rs->sc_dkdev.dk_label;
534
535 part = DISKPART(dev);
536 pmask = (1 << part);
537
538 db1_printf(("Opening raid device number: %d partition: %d\n",
539 unit, part));
540
541
542 if ((rs->sc_flags & RAIDF_INITED) &&
543 (rs->sc_dkdev.dk_openmask == 0))
544 raidgetdisklabel(dev);
545
546 /* make sure that this partition exists */
547
548 if (part != RAW_PART) {
549 db1_printf(("Not a raw partition..\n"));
550 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
551 ((part >= lp->d_npartitions) ||
552 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
553 error = ENXIO;
554 raidunlock(rs);
555 db1_printf(("Bailing out...\n"));
556 return (error);
557 }
558 }
559 /* Prevent this unit from being unconfigured while open. */
560 switch (fmt) {
561 case S_IFCHR:
562 rs->sc_dkdev.dk_copenmask |= pmask;
563 break;
564
565 case S_IFBLK:
566 rs->sc_dkdev.dk_bopenmask |= pmask;
567 break;
568 }
569
570 if ((rs->sc_dkdev.dk_openmask == 0) &&
571 ((rs->sc_flags & RAIDF_INITED) != 0)) {
572 /* First one... mark things as dirty... Note that we *MUST*
573 have done a configure before this. I DO NOT WANT TO BE
574 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
575 THAT THEY BELONG TOGETHER!!!!! */
576 /* XXX should check to see if we're only open for reading
577 here... If so, we needn't do this, but then need some
578 other way of keeping track of what's happened.. */
579
580 rf_markalldirty( raidPtrs[unit] );
581 }
582
583
584 rs->sc_dkdev.dk_openmask =
585 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
586
587 raidunlock(rs);
588
589 return (error);
590
591
592 }
593 /* ARGSUSED */
594 int
595 raidclose(dev, flags, fmt, p)
596 dev_t dev;
597 int flags, fmt;
598 struct proc *p;
599 {
600 int unit = raidunit(dev);
601 struct raid_softc *rs;
602 int error = 0;
603 int part;
604
605 if (unit >= numraid)
606 return (ENXIO);
607 rs = &raid_softc[unit];
608
609 if ((error = raidlock(rs)) != 0)
610 return (error);
611
612 part = DISKPART(dev);
613
614 /* ...that much closer to allowing unconfiguration... */
615 switch (fmt) {
616 case S_IFCHR:
617 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
618 break;
619
620 case S_IFBLK:
621 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
622 break;
623 }
624 rs->sc_dkdev.dk_openmask =
625 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
626
627 if ((rs->sc_dkdev.dk_openmask == 0) &&
628 ((rs->sc_flags & RAIDF_INITED) != 0)) {
629 /* Last one... device is not unconfigured yet.
630 Device shutdown has taken care of setting the
631 clean bits if RAIDF_INITED is not set
632 mark things as clean... */
633 #if 0
634 printf("Last one on raid%d. Updating status.\n",unit);
635 #endif
636 rf_final_update_component_labels( raidPtrs[unit] );
637 }
638
639 raidunlock(rs);
640 return (0);
641
642 }
643
644 void
645 raidstrategy(bp)
646 struct buf *bp;
647 {
648 int s;
649
650 unsigned int raidID = raidunit(bp->b_dev);
651 RF_Raid_t *raidPtr;
652 struct raid_softc *rs = &raid_softc[raidID];
653 struct disklabel *lp;
654 int wlabel;
655
656 if ((rs->sc_flags & RAIDF_INITED) ==0) {
657 bp->b_error = ENXIO;
658 bp->b_flags = B_ERROR;
659 bp->b_resid = bp->b_bcount;
660 biodone(bp);
661 return;
662 }
663 if (raidID >= numraid || !raidPtrs[raidID]) {
664 bp->b_error = ENODEV;
665 bp->b_flags |= B_ERROR;
666 bp->b_resid = bp->b_bcount;
667 biodone(bp);
668 return;
669 }
670 raidPtr = raidPtrs[raidID];
671 if (!raidPtr->valid) {
672 bp->b_error = ENODEV;
673 bp->b_flags |= B_ERROR;
674 bp->b_resid = bp->b_bcount;
675 biodone(bp);
676 return;
677 }
678 if (bp->b_bcount == 0) {
679 db1_printf(("b_bcount is zero..\n"));
680 biodone(bp);
681 return;
682 }
683 lp = rs->sc_dkdev.dk_label;
684
685 /*
686 * Do bounds checking and adjust transfer. If there's an
687 * error, the bounds check will flag that for us.
688 */
689
690 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
691 if (DISKPART(bp->b_dev) != RAW_PART)
692 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
693 db1_printf(("Bounds check failed!!:%d %d\n",
694 (int) bp->b_blkno, (int) wlabel));
695 biodone(bp);
696 return;
697 }
698 s = splbio();
699
700 bp->b_resid = 0;
701
702 /* stuff it onto our queue */
703 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
704
705 raidstart(raidPtrs[raidID]);
706
707 splx(s);
708 }
709 /* ARGSUSED */
710 int
711 raidread(dev, uio, flags)
712 dev_t dev;
713 struct uio *uio;
714 int flags;
715 {
716 int unit = raidunit(dev);
717 struct raid_softc *rs;
718 int part;
719
720 if (unit >= numraid)
721 return (ENXIO);
722 rs = &raid_softc[unit];
723
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 part = DISKPART(dev);
727
728 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
729
730 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
731
732 }
733 /* ARGSUSED */
734 int
735 raidwrite(dev, uio, flags)
736 dev_t dev;
737 struct uio *uio;
738 int flags;
739 {
740 int unit = raidunit(dev);
741 struct raid_softc *rs;
742
743 if (unit >= numraid)
744 return (ENXIO);
745 rs = &raid_softc[unit];
746
747 if ((rs->sc_flags & RAIDF_INITED) == 0)
748 return (ENXIO);
749 db1_printf(("raidwrite\n"));
750 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
751
752 }
753
754 int
755 raidioctl(dev, cmd, data, flag, p)
756 dev_t dev;
757 u_long cmd;
758 caddr_t data;
759 int flag;
760 struct proc *p;
761 {
762 int unit = raidunit(dev);
763 int error = 0;
764 int part, pmask;
765 struct raid_softc *rs;
766 RF_Config_t *k_cfg, *u_cfg;
767 RF_Raid_t *raidPtr;
768 RF_RaidDisk_t *diskPtr;
769 RF_AccTotals_t *totals;
770 RF_DeviceConfig_t *d_cfg, **ucfgp;
771 u_char *specific_buf;
772 int retcode = 0;
773 int row;
774 int column;
775 struct rf_recon_req *rrcopy, *rr;
776 RF_ComponentLabel_t *clabel;
777 RF_ComponentLabel_t ci_label;
778 RF_ComponentLabel_t **clabel_ptr;
779 RF_SingleComponent_t *sparePtr,*componentPtr;
780 RF_SingleComponent_t hot_spare;
781 RF_SingleComponent_t component;
782 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
821 case RAIDFRAME_GET_COMPONENT_LABEL:
822 case RAIDFRAME_SET_COMPONENT_LABEL:
823 case RAIDFRAME_ADD_HOT_SPARE:
824 case RAIDFRAME_REMOVE_HOT_SPARE:
825 case RAIDFRAME_INIT_LABELS:
826 case RAIDFRAME_REBUILD_IN_PLACE:
827 case RAIDFRAME_CHECK_PARITY:
828 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
829 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
830 case RAIDFRAME_CHECK_COPYBACK_STATUS:
831 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
832 case RAIDFRAME_SET_AUTOCONFIG:
833 case RAIDFRAME_SET_ROOT:
834 case RAIDFRAME_DELETE_COMPONENT:
835 case RAIDFRAME_INCORPORATE_HOT_SPARE:
836 if ((rs->sc_flags & RAIDF_INITED) == 0)
837 return (ENXIO);
838 }
839
840 switch (cmd) {
841
842 /* configure the system */
843 case RAIDFRAME_CONFIGURE:
844
845 if (raidPtr->valid) {
846 /* There is a valid RAID set running on this unit! */
847 printf("raid%d: Device already configured!\n",unit);
848 return(EINVAL);
849 }
850
851 /* copy-in the configuration information */
852 /* data points to a pointer to the configuration structure */
853
854 u_cfg = *((RF_Config_t **) data);
855 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
856 if (k_cfg == NULL) {
857 return (ENOMEM);
858 }
859 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
860 sizeof(RF_Config_t));
861 if (retcode) {
862 RF_Free(k_cfg, sizeof(RF_Config_t));
863 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
864 retcode));
865 return (retcode);
866 }
867 /* allocate a buffer for the layout-specific data, and copy it
868 * in */
869 if (k_cfg->layoutSpecificSize) {
870 if (k_cfg->layoutSpecificSize > 10000) {
871 /* sanity check */
872 RF_Free(k_cfg, sizeof(RF_Config_t));
873 return (EINVAL);
874 }
875 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
876 (u_char *));
877 if (specific_buf == NULL) {
878 RF_Free(k_cfg, sizeof(RF_Config_t));
879 return (ENOMEM);
880 }
881 retcode = copyin(k_cfg->layoutSpecific,
882 (caddr_t) specific_buf,
883 k_cfg->layoutSpecificSize);
884 if (retcode) {
885 RF_Free(k_cfg, sizeof(RF_Config_t));
886 RF_Free(specific_buf,
887 k_cfg->layoutSpecificSize);
888 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
889 retcode));
890 return (retcode);
891 }
892 } else
893 specific_buf = NULL;
894 k_cfg->layoutSpecific = specific_buf;
895
896 /* should do some kind of sanity check on the configuration.
897 * Store the sum of all the bytes in the last byte? */
898
899 /* configure the system */
900
901 /*
902 * Clear the entire RAID descriptor, just to make sure
903 * there is no stale data left in the case of a
904 * reconfiguration
905 */
906 bzero((char *) raidPtr, sizeof(RF_Raid_t));
907 raidPtr->raidid = unit;
908
909 retcode = rf_Configure(raidPtr, k_cfg, NULL);
910
911 if (retcode == 0) {
912
913 /* allow this many simultaneous IO's to
914 this RAID device */
915 raidPtr->openings = RAIDOUTSTANDING;
916
917 raidinit(raidPtr);
918 rf_markalldirty(raidPtr);
919 }
920 /* free the buffers. No return code here. */
921 if (k_cfg->layoutSpecificSize) {
922 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
923 }
924 RF_Free(k_cfg, sizeof(RF_Config_t));
925
926 return (retcode);
927
928 /* shutdown the system */
929 case RAIDFRAME_SHUTDOWN:
930
931 if ((error = raidlock(rs)) != 0)
932 return (error);
933
934 /*
935 * If somebody has a partition mounted, we shouldn't
936 * shutdown.
937 */
938
939 part = DISKPART(dev);
940 pmask = (1 << part);
941 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
942 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
943 (rs->sc_dkdev.dk_copenmask & pmask))) {
944 raidunlock(rs);
945 return (EBUSY);
946 }
947
948 retcode = rf_Shutdown(raidPtr);
949
950 pool_destroy(&rs->sc_cbufpool);
951
952 /* It's no longer initialized... */
953 rs->sc_flags &= ~RAIDF_INITED;
954
955 /* Detach the disk. */
956 disk_detach(&rs->sc_dkdev);
957
958 raidunlock(rs);
959
960 return (retcode);
961 case RAIDFRAME_GET_COMPONENT_LABEL:
962 clabel_ptr = (RF_ComponentLabel_t **) data;
963 /* need to read the component label for the disk indicated
964 by row,column in clabel */
965
966 /* For practice, let's get it directly fromdisk, rather
967 than from the in-core copy */
968 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
969 (RF_ComponentLabel_t *));
970 if (clabel == NULL)
971 return (ENOMEM);
972
973 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
974
975 retcode = copyin( *clabel_ptr, clabel,
976 sizeof(RF_ComponentLabel_t));
977
978 if (retcode) {
979 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
980 return(retcode);
981 }
982
983 row = clabel->row;
984 column = clabel->column;
985
986 if ((row < 0) || (row >= raidPtr->numRow) ||
987 (column < 0) || (column >= raidPtr->numCol +
988 raidPtr->numSpare)) {
989 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
990 return(EINVAL);
991 }
992
993 raidread_component_label(raidPtr->Disks[row][column].dev,
994 raidPtr->raid_cinfo[row][column].ci_vp,
995 clabel );
996
997 retcode = copyout((caddr_t) clabel,
998 (caddr_t) *clabel_ptr,
999 sizeof(RF_ComponentLabel_t));
1000 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1001 return (retcode);
1002
1003 case RAIDFRAME_SET_COMPONENT_LABEL:
1004 clabel = (RF_ComponentLabel_t *) data;
1005
1006 /* XXX check the label for valid stuff... */
1007 /* Note that some things *should not* get modified --
1008 the user should be re-initing the labels instead of
1009 trying to patch things.
1010 */
1011
1012 printf("Got component label:\n");
1013 printf("Version: %d\n",clabel->version);
1014 printf("Serial Number: %d\n",clabel->serial_number);
1015 printf("Mod counter: %d\n",clabel->mod_counter);
1016 printf("Row: %d\n", clabel->row);
1017 printf("Column: %d\n", clabel->column);
1018 printf("Num Rows: %d\n", clabel->num_rows);
1019 printf("Num Columns: %d\n", clabel->num_columns);
1020 printf("Clean: %d\n", clabel->clean);
1021 printf("Status: %d\n", clabel->status);
1022
1023 row = clabel->row;
1024 column = clabel->column;
1025
1026 if ((row < 0) || (row >= raidPtr->numRow) ||
1027 (column < 0) || (column >= raidPtr->numCol)) {
1028 return(EINVAL);
1029 }
1030
1031 /* XXX this isn't allowed to do anything for now :-) */
1032
1033 /* XXX and before it is, we need to fill in the rest
1034 of the fields!?!?!?! */
1035 #if 0
1036 raidwrite_component_label(
1037 raidPtr->Disks[row][column].dev,
1038 raidPtr->raid_cinfo[row][column].ci_vp,
1039 clabel );
1040 #endif
1041 return (0);
1042
1043 case RAIDFRAME_INIT_LABELS:
1044 clabel = (RF_ComponentLabel_t *) data;
1045 /*
1046 we only want the serial number from
1047 the above. We get all the rest of the information
1048 from the config that was used to create this RAID
1049 set.
1050 */
1051
1052 raidPtr->serial_number = clabel->serial_number;
1053
1054 raid_init_component_label(raidPtr, &ci_label);
1055 ci_label.serial_number = clabel->serial_number;
1056
1057 for(row=0;row<raidPtr->numRow;row++) {
1058 ci_label.row = row;
1059 for(column=0;column<raidPtr->numCol;column++) {
1060 diskPtr = &raidPtr->Disks[row][column];
1061 ci_label.partitionSize = diskPtr->partitionSize;
1062 ci_label.column = column;
1063 raidwrite_component_label(
1064 raidPtr->Disks[row][column].dev,
1065 raidPtr->raid_cinfo[row][column].ci_vp,
1066 &ci_label );
1067 }
1068 }
1069
1070 return (retcode);
1071 case RAIDFRAME_SET_AUTOCONFIG:
1072 d = rf_set_autoconfig(raidPtr, *(int *) data);
1073 printf("New autoconfig value is: %d\n", d);
1074 *(int *) data = d;
1075 return (retcode);
1076
1077 case RAIDFRAME_SET_ROOT:
1078 d = rf_set_rootpartition(raidPtr, *(int *) data);
1079 printf("New rootpartition value is: %d\n", d);
1080 *(int *) data = d;
1081 return (retcode);
1082
1083 /* initialize all parity */
1084 case RAIDFRAME_REWRITEPARITY:
1085
1086 if (raidPtr->Layout.map->faultsTolerated == 0) {
1087 /* Parity for RAID 0 is trivially correct */
1088 raidPtr->parity_good = RF_RAID_CLEAN;
1089 return(0);
1090 }
1091
1092 if (raidPtr->parity_rewrite_in_progress == 1) {
1093 /* Re-write is already in progress! */
1094 return(EINVAL);
1095 }
1096
1097 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1098 rf_RewriteParityThread,
1099 raidPtr,"raid_parity");
1100 return (retcode);
1101
1102
1103 case RAIDFRAME_ADD_HOT_SPARE:
1104 sparePtr = (RF_SingleComponent_t *) data;
1105 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1106 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1107 return(retcode);
1108
1109 case RAIDFRAME_REMOVE_HOT_SPARE:
1110 return(retcode);
1111
1112 case RAIDFRAME_DELETE_COMPONENT:
1113 componentPtr = (RF_SingleComponent_t *)data;
1114 memcpy( &component, componentPtr,
1115 sizeof(RF_SingleComponent_t));
1116 retcode = rf_delete_component(raidPtr, &component);
1117 return(retcode);
1118
1119 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1120 componentPtr = (RF_SingleComponent_t *)data;
1121 memcpy( &component, componentPtr,
1122 sizeof(RF_SingleComponent_t));
1123 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1124 return(retcode);
1125
1126 case RAIDFRAME_REBUILD_IN_PLACE:
1127
1128 if (raidPtr->Layout.map->faultsTolerated == 0) {
1129 /* Can't do this on a RAID 0!! */
1130 return(EINVAL);
1131 }
1132
1133 if (raidPtr->recon_in_progress == 1) {
1134 /* a reconstruct is already in progress! */
1135 return(EINVAL);
1136 }
1137
1138 componentPtr = (RF_SingleComponent_t *) data;
1139 memcpy( &component, componentPtr,
1140 sizeof(RF_SingleComponent_t));
1141 row = component.row;
1142 column = component.column;
1143 printf("Rebuild: %d %d\n",row, column);
1144 if ((row < 0) || (row >= raidPtr->numRow) ||
1145 (column < 0) || (column >= raidPtr->numCol)) {
1146 return(EINVAL);
1147 }
1148
1149 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1150 if (rrcopy == NULL)
1151 return(ENOMEM);
1152
1153 rrcopy->raidPtr = (void *) raidPtr;
1154 rrcopy->row = row;
1155 rrcopy->col = column;
1156
1157 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1158 rf_ReconstructInPlaceThread,
1159 rrcopy,"raid_reconip");
1160 return(retcode);
1161
1162 case RAIDFRAME_GET_INFO:
1163 if (!raidPtr->valid)
1164 return (ENODEV);
1165 ucfgp = (RF_DeviceConfig_t **) data;
1166 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1167 (RF_DeviceConfig_t *));
1168 if (d_cfg == NULL)
1169 return (ENOMEM);
1170 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1171 d_cfg->rows = raidPtr->numRow;
1172 d_cfg->cols = raidPtr->numCol;
1173 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1174 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1175 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1176 return (ENOMEM);
1177 }
1178 d_cfg->nspares = raidPtr->numSpare;
1179 if (d_cfg->nspares >= RF_MAX_DISKS) {
1180 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1181 return (ENOMEM);
1182 }
1183 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1184 d = 0;
1185 for (i = 0; i < d_cfg->rows; i++) {
1186 for (j = 0; j < d_cfg->cols; j++) {
1187 d_cfg->devs[d] = raidPtr->Disks[i][j];
1188 d++;
1189 }
1190 }
1191 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1192 d_cfg->spares[i] = raidPtr->Disks[0][j];
1193 }
1194 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1195 sizeof(RF_DeviceConfig_t));
1196 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1197
1198 return (retcode);
1199
1200 case RAIDFRAME_CHECK_PARITY:
1201 *(int *) data = raidPtr->parity_good;
1202 return (0);
1203
1204 case RAIDFRAME_RESET_ACCTOTALS:
1205 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1206 return (0);
1207
1208 case RAIDFRAME_GET_ACCTOTALS:
1209 totals = (RF_AccTotals_t *) data;
1210 *totals = raidPtr->acc_totals;
1211 return (0);
1212
1213 case RAIDFRAME_KEEP_ACCTOTALS:
1214 raidPtr->keep_acc_totals = *(int *)data;
1215 return (0);
1216
1217 case RAIDFRAME_GET_SIZE:
1218 *(int *) data = raidPtr->totalSectors;
1219 return (0);
1220
1221 /* fail a disk & optionally start reconstruction */
1222 case RAIDFRAME_FAIL_DISK:
1223
1224 if (raidPtr->Layout.map->faultsTolerated == 0) {
1225 /* Can't do this on a RAID 0!! */
1226 return(EINVAL);
1227 }
1228
1229 rr = (struct rf_recon_req *) data;
1230
1231 if (rr->row < 0 || rr->row >= raidPtr->numRow
1232 || rr->col < 0 || rr->col >= raidPtr->numCol)
1233 return (EINVAL);
1234
1235 printf("raid%d: Failing the disk: row: %d col: %d\n",
1236 unit, rr->row, rr->col);
1237
1238 /* make a copy of the recon request so that we don't rely on
1239 * the user's buffer */
1240 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1241 if (rrcopy == NULL)
1242 return(ENOMEM);
1243 bcopy(rr, rrcopy, sizeof(*rr));
1244 rrcopy->raidPtr = (void *) raidPtr;
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1247 rf_ReconThread,
1248 rrcopy,"raid_recon");
1249 return (0);
1250
1251 /* invoke a copyback operation after recon on whatever disk
1252 * needs it, if any */
1253 case RAIDFRAME_COPYBACK:
1254
1255 if (raidPtr->Layout.map->faultsTolerated == 0) {
1256 /* This makes no sense on a RAID 0!! */
1257 return(EINVAL);
1258 }
1259
1260 if (raidPtr->copyback_in_progress == 1) {
1261 /* Copyback is already in progress! */
1262 return(EINVAL);
1263 }
1264
1265 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1266 rf_CopybackThread,
1267 raidPtr,"raid_copyback");
1268 return (retcode);
1269
1270 /* return the percentage completion of reconstruction */
1271 case RAIDFRAME_CHECK_RECON_STATUS:
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* This makes no sense on a RAID 0, so tell the
1274 user it's done. */
1275 *(int *) data = 100;
1276 return(0);
1277 }
1278 row = 0; /* XXX we only consider a single row... */
1279 if (raidPtr->status[row] != rf_rs_reconstructing)
1280 *(int *) data = 100;
1281 else
1282 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1283 return (0);
1284 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1285 progressInfoPtr = (RF_ProgressInfo_t **) data;
1286 row = 0; /* XXX we only consider a single row... */
1287 if (raidPtr->status[row] != rf_rs_reconstructing) {
1288 progressInfo.remaining = 0;
1289 progressInfo.completed = 100;
1290 progressInfo.total = 100;
1291 } else {
1292 progressInfo.total =
1293 raidPtr->reconControl[row]->numRUsTotal;
1294 progressInfo.completed =
1295 raidPtr->reconControl[row]->numRUsComplete;
1296 progressInfo.remaining = progressInfo.total -
1297 progressInfo.completed;
1298 }
1299 retcode = copyout((caddr_t) &progressInfo,
1300 (caddr_t) *progressInfoPtr,
1301 sizeof(RF_ProgressInfo_t));
1302 return (retcode);
1303
1304 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1305 if (raidPtr->Layout.map->faultsTolerated == 0) {
1306 /* This makes no sense on a RAID 0, so tell the
1307 user it's done. */
1308 *(int *) data = 100;
1309 return(0);
1310 }
1311 if (raidPtr->parity_rewrite_in_progress == 1) {
1312 *(int *) data = 100 *
1313 raidPtr->parity_rewrite_stripes_done /
1314 raidPtr->Layout.numStripe;
1315 } else {
1316 *(int *) data = 100;
1317 }
1318 return (0);
1319
1320 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1321 progressInfoPtr = (RF_ProgressInfo_t **) data;
1322 if (raidPtr->parity_rewrite_in_progress == 1) {
1323 progressInfo.total = raidPtr->Layout.numStripe;
1324 progressInfo.completed =
1325 raidPtr->parity_rewrite_stripes_done;
1326 progressInfo.remaining = progressInfo.total -
1327 progressInfo.completed;
1328 } else {
1329 progressInfo.remaining = 0;
1330 progressInfo.completed = 100;
1331 progressInfo.total = 100;
1332 }
1333 retcode = copyout((caddr_t) &progressInfo,
1334 (caddr_t) *progressInfoPtr,
1335 sizeof(RF_ProgressInfo_t));
1336 return (retcode);
1337
1338 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1339 if (raidPtr->Layout.map->faultsTolerated == 0) {
1340 /* This makes no sense on a RAID 0 */
1341 *(int *) data = 100;
1342 return(0);
1343 }
1344 if (raidPtr->copyback_in_progress == 1) {
1345 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1353 if (raidPtr->copyback_in_progress == 1) {
1354 progressInfo.total = raidPtr->Layout.numStripe;
1355 progressInfo.completed =
1356 raidPtr->parity_rewrite_stripes_done;
1357 progressInfo.remaining = progressInfo.total -
1358 progressInfo.completed;
1359 } else {
1360 progressInfo.remaining = 0;
1361 progressInfo.completed = 100;
1362 progressInfo.total = 100;
1363 }
1364 retcode = copyout((caddr_t) &progressInfo,
1365 (caddr_t) *progressInfoPtr,
1366 sizeof(RF_ProgressInfo_t));
1367 return (retcode);
1368
1369 /* the sparetable daemon calls this to wait for the kernel to
1370 * need a spare table. this ioctl does not return until a
1371 * spare table is needed. XXX -- calling mpsleep here in the
1372 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1373 * -- I should either compute the spare table in the kernel,
1374 * or have a different -- XXX XXX -- interface (a different
1375 * character device) for delivering the table -- XXX */
1376 #if 0
1377 case RAIDFRAME_SPARET_WAIT:
1378 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1379 while (!rf_sparet_wait_queue)
1380 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1381 waitreq = rf_sparet_wait_queue;
1382 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1383 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1384
1385 /* structure assignment */
1386 *((RF_SparetWait_t *) data) = *waitreq;
1387
1388 RF_Free(waitreq, sizeof(*waitreq));
1389 return (0);
1390
1391 /* wakes up a process waiting on SPARET_WAIT and puts an error
1392 * code in it that will cause the dameon to exit */
1393 case RAIDFRAME_ABORT_SPARET_WAIT:
1394 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1395 waitreq->fcol = -1;
1396 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1397 waitreq->next = rf_sparet_wait_queue;
1398 rf_sparet_wait_queue = waitreq;
1399 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1400 wakeup(&rf_sparet_wait_queue);
1401 return (0);
1402
1403 /* used by the spare table daemon to deliver a spare table
1404 * into the kernel */
1405 case RAIDFRAME_SEND_SPARET:
1406
1407 /* install the spare table */
1408 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1409
1410 /* respond to the requestor. the return status of the spare
1411 * table installation is passed in the "fcol" field */
1412 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1413 waitreq->fcol = retcode;
1414 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1415 waitreq->next = rf_sparet_resp_queue;
1416 rf_sparet_resp_queue = waitreq;
1417 wakeup(&rf_sparet_resp_queue);
1418 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1419
1420 return (retcode);
1421 #endif
1422
1423 default:
1424 break; /* fall through to the os-specific code below */
1425
1426 }
1427
1428 if (!raidPtr->valid)
1429 return (EINVAL);
1430
1431 /*
1432 * Add support for "regular" device ioctls here.
1433 */
1434
1435 switch (cmd) {
1436 case DIOCGDINFO:
1437 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1438 break;
1439
1440 case DIOCGPART:
1441 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1442 ((struct partinfo *) data)->part =
1443 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1444 break;
1445
1446 case DIOCWDINFO:
1447 case DIOCSDINFO:
1448 if ((error = raidlock(rs)) != 0)
1449 return (error);
1450
1451 rs->sc_flags |= RAIDF_LABELLING;
1452
1453 error = setdisklabel(rs->sc_dkdev.dk_label,
1454 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1455 if (error == 0) {
1456 if (cmd == DIOCWDINFO)
1457 error = writedisklabel(RAIDLABELDEV(dev),
1458 raidstrategy, rs->sc_dkdev.dk_label,
1459 rs->sc_dkdev.dk_cpulabel);
1460 }
1461 rs->sc_flags &= ~RAIDF_LABELLING;
1462
1463 raidunlock(rs);
1464
1465 if (error)
1466 return (error);
1467 break;
1468
1469 case DIOCWLABEL:
1470 if (*(int *) data != 0)
1471 rs->sc_flags |= RAIDF_WLABEL;
1472 else
1473 rs->sc_flags &= ~RAIDF_WLABEL;
1474 break;
1475
1476 case DIOCGDEFLABEL:
1477 raidgetdefaultlabel(raidPtr, rs,
1478 (struct disklabel *) data);
1479 break;
1480
1481 default:
1482 retcode = ENOTTY;
1483 }
1484 return (retcode);
1485
1486 }
1487
1488
1489 /* raidinit -- complete the rest of the initialization for the
1490 RAIDframe device. */
1491
1492
1493 static void
1494 raidinit(raidPtr)
1495 RF_Raid_t *raidPtr;
1496 {
1497 struct raid_softc *rs;
1498 int unit;
1499
1500 unit = raidPtr->raidid;
1501
1502 rs = &raid_softc[unit];
1503 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1504 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1505
1506
1507 /* XXX should check return code first... */
1508 rs->sc_flags |= RAIDF_INITED;
1509
1510 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1511
1512 rs->sc_dkdev.dk_name = rs->sc_xname;
1513
1514 /* disk_attach actually creates space for the CPU disklabel, among
1515 * other things, so it's critical to call this *BEFORE* we try putzing
1516 * with disklabels. */
1517
1518 disk_attach(&rs->sc_dkdev);
1519
1520 /* XXX There may be a weird interaction here between this, and
1521 * protectedSectors, as used in RAIDframe. */
1522
1523 rs->sc_size = raidPtr->totalSectors;
1524
1525 }
1526
1527 /* wake up the daemon & tell it to get us a spare table
1528 * XXX
1529 * the entries in the queues should be tagged with the raidPtr
1530 * so that in the extremely rare case that two recons happen at once,
1531 * we know for which device were requesting a spare table
1532 * XXX
1533 *
1534 * XXX This code is not currently used. GO
1535 */
1536 int
1537 rf_GetSpareTableFromDaemon(req)
1538 RF_SparetWait_t *req;
1539 {
1540 int retcode;
1541
1542 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1543 req->next = rf_sparet_wait_queue;
1544 rf_sparet_wait_queue = req;
1545 wakeup(&rf_sparet_wait_queue);
1546
1547 /* mpsleep unlocks the mutex */
1548 while (!rf_sparet_resp_queue) {
1549 tsleep(&rf_sparet_resp_queue, PRIBIO,
1550 "raidframe getsparetable", 0);
1551 }
1552 req = rf_sparet_resp_queue;
1553 rf_sparet_resp_queue = req->next;
1554 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1555
1556 retcode = req->fcol;
1557 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1558 * alloc'd */
1559 return (retcode);
1560 }
1561
1562 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1563 * bp & passes it down.
1564 * any calls originating in the kernel must use non-blocking I/O
1565 * do some extra sanity checking to return "appropriate" error values for
1566 * certain conditions (to make some standard utilities work)
1567 *
1568 * Formerly known as: rf_DoAccessKernel
1569 */
1570 void
1571 raidstart(raidPtr)
1572 RF_Raid_t *raidPtr;
1573 {
1574 RF_SectorCount_t num_blocks, pb, sum;
1575 RF_RaidAddr_t raid_addr;
1576 int retcode;
1577 struct partition *pp;
1578 daddr_t blocknum;
1579 int unit;
1580 struct raid_softc *rs;
1581 int do_async;
1582 struct buf *bp;
1583
1584 unit = raidPtr->raidid;
1585 rs = &raid_softc[unit];
1586
1587 /* quick check to see if anything has died recently */
1588 RF_LOCK_MUTEX(raidPtr->mutex);
1589 if (raidPtr->numNewFailures > 0) {
1590 rf_update_component_labels(raidPtr);
1591 raidPtr->numNewFailures--;
1592 }
1593 RF_UNLOCK_MUTEX(raidPtr->mutex);
1594
1595 /* Check to see if we're at the limit... */
1596 RF_LOCK_MUTEX(raidPtr->mutex);
1597 while (raidPtr->openings > 0) {
1598 RF_UNLOCK_MUTEX(raidPtr->mutex);
1599
1600 /* get the next item, if any, from the queue */
1601 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1602 /* nothing more to do */
1603 return;
1604 }
1605 BUFQ_REMOVE(&rs->buf_queue, bp);
1606
1607 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1608 * partition.. Need to make it absolute to the underlying
1609 * device.. */
1610
1611 blocknum = bp->b_blkno;
1612 if (DISKPART(bp->b_dev) != RAW_PART) {
1613 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1614 blocknum += pp->p_offset;
1615 }
1616
1617 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1618 (int) blocknum));
1619
1620 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1621 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1622
1623 /* *THIS* is where we adjust what block we're going to...
1624 * but DO NOT TOUCH bp->b_blkno!!! */
1625 raid_addr = blocknum;
1626
1627 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1628 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1629 sum = raid_addr + num_blocks + pb;
1630 if (1 || rf_debugKernelAccess) {
1631 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1632 (int) raid_addr, (int) sum, (int) num_blocks,
1633 (int) pb, (int) bp->b_resid));
1634 }
1635 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1636 || (sum < num_blocks) || (sum < pb)) {
1637 bp->b_error = ENOSPC;
1638 bp->b_flags |= B_ERROR;
1639 bp->b_resid = bp->b_bcount;
1640 biodone(bp);
1641 RF_LOCK_MUTEX(raidPtr->mutex);
1642 continue;
1643 }
1644 /*
1645 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1646 */
1647
1648 if (bp->b_bcount & raidPtr->sectorMask) {
1649 bp->b_error = EINVAL;
1650 bp->b_flags |= B_ERROR;
1651 bp->b_resid = bp->b_bcount;
1652 biodone(bp);
1653 RF_LOCK_MUTEX(raidPtr->mutex);
1654 continue;
1655
1656 }
1657 db1_printf(("Calling DoAccess..\n"));
1658
1659
1660 RF_LOCK_MUTEX(raidPtr->mutex);
1661 raidPtr->openings--;
1662 RF_UNLOCK_MUTEX(raidPtr->mutex);
1663
1664 /*
1665 * Everything is async.
1666 */
1667 do_async = 1;
1668
1669 /* don't ever condition on bp->b_flags & B_WRITE.
1670 * always condition on B_READ instead */
1671
1672 /* XXX we're still at splbio() here... do we *really*
1673 need to be? */
1674
1675
1676 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1677 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1678 do_async, raid_addr, num_blocks,
1679 bp->b_data, bp, NULL, NULL,
1680 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1681
1682
1683 RF_LOCK_MUTEX(raidPtr->mutex);
1684 }
1685 RF_UNLOCK_MUTEX(raidPtr->mutex);
1686 }
1687
1688
1689
1690
1691 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1692
1693 int
1694 rf_DispatchKernelIO(queue, req)
1695 RF_DiskQueue_t *queue;
1696 RF_DiskQueueData_t *req;
1697 {
1698 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1699 struct buf *bp;
1700 struct raidbuf *raidbp = NULL;
1701 struct raid_softc *rs;
1702 int unit;
1703 int s;
1704
1705 s=0;
1706 /* s = splbio();*/ /* want to test this */
1707 /* XXX along with the vnode, we also need the softc associated with
1708 * this device.. */
1709
1710 req->queue = queue;
1711
1712 unit = queue->raidPtr->raidid;
1713
1714 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1715
1716 if (unit >= numraid) {
1717 printf("Invalid unit number: %d %d\n", unit, numraid);
1718 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1719 }
1720 rs = &raid_softc[unit];
1721
1722 /* XXX is this the right place? */
1723 disk_busy(&rs->sc_dkdev);
1724
1725 bp = req->bp;
1726 #if 1
1727 /* XXX when there is a physical disk failure, someone is passing us a
1728 * buffer that contains old stuff!! Attempt to deal with this problem
1729 * without taking a performance hit... (not sure where the real bug
1730 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1731
1732 if (bp->b_flags & B_ERROR) {
1733 bp->b_flags &= ~B_ERROR;
1734 }
1735 if (bp->b_error != 0) {
1736 bp->b_error = 0;
1737 }
1738 #endif
1739 raidbp = RAIDGETBUF(rs);
1740
1741 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1742
1743 /*
1744 * context for raidiodone
1745 */
1746 raidbp->rf_obp = bp;
1747 raidbp->req = req;
1748
1749 LIST_INIT(&raidbp->rf_buf.b_dep);
1750
1751 switch (req->type) {
1752 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1753 /* XXX need to do something extra here.. */
1754 /* I'm leaving this in, as I've never actually seen it used,
1755 * and I'd like folks to report it... GO */
1756 printf(("WAKEUP CALLED\n"));
1757 queue->numOutstanding++;
1758
1759 /* XXX need to glue the original buffer into this?? */
1760
1761 KernelWakeupFunc(&raidbp->rf_buf);
1762 break;
1763
1764 case RF_IO_TYPE_READ:
1765 case RF_IO_TYPE_WRITE:
1766
1767 if (req->tracerec) {
1768 RF_ETIMER_START(req->tracerec->timer);
1769 }
1770 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1771 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1772 req->sectorOffset, req->numSector,
1773 req->buf, KernelWakeupFunc, (void *) req,
1774 queue->raidPtr->logBytesPerSector, req->b_proc);
1775
1776 if (rf_debugKernelAccess) {
1777 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1778 (long) bp->b_blkno));
1779 }
1780 queue->numOutstanding++;
1781 queue->last_deq_sector = req->sectorOffset;
1782 /* acc wouldn't have been let in if there were any pending
1783 * reqs at any other priority */
1784 queue->curPriority = req->priority;
1785
1786 db1_printf(("Going for %c to unit %d row %d col %d\n",
1787 req->type, unit, queue->row, queue->col));
1788 db1_printf(("sector %d count %d (%d bytes) %d\n",
1789 (int) req->sectorOffset, (int) req->numSector,
1790 (int) (req->numSector <<
1791 queue->raidPtr->logBytesPerSector),
1792 (int) queue->raidPtr->logBytesPerSector));
1793 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1794 raidbp->rf_buf.b_vp->v_numoutput++;
1795 }
1796 VOP_STRATEGY(&raidbp->rf_buf);
1797
1798 break;
1799
1800 default:
1801 panic("bad req->type in rf_DispatchKernelIO");
1802 }
1803 db1_printf(("Exiting from DispatchKernelIO\n"));
1804 /* splx(s); */ /* want to test this */
1805 return (0);
1806 }
1807 /* this is the callback function associated with a I/O invoked from
1808 kernel code.
1809 */
1810 static void
1811 KernelWakeupFunc(vbp)
1812 struct buf *vbp;
1813 {
1814 RF_DiskQueueData_t *req = NULL;
1815 RF_DiskQueue_t *queue;
1816 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1817 struct buf *bp;
1818 struct raid_softc *rs;
1819 int unit;
1820 int s;
1821
1822 s = splbio();
1823 db1_printf(("recovering the request queue:\n"));
1824 req = raidbp->req;
1825
1826 bp = raidbp->rf_obp;
1827
1828 queue = (RF_DiskQueue_t *) req->queue;
1829
1830 if (raidbp->rf_buf.b_flags & B_ERROR) {
1831 bp->b_flags |= B_ERROR;
1832 bp->b_error = raidbp->rf_buf.b_error ?
1833 raidbp->rf_buf.b_error : EIO;
1834 }
1835
1836 /* XXX methinks this could be wrong... */
1837 #if 1
1838 bp->b_resid = raidbp->rf_buf.b_resid;
1839 #endif
1840
1841 if (req->tracerec) {
1842 RF_ETIMER_STOP(req->tracerec->timer);
1843 RF_ETIMER_EVAL(req->tracerec->timer);
1844 RF_LOCK_MUTEX(rf_tracing_mutex);
1845 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1846 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1847 req->tracerec->num_phys_ios++;
1848 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1849 }
1850 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1851
1852 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1853
1854
1855 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1856 * ballistic, and mark the component as hosed... */
1857
1858 if (bp->b_flags & B_ERROR) {
1859 /* Mark the disk as dead */
1860 /* but only mark it once... */
1861 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1862 rf_ds_optimal) {
1863 printf("raid%d: IO Error. Marking %s as failed.\n",
1864 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1865 queue->raidPtr->Disks[queue->row][queue->col].status =
1866 rf_ds_failed;
1867 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1868 queue->raidPtr->numFailures++;
1869 queue->raidPtr->numNewFailures++;
1870 /* XXX here we should bump the version number for each component, and write that data out */
1871 } else { /* Disk is already dead... */
1872 /* printf("Disk already marked as dead!\n"); */
1873 }
1874
1875 }
1876
1877 rs = &raid_softc[unit];
1878 RAIDPUTBUF(rs, raidbp);
1879
1880
1881 if (bp->b_resid == 0) {
1882 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1883 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1884 }
1885
1886 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1887 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1888
1889 splx(s);
1890 }
1891
1892
1893
1894 /*
1895 * initialize a buf structure for doing an I/O in the kernel.
1896 */
1897 static void
1898 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1899 logBytesPerSector, b_proc)
1900 struct buf *bp;
1901 struct vnode *b_vp;
1902 unsigned rw_flag;
1903 dev_t dev;
1904 RF_SectorNum_t startSect;
1905 RF_SectorCount_t numSect;
1906 caddr_t buf;
1907 void (*cbFunc) (struct buf *);
1908 void *cbArg;
1909 int logBytesPerSector;
1910 struct proc *b_proc;
1911 {
1912 /* bp->b_flags = B_PHYS | rw_flag; */
1913 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1914 bp->b_bcount = numSect << logBytesPerSector;
1915 bp->b_bufsize = bp->b_bcount;
1916 bp->b_error = 0;
1917 bp->b_dev = dev;
1918 bp->b_data = buf;
1919 bp->b_blkno = startSect;
1920 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1921 if (bp->b_bcount == 0) {
1922 panic("bp->b_bcount is zero in InitBP!!\n");
1923 }
1924 bp->b_proc = b_proc;
1925 bp->b_iodone = cbFunc;
1926 bp->b_vp = b_vp;
1927
1928 }
1929
1930 static void
1931 raidgetdefaultlabel(raidPtr, rs, lp)
1932 RF_Raid_t *raidPtr;
1933 struct raid_softc *rs;
1934 struct disklabel *lp;
1935 {
1936 db1_printf(("Building a default label...\n"));
1937 bzero(lp, sizeof(*lp));
1938
1939 /* fabricate a label... */
1940 lp->d_secperunit = raidPtr->totalSectors;
1941 lp->d_secsize = raidPtr->bytesPerSector;
1942 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1943 lp->d_ntracks = 1;
1944 lp->d_ncylinders = raidPtr->totalSectors /
1945 (lp->d_nsectors * lp->d_ntracks);
1946 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1947
1948 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1949 lp->d_type = DTYPE_RAID;
1950 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1951 lp->d_rpm = 3600;
1952 lp->d_interleave = 1;
1953 lp->d_flags = 0;
1954
1955 lp->d_partitions[RAW_PART].p_offset = 0;
1956 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1957 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1958 lp->d_npartitions = RAW_PART + 1;
1959
1960 lp->d_magic = DISKMAGIC;
1961 lp->d_magic2 = DISKMAGIC;
1962 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1963
1964 }
1965 /*
1966 * Read the disklabel from the raid device. If one is not present, fake one
1967 * up.
1968 */
1969 static void
1970 raidgetdisklabel(dev)
1971 dev_t dev;
1972 {
1973 int unit = raidunit(dev);
1974 struct raid_softc *rs = &raid_softc[unit];
1975 char *errstring;
1976 struct disklabel *lp = rs->sc_dkdev.dk_label;
1977 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1978 RF_Raid_t *raidPtr;
1979
1980 db1_printf(("Getting the disklabel...\n"));
1981
1982 bzero(clp, sizeof(*clp));
1983
1984 raidPtr = raidPtrs[unit];
1985
1986 raidgetdefaultlabel(raidPtr, rs, lp);
1987
1988 /*
1989 * Call the generic disklabel extraction routine.
1990 */
1991 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1992 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1993 if (errstring)
1994 raidmakedisklabel(rs);
1995 else {
1996 int i;
1997 struct partition *pp;
1998
1999 /*
2000 * Sanity check whether the found disklabel is valid.
2001 *
2002 * This is necessary since total size of the raid device
2003 * may vary when an interleave is changed even though exactly
2004 * same componets are used, and old disklabel may used
2005 * if that is found.
2006 */
2007 if (lp->d_secperunit != rs->sc_size)
2008 printf("WARNING: %s: "
2009 "total sector size in disklabel (%d) != "
2010 "the size of raid (%ld)\n", rs->sc_xname,
2011 lp->d_secperunit, (long) rs->sc_size);
2012 for (i = 0; i < lp->d_npartitions; i++) {
2013 pp = &lp->d_partitions[i];
2014 if (pp->p_offset + pp->p_size > rs->sc_size)
2015 printf("WARNING: %s: end of partition `%c' "
2016 "exceeds the size of raid (%ld)\n",
2017 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2018 }
2019 }
2020
2021 }
2022 /*
2023 * Take care of things one might want to take care of in the event
2024 * that a disklabel isn't present.
2025 */
2026 static void
2027 raidmakedisklabel(rs)
2028 struct raid_softc *rs;
2029 {
2030 struct disklabel *lp = rs->sc_dkdev.dk_label;
2031 db1_printf(("Making a label..\n"));
2032
2033 /*
2034 * For historical reasons, if there's no disklabel present
2035 * the raw partition must be marked FS_BSDFFS.
2036 */
2037
2038 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2039
2040 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2041
2042 lp->d_checksum = dkcksum(lp);
2043 }
2044 /*
2045 * Lookup the provided name in the filesystem. If the file exists,
2046 * is a valid block device, and isn't being used by anyone else,
2047 * set *vpp to the file's vnode.
2048 * You'll find the original of this in ccd.c
2049 */
2050 int
2051 raidlookup(path, p, vpp)
2052 char *path;
2053 struct proc *p;
2054 struct vnode **vpp; /* result */
2055 {
2056 struct nameidata nd;
2057 struct vnode *vp;
2058 struct vattr va;
2059 int error;
2060
2061 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2062 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2063 #ifdef DEBUG
2064 printf("RAIDframe: vn_open returned %d\n", error);
2065 #endif
2066 return (error);
2067 }
2068 vp = nd.ni_vp;
2069 if (vp->v_usecount > 1) {
2070 VOP_UNLOCK(vp, 0);
2071 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2072 return (EBUSY);
2073 }
2074 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2075 VOP_UNLOCK(vp, 0);
2076 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2077 return (error);
2078 }
2079 /* XXX: eventually we should handle VREG, too. */
2080 if (va.va_type != VBLK) {
2081 VOP_UNLOCK(vp, 0);
2082 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2083 return (ENOTBLK);
2084 }
2085 VOP_UNLOCK(vp, 0);
2086 *vpp = vp;
2087 return (0);
2088 }
2089 /*
2090 * Wait interruptibly for an exclusive lock.
2091 *
2092 * XXX
2093 * Several drivers do this; it should be abstracted and made MP-safe.
2094 * (Hmm... where have we seen this warning before :-> GO )
2095 */
2096 static int
2097 raidlock(rs)
2098 struct raid_softc *rs;
2099 {
2100 int error;
2101
2102 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2103 rs->sc_flags |= RAIDF_WANTED;
2104 if ((error =
2105 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2106 return (error);
2107 }
2108 rs->sc_flags |= RAIDF_LOCKED;
2109 return (0);
2110 }
2111 /*
2112 * Unlock and wake up any waiters.
2113 */
2114 static void
2115 raidunlock(rs)
2116 struct raid_softc *rs;
2117 {
2118
2119 rs->sc_flags &= ~RAIDF_LOCKED;
2120 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2121 rs->sc_flags &= ~RAIDF_WANTED;
2122 wakeup(rs);
2123 }
2124 }
2125
2126
2127 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2128 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2129
2130 int
2131 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2132 {
2133 RF_ComponentLabel_t clabel;
2134 raidread_component_label(dev, b_vp, &clabel);
2135 clabel.mod_counter = mod_counter;
2136 clabel.clean = RF_RAID_CLEAN;
2137 raidwrite_component_label(dev, b_vp, &clabel);
2138 return(0);
2139 }
2140
2141
2142 int
2143 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2144 {
2145 RF_ComponentLabel_t clabel;
2146 raidread_component_label(dev, b_vp, &clabel);
2147 clabel.mod_counter = mod_counter;
2148 clabel.clean = RF_RAID_DIRTY;
2149 raidwrite_component_label(dev, b_vp, &clabel);
2150 return(0);
2151 }
2152
2153 /* ARGSUSED */
2154 int
2155 raidread_component_label(dev, b_vp, clabel)
2156 dev_t dev;
2157 struct vnode *b_vp;
2158 RF_ComponentLabel_t *clabel;
2159 {
2160 struct buf *bp;
2161 int error;
2162
2163 /* XXX should probably ensure that we don't try to do this if
2164 someone has changed rf_protected_sectors. */
2165
2166 /* get a block of the appropriate size... */
2167 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2168 bp->b_dev = dev;
2169
2170 /* get our ducks in a row for the read */
2171 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2172 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2173 bp->b_flags = B_BUSY | B_READ;
2174 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2175
2176 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2177
2178 error = biowait(bp);
2179
2180 if (!error) {
2181 memcpy(clabel, bp->b_data,
2182 sizeof(RF_ComponentLabel_t));
2183 #if 0
2184 rf_print_component_label( clabel );
2185 #endif
2186 } else {
2187 #if 0
2188 printf("Failed to read RAID component label!\n");
2189 #endif
2190 }
2191
2192 bp->b_flags = B_INVAL | B_AGE;
2193 brelse(bp);
2194 return(error);
2195 }
2196 /* ARGSUSED */
2197 int
2198 raidwrite_component_label(dev, b_vp, clabel)
2199 dev_t dev;
2200 struct vnode *b_vp;
2201 RF_ComponentLabel_t *clabel;
2202 {
2203 struct buf *bp;
2204 int error;
2205
2206 /* get a block of the appropriate size... */
2207 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2208 bp->b_dev = dev;
2209
2210 /* get our ducks in a row for the write */
2211 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2212 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2213 bp->b_flags = B_BUSY | B_WRITE;
2214 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2215
2216 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2217
2218 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2219
2220 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2221 error = biowait(bp);
2222 bp->b_flags = B_INVAL | B_AGE;
2223 brelse(bp);
2224 if (error) {
2225 #if 1
2226 printf("Failed to write RAID component info!\n");
2227 #endif
2228 }
2229
2230 return(error);
2231 }
2232
2233 void
2234 rf_markalldirty(raidPtr)
2235 RF_Raid_t *raidPtr;
2236 {
2237 RF_ComponentLabel_t clabel;
2238 int r,c;
2239
2240 raidPtr->mod_counter++;
2241 for (r = 0; r < raidPtr->numRow; r++) {
2242 for (c = 0; c < raidPtr->numCol; c++) {
2243 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2244 raidread_component_label(
2245 raidPtr->Disks[r][c].dev,
2246 raidPtr->raid_cinfo[r][c].ci_vp,
2247 &clabel);
2248 if (clabel.status == rf_ds_spared) {
2249 /* XXX do something special...
2250 but whatever you do, don't
2251 try to access it!! */
2252 } else {
2253 #if 0
2254 clabel.status =
2255 raidPtr->Disks[r][c].status;
2256 raidwrite_component_label(
2257 raidPtr->Disks[r][c].dev,
2258 raidPtr->raid_cinfo[r][c].ci_vp,
2259 &clabel);
2260 #endif
2261 raidmarkdirty(
2262 raidPtr->Disks[r][c].dev,
2263 raidPtr->raid_cinfo[r][c].ci_vp,
2264 raidPtr->mod_counter);
2265 }
2266 }
2267 }
2268 }
2269 /* printf("Component labels marked dirty.\n"); */
2270 #if 0
2271 for( c = 0; c < raidPtr->numSpare ; c++) {
2272 sparecol = raidPtr->numCol + c;
2273 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2274 /*
2275
2276 XXX this is where we get fancy and map this spare
2277 into it's correct spot in the array.
2278
2279 */
2280 /*
2281
2282 we claim this disk is "optimal" if it's
2283 rf_ds_used_spare, as that means it should be
2284 directly substitutable for the disk it replaced.
2285 We note that too...
2286
2287 */
2288
2289 for(i=0;i<raidPtr->numRow;i++) {
2290 for(j=0;j<raidPtr->numCol;j++) {
2291 if ((raidPtr->Disks[i][j].spareRow ==
2292 r) &&
2293 (raidPtr->Disks[i][j].spareCol ==
2294 sparecol)) {
2295 srow = r;
2296 scol = sparecol;
2297 break;
2298 }
2299 }
2300 }
2301
2302 raidread_component_label(
2303 raidPtr->Disks[r][sparecol].dev,
2304 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2305 &clabel);
2306 /* make sure status is noted */
2307 clabel.version = RF_COMPONENT_LABEL_VERSION;
2308 clabel.mod_counter = raidPtr->mod_counter;
2309 clabel.serial_number = raidPtr->serial_number;
2310 clabel.row = srow;
2311 clabel.column = scol;
2312 clabel.num_rows = raidPtr->numRow;
2313 clabel.num_columns = raidPtr->numCol;
2314 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2315 clabel.status = rf_ds_optimal;
2316 raidwrite_component_label(
2317 raidPtr->Disks[r][sparecol].dev,
2318 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2319 &clabel);
2320 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2321 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2322 }
2323 }
2324
2325 #endif
2326 }
2327
2328
2329 void
2330 rf_update_component_labels(raidPtr)
2331 RF_Raid_t *raidPtr;
2332 {
2333 RF_ComponentLabel_t clabel;
2334 int sparecol;
2335 int r,c;
2336 int i,j;
2337 int srow, scol;
2338
2339 srow = -1;
2340 scol = -1;
2341
2342 /* XXX should do extra checks to make sure things really are clean,
2343 rather than blindly setting the clean bit... */
2344
2345 raidPtr->mod_counter++;
2346
2347 for (r = 0; r < raidPtr->numRow; r++) {
2348 for (c = 0; c < raidPtr->numCol; c++) {
2349 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2350 raidread_component_label(
2351 raidPtr->Disks[r][c].dev,
2352 raidPtr->raid_cinfo[r][c].ci_vp,
2353 &clabel);
2354 /* make sure status is noted */
2355 clabel.status = rf_ds_optimal;
2356 /* bump the counter */
2357 clabel.mod_counter = raidPtr->mod_counter;
2358
2359 raidwrite_component_label(
2360 raidPtr->Disks[r][c].dev,
2361 raidPtr->raid_cinfo[r][c].ci_vp,
2362 &clabel);
2363 }
2364 /* else we don't touch it.. */
2365 }
2366 }
2367
2368 for( c = 0; c < raidPtr->numSpare ; c++) {
2369 sparecol = raidPtr->numCol + c;
2370 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2371 /*
2372
2373 we claim this disk is "optimal" if it's
2374 rf_ds_used_spare, as that means it should be
2375 directly substitutable for the disk it replaced.
2376 We note that too...
2377
2378 */
2379
2380 for(i=0;i<raidPtr->numRow;i++) {
2381 for(j=0;j<raidPtr->numCol;j++) {
2382 if ((raidPtr->Disks[i][j].spareRow ==
2383 0) &&
2384 (raidPtr->Disks[i][j].spareCol ==
2385 sparecol)) {
2386 srow = i;
2387 scol = j;
2388 break;
2389 }
2390 }
2391 }
2392
2393 /* XXX shouldn't *really* need this... */
2394 raidread_component_label(
2395 raidPtr->Disks[0][sparecol].dev,
2396 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2397 &clabel);
2398 /* make sure status is noted */
2399
2400 raid_init_component_label(raidPtr, &clabel);
2401
2402 clabel.mod_counter = raidPtr->mod_counter;
2403 clabel.row = srow;
2404 clabel.column = scol;
2405 clabel.status = rf_ds_optimal;
2406
2407 raidwrite_component_label(
2408 raidPtr->Disks[0][sparecol].dev,
2409 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2410 &clabel);
2411 }
2412 }
2413 /* printf("Component labels updated\n"); */
2414 }
2415
2416
2417 void
2418 rf_final_update_component_labels(raidPtr)
2419 RF_Raid_t *raidPtr;
2420 {
2421 RF_ComponentLabel_t clabel;
2422 int sparecol;
2423 int r,c;
2424 int i,j;
2425 int srow, scol;
2426
2427 srow = -1;
2428 scol = -1;
2429
2430 /* XXX should do extra checks to make sure things really are clean,
2431 rather than blindly setting the clean bit... */
2432
2433 raidPtr->mod_counter++;
2434
2435 for (r = 0; r < raidPtr->numRow; r++) {
2436 for (c = 0; c < raidPtr->numCol; c++) {
2437 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2438 raidread_component_label(
2439 raidPtr->Disks[r][c].dev,
2440 raidPtr->raid_cinfo[r][c].ci_vp,
2441 &clabel);
2442 /* make sure status is noted */
2443 clabel.status = rf_ds_optimal;
2444 /* bump the counter */
2445 clabel.mod_counter = raidPtr->mod_counter;
2446
2447 raidwrite_component_label(
2448 raidPtr->Disks[r][c].dev,
2449 raidPtr->raid_cinfo[r][c].ci_vp,
2450 &clabel);
2451 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2452 raidmarkclean(
2453 raidPtr->Disks[r][c].dev,
2454 raidPtr->raid_cinfo[r][c].ci_vp,
2455 raidPtr->mod_counter);
2456 }
2457 }
2458 /* else we don't touch it.. */
2459 }
2460 }
2461
2462 for( c = 0; c < raidPtr->numSpare ; c++) {
2463 sparecol = raidPtr->numCol + c;
2464 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2465 /*
2466
2467 we claim this disk is "optimal" if it's
2468 rf_ds_used_spare, as that means it should be
2469 directly substitutable for the disk it replaced.
2470 We note that too...
2471
2472 */
2473
2474 for(i=0;i<raidPtr->numRow;i++) {
2475 for(j=0;j<raidPtr->numCol;j++) {
2476 if ((raidPtr->Disks[i][j].spareRow ==
2477 0) &&
2478 (raidPtr->Disks[i][j].spareCol ==
2479 sparecol)) {
2480 srow = i;
2481 scol = j;
2482 break;
2483 }
2484 }
2485 }
2486
2487 /* XXX shouldn't *really* need this... */
2488 raidread_component_label(
2489 raidPtr->Disks[0][sparecol].dev,
2490 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2491 &clabel);
2492 /* make sure status is noted */
2493
2494 raid_init_component_label(raidPtr, &clabel);
2495
2496 clabel.mod_counter = raidPtr->mod_counter;
2497 clabel.row = srow;
2498 clabel.column = scol;
2499 clabel.status = rf_ds_optimal;
2500
2501 raidwrite_component_label(
2502 raidPtr->Disks[0][sparecol].dev,
2503 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2504 &clabel);
2505 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2506 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2507 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2508 raidPtr->mod_counter);
2509 }
2510 }
2511 }
2512 /* printf("Component labels updated\n"); */
2513 }
2514
2515 void
2516 rf_close_component(raidPtr, vp, auto_configured)
2517 RF_Raid_t *raidPtr;
2518 struct vnode *vp;
2519 int auto_configured;
2520 {
2521 struct proc *p;
2522
2523 p = raidPtr->engine_thread;
2524
2525 if (vp != NULL) {
2526 if (auto_configured == 1) {
2527 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2528 vput(vp);
2529
2530 } else {
2531 VOP_UNLOCK(vp, 0);
2532 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2533 }
2534 } else {
2535 printf("vnode was NULL\n");
2536 }
2537 }
2538
2539
2540 void
2541 rf_UnconfigureVnodes(raidPtr)
2542 RF_Raid_t *raidPtr;
2543 {
2544 int r,c;
2545 struct proc *p;
2546 struct vnode *vp;
2547 int acd;
2548
2549
2550 /* We take this opportunity to close the vnodes like we should.. */
2551
2552 p = raidPtr->engine_thread;
2553
2554 for (r = 0; r < raidPtr->numRow; r++) {
2555 for (c = 0; c < raidPtr->numCol; c++) {
2556 printf("Closing vnode for row: %d col: %d\n", r, c);
2557 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2558 acd = raidPtr->Disks[r][c].auto_configured;
2559 rf_close_component(raidPtr, vp, acd);
2560 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2561 raidPtr->Disks[r][c].auto_configured = 0;
2562 }
2563 }
2564 for (r = 0; r < raidPtr->numSpare; r++) {
2565 printf("Closing vnode for spare: %d\n", r);
2566 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2567 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2568 rf_close_component(raidPtr, vp, acd);
2569 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2570 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2571 }
2572 }
2573
2574
2575 void
2576 rf_ReconThread(req)
2577 struct rf_recon_req *req;
2578 {
2579 int s;
2580 RF_Raid_t *raidPtr;
2581
2582 s = splbio();
2583 raidPtr = (RF_Raid_t *) req->raidPtr;
2584 raidPtr->recon_in_progress = 1;
2585
2586 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2587 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2588
2589 /* XXX get rid of this! we don't need it at all.. */
2590 RF_Free(req, sizeof(*req));
2591
2592 raidPtr->recon_in_progress = 0;
2593 splx(s);
2594
2595 /* That's all... */
2596 kthread_exit(0); /* does not return */
2597 }
2598
2599 void
2600 rf_RewriteParityThread(raidPtr)
2601 RF_Raid_t *raidPtr;
2602 {
2603 int retcode;
2604 int s;
2605
2606 raidPtr->parity_rewrite_in_progress = 1;
2607 s = splbio();
2608 retcode = rf_RewriteParity(raidPtr);
2609 splx(s);
2610 if (retcode) {
2611 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2612 } else {
2613 /* set the clean bit! If we shutdown correctly,
2614 the clean bit on each component label will get
2615 set */
2616 raidPtr->parity_good = RF_RAID_CLEAN;
2617 }
2618 raidPtr->parity_rewrite_in_progress = 0;
2619
2620 /* Anyone waiting for us to stop? If so, inform them... */
2621 if (raidPtr->waitShutdown) {
2622 wakeup(&raidPtr->parity_rewrite_in_progress);
2623 }
2624
2625 /* That's all... */
2626 kthread_exit(0); /* does not return */
2627 }
2628
2629
2630 void
2631 rf_CopybackThread(raidPtr)
2632 RF_Raid_t *raidPtr;
2633 {
2634 int s;
2635
2636 raidPtr->copyback_in_progress = 1;
2637 s = splbio();
2638 rf_CopybackReconstructedData(raidPtr);
2639 splx(s);
2640 raidPtr->copyback_in_progress = 0;
2641
2642 /* That's all... */
2643 kthread_exit(0); /* does not return */
2644 }
2645
2646
2647 void
2648 rf_ReconstructInPlaceThread(req)
2649 struct rf_recon_req *req;
2650 {
2651 int retcode;
2652 int s;
2653 RF_Raid_t *raidPtr;
2654
2655 s = splbio();
2656 raidPtr = req->raidPtr;
2657 raidPtr->recon_in_progress = 1;
2658 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2659 RF_Free(req, sizeof(*req));
2660 raidPtr->recon_in_progress = 0;
2661 splx(s);
2662
2663 /* That's all... */
2664 kthread_exit(0); /* does not return */
2665 }
2666
2667 void
2668 rf_mountroot_hook(dev)
2669 struct device *dev;
2670 {
2671
2672 }
2673
2674
2675 RF_AutoConfig_t *
2676 rf_find_raid_components()
2677 {
2678 struct devnametobdevmaj *dtobdm;
2679 struct vnode *vp;
2680 struct disklabel label;
2681 struct device *dv;
2682 char *cd_name;
2683 dev_t dev;
2684 int error;
2685 int i;
2686 int good_one;
2687 RF_ComponentLabel_t *clabel;
2688 RF_AutoConfig_t *ac_list;
2689 RF_AutoConfig_t *ac;
2690
2691
2692 /* initialize the AutoConfig list */
2693 ac_list = NULL;
2694
2695 if (raidautoconfig) {
2696
2697 /* we begin by trolling through *all* the devices on the system */
2698
2699 for (dv = alldevs.tqh_first; dv != NULL;
2700 dv = dv->dv_list.tqe_next) {
2701
2702 /* we are only interested in disks... */
2703 if (dv->dv_class != DV_DISK)
2704 continue;
2705
2706 /* we don't care about floppies... */
2707 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2708 continue;
2709 }
2710
2711 /* need to find the device_name_to_block_device_major stuff */
2712 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2713 dtobdm = dev_name2blk;
2714 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2715 dtobdm++;
2716 }
2717
2718 /* get a vnode for the raw partition of this disk */
2719
2720 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2721 if (bdevvp(dev, &vp))
2722 panic("RAID can't alloc vnode");
2723
2724 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2725
2726 if (error) {
2727 /* "Who cares." Continue looking
2728 for something that exists*/
2729 vput(vp);
2730 continue;
2731 }
2732
2733 /* Ok, the disk exists. Go get the disklabel. */
2734 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2735 FREAD, NOCRED, 0);
2736 if (error) {
2737 /*
2738 * XXX can't happen - open() would
2739 * have errored out (or faked up one)
2740 */
2741 printf("can't get label for dev %s%c (%d)!?!?\n",
2742 dv->dv_xname, 'a' + RAW_PART, error);
2743 }
2744
2745 /* don't need this any more. We'll allocate it again
2746 a little later if we really do... */
2747 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2748 vput(vp);
2749
2750 for (i=0; i < label.d_npartitions; i++) {
2751 /* We only support partitions marked as RAID */
2752 if (label.d_partitions[i].p_fstype != FS_RAID)
2753 continue;
2754
2755 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2756 if (bdevvp(dev, &vp))
2757 panic("RAID can't alloc vnode");
2758
2759 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2760 if (error) {
2761 /* Whatever... */
2762 vput(vp);
2763 continue;
2764 }
2765
2766 good_one = 0;
2767
2768 clabel = (RF_ComponentLabel_t *)
2769 malloc(sizeof(RF_ComponentLabel_t),
2770 M_RAIDFRAME, M_NOWAIT);
2771 if (clabel == NULL) {
2772 /* XXX CLEANUP HERE */
2773 printf("RAID auto config: out of memory!\n");
2774 return(NULL); /* XXX probably should panic? */
2775 }
2776
2777 if (!raidread_component_label(dev, vp, clabel)) {
2778 /* Got the label. Does it look reasonable? */
2779 if (rf_reasonable_label(clabel) &&
2780 (clabel->partitionSize <=
2781 label.d_partitions[i].p_size)) {
2782 #if DEBUG
2783 printf("Component on: %s%c: %d\n",
2784 dv->dv_xname, 'a'+i,
2785 label.d_partitions[i].p_size);
2786 rf_print_component_label(clabel);
2787 #endif
2788 /* if it's reasonable, add it,
2789 else ignore it. */
2790 ac = (RF_AutoConfig_t *)
2791 malloc(sizeof(RF_AutoConfig_t),
2792 M_RAIDFRAME,
2793 M_NOWAIT);
2794 if (ac == NULL) {
2795 /* XXX should panic?? */
2796 return(NULL);
2797 }
2798
2799 sprintf(ac->devname, "%s%c",
2800 dv->dv_xname, 'a'+i);
2801 ac->dev = dev;
2802 ac->vp = vp;
2803 ac->clabel = clabel;
2804 ac->next = ac_list;
2805 ac_list = ac;
2806 good_one = 1;
2807 }
2808 }
2809 if (!good_one) {
2810 /* cleanup */
2811 free(clabel, M_RAIDFRAME);
2812 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2813 vput(vp);
2814 }
2815 }
2816 }
2817 }
2818 return(ac_list);
2819 }
2820
2821 static int
2822 rf_reasonable_label(clabel)
2823 RF_ComponentLabel_t *clabel;
2824 {
2825
2826 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2827 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2828 ((clabel->clean == RF_RAID_CLEAN) ||
2829 (clabel->clean == RF_RAID_DIRTY)) &&
2830 clabel->row >=0 &&
2831 clabel->column >= 0 &&
2832 clabel->num_rows > 0 &&
2833 clabel->num_columns > 0 &&
2834 clabel->row < clabel->num_rows &&
2835 clabel->column < clabel->num_columns &&
2836 clabel->blockSize > 0 &&
2837 clabel->numBlocks > 0) {
2838 /* label looks reasonable enough... */
2839 return(1);
2840 }
2841 return(0);
2842 }
2843
2844
2845 void
2846 rf_print_component_label(clabel)
2847 RF_ComponentLabel_t *clabel;
2848 {
2849 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2850 clabel->row, clabel->column,
2851 clabel->num_rows, clabel->num_columns);
2852 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2853 clabel->version, clabel->serial_number,
2854 clabel->mod_counter);
2855 printf(" Clean: %s Status: %d\n",
2856 clabel->clean ? "Yes" : "No", clabel->status );
2857 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2858 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2859 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2860 (char) clabel->parityConfig, clabel->blockSize,
2861 clabel->numBlocks);
2862 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2863 printf(" Contains root partition: %s\n",
2864 clabel->root_partition ? "Yes" : "No" );
2865 printf(" Last configured as: raid%d\n", clabel->last_unit );
2866 #if 0
2867 printf(" Config order: %d\n", clabel->config_order);
2868 #endif
2869
2870 }
2871
2872 RF_ConfigSet_t *
2873 rf_create_auto_sets(ac_list)
2874 RF_AutoConfig_t *ac_list;
2875 {
2876 RF_AutoConfig_t *ac;
2877 RF_ConfigSet_t *config_sets;
2878 RF_ConfigSet_t *cset;
2879 RF_AutoConfig_t *ac_next;
2880
2881
2882 config_sets = NULL;
2883
2884 /* Go through the AutoConfig list, and figure out which components
2885 belong to what sets. */
2886 ac = ac_list;
2887 while(ac!=NULL) {
2888 /* we're going to putz with ac->next, so save it here
2889 for use at the end of the loop */
2890 ac_next = ac->next;
2891
2892 if (config_sets == NULL) {
2893 /* will need at least this one... */
2894 config_sets = (RF_ConfigSet_t *)
2895 malloc(sizeof(RF_ConfigSet_t),
2896 M_RAIDFRAME, M_NOWAIT);
2897 if (config_sets == NULL) {
2898 panic("rf_create_auto_sets: No memory!\n");
2899 }
2900 /* this one is easy :) */
2901 config_sets->ac = ac;
2902 config_sets->next = NULL;
2903 config_sets->rootable = 0;
2904 ac->next = NULL;
2905 } else {
2906 /* which set does this component fit into? */
2907 cset = config_sets;
2908 while(cset!=NULL) {
2909 if (rf_does_it_fit(cset, ac)) {
2910 /* looks like it matches... */
2911 ac->next = cset->ac;
2912 cset->ac = ac;
2913 break;
2914 }
2915 cset = cset->next;
2916 }
2917 if (cset==NULL) {
2918 /* didn't find a match above... new set..*/
2919 cset = (RF_ConfigSet_t *)
2920 malloc(sizeof(RF_ConfigSet_t),
2921 M_RAIDFRAME, M_NOWAIT);
2922 if (cset == NULL) {
2923 panic("rf_create_auto_sets: No memory!\n");
2924 }
2925 cset->ac = ac;
2926 ac->next = NULL;
2927 cset->next = config_sets;
2928 cset->rootable = 0;
2929 config_sets = cset;
2930 }
2931 }
2932 ac = ac_next;
2933 }
2934
2935
2936 return(config_sets);
2937 }
2938
2939 static int
2940 rf_does_it_fit(cset, ac)
2941 RF_ConfigSet_t *cset;
2942 RF_AutoConfig_t *ac;
2943 {
2944 RF_ComponentLabel_t *clabel1, *clabel2;
2945
2946 /* If this one matches the *first* one in the set, that's good
2947 enough, since the other members of the set would have been
2948 through here too... */
2949 /* note that we are not checking partitionSize here..
2950
2951 Note that we are also not checking the mod_counters here.
2952 If everything else matches execpt the mod_counter, that's
2953 good enough for this test. We will deal with the mod_counters
2954 a little later in the autoconfiguration process.
2955
2956 (clabel1->mod_counter == clabel2->mod_counter) &&
2957
2958 The reason we don't check for this is that failed disks
2959 will have lower modification counts. If those disks are
2960 not added to the set they used to belong to, then they will
2961 form their own set, which may result in 2 different sets,
2962 for example, competing to be configured at raid0, and
2963 perhaps competing to be the root filesystem set. If the
2964 wrong ones get configured, or both attempt to become /,
2965 weird behaviour and or serious lossage will occur. Thus we
2966 need to bring them into the fold here, and kick them out at
2967 a later point.
2968
2969 */
2970
2971 clabel1 = cset->ac->clabel;
2972 clabel2 = ac->clabel;
2973 if ((clabel1->version == clabel2->version) &&
2974 (clabel1->serial_number == clabel2->serial_number) &&
2975 (clabel1->num_rows == clabel2->num_rows) &&
2976 (clabel1->num_columns == clabel2->num_columns) &&
2977 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2978 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2979 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2980 (clabel1->parityConfig == clabel2->parityConfig) &&
2981 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2982 (clabel1->blockSize == clabel2->blockSize) &&
2983 (clabel1->numBlocks == clabel2->numBlocks) &&
2984 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2985 (clabel1->root_partition == clabel2->root_partition) &&
2986 (clabel1->last_unit == clabel2->last_unit) &&
2987 (clabel1->config_order == clabel2->config_order)) {
2988 /* if it get's here, it almost *has* to be a match */
2989 } else {
2990 /* it's not consistent with somebody in the set..
2991 punt */
2992 return(0);
2993 }
2994 /* all was fine.. it must fit... */
2995 return(1);
2996 }
2997
2998 int
2999 rf_have_enough_components(cset)
3000 RF_ConfigSet_t *cset;
3001 {
3002 RF_AutoConfig_t *ac;
3003 RF_AutoConfig_t *auto_config;
3004 RF_ComponentLabel_t *clabel;
3005 int r,c;
3006 int num_rows;
3007 int num_cols;
3008 int num_missing;
3009 int mod_counter;
3010 int mod_counter_found;
3011 int even_pair_failed;
3012 char parity_type;
3013
3014
3015 /* check to see that we have enough 'live' components
3016 of this set. If so, we can configure it if necessary */
3017
3018 num_rows = cset->ac->clabel->num_rows;
3019 num_cols = cset->ac->clabel->num_columns;
3020 parity_type = cset->ac->clabel->parityConfig;
3021
3022 /* XXX Check for duplicate components!?!?!? */
3023
3024 /* Determine what the mod_counter is supposed to be for this set. */
3025
3026 mod_counter_found = 0;
3027 ac = cset->ac;
3028 while(ac!=NULL) {
3029 if (mod_counter_found==0) {
3030 mod_counter = ac->clabel->mod_counter;
3031 mod_counter_found = 1;
3032 } else {
3033 if (ac->clabel->mod_counter > mod_counter) {
3034 mod_counter = ac->clabel->mod_counter;
3035 }
3036 }
3037 ac = ac->next;
3038 }
3039
3040 num_missing = 0;
3041 auto_config = cset->ac;
3042
3043 for(r=0; r<num_rows; r++) {
3044 even_pair_failed = 0;
3045 for(c=0; c<num_cols; c++) {
3046 ac = auto_config;
3047 while(ac!=NULL) {
3048 if ((ac->clabel->row == r) &&
3049 (ac->clabel->column == c) &&
3050 (ac->clabel->mod_counter == mod_counter)) {
3051 /* it's this one... */
3052 #if DEBUG
3053 printf("Found: %s at %d,%d\n",
3054 ac->devname,r,c);
3055 #endif
3056 break;
3057 }
3058 ac=ac->next;
3059 }
3060 if (ac==NULL) {
3061 /* Didn't find one here! */
3062 /* special case for RAID 1, especially
3063 where there are more than 2
3064 components (where RAIDframe treats
3065 things a little differently :( ) */
3066 if (parity_type == '1') {
3067 if (c%2 == 0) { /* even component */
3068 even_pair_failed = 1;
3069 } else { /* odd component. If
3070 we're failed, and
3071 so is the even
3072 component, it's
3073 "Good Night, Charlie" */
3074 if (even_pair_failed == 1) {
3075 return(0);
3076 }
3077 }
3078 } else {
3079 /* normal accounting */
3080 num_missing++;
3081 }
3082 }
3083 if ((parity_type == '1') && (c%2 == 1)) {
3084 /* Just did an even component, and we didn't
3085 bail.. reset the even_pair_failed flag,
3086 and go on to the next component.... */
3087 even_pair_failed = 0;
3088 }
3089 }
3090 }
3091
3092 clabel = cset->ac->clabel;
3093
3094 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3095 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3096 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3097 /* XXX this needs to be made *much* more general */
3098 /* Too many failures */
3099 return(0);
3100 }
3101 /* otherwise, all is well, and we've got enough to take a kick
3102 at autoconfiguring this set */
3103 return(1);
3104 }
3105
3106 void
3107 rf_create_configuration(ac,config,raidPtr)
3108 RF_AutoConfig_t *ac;
3109 RF_Config_t *config;
3110 RF_Raid_t *raidPtr;
3111 {
3112 RF_ComponentLabel_t *clabel;
3113 int i;
3114
3115 clabel = ac->clabel;
3116
3117 /* 1. Fill in the common stuff */
3118 config->numRow = clabel->num_rows;
3119 config->numCol = clabel->num_columns;
3120 config->numSpare = 0; /* XXX should this be set here? */
3121 config->sectPerSU = clabel->sectPerSU;
3122 config->SUsPerPU = clabel->SUsPerPU;
3123 config->SUsPerRU = clabel->SUsPerRU;
3124 config->parityConfig = clabel->parityConfig;
3125 /* XXX... */
3126 strcpy(config->diskQueueType,"fifo");
3127 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3128 config->layoutSpecificSize = 0; /* XXX ?? */
3129
3130 while(ac!=NULL) {
3131 /* row/col values will be in range due to the checks
3132 in reasonable_label() */
3133 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3134 ac->devname);
3135 ac = ac->next;
3136 }
3137
3138 for(i=0;i<RF_MAXDBGV;i++) {
3139 config->debugVars[i][0] = NULL;
3140 }
3141 }
3142
3143 int
3144 rf_set_autoconfig(raidPtr, new_value)
3145 RF_Raid_t *raidPtr;
3146 int new_value;
3147 {
3148 RF_ComponentLabel_t clabel;
3149 struct vnode *vp;
3150 dev_t dev;
3151 int row, column;
3152
3153 raidPtr->autoconfigure = new_value;
3154 for(row=0; row<raidPtr->numRow; row++) {
3155 for(column=0; column<raidPtr->numCol; column++) {
3156 if (raidPtr->Disks[row][column].status ==
3157 rf_ds_optimal) {
3158 dev = raidPtr->Disks[row][column].dev;
3159 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3160 raidread_component_label(dev, vp, &clabel);
3161 clabel.autoconfigure = new_value;
3162 raidwrite_component_label(dev, vp, &clabel);
3163 }
3164 }
3165 }
3166 return(new_value);
3167 }
3168
3169 int
3170 rf_set_rootpartition(raidPtr, new_value)
3171 RF_Raid_t *raidPtr;
3172 int new_value;
3173 {
3174 RF_ComponentLabel_t clabel;
3175 struct vnode *vp;
3176 dev_t dev;
3177 int row, column;
3178
3179 raidPtr->root_partition = new_value;
3180 for(row=0; row<raidPtr->numRow; row++) {
3181 for(column=0; column<raidPtr->numCol; column++) {
3182 if (raidPtr->Disks[row][column].status ==
3183 rf_ds_optimal) {
3184 dev = raidPtr->Disks[row][column].dev;
3185 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3186 raidread_component_label(dev, vp, &clabel);
3187 clabel.root_partition = new_value;
3188 raidwrite_component_label(dev, vp, &clabel);
3189 }
3190 }
3191 }
3192 return(new_value);
3193 }
3194
3195 void
3196 rf_release_all_vps(cset)
3197 RF_ConfigSet_t *cset;
3198 {
3199 RF_AutoConfig_t *ac;
3200
3201 ac = cset->ac;
3202 while(ac!=NULL) {
3203 /* Close the vp, and give it back */
3204 if (ac->vp) {
3205 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3206 vput(ac->vp);
3207 ac->vp = NULL;
3208 }
3209 ac = ac->next;
3210 }
3211 }
3212
3213
3214 void
3215 rf_cleanup_config_set(cset)
3216 RF_ConfigSet_t *cset;
3217 {
3218 RF_AutoConfig_t *ac;
3219 RF_AutoConfig_t *next_ac;
3220
3221 ac = cset->ac;
3222 while(ac!=NULL) {
3223 next_ac = ac->next;
3224 /* nuke the label */
3225 free(ac->clabel, M_RAIDFRAME);
3226 /* cleanup the config structure */
3227 free(ac, M_RAIDFRAME);
3228 /* "next.." */
3229 ac = next_ac;
3230 }
3231 /* and, finally, nuke the config set */
3232 free(cset, M_RAIDFRAME);
3233 }
3234
3235
3236 void
3237 raid_init_component_label(raidPtr, clabel)
3238 RF_Raid_t *raidPtr;
3239 RF_ComponentLabel_t *clabel;
3240 {
3241 /* current version number */
3242 clabel->version = RF_COMPONENT_LABEL_VERSION;
3243 clabel->serial_number = raidPtr->serial_number;
3244 clabel->mod_counter = raidPtr->mod_counter;
3245 clabel->num_rows = raidPtr->numRow;
3246 clabel->num_columns = raidPtr->numCol;
3247 clabel->clean = RF_RAID_DIRTY; /* not clean */
3248 clabel->status = rf_ds_optimal; /* "It's good!" */
3249
3250 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3251 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3252 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3253
3254 clabel->blockSize = raidPtr->bytesPerSector;
3255 clabel->numBlocks = raidPtr->sectorsPerDisk;
3256
3257 /* XXX not portable */
3258 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3259 clabel->maxOutstanding = raidPtr->maxOutstanding;
3260 clabel->autoconfigure = raidPtr->autoconfigure;
3261 clabel->root_partition = raidPtr->root_partition;
3262 clabel->last_unit = raidPtr->raidid;
3263 clabel->config_order = raidPtr->config_order;
3264 }
3265
3266 int
3267 rf_auto_config_set(cset,unit)
3268 RF_ConfigSet_t *cset;
3269 int *unit;
3270 {
3271 RF_Raid_t *raidPtr;
3272 RF_Config_t *config;
3273 int raidID;
3274 int retcode;
3275
3276 printf("RAID autoconfigure\n");
3277
3278 retcode = 0;
3279 *unit = -1;
3280
3281 /* 1. Create a config structure */
3282
3283 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3284 M_RAIDFRAME,
3285 M_NOWAIT);
3286 if (config==NULL) {
3287 printf("Out of mem!?!?\n");
3288 /* XXX do something more intelligent here. */
3289 return(1);
3290 }
3291
3292 memset(config, 0, sizeof(RF_Config_t));
3293
3294 /* XXX raidID needs to be set correctly.. */
3295
3296 /*
3297 2. Figure out what RAID ID this one is supposed to live at
3298 See if we can get the same RAID dev that it was configured
3299 on last time..
3300 */
3301
3302 raidID = cset->ac->clabel->last_unit;
3303 if ((raidID < 0) || (raidID >= numraid)) {
3304 /* let's not wander off into lala land. */
3305 raidID = numraid - 1;
3306 }
3307 if (raidPtrs[raidID]->valid != 0) {
3308
3309 /*
3310 Nope... Go looking for an alternative...
3311 Start high so we don't immediately use raid0 if that's
3312 not taken.
3313 */
3314
3315 for(raidID = numraid; raidID >= 0; raidID--) {
3316 if (raidPtrs[raidID]->valid == 0) {
3317 /* can use this one! */
3318 break;
3319 }
3320 }
3321 }
3322
3323 if (raidID < 0) {
3324 /* punt... */
3325 printf("Unable to auto configure this set!\n");
3326 printf("(Out of RAID devs!)\n");
3327 return(1);
3328 }
3329 printf("Configuring raid%d:\n",raidID);
3330 raidPtr = raidPtrs[raidID];
3331
3332 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3333 raidPtr->raidid = raidID;
3334 raidPtr->openings = RAIDOUTSTANDING;
3335
3336 /* 3. Build the configuration structure */
3337 rf_create_configuration(cset->ac, config, raidPtr);
3338
3339 /* 4. Do the configuration */
3340 retcode = rf_Configure(raidPtr, config, cset->ac);
3341
3342 if (retcode == 0) {
3343
3344 raidinit(raidPtrs[raidID]);
3345
3346 rf_markalldirty(raidPtrs[raidID]);
3347 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3348 if (cset->ac->clabel->root_partition==1) {
3349 /* everything configured just fine. Make a note
3350 that this set is eligible to be root. */
3351 cset->rootable = 1;
3352 /* XXX do this here? */
3353 raidPtrs[raidID]->root_partition = 1;
3354 }
3355 }
3356
3357 /* 5. Cleanup */
3358 free(config, M_RAIDFRAME);
3359
3360 *unit = raidID;
3361 return(retcode);
3362 }
3363