rf_netbsdkintf.c revision 1.109.2.5 1 /* $NetBSD: rf_netbsdkintf.c,v 1.109.2.5 2001/10/11 00:02:20 fvdl Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80 /*
81 * Copyright (c) 1995 Carnegie-Mellon University.
82 * All rights reserved.
83 *
84 * Authors: Mark Holland, Jim Zelenka
85 *
86 * Permission to use, copy, modify and distribute this software and
87 * its documentation is hereby granted, provided that both the copyright
88 * notice and this permission notice appear in all copies of the
89 * software, derivative works or modified versions, and any portions
90 * thereof, and that both notices appear in supporting documentation.
91 *
92 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
93 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
94 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
95 *
96 * Carnegie Mellon requests users of this software to return to
97 *
98 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
99 * School of Computer Science
100 * Carnegie Mellon University
101 * Pittsburgh PA 15213-3890
102 *
103 * any improvements or extensions that they make and grant Carnegie the
104 * rights to redistribute these changes.
105 */
106
107 /***********************************************************
108 *
109 * rf_kintf.c -- the kernel interface routines for RAIDframe
110 *
111 ***********************************************************/
112
113 #include <sys/errno.h>
114 #include <sys/param.h>
115 #include <sys/pool.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/namei.h>
124 #include <sys/vnode.h>
125 #include <sys/param.h>
126 #include <sys/types.h>
127 #include <machine/types.h>
128 #include <sys/disklabel.h>
129 #include <sys/conf.h>
130 #include <sys/lock.h>
131 #include <sys/buf.h>
132 #include <sys/user.h>
133 #include <sys/reboot.h>
134
135 #include <dev/raidframe/raidframevar.h>
136 #include <dev/raidframe/raidframeio.h>
137 #include "raid.h"
138 #include "opt_raid_autoconfig.h"
139 #include "rf_raid.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_desc.h"
144 #include "rf_diskqueue.h"
145 #include "rf_acctrace.h"
146 #include "rf_etimer.h"
147 #include "rf_general.h"
148 #include "rf_debugMem.h"
149 #include "rf_kintf.h"
150 #include "rf_options.h"
151 #include "rf_driver.h"
152 #include "rf_parityscan.h"
153 #include "rf_debugprint.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 RF_SectorNum_t startSect, RF_SectorCount_t numSect,
177 caddr_t buf, void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 /*
184 * Pilfered from ccd.c
185 */
186
187 struct raidbuf {
188 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
189 struct buf *rf_obp; /* ptr. to original I/O buf */
190 int rf_flags; /* misc. flags */
191 RF_DiskQueueData_t *req;/* the request that this was part of.. */
192 };
193
194
195 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
196 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
197
198 /* XXX Not sure if the following should be replacing the raidPtrs above,
199 or if it should be used in conjunction with that...
200 */
201
202 struct raid_softc {
203 int sc_flags; /* flags */
204 int sc_cflags; /* configuration flags */
205 size_t sc_size; /* size of the raid device */
206 char sc_xname[20]; /* XXX external name */
207 struct disk sc_dkdev; /* generic disk device info */
208 struct pool sc_cbufpool; /* component buffer pool */
209 struct buf_queue buf_queue; /* used for the device queue */
210 };
211 /* sc_flags */
212 #define RAIDF_INITED 0x01 /* unit has been initialized */
213 #define RAIDF_WLABEL 0x02 /* label area is writable */
214 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
215 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
216 #define RAIDF_LOCKED 0x80 /* unit is locked */
217
218 #define raidunit(x) DISKUNIT(x)
219 int numraid = 0;
220
221 /*
222 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
223 * Be aware that large numbers can allow the driver to consume a lot of
224 * kernel memory, especially on writes, and in degraded mode reads.
225 *
226 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
227 * a single 64K write will typically require 64K for the old data,
228 * 64K for the old parity, and 64K for the new parity, for a total
229 * of 192K (if the parity buffer is not re-used immediately).
230 * Even it if is used immediately, that's still 128K, which when multiplied
231 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
232 *
233 * Now in degraded mode, for example, a 64K read on the above setup may
234 * require data reconstruction, which will require *all* of the 4 remaining
235 * disks to participate -- 4 * 32K/disk == 128K again.
236 */
237
238 #ifndef RAIDOUTSTANDING
239 #define RAIDOUTSTANDING 6
240 #endif
241
242 #define RAIDLABELDEV(dev) \
243 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
244
245 /* declared here, and made public, for the benefit of KVM stuff.. */
246 struct raid_softc *raid_softc;
247
248 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
249 struct disklabel *);
250 static void raidgetdisklabel(struct vnode *);
251 static void raidmakedisklabel(struct raid_softc *);
252
253 static int raidlock(struct raid_softc *);
254 static void raidunlock(struct raid_softc *);
255
256 static void rf_markalldirty(RF_Raid_t *);
257 void rf_mountroot_hook(struct device *);
258
259 struct device *raidrootdev;
260
261 void rf_ReconThread(struct rf_recon_req *);
262 /* XXX what I want is: */
263 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
264 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
265 void rf_CopybackThread(RF_Raid_t *raidPtr);
266 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
267 void rf_buildroothack(void *);
268
269 RF_AutoConfig_t *rf_find_raid_components(void);
270 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
271 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
272 static int rf_reasonable_label(RF_ComponentLabel_t *);
273 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
274 int rf_set_autoconfig(RF_Raid_t *, int);
275 int rf_set_rootpartition(RF_Raid_t *, int);
276 void rf_release_all_vps(RF_ConfigSet_t *);
277 void rf_cleanup_config_set(RF_ConfigSet_t *);
278 int rf_have_enough_components(RF_ConfigSet_t *);
279 int rf_auto_config_set(RF_ConfigSet_t *, int *);
280
281 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
282 allow autoconfig to take place.
283 Note that this is overridden by having
284 RAID_AUTOCONFIG as an option in the
285 kernel config file. */
286
287 void
288 raidattach(num)
289 int num;
290 {
291 int raidID;
292 int i, rc;
293 RF_AutoConfig_t *ac_list; /* autoconfig list */
294 RF_ConfigSet_t *config_sets;
295
296 #ifdef DEBUG
297 printf("raidattach: Asked for %d units\n", num);
298 #endif
299
300 if (num <= 0) {
301 #ifdef DIAGNOSTIC
302 panic("raidattach: count <= 0");
303 #endif
304 return;
305 }
306 /* This is where all the initialization stuff gets done. */
307
308 numraid = num;
309
310 /* Make some space for requested number of units... */
311
312 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
313 if (raidPtrs == NULL) {
314 panic("raidPtrs is NULL!!\n");
315 }
316
317 rc = rf_mutex_init(&rf_sparet_wait_mutex);
318 if (rc) {
319 RF_PANIC();
320 }
321
322 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
323
324 for (i = 0; i < num; i++)
325 raidPtrs[i] = NULL;
326 rc = rf_BootRaidframe();
327 if (rc == 0)
328 printf("Kernelized RAIDframe activated\n");
329 else
330 panic("Serious error booting RAID!!\n");
331
332 /* put together some datastructures like the CCD device does.. This
333 * lets us lock the device and what-not when it gets opened. */
334
335 raid_softc = (struct raid_softc *)
336 malloc(num * sizeof(struct raid_softc),
337 M_RAIDFRAME, M_NOWAIT);
338 if (raid_softc == NULL) {
339 printf("WARNING: no memory for RAIDframe driver\n");
340 return;
341 }
342
343 memset(raid_softc, 0, num * sizeof(struct raid_softc));
344
345 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
346 M_RAIDFRAME, M_NOWAIT);
347 if (raidrootdev == NULL) {
348 panic("No memory for RAIDframe driver!!?!?!\n");
349 }
350
351 for (raidID = 0; raidID < num; raidID++) {
352 BUFQ_INIT(&raid_softc[raidID].buf_queue);
353
354 raidrootdev[raidID].dv_class = DV_DISK;
355 raidrootdev[raidID].dv_cfdata = NULL;
356 raidrootdev[raidID].dv_unit = raidID;
357 raidrootdev[raidID].dv_parent = NULL;
358 raidrootdev[raidID].dv_flags = 0;
359 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
360
361 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
362 (RF_Raid_t *));
363 if (raidPtrs[raidID] == NULL) {
364 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
365 numraid = raidID;
366 return;
367 }
368 }
369
370 #if RAID_AUTOCONFIG
371 raidautoconfig = 1;
372 #endif
373
374 if (raidautoconfig) {
375 /* 1. locate all RAID components on the system */
376
377 #if DEBUG
378 printf("Searching for raid components...\n");
379 #endif
380 ac_list = rf_find_raid_components();
381
382 /* 2. sort them into their respective sets */
383
384 config_sets = rf_create_auto_sets(ac_list);
385
386 /* 3. evaluate each set and configure the valid ones
387 This gets done in rf_buildroothack() */
388
389 /* schedule the creation of the thread to do the
390 "/ on RAID" stuff */
391
392 kthread_create(rf_buildroothack,config_sets);
393
394 #if 0
395 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
396 #endif
397 }
398
399 }
400
401 void
402 rf_buildroothack(arg)
403 void *arg;
404 {
405 RF_ConfigSet_t *config_sets = arg;
406 RF_ConfigSet_t *cset;
407 RF_ConfigSet_t *next_cset;
408 int retcode;
409 int raidID;
410 int rootID;
411 int num_root;
412
413 rootID = 0;
414 num_root = 0;
415 cset = config_sets;
416 while(cset != NULL ) {
417 next_cset = cset->next;
418 if (rf_have_enough_components(cset) &&
419 cset->ac->clabel->autoconfigure==1) {
420 retcode = rf_auto_config_set(cset,&raidID);
421 if (!retcode) {
422 if (cset->rootable) {
423 rootID = raidID;
424 num_root++;
425 }
426 } else {
427 /* The autoconfig didn't work :( */
428 #if DEBUG
429 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
430 #endif
431 rf_release_all_vps(cset);
432 }
433 } else {
434 /* we're not autoconfiguring this set...
435 release the associated resources */
436 rf_release_all_vps(cset);
437 }
438 /* cleanup */
439 rf_cleanup_config_set(cset);
440 cset = next_cset;
441 }
442 if (boothowto & RB_ASKNAME) {
443 /* We don't auto-config... */
444 } else {
445 /* They didn't ask, and we found something bootable... */
446
447 if (num_root == 1) {
448 booted_device = &raidrootdev[rootID];
449 } else if (num_root > 1) {
450 /* we can't guess.. require the user to answer... */
451 boothowto |= RB_ASKNAME;
452 }
453 }
454 }
455
456
457 int
458 raidsize(dev)
459 dev_t dev;
460 {
461 #if 1 /* XXXthorpej */
462 return (-1);
463 #else
464 struct raid_softc *rs;
465 struct disklabel *lp;
466 int part, unit, omask, size;
467
468 unit = raidunit(dev);
469 if (unit >= numraid)
470 return (-1);
471 rs = &raid_softc[unit];
472
473 if ((rs->sc_flags & RAIDF_INITED) == 0)
474 return (-1);
475
476 part = DISKPART(dev);
477 omask = rs->sc_dkdev.dk_openmask & (1 << part);
478 lp = rs->sc_dkdev.dk_label;
479
480 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
481 return (-1);
482
483 if (lp->d_partitions[part].p_fstype != FS_SWAP)
484 size = -1;
485 else
486 size = lp->d_partitions[part].p_size *
487 (lp->d_secsize / DEV_BSIZE);
488
489 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 return (size);
493 #endif
494 }
495
496 int
497 raiddump(dev, blkno, va, size)
498 dev_t dev;
499 daddr_t blkno;
500 caddr_t va;
501 size_t size;
502 {
503 /* Not implemented. */
504 return ENXIO;
505 }
506
507 /* ARGSUSED */
508 int
509 raidopen(devvp, flags, fmt, p)
510 struct vnode *devvp;
511 int flags, fmt;
512 struct proc *p;
513 {
514 int unit = raidunit(vdev_rdev(devvp));
515 struct raid_softc *rs;
516 struct disklabel *lp;
517 int part, pmask;
518 int error = 0;
519
520 if (unit >= numraid)
521 return (ENXIO);
522 rs = &raid_softc[unit];
523
524 vdev_setprivdata(devvp, rs);
525
526 if ((error = raidlock(rs)) != 0)
527 return (error);
528 lp = rs->sc_dkdev.dk_label;
529
530 part = DISKPART(vdev_rdev(devvp));
531 pmask = (1 << part);
532
533 db1_printf(("Opening raid device number: %d partition: %d\n",
534 unit, part));
535
536
537 if ((rs->sc_flags & RAIDF_INITED) &&
538 (rs->sc_dkdev.dk_openmask == 0))
539 raidgetdisklabel(devvp);
540
541 /* make sure that this partition exists */
542
543 if (part != RAW_PART) {
544 db1_printf(("Not a raw partition..\n"));
545 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
546 ((part >= lp->d_npartitions) ||
547 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
548 error = ENXIO;
549 raidunlock(rs);
550 db1_printf(("Bailing out...\n"));
551 return (error);
552 }
553 }
554 /* Prevent this unit from being unconfigured while open. */
555 switch (fmt) {
556 case S_IFCHR:
557 rs->sc_dkdev.dk_copenmask |= pmask;
558 break;
559
560 case S_IFBLK:
561 rs->sc_dkdev.dk_bopenmask |= pmask;
562 break;
563 }
564
565 if ((rs->sc_dkdev.dk_openmask == 0) &&
566 ((rs->sc_flags & RAIDF_INITED) != 0)) {
567 /* First one... mark things as dirty... Note that we *MUST*
568 have done a configure before this. I DO NOT WANT TO BE
569 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
570 THAT THEY BELONG TOGETHER!!!!! */
571 /* XXX should check to see if we're only open for reading
572 here... If so, we needn't do this, but then need some
573 other way of keeping track of what's happened.. */
574
575 rf_markalldirty( raidPtrs[unit] );
576 }
577
578
579 rs->sc_dkdev.dk_openmask =
580 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
581
582 raidunlock(rs);
583
584 return (error);
585
586
587 }
588
589 /* ARGSUSED */
590 int
591 raidclose(devvp, flags, fmt, p)
592 struct vnode *devvp;
593 int flags, fmt;
594 struct proc *p;
595 {
596 struct raid_softc *rs;
597 int error = 0;
598 int part;
599 dev_t rdev;
600
601 rs = vdev_privdata(devvp);
602 rdev = vdev_rdev(devvp);
603
604 if ((error = raidlock(rs)) != 0)
605 return (error);
606
607 part = DISKPART(rdev);
608
609 /* ...that much closer to allowing unconfiguration... */
610 switch (fmt) {
611 case S_IFCHR:
612 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
613 break;
614
615 case S_IFBLK:
616 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
617 break;
618 }
619 rs->sc_dkdev.dk_openmask =
620 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
621
622 if ((rs->sc_dkdev.dk_openmask == 0) &&
623 ((rs->sc_flags & RAIDF_INITED) != 0)) {
624 /* Last one... device is not unconfigured yet.
625 Device shutdown has taken care of setting the
626 clean bits if RAIDF_INITED is not set
627 mark things as clean... */
628 #if 0
629 printf("Last one on raid%d. Updating status.\n",
630 DISKUNIT(vdev_rdev(devvp)));
631 #endif
632 rf_update_component_labels(raidPtrs[DISKUNIT(rdev)],
633 RF_FINAL_COMPONENT_UPDATE);
634 if (doing_shutdown) {
635 /* last one, and we're going down, so
636 lights out for this RAID set too. */
637 error = rf_Shutdown(raidPtrs[DISKUNIT(rdev)]);
638 pool_destroy(&rs->sc_cbufpool);
639
640 /* It's no longer initialized... */
641 rs->sc_flags &= ~RAIDF_INITED;
642
643 /* Detach the disk. */
644 disk_detach(&rs->sc_dkdev);
645 }
646 }
647
648 raidunlock(rs);
649 return (0);
650 }
651
652 void
653 raidstrategy(bp)
654 struct buf *bp;
655 {
656 int s;
657
658 unsigned int raidID;
659 struct raid_softc *rs;
660 RF_Raid_t *raidPtr;
661 struct disklabel *lp;
662 int wlabel;
663 dev_t rdev;
664
665 rdev = vdev_rdev(bp->b_devvp);
666 rs = vdev_privdata(bp->b_devvp);
667
668 raidID = DISKUNIT(rdev);
669
670 if ((rs->sc_flags & RAIDF_INITED) ==0) {
671 bp->b_error = ENXIO;
672 bp->b_flags |= B_ERROR;
673 bp->b_resid = bp->b_bcount;
674 biodone(bp);
675 return;
676 }
677 raidPtr = raidPtrs[raidID];
678 if (raidPtr == NULL || raidPtr->valid == 0) {
679 bp->b_error = ENODEV;
680 bp->b_flags |= B_ERROR;
681 bp->b_resid = bp->b_bcount;
682 biodone(bp);
683 return;
684 }
685 if (bp->b_bcount == 0) {
686 db1_printf(("b_bcount is zero..\n"));
687 biodone(bp);
688 return;
689 }
690 lp = rs->sc_dkdev.dk_label;
691
692 /*
693 * Do bounds checking and adjust transfer. If there's an
694 * error, the bounds check will flag that for us.
695 */
696
697 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
698 if (DISKPART(rdev) != RAW_PART &&
699 (bp->b_flags & B_DKLABEL) == 0) {
700 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
701 db1_printf(("Bounds check failed!!:%d %d\n",
702 (int) bp->b_blkno, (int) wlabel));
703 biodone(bp);
704 return;
705 }
706 }
707 s = splbio();
708
709 bp->b_resid = 0;
710
711 /* stuff it onto our queue */
712 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
713
714 raidstart(raidPtrs[raidID]);
715
716 splx(s);
717 }
718
719 /* ARGSUSED */
720 int
721 raidread(devvp, uio, flags)
722 struct vnode *devvp;
723 struct uio *uio;
724 int flags;
725 {
726 struct raid_softc *rs;
727
728 rs = vdev_privdata(devvp);
729
730 if ((rs->sc_flags & RAIDF_INITED) == 0)
731 return (ENXIO);
732
733 db1_printf(("raidread: unit: %d partition: %d\n",
734 DISKUNIT(vdev_rdev(devvp)), DISKPART(vdev_rdev(devvp))));
735
736 return (physio(raidstrategy, NULL, devvp, B_READ, minphys, uio));
737 }
738
739 /* ARGSUSED */
740 int
741 raidwrite(devvp, uio, flags)
742 struct vnode *devvp;
743 struct uio *uio;
744 int flags;
745 {
746 struct raid_softc *rs;
747
748 rs = vdev_privdata(devvp);
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752
753 db1_printf(("raidwrite\n"));
754
755 return (physio(raidstrategy, NULL, devvp, B_WRITE, minphys, uio));
756 }
757
758 int
759 raidioctl(devvp, cmd, data, flag, p)
760 struct vnode *devvp;
761 u_long cmd;
762 caddr_t data;
763 int flag;
764 struct proc *p;
765 {
766 struct raid_softc *rs;
767 int error = 0;
768 int part, pmask;
769 RF_Config_t *k_cfg, *u_cfg;
770 RF_Raid_t *raidPtr;
771 RF_RaidDisk_t *diskPtr;
772 RF_AccTotals_t *totals;
773 RF_DeviceConfig_t *d_cfg, **ucfgp;
774 u_char *specific_buf;
775 int retcode = 0;
776 int row;
777 int column;
778 struct rf_recon_req *rrcopy, *rr;
779 RF_ComponentLabel_t *clabel;
780 RF_ComponentLabel_t ci_label;
781 RF_ComponentLabel_t **clabel_ptr;
782 RF_SingleComponent_t *sparePtr,*componentPtr;
783 RF_SingleComponent_t hot_spare;
784 RF_SingleComponent_t component;
785 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
786 int i, j, d;
787 dev_t rdev;
788 #ifdef __HAVE_OLD_DISKLABEL
789 struct disklabel newlabel;
790 #endif
791
792 rdev = vdev_rdev(devvp);
793 rs = vdev_privdata(devvp);
794
795 raidPtr = raidPtrs[DISKUNIT(rdev)];
796
797 db1_printf(("raidioctl: 0x%x %d %d %ld\n", rdev,
798 DISKPART(rdev), DISKUNIT(rdev), cmd));
799
800 /* Must be open for writes for these commands... */
801 switch (cmd) {
802 case DIOCSDINFO:
803 case DIOCWDINFO:
804 #ifdef __HAVE_OLD_DISKLABEL
805 case ODIOCWDINFO:
806 case ODIOCSDINFO:
807 #endif
808 case DIOCWLABEL:
809 if ((flag & FWRITE) == 0)
810 return (EBADF);
811 }
812
813 /* Must be initialized for these... */
814 switch (cmd) {
815 case DIOCGDINFO:
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCGDINFO:
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 case ODIOCGDEFLABEL:
823 #endif
824 case DIOCGPART:
825 case DIOCWLABEL:
826 case DIOCGDEFLABEL:
827 case RAIDFRAME_SHUTDOWN:
828 case RAIDFRAME_REWRITEPARITY:
829 case RAIDFRAME_GET_INFO:
830 case RAIDFRAME_RESET_ACCTOTALS:
831 case RAIDFRAME_GET_ACCTOTALS:
832 case RAIDFRAME_KEEP_ACCTOTALS:
833 case RAIDFRAME_GET_SIZE:
834 case RAIDFRAME_FAIL_DISK:
835 case RAIDFRAME_COPYBACK:
836 case RAIDFRAME_CHECK_RECON_STATUS:
837 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
838 case RAIDFRAME_GET_COMPONENT_LABEL:
839 case RAIDFRAME_SET_COMPONENT_LABEL:
840 case RAIDFRAME_ADD_HOT_SPARE:
841 case RAIDFRAME_REMOVE_HOT_SPARE:
842 case RAIDFRAME_INIT_LABELS:
843 case RAIDFRAME_REBUILD_IN_PLACE:
844 case RAIDFRAME_CHECK_PARITY:
845 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
846 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
847 case RAIDFRAME_CHECK_COPYBACK_STATUS:
848 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
849 case RAIDFRAME_SET_AUTOCONFIG:
850 case RAIDFRAME_SET_ROOT:
851 case RAIDFRAME_DELETE_COMPONENT:
852 case RAIDFRAME_INCORPORATE_HOT_SPARE:
853 if ((rs->sc_flags & RAIDF_INITED) == 0)
854 return (ENXIO);
855 }
856
857 switch (cmd) {
858
859 /* configure the system */
860 case RAIDFRAME_CONFIGURE:
861
862 if (raidPtr->valid) {
863 /* There is a valid RAID set running on this unit! */
864 printf("raid%d: Device already configured!\n",
865 DISKUNIT(rdev));
866 return(EINVAL);
867 }
868
869 /* copy-in the configuration information */
870 /* data points to a pointer to the configuration structure */
871
872 u_cfg = *((RF_Config_t **) data);
873 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
874 if (k_cfg == NULL) {
875 return (ENOMEM);
876 }
877 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
878 sizeof(RF_Config_t));
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
882 retcode));
883 return (retcode);
884 }
885 /* allocate a buffer for the layout-specific data, and copy it
886 * in */
887 if (k_cfg->layoutSpecificSize) {
888 if (k_cfg->layoutSpecificSize > 10000) {
889 /* sanity check */
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 return (EINVAL);
892 }
893 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
894 (u_char *));
895 if (specific_buf == NULL) {
896 RF_Free(k_cfg, sizeof(RF_Config_t));
897 return (ENOMEM);
898 }
899 retcode = copyin(k_cfg->layoutSpecific,
900 (caddr_t) specific_buf,
901 k_cfg->layoutSpecificSize);
902 if (retcode) {
903 RF_Free(k_cfg, sizeof(RF_Config_t));
904 RF_Free(specific_buf,
905 k_cfg->layoutSpecificSize);
906 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
907 retcode));
908 return (retcode);
909 }
910 } else
911 specific_buf = NULL;
912 k_cfg->layoutSpecific = specific_buf;
913
914 /* should do some kind of sanity check on the configuration.
915 * Store the sum of all the bytes in the last byte? */
916
917 /* configure the system */
918
919 /*
920 * Clear the entire RAID descriptor, just to make sure
921 * there is no stale data left in the case of a
922 * reconfiguration
923 */
924 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
925 raidPtr->raidid = DISKUNIT(rdev);
926
927 retcode = rf_Configure(raidPtr, k_cfg, NULL);
928
929 if (retcode == 0) {
930
931 /* allow this many simultaneous IO's to
932 this RAID device */
933 raidPtr->openings = RAIDOUTSTANDING;
934
935 raidinit(raidPtr);
936 rf_markalldirty(raidPtr);
937 }
938 /* free the buffers. No return code here. */
939 if (k_cfg->layoutSpecificSize) {
940 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
941 }
942 RF_Free(k_cfg, sizeof(RF_Config_t));
943
944 return (retcode);
945
946 /* shutdown the system */
947 case RAIDFRAME_SHUTDOWN:
948
949 if ((error = raidlock(rs)) != 0)
950 return (error);
951
952 /*
953 * If somebody has a partition mounted, we shouldn't
954 * shutdown.
955 */
956
957 part = DISKPART(rdev);
958 pmask = (1 << part);
959 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
960 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
961 (rs->sc_dkdev.dk_copenmask & pmask))) {
962 raidunlock(rs);
963 return (EBUSY);
964 }
965
966 retcode = rf_Shutdown(raidPtr);
967
968 pool_destroy(&rs->sc_cbufpool);
969
970 /* It's no longer initialized... */
971 rs->sc_flags &= ~RAIDF_INITED;
972
973 /* Detach the disk. */
974 disk_detach(&rs->sc_dkdev);
975
976 raidunlock(rs);
977
978 return (retcode);
979 case RAIDFRAME_GET_COMPONENT_LABEL:
980 clabel_ptr = (RF_ComponentLabel_t **) data;
981 /* need to read the component label for the disk indicated
982 by row,column in clabel */
983
984 /* For practice, let's get it directly fromdisk, rather
985 than from the in-core copy */
986 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
987 (RF_ComponentLabel_t *));
988 if (clabel == NULL)
989 return (ENOMEM);
990
991 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
992
993 retcode = copyin( *clabel_ptr, clabel,
994 sizeof(RF_ComponentLabel_t));
995
996 if (retcode) {
997 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
998 return(retcode);
999 }
1000
1001 row = clabel->row;
1002 column = clabel->column;
1003
1004 if ((row < 0) || (row >= raidPtr->numRow) ||
1005 (column < 0) || (column >= raidPtr->numCol +
1006 raidPtr->numSpare)) {
1007 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1008 return(EINVAL);
1009 }
1010
1011 raidread_component_label(raidPtr->raid_cinfo[row][column].ci_vp,
1012 clabel);
1013
1014 retcode = copyout((caddr_t) clabel,
1015 (caddr_t) *clabel_ptr,
1016 sizeof(RF_ComponentLabel_t));
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return (retcode);
1019
1020 case RAIDFRAME_SET_COMPONENT_LABEL:
1021 clabel = (RF_ComponentLabel_t *) data;
1022
1023 /* XXX check the label for valid stuff... */
1024 /* Note that some things *should not* get modified --
1025 the user should be re-initing the labels instead of
1026 trying to patch things.
1027 */
1028
1029 printf("Got component label:\n");
1030 printf("Version: %d\n",clabel->version);
1031 printf("Serial Number: %d\n",clabel->serial_number);
1032 printf("Mod counter: %d\n",clabel->mod_counter);
1033 printf("Row: %d\n", clabel->row);
1034 printf("Column: %d\n", clabel->column);
1035 printf("Num Rows: %d\n", clabel->num_rows);
1036 printf("Num Columns: %d\n", clabel->num_columns);
1037 printf("Clean: %d\n", clabel->clean);
1038 printf("Status: %d\n", clabel->status);
1039
1040 row = clabel->row;
1041 column = clabel->column;
1042
1043 if ((row < 0) || (row >= raidPtr->numRow) ||
1044 (column < 0) || (column >= raidPtr->numCol)) {
1045 return(EINVAL);
1046 }
1047
1048 /* XXX this isn't allowed to do anything for now :-) */
1049
1050 /* XXX and before it is, we need to fill in the rest
1051 of the fields!?!?!?! */
1052 #if 0
1053 raidwrite_component_label(
1054 raidPtr->raid_cinfo[row][column].ci_vp, clabel);
1055 #endif
1056 return (0);
1057
1058 case RAIDFRAME_INIT_LABELS:
1059 clabel = (RF_ComponentLabel_t *) data;
1060 /*
1061 we only want the serial number from
1062 the above. We get all the rest of the information
1063 from the config that was used to create this RAID
1064 set.
1065 */
1066
1067 raidPtr->serial_number = clabel->serial_number;
1068
1069 raid_init_component_label(raidPtr, &ci_label);
1070 ci_label.serial_number = clabel->serial_number;
1071
1072 for(row=0;row<raidPtr->numRow;row++) {
1073 ci_label.row = row;
1074 for(column=0;column<raidPtr->numCol;column++) {
1075 diskPtr = &raidPtr->Disks[row][column];
1076 if (!RF_DEAD_DISK(diskPtr->status)) {
1077 ci_label.partitionSize = diskPtr->partitionSize;
1078 ci_label.column = column;
1079 raidwrite_component_label(
1080 raidPtr->raid_cinfo[row][column].ci_vp,
1081 &ci_label );
1082 }
1083 }
1084 }
1085
1086 return (retcode);
1087 case RAIDFRAME_SET_AUTOCONFIG:
1088 d = rf_set_autoconfig(raidPtr, *(int *) data);
1089 printf("New autoconfig value is: %d\n", d);
1090 *(int *) data = d;
1091 return (retcode);
1092
1093 case RAIDFRAME_SET_ROOT:
1094 d = rf_set_rootpartition(raidPtr, *(int *) data);
1095 printf("New rootpartition value is: %d\n", d);
1096 *(int *) data = d;
1097 return (retcode);
1098
1099 /* initialize all parity */
1100 case RAIDFRAME_REWRITEPARITY:
1101
1102 if (raidPtr->Layout.map->faultsTolerated == 0) {
1103 /* Parity for RAID 0 is trivially correct */
1104 raidPtr->parity_good = RF_RAID_CLEAN;
1105 return(0);
1106 }
1107
1108 if (raidPtr->parity_rewrite_in_progress == 1) {
1109 /* Re-write is already in progress! */
1110 return(EINVAL);
1111 }
1112
1113 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1114 rf_RewriteParityThread,
1115 raidPtr,"raid_parity");
1116 return (retcode);
1117
1118
1119 case RAIDFRAME_ADD_HOT_SPARE:
1120 sparePtr = (RF_SingleComponent_t *) data;
1121 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1122 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1123 return(retcode);
1124
1125 case RAIDFRAME_REMOVE_HOT_SPARE:
1126 return(retcode);
1127
1128 case RAIDFRAME_DELETE_COMPONENT:
1129 componentPtr = (RF_SingleComponent_t *)data;
1130 memcpy( &component, componentPtr,
1131 sizeof(RF_SingleComponent_t));
1132 retcode = rf_delete_component(raidPtr, &component);
1133 return(retcode);
1134
1135 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1136 componentPtr = (RF_SingleComponent_t *)data;
1137 memcpy( &component, componentPtr,
1138 sizeof(RF_SingleComponent_t));
1139 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1140 return(retcode);
1141
1142 case RAIDFRAME_REBUILD_IN_PLACE:
1143
1144 if (raidPtr->Layout.map->faultsTolerated == 0) {
1145 /* Can't do this on a RAID 0!! */
1146 return(EINVAL);
1147 }
1148
1149 if (raidPtr->recon_in_progress == 1) {
1150 /* a reconstruct is already in progress! */
1151 return(EINVAL);
1152 }
1153
1154 componentPtr = (RF_SingleComponent_t *) data;
1155 memcpy( &component, componentPtr,
1156 sizeof(RF_SingleComponent_t));
1157 row = component.row;
1158 column = component.column;
1159 printf("Rebuild: %d %d\n",row, column);
1160 if ((row < 0) || (row >= raidPtr->numRow) ||
1161 (column < 0) || (column >= raidPtr->numCol)) {
1162 return(EINVAL);
1163 }
1164
1165 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1166 if (rrcopy == NULL)
1167 return(ENOMEM);
1168
1169 rrcopy->raidPtr = (void *) raidPtr;
1170 rrcopy->row = row;
1171 rrcopy->col = column;
1172
1173 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1174 rf_ReconstructInPlaceThread,
1175 rrcopy,"raid_reconip");
1176 return(retcode);
1177
1178 case RAIDFRAME_GET_INFO:
1179 if (!raidPtr->valid)
1180 return (ENODEV);
1181 ucfgp = (RF_DeviceConfig_t **) data;
1182 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1183 (RF_DeviceConfig_t *));
1184 if (d_cfg == NULL)
1185 return (ENOMEM);
1186 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1187 d_cfg->rows = raidPtr->numRow;
1188 d_cfg->cols = raidPtr->numCol;
1189 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1190 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1191 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1192 return (ENOMEM);
1193 }
1194 d_cfg->nspares = raidPtr->numSpare;
1195 if (d_cfg->nspares >= RF_MAX_DISKS) {
1196 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1197 return (ENOMEM);
1198 }
1199 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1200 d = 0;
1201 for (i = 0; i < d_cfg->rows; i++) {
1202 for (j = 0; j < d_cfg->cols; j++) {
1203 d_cfg->devs[d] = raidPtr->Disks[i][j];
1204 d++;
1205 }
1206 }
1207 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1208 d_cfg->spares[i] = raidPtr->Disks[0][j];
1209 }
1210 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1211 sizeof(RF_DeviceConfig_t));
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213
1214 return (retcode);
1215
1216 case RAIDFRAME_CHECK_PARITY:
1217 *(int *) data = raidPtr->parity_good;
1218 return (0);
1219
1220 case RAIDFRAME_RESET_ACCTOTALS:
1221 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1222 return (0);
1223
1224 case RAIDFRAME_GET_ACCTOTALS:
1225 totals = (RF_AccTotals_t *) data;
1226 *totals = raidPtr->acc_totals;
1227 return (0);
1228
1229 case RAIDFRAME_KEEP_ACCTOTALS:
1230 raidPtr->keep_acc_totals = *(int *)data;
1231 return (0);
1232
1233 case RAIDFRAME_GET_SIZE:
1234 *(int *) data = raidPtr->totalSectors;
1235 return (0);
1236
1237 /* fail a disk & optionally start reconstruction */
1238 case RAIDFRAME_FAIL_DISK:
1239
1240 if (raidPtr->Layout.map->faultsTolerated == 0) {
1241 /* Can't do this on a RAID 0!! */
1242 return(EINVAL);
1243 }
1244
1245 rr = (struct rf_recon_req *) data;
1246
1247 if (rr->row < 0 || rr->row >= raidPtr->numRow
1248 || rr->col < 0 || rr->col >= raidPtr->numCol)
1249 return (EINVAL);
1250
1251 printf("raid%d: Failing the disk: row: %d col: %d\n",
1252 DISKUNIT(rdev), rr->row, rr->col);
1253
1254 /* make a copy of the recon request so that we don't rely on
1255 * the user's buffer */
1256 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1257 if (rrcopy == NULL)
1258 return(ENOMEM);
1259 bcopy(rr, rrcopy, sizeof(*rr));
1260 rrcopy->raidPtr = (void *) raidPtr;
1261
1262 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1263 rf_ReconThread,
1264 rrcopy,"raid_recon");
1265 return (0);
1266
1267 /* invoke a copyback operation after recon on whatever disk
1268 * needs it, if any */
1269 case RAIDFRAME_COPYBACK:
1270
1271 if (raidPtr->Layout.map->faultsTolerated == 0) {
1272 /* This makes no sense on a RAID 0!! */
1273 return(EINVAL);
1274 }
1275
1276 if (raidPtr->copyback_in_progress == 1) {
1277 /* Copyback is already in progress! */
1278 return(EINVAL);
1279 }
1280
1281 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1282 rf_CopybackThread,
1283 raidPtr,"raid_copyback");
1284 return (retcode);
1285
1286 /* return the percentage completion of reconstruction */
1287 case RAIDFRAME_CHECK_RECON_STATUS:
1288 if (raidPtr->Layout.map->faultsTolerated == 0) {
1289 /* This makes no sense on a RAID 0, so tell the
1290 user it's done. */
1291 *(int *) data = 100;
1292 return(0);
1293 }
1294 row = 0; /* XXX we only consider a single row... */
1295 if (raidPtr->status[row] != rf_rs_reconstructing)
1296 *(int *) data = 100;
1297 else
1298 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1299 return (0);
1300 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1301 progressInfoPtr = (RF_ProgressInfo_t **) data;
1302 row = 0; /* XXX we only consider a single row... */
1303 if (raidPtr->status[row] != rf_rs_reconstructing) {
1304 progressInfo.remaining = 0;
1305 progressInfo.completed = 100;
1306 progressInfo.total = 100;
1307 } else {
1308 progressInfo.total =
1309 raidPtr->reconControl[row]->numRUsTotal;
1310 progressInfo.completed =
1311 raidPtr->reconControl[row]->numRUsComplete;
1312 progressInfo.remaining = progressInfo.total -
1313 progressInfo.completed;
1314 }
1315 retcode = copyout((caddr_t) &progressInfo,
1316 (caddr_t) *progressInfoPtr,
1317 sizeof(RF_ProgressInfo_t));
1318 return (retcode);
1319
1320 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1321 if (raidPtr->Layout.map->faultsTolerated == 0) {
1322 /* This makes no sense on a RAID 0, so tell the
1323 user it's done. */
1324 *(int *) data = 100;
1325 return(0);
1326 }
1327 if (raidPtr->parity_rewrite_in_progress == 1) {
1328 *(int *) data = 100 *
1329 raidPtr->parity_rewrite_stripes_done /
1330 raidPtr->Layout.numStripe;
1331 } else {
1332 *(int *) data = 100;
1333 }
1334 return (0);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1337 progressInfoPtr = (RF_ProgressInfo_t **) data;
1338 if (raidPtr->parity_rewrite_in_progress == 1) {
1339 progressInfo.total = raidPtr->Layout.numStripe;
1340 progressInfo.completed =
1341 raidPtr->parity_rewrite_stripes_done;
1342 progressInfo.remaining = progressInfo.total -
1343 progressInfo.completed;
1344 } else {
1345 progressInfo.remaining = 0;
1346 progressInfo.completed = 100;
1347 progressInfo.total = 100;
1348 }
1349 retcode = copyout((caddr_t) &progressInfo,
1350 (caddr_t) *progressInfoPtr,
1351 sizeof(RF_ProgressInfo_t));
1352 return (retcode);
1353
1354 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1355 if (raidPtr->Layout.map->faultsTolerated == 0) {
1356 /* This makes no sense on a RAID 0 */
1357 *(int *) data = 100;
1358 return(0);
1359 }
1360 if (raidPtr->copyback_in_progress == 1) {
1361 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1362 raidPtr->Layout.numStripe;
1363 } else {
1364 *(int *) data = 100;
1365 }
1366 return (0);
1367
1368 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1369 progressInfoPtr = (RF_ProgressInfo_t **) data;
1370 if (raidPtr->copyback_in_progress == 1) {
1371 progressInfo.total = raidPtr->Layout.numStripe;
1372 progressInfo.completed =
1373 raidPtr->copyback_stripes_done;
1374 progressInfo.remaining = progressInfo.total -
1375 progressInfo.completed;
1376 } else {
1377 progressInfo.remaining = 0;
1378 progressInfo.completed = 100;
1379 progressInfo.total = 100;
1380 }
1381 retcode = copyout((caddr_t) &progressInfo,
1382 (caddr_t) *progressInfoPtr,
1383 sizeof(RF_ProgressInfo_t));
1384 return (retcode);
1385
1386 /* the sparetable daemon calls this to wait for the kernel to
1387 * need a spare table. this ioctl does not return until a
1388 * spare table is needed. XXX -- calling mpsleep here in the
1389 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1390 * -- I should either compute the spare table in the kernel,
1391 * or have a different -- XXX XXX -- interface (a different
1392 * character device) for delivering the table -- XXX */
1393 #if 0
1394 case RAIDFRAME_SPARET_WAIT:
1395 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1396 while (!rf_sparet_wait_queue)
1397 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1398 waitreq = rf_sparet_wait_queue;
1399 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1400 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1401
1402 /* structure assignment */
1403 *((RF_SparetWait_t *) data) = *waitreq;
1404
1405 RF_Free(waitreq, sizeof(*waitreq));
1406 return (0);
1407
1408 /* wakes up a process waiting on SPARET_WAIT and puts an error
1409 * code in it that will cause the dameon to exit */
1410 case RAIDFRAME_ABORT_SPARET_WAIT:
1411 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1412 waitreq->fcol = -1;
1413 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1414 waitreq->next = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = waitreq;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417 wakeup(&rf_sparet_wait_queue);
1418 return (0);
1419
1420 /* used by the spare table daemon to deliver a spare table
1421 * into the kernel */
1422 case RAIDFRAME_SEND_SPARET:
1423
1424 /* install the spare table */
1425 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1426
1427 /* respond to the requestor. the return status of the spare
1428 * table installation is passed in the "fcol" field */
1429 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1430 waitreq->fcol = retcode;
1431 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1432 waitreq->next = rf_sparet_resp_queue;
1433 rf_sparet_resp_queue = waitreq;
1434 wakeup(&rf_sparet_resp_queue);
1435 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1436
1437 return (retcode);
1438 #endif
1439
1440 default:
1441 break; /* fall through to the os-specific code below */
1442
1443 }
1444
1445 if (!raidPtr->valid)
1446 return (EINVAL);
1447
1448 /*
1449 * Add support for "regular" device ioctls here.
1450 */
1451
1452 switch (cmd) {
1453 case DIOCGDINFO:
1454 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1455 break;
1456 #ifdef __HAVE_OLD_DISKLABEL
1457 case ODIOCGDINFO:
1458 newlabel = *(rs->sc_dkdev.dk_label);
1459 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1460 return ENOTTY;
1461 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1462 break;
1463 #endif
1464
1465 case DIOCGPART:
1466 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1467 ((struct partinfo *) data)->part =
1468 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(rdev)];
1469 break;
1470
1471 case DIOCWDINFO:
1472 case DIOCSDINFO:
1473 #ifdef __HAVE_OLD_DISKLABEL
1474 case ODIOCWDINFO:
1475 case ODIOCSDINFO:
1476 #endif
1477 {
1478 struct disklabel *lp;
1479 #ifdef __HAVE_OLD_DISKLABEL
1480 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1481 memset(&newlabel, 0, sizeof newlabel);
1482 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1483 lp = &newlabel;
1484 } else
1485 #endif
1486 lp = (struct disklabel *)data;
1487
1488 if ((error = raidlock(rs)) != 0)
1489 return (error);
1490
1491 rs->sc_flags |= RAIDF_LABELLING;
1492
1493 error = setdisklabel(rs->sc_dkdev.dk_label,
1494 lp, 0, rs->sc_dkdev.dk_cpulabel);
1495 if (error == 0) {
1496 if (cmd == DIOCWDINFO
1497 #ifdef __HAVE_OLD_DISKLABEL
1498 || cmd == ODIOCWDINFO
1499 #endif
1500 )
1501 error = writedisklabel(devvp, raidstrategy,
1502 rs->sc_dkdev.dk_label,
1503 rs->sc_dkdev.dk_cpulabel);
1504 }
1505 rs->sc_flags &= ~RAIDF_LABELLING;
1506
1507 raidunlock(rs);
1508
1509 if (error)
1510 return (error);
1511 break;
1512 }
1513
1514 case DIOCWLABEL:
1515 if (*(int *) data != 0)
1516 rs->sc_flags |= RAIDF_WLABEL;
1517 else
1518 rs->sc_flags &= ~RAIDF_WLABEL;
1519 break;
1520
1521 case DIOCGDEFLABEL:
1522 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1523 break;
1524
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 case ODIOCGDEFLABEL:
1527 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1528 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1529 return ENOTTY;
1530 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1531 break;
1532 #endif
1533
1534 default:
1535 retcode = ENOTTY;
1536 }
1537 return (retcode);
1538 }
1539
1540
1541 /* raidinit -- complete the rest of the initialization for the
1542 RAIDframe device. */
1543
1544
1545 static void
1546 raidinit(raidPtr)
1547 RF_Raid_t *raidPtr;
1548 {
1549 struct raid_softc *rs;
1550 int unit;
1551
1552 unit = raidPtr->raidid;
1553
1554 rs = &raid_softc[unit];
1555 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1556 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1557
1558
1559 /* XXX should check return code first... */
1560 rs->sc_flags |= RAIDF_INITED;
1561
1562 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1563
1564 rs->sc_dkdev.dk_name = rs->sc_xname;
1565
1566 /* disk_attach actually creates space for the CPU disklabel, among
1567 * other things, so it's critical to call this *BEFORE* we try putzing
1568 * with disklabels. */
1569
1570 disk_attach(&rs->sc_dkdev);
1571
1572 /* XXX There may be a weird interaction here between this, and
1573 * protectedSectors, as used in RAIDframe. */
1574
1575 rs->sc_size = raidPtr->totalSectors;
1576
1577 }
1578
1579 /* wake up the daemon & tell it to get us a spare table
1580 * XXX
1581 * the entries in the queues should be tagged with the raidPtr
1582 * so that in the extremely rare case that two recons happen at once,
1583 * we know for which device were requesting a spare table
1584 * XXX
1585 *
1586 * XXX This code is not currently used. GO
1587 */
1588 int
1589 rf_GetSpareTableFromDaemon(req)
1590 RF_SparetWait_t *req;
1591 {
1592 int retcode;
1593
1594 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1595 req->next = rf_sparet_wait_queue;
1596 rf_sparet_wait_queue = req;
1597 wakeup(&rf_sparet_wait_queue);
1598
1599 /* mpsleep unlocks the mutex */
1600 while (!rf_sparet_resp_queue) {
1601 tsleep(&rf_sparet_resp_queue, PRIBIO,
1602 "raidframe getsparetable", 0);
1603 }
1604 req = rf_sparet_resp_queue;
1605 rf_sparet_resp_queue = req->next;
1606 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1607
1608 retcode = req->fcol;
1609 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1610 * alloc'd */
1611 return (retcode);
1612 }
1613
1614 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1615 * bp & passes it down.
1616 * any calls originating in the kernel must use non-blocking I/O
1617 * do some extra sanity checking to return "appropriate" error values for
1618 * certain conditions (to make some standard utilities work)
1619 *
1620 * Formerly known as: rf_DoAccessKernel
1621 */
1622 void
1623 raidstart(raidPtr)
1624 RF_Raid_t *raidPtr;
1625 {
1626 RF_SectorCount_t num_blocks, pb, sum;
1627 RF_RaidAddr_t raid_addr;
1628 int retcode;
1629 struct partition *pp;
1630 daddr_t blocknum;
1631 int unit;
1632 struct raid_softc *rs;
1633 int do_async;
1634 struct buf *bp;
1635 dev_t rdev;
1636
1637 unit = raidPtr->raidid;
1638 rs = &raid_softc[unit];
1639
1640 /* quick check to see if anything has died recently */
1641 RF_LOCK_MUTEX(raidPtr->mutex);
1642 if (raidPtr->numNewFailures > 0) {
1643 rf_update_component_labels(raidPtr,
1644 RF_NORMAL_COMPONENT_UPDATE);
1645 raidPtr->numNewFailures--;
1646 }
1647 RF_UNLOCK_MUTEX(raidPtr->mutex);
1648
1649 /* Check to see if we're at the limit... */
1650 RF_LOCK_MUTEX(raidPtr->mutex);
1651 while (raidPtr->openings > 0) {
1652 RF_UNLOCK_MUTEX(raidPtr->mutex);
1653
1654 /* get the next item, if any, from the queue */
1655 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1656 /* nothing more to do */
1657 return;
1658 }
1659 rdev = vdev_rdev(bp->b_devvp);
1660 BUFQ_REMOVE(&rs->buf_queue, bp);
1661
1662 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1663 * partition.. Need to make it absolute to the underlying
1664 * device.. */
1665
1666 blocknum = bp->b_blkno;
1667 if (DISKPART(rdev) != RAW_PART &&
1668 (bp->b_flags & B_DKLABEL) == 0) {
1669 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(rdev)];
1670 blocknum += pp->p_offset;
1671 }
1672
1673 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1674 (int) blocknum));
1675
1676 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1677 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1678
1679 /* *THIS* is where we adjust what block we're going to...
1680 * but DO NOT TOUCH bp->b_blkno!!! */
1681 raid_addr = blocknum;
1682
1683 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1684 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1685 sum = raid_addr + num_blocks + pb;
1686 if (1 || rf_debugKernelAccess) {
1687 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1688 (int) raid_addr, (int) sum, (int) num_blocks,
1689 (int) pb, (int) bp->b_resid));
1690 }
1691 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1692 || (sum < num_blocks) || (sum < pb)) {
1693 bp->b_error = ENOSPC;
1694 bp->b_flags |= B_ERROR;
1695 bp->b_resid = bp->b_bcount;
1696 biodone(bp);
1697 RF_LOCK_MUTEX(raidPtr->mutex);
1698 continue;
1699 }
1700 /*
1701 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1702 */
1703
1704 if (bp->b_bcount & raidPtr->sectorMask) {
1705 bp->b_error = EINVAL;
1706 bp->b_flags |= B_ERROR;
1707 bp->b_resid = bp->b_bcount;
1708 biodone(bp);
1709 RF_LOCK_MUTEX(raidPtr->mutex);
1710 continue;
1711
1712 }
1713 db1_printf(("Calling DoAccess..\n"));
1714
1715
1716 RF_LOCK_MUTEX(raidPtr->mutex);
1717 raidPtr->openings--;
1718 RF_UNLOCK_MUTEX(raidPtr->mutex);
1719
1720 /*
1721 * Everything is async.
1722 */
1723 do_async = 1;
1724
1725 disk_busy(&rs->sc_dkdev);
1726
1727 /* XXX we're still at splbio() here... do we *really*
1728 need to be? */
1729
1730 /* don't ever condition on bp->b_flags & B_WRITE.
1731 * always condition on B_READ instead */
1732
1733 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1734 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1735 do_async, raid_addr, num_blocks,
1736 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1737
1738 RF_LOCK_MUTEX(raidPtr->mutex);
1739 }
1740 RF_UNLOCK_MUTEX(raidPtr->mutex);
1741 }
1742
1743 /*
1744 * invoke an I/O from kernel mode. Disk queue should be
1745 * locked upon entry
1746 */
1747 int
1748 rf_DispatchKernelIO(queue, req)
1749 RF_DiskQueue_t *queue;
1750 RF_DiskQueueData_t *req;
1751 {
1752 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1753 struct buf *bp;
1754 struct raidbuf *raidbp = NULL;
1755 struct raid_softc *rs;
1756 int unit;
1757 int s;
1758
1759 s=0;
1760 /* s = splbio();*/ /* want to test this */
1761 /* XXX along with the vnode, we also need the softc associated with
1762 * this device.. */
1763
1764 req->queue = queue;
1765
1766 unit = queue->raidPtr->raidid;
1767
1768 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1769
1770 if (unit >= numraid) {
1771 printf("Invalid unit number: %d %d\n", unit, numraid);
1772 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1773 }
1774 rs = &raid_softc[unit];
1775
1776 bp = req->bp;
1777 #if 1
1778 /* XXX when there is a physical disk failure, someone is passing us a
1779 * buffer that contains old stuff!! Attempt to deal with this problem
1780 * without taking a performance hit... (not sure where the real bug
1781 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1782
1783 if (bp->b_flags & B_ERROR) {
1784 bp->b_flags &= ~B_ERROR;
1785 }
1786 if (bp->b_error != 0) {
1787 bp->b_error = 0;
1788 }
1789 #endif
1790 raidbp = RAIDGETBUF(rs);
1791
1792 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1793
1794 /*
1795 * context for raidiodone
1796 */
1797 raidbp->rf_obp = bp;
1798 raidbp->req = req;
1799
1800 LIST_INIT(&raidbp->rf_buf.b_dep);
1801
1802 switch (req->type) {
1803 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1804 /* XXX need to do something extra here.. */
1805 /* I'm leaving this in, as I've never actually seen it used,
1806 * and I'd like folks to report it... GO */
1807 printf(("WAKEUP CALLED\n"));
1808 queue->numOutstanding++;
1809
1810 /* XXX need to glue the original buffer into this?? */
1811
1812 KernelWakeupFunc(&raidbp->rf_buf);
1813 break;
1814
1815 case RF_IO_TYPE_READ:
1816 case RF_IO_TYPE_WRITE:
1817
1818 if (req->tracerec) {
1819 RF_ETIMER_START(req->tracerec->timer);
1820 }
1821 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1822 op | bp->b_flags, req->sectorOffset, req->numSector,
1823 req->buf, KernelWakeupFunc, (void *) req,
1824 queue->raidPtr->logBytesPerSector, req->b_proc);
1825
1826 if (rf_debugKernelAccess) {
1827 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1828 (long) bp->b_blkno));
1829 }
1830 queue->numOutstanding++;
1831 queue->last_deq_sector = req->sectorOffset;
1832 /* acc wouldn't have been let in if there were any pending
1833 * reqs at any other priority */
1834 queue->curPriority = req->priority;
1835
1836 db1_printf(("Going for %c to unit %d row %d col %d\n",
1837 req->type, unit, queue->row, queue->col));
1838 db1_printf(("sector %d count %d (%d bytes) %d\n",
1839 (int) req->sectorOffset, (int) req->numSector,
1840 (int) (req->numSector <<
1841 queue->raidPtr->logBytesPerSector),
1842 (int) queue->raidPtr->logBytesPerSector));
1843 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1844 raidbp->rf_buf.b_vp->v_numoutput++;
1845 }
1846 VOP_STRATEGY(&raidbp->rf_buf);
1847
1848 break;
1849
1850 default:
1851 panic("bad req->type in rf_DispatchKernelIO");
1852 }
1853 db1_printf(("Exiting from DispatchKernelIO\n"));
1854 /* splx(s); */ /* want to test this */
1855 return (0);
1856 }
1857 /* this is the callback function associated with a I/O invoked from
1858 kernel code.
1859 */
1860 static void
1861 KernelWakeupFunc(vbp)
1862 struct buf *vbp;
1863 {
1864 RF_DiskQueueData_t *req = NULL;
1865 RF_DiskQueue_t *queue;
1866 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1867 struct buf *bp;
1868 struct raid_softc *rs;
1869 int unit;
1870 int s;
1871
1872 s = splbio();
1873 db1_printf(("recovering the request queue:\n"));
1874 req = raidbp->req;
1875
1876 bp = raidbp->rf_obp;
1877
1878 queue = (RF_DiskQueue_t *) req->queue;
1879
1880 if (raidbp->rf_buf.b_flags & B_ERROR) {
1881 bp->b_flags |= B_ERROR;
1882 bp->b_error = raidbp->rf_buf.b_error ?
1883 raidbp->rf_buf.b_error : EIO;
1884 }
1885
1886 /* XXX methinks this could be wrong... */
1887 #if 1
1888 bp->b_resid = raidbp->rf_buf.b_resid;
1889 #endif
1890
1891 if (req->tracerec) {
1892 RF_ETIMER_STOP(req->tracerec->timer);
1893 RF_ETIMER_EVAL(req->tracerec->timer);
1894 RF_LOCK_MUTEX(rf_tracing_mutex);
1895 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1896 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1897 req->tracerec->num_phys_ios++;
1898 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1899 }
1900 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1901
1902 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1903
1904
1905 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1906 * ballistic, and mark the component as hosed... */
1907
1908 if (bp->b_flags & B_ERROR) {
1909 /* Mark the disk as dead */
1910 /* but only mark it once... */
1911 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1912 rf_ds_optimal) {
1913 printf("raid%d: IO Error. Marking %s as failed.\n",
1914 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1915 queue->raidPtr->Disks[queue->row][queue->col].status =
1916 rf_ds_failed;
1917 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1918 queue->raidPtr->numFailures++;
1919 queue->raidPtr->numNewFailures++;
1920 } else { /* Disk is already dead... */
1921 /* printf("Disk already marked as dead!\n"); */
1922 }
1923
1924 }
1925
1926 rs = &raid_softc[unit];
1927 RAIDPUTBUF(rs, raidbp);
1928
1929 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1930 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1931
1932 splx(s);
1933 }
1934
1935 /*
1936 * initialize a buf structure for doing an I/O in the kernel.
1937 */
1938 static void
1939 InitBP(bp, b_vp, rw_flag, startSect, numSect, buf, cbFunc, cbArg,
1940 logBytesPerSector, b_proc)
1941 struct buf *bp;
1942 struct vnode *b_vp;
1943 unsigned rw_flag;
1944 RF_SectorNum_t startSect;
1945 RF_SectorCount_t numSect;
1946 caddr_t buf;
1947 void (*cbFunc) (struct buf *);
1948 void *cbArg;
1949 int logBytesPerSector;
1950 struct proc *b_proc;
1951 {
1952 /* bp->b_flags = B_PHYS | rw_flag; */
1953 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1954 bp->b_bcount = numSect << logBytesPerSector;
1955 bp->b_bufsize = bp->b_bcount;
1956 bp->b_error = 0;
1957 bp->b_devvp = b_vp;
1958 bp->b_data = buf;
1959 bp->b_blkno = startSect;
1960 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1961 if (bp->b_bcount == 0) {
1962 panic("bp->b_bcount is zero in InitBP!!\n");
1963 }
1964 bp->b_proc = b_proc;
1965 bp->b_iodone = cbFunc;
1966 bp->b_vp = b_vp;
1967
1968 }
1969
1970 static void
1971 raidgetdefaultlabel(raidPtr, rs, lp)
1972 RF_Raid_t *raidPtr;
1973 struct raid_softc *rs;
1974 struct disklabel *lp;
1975 {
1976 db1_printf(("Building a default label...\n"));
1977 memset(lp, 0, sizeof(*lp));
1978
1979 /* fabricate a label... */
1980 lp->d_secperunit = raidPtr->totalSectors;
1981 lp->d_secsize = raidPtr->bytesPerSector;
1982 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1983 lp->d_ntracks = 4 * raidPtr->numCol;
1984 lp->d_ncylinders = raidPtr->totalSectors /
1985 (lp->d_nsectors * lp->d_ntracks);
1986 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1987
1988 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1989 lp->d_type = DTYPE_RAID;
1990 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1991 lp->d_rpm = 3600;
1992 lp->d_interleave = 1;
1993 lp->d_flags = 0;
1994
1995 lp->d_partitions[RAW_PART].p_offset = 0;
1996 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1997 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1998 lp->d_npartitions = RAW_PART + 1;
1999
2000 lp->d_magic = DISKMAGIC;
2001 lp->d_magic2 = DISKMAGIC;
2002 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2003
2004 }
2005 /*
2006 * Read the disklabel from the raid device. If one is not present, fake one
2007 * up.
2008 */
2009 static void
2010 raidgetdisklabel(devvp)
2011 struct vnode *devvp;
2012 {
2013 struct raid_softc *rs;
2014 char *errstring;
2015 struct disklabel *lp;
2016 struct cpu_disklabel *clp;
2017 RF_Raid_t *raidPtr;
2018
2019 rs = vdev_privdata(devvp);
2020 lp = rs->sc_dkdev.dk_label;
2021 clp = rs->sc_dkdev.dk_cpulabel;
2022
2023 db1_printf(("Getting the disklabel...\n"));
2024
2025 memset(clp, 0, sizeof(*clp));
2026
2027 raidPtr = raidPtrs[DISKUNIT(vdev_rdev(devvp))];
2028
2029 raidgetdefaultlabel(raidPtr, rs, lp);
2030
2031 /*
2032 * Call the generic disklabel extraction routine.
2033 */
2034 errstring = readdisklabel(devvp, raidstrategy,
2035 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2036 if (errstring)
2037 raidmakedisklabel(rs);
2038 else {
2039 int i;
2040 struct partition *pp;
2041
2042 /*
2043 * Sanity check whether the found disklabel is valid.
2044 *
2045 * This is necessary since total size of the raid device
2046 * may vary when an interleave is changed even though exactly
2047 * same componets are used, and old disklabel may used
2048 * if that is found.
2049 */
2050 if (lp->d_secperunit != rs->sc_size)
2051 printf("WARNING: %s: "
2052 "total sector size in disklabel (%d) != "
2053 "the size of raid (%ld)\n", rs->sc_xname,
2054 lp->d_secperunit, (long) rs->sc_size);
2055 for (i = 0; i < lp->d_npartitions; i++) {
2056 pp = &lp->d_partitions[i];
2057 if (pp->p_offset + pp->p_size > rs->sc_size)
2058 printf("WARNING: %s: end of partition `%c' "
2059 "exceeds the size of raid (%ld)\n",
2060 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2061 }
2062 }
2063 }
2064 /*
2065 * Take care of things one might want to take care of in the event
2066 * that a disklabel isn't present.
2067 */
2068 static void
2069 raidmakedisklabel(rs)
2070 struct raid_softc *rs;
2071 {
2072 struct disklabel *lp = rs->sc_dkdev.dk_label;
2073 db1_printf(("Making a label..\n"));
2074
2075 /*
2076 * For historical reasons, if there's no disklabel present
2077 * the raw partition must be marked FS_BSDFFS.
2078 */
2079
2080 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2081
2082 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2083
2084 lp->d_checksum = dkcksum(lp);
2085 }
2086 /*
2087 * Lookup the provided name in the filesystem. If the file exists,
2088 * is a valid block device, and isn't being used by anyone else,
2089 * set *vpp to the file's vnode.
2090 * You'll find the original of this in ccd.c
2091 */
2092 int
2093 raidlookup(path, p, vpp)
2094 char *path;
2095 struct proc *p;
2096 struct vnode **vpp; /* result */
2097 {
2098 struct nameidata nd;
2099 struct vnode *vp;
2100 struct vattr va;
2101 int error;
2102
2103 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2104 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2105 #ifdef DEBUG
2106 printf("RAIDframe: vn_open returned %d\n", error);
2107 #endif
2108 return (error);
2109 }
2110 vp = nd.ni_vp;
2111 if (vp->v_usecount > 1) {
2112 VOP_UNLOCK(vp, 0);
2113 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2114 return (EBUSY);
2115 }
2116 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2117 VOP_UNLOCK(vp, 0);
2118 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2119 return (error);
2120 }
2121 /* XXX: eventually we should handle VREG, too. */
2122 if (va.va_type != VBLK) {
2123 VOP_UNLOCK(vp, 0);
2124 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2125 return (ENOTBLK);
2126 }
2127 VOP_UNLOCK(vp, 0);
2128 *vpp = vp;
2129 return (0);
2130 }
2131 /*
2132 * Wait interruptibly for an exclusive lock.
2133 *
2134 * XXX
2135 * Several drivers do this; it should be abstracted and made MP-safe.
2136 * (Hmm... where have we seen this warning before :-> GO )
2137 */
2138 static int
2139 raidlock(rs)
2140 struct raid_softc *rs;
2141 {
2142 int error;
2143
2144 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2145 rs->sc_flags |= RAIDF_WANTED;
2146 if ((error =
2147 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2148 return (error);
2149 }
2150 rs->sc_flags |= RAIDF_LOCKED;
2151 return (0);
2152 }
2153 /*
2154 * Unlock and wake up any waiters.
2155 */
2156 static void
2157 raidunlock(rs)
2158 struct raid_softc *rs;
2159 {
2160
2161 rs->sc_flags &= ~RAIDF_LOCKED;
2162 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2163 rs->sc_flags &= ~RAIDF_WANTED;
2164 wakeup(rs);
2165 }
2166 }
2167
2168
2169 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2170 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2171
2172 int
2173 raidmarkclean(struct vnode *b_vp, int mod_counter)
2174 {
2175 RF_ComponentLabel_t clabel;
2176 raidread_component_label(b_vp, &clabel);
2177 clabel.mod_counter = mod_counter;
2178 clabel.clean = RF_RAID_CLEAN;
2179 raidwrite_component_label(b_vp, &clabel);
2180 return(0);
2181 }
2182
2183
2184 int
2185 raidmarkdirty(struct vnode *b_vp, int mod_counter)
2186 {
2187 RF_ComponentLabel_t clabel;
2188 raidread_component_label(b_vp, &clabel);
2189 clabel.mod_counter = mod_counter;
2190 clabel.clean = RF_RAID_DIRTY;
2191 raidwrite_component_label(b_vp, &clabel);
2192 return(0);
2193 }
2194
2195 /* ARGSUSED */
2196 int
2197 raidread_component_label(b_vp, clabel)
2198 struct vnode *b_vp;
2199 RF_ComponentLabel_t *clabel;
2200 {
2201 struct buf *bp;
2202 int error;
2203
2204 /* XXX should probably ensure that we don't try to do this if
2205 someone has changed rf_protected_sectors. */
2206
2207 if (b_vp == NULL) {
2208 /* For whatever reason, this component is not valid.
2209 Don't try to read a component label from it. */
2210 return(EINVAL);
2211 }
2212
2213 /* get a block of the appropriate size... */
2214 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2215 bp->b_devvp = b_vp;
2216
2217 /* get our ducks in a row for the read */
2218 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2219 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2220 bp->b_flags |= B_READ;
2221 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2222
2223 (*bdevsw[major(vdev_rdev(b_vp))].d_strategy)(bp);
2224
2225 error = biowait(bp);
2226
2227 if (!error) {
2228 memcpy(clabel, bp->b_data,
2229 sizeof(RF_ComponentLabel_t));
2230 #if 0
2231 rf_print_component_label( clabel );
2232 #endif
2233 } else {
2234 #if 0
2235 printf("Failed to read RAID component label!\n");
2236 #endif
2237 }
2238
2239 brelse(bp);
2240 return(error);
2241 }
2242
2243 /* ARGSUSED */
2244 int
2245 raidwrite_component_label(b_vp, clabel)
2246 struct vnode *b_vp;
2247 RF_ComponentLabel_t *clabel;
2248 {
2249 struct buf *bp;
2250 int error;
2251
2252 /* get a block of the appropriate size... */
2253 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2254 bgetdevvp(b_vp, bp);
2255
2256 /* get our ducks in a row for the write */
2257 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2258 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2259 bp->b_flags |= B_WRITE;
2260 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2261
2262 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2263
2264 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2265
2266 (*bdevsw[major(vdev_rdev(b_vp))].d_strategy)(bp);
2267 error = biowait(bp);
2268 bp->b_flags |= B_INVAL;
2269 brelse(bp);
2270 if (error) {
2271 #if 1
2272 printf("Failed to write RAID component info!\n");
2273 #endif
2274 }
2275
2276 return(error);
2277 }
2278
2279 void
2280 rf_markalldirty(raidPtr)
2281 RF_Raid_t *raidPtr;
2282 {
2283 RF_ComponentLabel_t clabel;
2284 int r,c;
2285
2286 raidPtr->mod_counter++;
2287 for (r = 0; r < raidPtr->numRow; r++) {
2288 for (c = 0; c < raidPtr->numCol; c++) {
2289 /* we don't want to touch (at all) a disk that has
2290 failed */
2291 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2292 raidread_component_label(
2293 raidPtr->raid_cinfo[r][c].ci_vp,
2294 &clabel);
2295 if (clabel.status == rf_ds_spared) {
2296 /* XXX do something special...
2297 but whatever you do, don't
2298 try to access it!! */
2299 } else {
2300 #if 0
2301 clabel.status =
2302 raidPtr->Disks[r][c].status;
2303 raidwrite_component_label(
2304 raidPtr->raid_cinfo[r][c].ci_vp,
2305 &clabel);
2306 #endif
2307 raidmarkdirty(
2308 raidPtr->raid_cinfo[r][c].ci_vp,
2309 raidPtr->mod_counter);
2310 }
2311 }
2312 }
2313 }
2314 /* printf("Component labels marked dirty.\n"); */
2315 #if 0
2316 for( c = 0; c < raidPtr->numSpare ; c++) {
2317 sparecol = raidPtr->numCol + c;
2318 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2319 /*
2320
2321 XXX this is where we get fancy and map this spare
2322 into it's correct spot in the array.
2323
2324 */
2325 /*
2326
2327 we claim this disk is "optimal" if it's
2328 rf_ds_used_spare, as that means it should be
2329 directly substitutable for the disk it replaced.
2330 We note that too...
2331
2332 */
2333
2334 for(i=0;i<raidPtr->numRow;i++) {
2335 for(j=0;j<raidPtr->numCol;j++) {
2336 if ((raidPtr->Disks[i][j].spareRow ==
2337 r) &&
2338 (raidPtr->Disks[i][j].spareCol ==
2339 sparecol)) {
2340 srow = r;
2341 scol = sparecol;
2342 break;
2343 }
2344 }
2345 }
2346
2347 raidread_component_label(
2348 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2349 &clabel);
2350 /* make sure status is noted */
2351 clabel.version = RF_COMPONENT_LABEL_VERSION;
2352 clabel.mod_counter = raidPtr->mod_counter;
2353 clabel.serial_number = raidPtr->serial_number;
2354 clabel.row = srow;
2355 clabel.column = scol;
2356 clabel.num_rows = raidPtr->numRow;
2357 clabel.num_columns = raidPtr->numCol;
2358 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2359 clabel.status = rf_ds_optimal;
2360 raidwrite_component_label(
2361 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2362 &clabel);
2363 raidmarkclean(raidPtr->raid_cinfo[r][sparecol].ci_vp);
2364 }
2365 }
2366 #endif
2367 }
2368
2369
2370 void
2371 rf_update_component_labels(raidPtr, final)
2372 RF_Raid_t *raidPtr;
2373 int final;
2374 {
2375 RF_ComponentLabel_t clabel;
2376 int sparecol;
2377 int r,c;
2378 int i,j;
2379 int srow, scol;
2380
2381 srow = -1;
2382 scol = -1;
2383
2384 /* XXX should do extra checks to make sure things really are clean,
2385 rather than blindly setting the clean bit... */
2386
2387 raidPtr->mod_counter++;
2388
2389 for (r = 0; r < raidPtr->numRow; r++) {
2390 for (c = 0; c < raidPtr->numCol; c++) {
2391 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2392 raidread_component_label(
2393 raidPtr->raid_cinfo[r][c].ci_vp,
2394 &clabel);
2395 /* make sure status is noted */
2396 clabel.status = rf_ds_optimal;
2397 /* bump the counter */
2398 clabel.mod_counter = raidPtr->mod_counter;
2399
2400 raidwrite_component_label(
2401 raidPtr->raid_cinfo[r][c].ci_vp,
2402 &clabel);
2403 if (final == RF_FINAL_COMPONENT_UPDATE) {
2404 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2405 raidmarkclean(
2406 raidPtr->raid_cinfo[r][c].ci_vp,
2407 raidPtr->mod_counter);
2408 }
2409 }
2410 }
2411 /* else we don't touch it.. */
2412 }
2413 }
2414
2415 for( c = 0; c < raidPtr->numSpare ; c++) {
2416 sparecol = raidPtr->numCol + c;
2417 /* Need to ensure that the reconstruct actually completed! */
2418 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2419 /*
2420
2421 we claim this disk is "optimal" if it's
2422 rf_ds_used_spare, as that means it should be
2423 directly substitutable for the disk it replaced.
2424 We note that too...
2425
2426 */
2427
2428 for(i=0;i<raidPtr->numRow;i++) {
2429 for(j=0;j<raidPtr->numCol;j++) {
2430 if ((raidPtr->Disks[i][j].spareRow ==
2431 0) &&
2432 (raidPtr->Disks[i][j].spareCol ==
2433 sparecol)) {
2434 srow = i;
2435 scol = j;
2436 break;
2437 }
2438 }
2439 }
2440
2441 /* XXX shouldn't *really* need this... */
2442 raidread_component_label(
2443 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2444 &clabel);
2445 /* make sure status is noted */
2446
2447 raid_init_component_label(raidPtr, &clabel);
2448
2449 clabel.mod_counter = raidPtr->mod_counter;
2450 clabel.row = srow;
2451 clabel.column = scol;
2452 clabel.status = rf_ds_optimal;
2453
2454 raidwrite_component_label(
2455 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2456 &clabel);
2457 if (final == RF_FINAL_COMPONENT_UPDATE) {
2458 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2459 raidmarkclean(
2460 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2461 raidPtr->mod_counter);
2462 }
2463 }
2464 }
2465 }
2466 /* printf("Component labels updated\n"); */
2467 }
2468
2469 void
2470 rf_close_component(raidPtr, vp, auto_configured)
2471 RF_Raid_t *raidPtr;
2472 struct vnode *vp;
2473 int auto_configured;
2474 {
2475 struct proc *p;
2476
2477 p = raidPtr->engine_thread;
2478
2479 if (vp != NULL) {
2480 if (auto_configured == 1) {
2481 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2482 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2483 vput(vp);
2484
2485 } else {
2486 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2487 }
2488 } else {
2489 printf("vnode was NULL\n");
2490 }
2491 }
2492
2493
2494 void
2495 rf_UnconfigureVnodes(raidPtr)
2496 RF_Raid_t *raidPtr;
2497 {
2498 int r,c;
2499 struct proc *p;
2500 struct vnode *vp;
2501 int acd;
2502
2503
2504 /* We take this opportunity to close the vnodes like we should.. */
2505
2506 p = raidPtr->engine_thread;
2507
2508 for (r = 0; r < raidPtr->numRow; r++) {
2509 for (c = 0; c < raidPtr->numCol; c++) {
2510 printf("Closing vnode for row: %d col: %d\n", r, c);
2511 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2512 acd = raidPtr->Disks[r][c].auto_configured;
2513 rf_close_component(raidPtr, vp, acd);
2514 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2515 raidPtr->Disks[r][c].auto_configured = 0;
2516 }
2517 }
2518 for (r = 0; r < raidPtr->numSpare; r++) {
2519 printf("Closing vnode for spare: %d\n", r);
2520 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2521 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2522 rf_close_component(raidPtr, vp, acd);
2523 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2524 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2525 }
2526 }
2527
2528
2529 void
2530 rf_ReconThread(req)
2531 struct rf_recon_req *req;
2532 {
2533 int s;
2534 RF_Raid_t *raidPtr;
2535
2536 s = splbio();
2537 raidPtr = (RF_Raid_t *) req->raidPtr;
2538 raidPtr->recon_in_progress = 1;
2539
2540 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2541 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2542
2543 /* XXX get rid of this! we don't need it at all.. */
2544 RF_Free(req, sizeof(*req));
2545
2546 raidPtr->recon_in_progress = 0;
2547 splx(s);
2548
2549 /* That's all... */
2550 kthread_exit(0); /* does not return */
2551 }
2552
2553 void
2554 rf_RewriteParityThread(raidPtr)
2555 RF_Raid_t *raidPtr;
2556 {
2557 int retcode;
2558 int s;
2559
2560 raidPtr->parity_rewrite_in_progress = 1;
2561 s = splbio();
2562 retcode = rf_RewriteParity(raidPtr);
2563 splx(s);
2564 if (retcode) {
2565 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2566 } else {
2567 /* set the clean bit! If we shutdown correctly,
2568 the clean bit on each component label will get
2569 set */
2570 raidPtr->parity_good = RF_RAID_CLEAN;
2571 }
2572 raidPtr->parity_rewrite_in_progress = 0;
2573
2574 /* Anyone waiting for us to stop? If so, inform them... */
2575 if (raidPtr->waitShutdown) {
2576 wakeup(&raidPtr->parity_rewrite_in_progress);
2577 }
2578
2579 /* That's all... */
2580 kthread_exit(0); /* does not return */
2581 }
2582
2583
2584 void
2585 rf_CopybackThread(raidPtr)
2586 RF_Raid_t *raidPtr;
2587 {
2588 int s;
2589
2590 raidPtr->copyback_in_progress = 1;
2591 s = splbio();
2592 rf_CopybackReconstructedData(raidPtr);
2593 splx(s);
2594 raidPtr->copyback_in_progress = 0;
2595
2596 /* That's all... */
2597 kthread_exit(0); /* does not return */
2598 }
2599
2600
2601 void
2602 rf_ReconstructInPlaceThread(req)
2603 struct rf_recon_req *req;
2604 {
2605 int retcode;
2606 int s;
2607 RF_Raid_t *raidPtr;
2608
2609 s = splbio();
2610 raidPtr = req->raidPtr;
2611 raidPtr->recon_in_progress = 1;
2612 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2613 RF_Free(req, sizeof(*req));
2614 raidPtr->recon_in_progress = 0;
2615 splx(s);
2616
2617 /* That's all... */
2618 kthread_exit(0); /* does not return */
2619 }
2620
2621 void
2622 rf_mountroot_hook(dev)
2623 struct device *dev;
2624 {
2625
2626 }
2627
2628
2629 RF_AutoConfig_t *
2630 rf_find_raid_components()
2631 {
2632 struct devnametobdevmaj *dtobdm;
2633 struct vnode *vp;
2634 struct disklabel label;
2635 struct device *dv;
2636 char *cd_name;
2637 dev_t dev;
2638 int error;
2639 int i;
2640 int good_one;
2641 RF_ComponentLabel_t *clabel;
2642 RF_AutoConfig_t *ac_list;
2643 RF_AutoConfig_t *ac;
2644
2645
2646 /* initialize the AutoConfig list */
2647 ac_list = NULL;
2648
2649 /* we begin by trolling through *all* the devices on the system */
2650
2651 for (dv = alldevs.tqh_first; dv != NULL;
2652 dv = dv->dv_list.tqe_next) {
2653
2654 /* we are only interested in disks... */
2655 if (dv->dv_class != DV_DISK)
2656 continue;
2657
2658 /* we don't care about floppies... */
2659 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2660 continue;
2661 }
2662
2663 /* need to find the device_name_to_block_device_major stuff */
2664 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2665 dtobdm = dev_name2blk;
2666 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2667 dtobdm++;
2668 }
2669
2670 /* get a vnode for the raw partition of this disk */
2671
2672 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2673 if (bdevvp(dev, &vp))
2674 panic("RAID can't alloc vnode");
2675
2676 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2677
2678 error = VOP_OPEN(vp, FREAD, NOCRED, 0, NULL);
2679
2680 if (error) {
2681 /* "Who cares." Continue looking
2682 for something that exists*/
2683 vput(vp);
2684 continue;
2685 }
2686
2687 /* Ok, the disk exists. Go get the disklabel. */
2688 VOP_UNLOCK(vp, 0);
2689 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2690 FREAD, NOCRED, 0);
2691 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2692 if (error) {
2693 /*
2694 * XXX can't happen - open() would
2695 * have errored out (or faked up one)
2696 */
2697 printf("can't get label for dev %s%c (%d)!?!?\n",
2698 dv->dv_xname, 'a' + RAW_PART, error);
2699 }
2700
2701 /* don't need this any more. We'll allocate it again
2702 a little later if we really do... */
2703 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2704 vput(vp);
2705
2706 for (i=0; i < label.d_npartitions; i++) {
2707 /* We only support partitions marked as RAID */
2708 if (label.d_partitions[i].p_fstype != FS_RAID)
2709 continue;
2710
2711 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2712 if (bdevvp(dev, &vp))
2713 panic("RAID can't alloc vnode");
2714
2715 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2716
2717 error = VOP_OPEN(vp, FREAD, NOCRED, 0, NULL);
2718 if (error) {
2719 /* Whatever... */
2720 vput(vp);
2721 continue;
2722 }
2723
2724 good_one = 0;
2725
2726 clabel = (RF_ComponentLabel_t *)
2727 malloc(sizeof(RF_ComponentLabel_t),
2728 M_RAIDFRAME, M_NOWAIT);
2729 if (clabel == NULL) {
2730 /* XXX CLEANUP HERE */
2731 vput(vp);
2732 printf("RAID auto config: out of memory!\n");
2733 return(NULL); /* XXX probably should panic? */
2734 }
2735
2736 if (!raidread_component_label(vp, clabel)) {
2737 /* Got the label. Does it look reasonable? */
2738 if (rf_reasonable_label(clabel) &&
2739 (clabel->partitionSize <=
2740 label.d_partitions[i].p_size)) {
2741 #if DEBUG
2742 printf("Component on: %s%c: %d\n",
2743 dv->dv_xname, 'a'+i,
2744 label.d_partitions[i].p_size);
2745 rf_print_component_label(clabel);
2746 #endif
2747 /* if it's reasonable, add it,
2748 else ignore it. */
2749 ac = (RF_AutoConfig_t *)
2750 malloc(sizeof(RF_AutoConfig_t),
2751 M_RAIDFRAME,
2752 M_NOWAIT);
2753 if (ac == NULL) {
2754 /* XXX should panic?? */
2755 vput(vp);
2756 return(NULL);
2757 }
2758
2759 sprintf(ac->devname, "%s%c",
2760 dv->dv_xname, 'a'+i);
2761 ac->vp = vp;
2762 ac->clabel = clabel;
2763 ac->next = ac_list;
2764 ac_list = ac;
2765 good_one = 1;
2766 }
2767 }
2768 if (!good_one) {
2769 /* cleanup */
2770 free(clabel, M_RAIDFRAME);
2771 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2772 vput(vp);
2773 } else
2774 VOP_UNLOCK(vp, 0);
2775 }
2776 }
2777 return(ac_list);
2778 }
2779
2780 static int
2781 rf_reasonable_label(clabel)
2782 RF_ComponentLabel_t *clabel;
2783 {
2784
2785 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2786 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2787 ((clabel->clean == RF_RAID_CLEAN) ||
2788 (clabel->clean == RF_RAID_DIRTY)) &&
2789 clabel->row >=0 &&
2790 clabel->column >= 0 &&
2791 clabel->num_rows > 0 &&
2792 clabel->num_columns > 0 &&
2793 clabel->row < clabel->num_rows &&
2794 clabel->column < clabel->num_columns &&
2795 clabel->blockSize > 0 &&
2796 clabel->numBlocks > 0) {
2797 /* label looks reasonable enough... */
2798 return(1);
2799 }
2800 return(0);
2801 }
2802
2803
2804 void
2805 rf_print_component_label(clabel)
2806 RF_ComponentLabel_t *clabel;
2807 {
2808 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2809 clabel->row, clabel->column,
2810 clabel->num_rows, clabel->num_columns);
2811 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2812 clabel->version, clabel->serial_number,
2813 clabel->mod_counter);
2814 printf(" Clean: %s Status: %d\n",
2815 clabel->clean ? "Yes" : "No", clabel->status );
2816 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2817 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2818 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2819 (char) clabel->parityConfig, clabel->blockSize,
2820 clabel->numBlocks);
2821 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2822 printf(" Contains root partition: %s\n",
2823 clabel->root_partition ? "Yes" : "No" );
2824 printf(" Last configured as: raid%d\n", clabel->last_unit );
2825 #if 0
2826 printf(" Config order: %d\n", clabel->config_order);
2827 #endif
2828
2829 }
2830
2831 RF_ConfigSet_t *
2832 rf_create_auto_sets(ac_list)
2833 RF_AutoConfig_t *ac_list;
2834 {
2835 RF_AutoConfig_t *ac;
2836 RF_ConfigSet_t *config_sets;
2837 RF_ConfigSet_t *cset;
2838 RF_AutoConfig_t *ac_next;
2839
2840
2841 config_sets = NULL;
2842
2843 /* Go through the AutoConfig list, and figure out which components
2844 belong to what sets. */
2845 ac = ac_list;
2846 while(ac!=NULL) {
2847 /* we're going to putz with ac->next, so save it here
2848 for use at the end of the loop */
2849 ac_next = ac->next;
2850
2851 if (config_sets == NULL) {
2852 /* will need at least this one... */
2853 config_sets = (RF_ConfigSet_t *)
2854 malloc(sizeof(RF_ConfigSet_t),
2855 M_RAIDFRAME, M_NOWAIT);
2856 if (config_sets == NULL) {
2857 panic("rf_create_auto_sets: No memory!\n");
2858 }
2859 /* this one is easy :) */
2860 config_sets->ac = ac;
2861 config_sets->next = NULL;
2862 config_sets->rootable = 0;
2863 ac->next = NULL;
2864 } else {
2865 /* which set does this component fit into? */
2866 cset = config_sets;
2867 while(cset!=NULL) {
2868 if (rf_does_it_fit(cset, ac)) {
2869 /* looks like it matches... */
2870 ac->next = cset->ac;
2871 cset->ac = ac;
2872 break;
2873 }
2874 cset = cset->next;
2875 }
2876 if (cset==NULL) {
2877 /* didn't find a match above... new set..*/
2878 cset = (RF_ConfigSet_t *)
2879 malloc(sizeof(RF_ConfigSet_t),
2880 M_RAIDFRAME, M_NOWAIT);
2881 if (cset == NULL) {
2882 panic("rf_create_auto_sets: No memory!\n");
2883 }
2884 cset->ac = ac;
2885 ac->next = NULL;
2886 cset->next = config_sets;
2887 cset->rootable = 0;
2888 config_sets = cset;
2889 }
2890 }
2891 ac = ac_next;
2892 }
2893
2894
2895 return(config_sets);
2896 }
2897
2898 static int
2899 rf_does_it_fit(cset, ac)
2900 RF_ConfigSet_t *cset;
2901 RF_AutoConfig_t *ac;
2902 {
2903 RF_ComponentLabel_t *clabel1, *clabel2;
2904
2905 /* If this one matches the *first* one in the set, that's good
2906 enough, since the other members of the set would have been
2907 through here too... */
2908 /* note that we are not checking partitionSize here..
2909
2910 Note that we are also not checking the mod_counters here.
2911 If everything else matches execpt the mod_counter, that's
2912 good enough for this test. We will deal with the mod_counters
2913 a little later in the autoconfiguration process.
2914
2915 (clabel1->mod_counter == clabel2->mod_counter) &&
2916
2917 The reason we don't check for this is that failed disks
2918 will have lower modification counts. If those disks are
2919 not added to the set they used to belong to, then they will
2920 form their own set, which may result in 2 different sets,
2921 for example, competing to be configured at raid0, and
2922 perhaps competing to be the root filesystem set. If the
2923 wrong ones get configured, or both attempt to become /,
2924 weird behaviour and or serious lossage will occur. Thus we
2925 need to bring them into the fold here, and kick them out at
2926 a later point.
2927
2928 */
2929
2930 clabel1 = cset->ac->clabel;
2931 clabel2 = ac->clabel;
2932 if ((clabel1->version == clabel2->version) &&
2933 (clabel1->serial_number == clabel2->serial_number) &&
2934 (clabel1->num_rows == clabel2->num_rows) &&
2935 (clabel1->num_columns == clabel2->num_columns) &&
2936 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2937 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2938 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2939 (clabel1->parityConfig == clabel2->parityConfig) &&
2940 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2941 (clabel1->blockSize == clabel2->blockSize) &&
2942 (clabel1->numBlocks == clabel2->numBlocks) &&
2943 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2944 (clabel1->root_partition == clabel2->root_partition) &&
2945 (clabel1->last_unit == clabel2->last_unit) &&
2946 (clabel1->config_order == clabel2->config_order)) {
2947 /* if it get's here, it almost *has* to be a match */
2948 } else {
2949 /* it's not consistent with somebody in the set..
2950 punt */
2951 return(0);
2952 }
2953 /* all was fine.. it must fit... */
2954 return(1);
2955 }
2956
2957 int
2958 rf_have_enough_components(cset)
2959 RF_ConfigSet_t *cset;
2960 {
2961 RF_AutoConfig_t *ac;
2962 RF_AutoConfig_t *auto_config;
2963 RF_ComponentLabel_t *clabel;
2964 int r,c;
2965 int num_rows;
2966 int num_cols;
2967 int num_missing;
2968 int mod_counter;
2969 int mod_counter_found;
2970 int even_pair_failed;
2971 char parity_type;
2972
2973
2974 /* check to see that we have enough 'live' components
2975 of this set. If so, we can configure it if necessary */
2976
2977 num_rows = cset->ac->clabel->num_rows;
2978 num_cols = cset->ac->clabel->num_columns;
2979 parity_type = cset->ac->clabel->parityConfig;
2980
2981 /* XXX Check for duplicate components!?!?!? */
2982
2983 /* Determine what the mod_counter is supposed to be for this set. */
2984
2985 mod_counter_found = 0;
2986 mod_counter = 0;
2987 ac = cset->ac;
2988 while(ac!=NULL) {
2989 if (mod_counter_found==0) {
2990 mod_counter = ac->clabel->mod_counter;
2991 mod_counter_found = 1;
2992 } else {
2993 if (ac->clabel->mod_counter > mod_counter) {
2994 mod_counter = ac->clabel->mod_counter;
2995 }
2996 }
2997 ac = ac->next;
2998 }
2999
3000 num_missing = 0;
3001 auto_config = cset->ac;
3002
3003 for(r=0; r<num_rows; r++) {
3004 even_pair_failed = 0;
3005 for(c=0; c<num_cols; c++) {
3006 ac = auto_config;
3007 while(ac!=NULL) {
3008 if ((ac->clabel->row == r) &&
3009 (ac->clabel->column == c) &&
3010 (ac->clabel->mod_counter == mod_counter)) {
3011 /* it's this one... */
3012 #if DEBUG
3013 printf("Found: %s at %d,%d\n",
3014 ac->devname,r,c);
3015 #endif
3016 break;
3017 }
3018 ac=ac->next;
3019 }
3020 if (ac==NULL) {
3021 /* Didn't find one here! */
3022 /* special case for RAID 1, especially
3023 where there are more than 2
3024 components (where RAIDframe treats
3025 things a little differently :( ) */
3026 if (parity_type == '1') {
3027 if (c%2 == 0) { /* even component */
3028 even_pair_failed = 1;
3029 } else { /* odd component. If
3030 we're failed, and
3031 so is the even
3032 component, it's
3033 "Good Night, Charlie" */
3034 if (even_pair_failed == 1) {
3035 return(0);
3036 }
3037 }
3038 } else {
3039 /* normal accounting */
3040 num_missing++;
3041 }
3042 }
3043 if ((parity_type == '1') && (c%2 == 1)) {
3044 /* Just did an even component, and we didn't
3045 bail.. reset the even_pair_failed flag,
3046 and go on to the next component.... */
3047 even_pair_failed = 0;
3048 }
3049 }
3050 }
3051
3052 clabel = cset->ac->clabel;
3053
3054 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3055 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3056 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3057 /* XXX this needs to be made *much* more general */
3058 /* Too many failures */
3059 return(0);
3060 }
3061 /* otherwise, all is well, and we've got enough to take a kick
3062 at autoconfiguring this set */
3063 return(1);
3064 }
3065
3066 void
3067 rf_create_configuration(ac,config,raidPtr)
3068 RF_AutoConfig_t *ac;
3069 RF_Config_t *config;
3070 RF_Raid_t *raidPtr;
3071 {
3072 RF_ComponentLabel_t *clabel;
3073 int i;
3074
3075 clabel = ac->clabel;
3076
3077 /* 1. Fill in the common stuff */
3078 config->numRow = clabel->num_rows;
3079 config->numCol = clabel->num_columns;
3080 config->numSpare = 0; /* XXX should this be set here? */
3081 config->sectPerSU = clabel->sectPerSU;
3082 config->SUsPerPU = clabel->SUsPerPU;
3083 config->SUsPerRU = clabel->SUsPerRU;
3084 config->parityConfig = clabel->parityConfig;
3085 /* XXX... */
3086 strcpy(config->diskQueueType,"fifo");
3087 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3088 config->layoutSpecificSize = 0; /* XXX ?? */
3089
3090 while(ac!=NULL) {
3091 /* row/col values will be in range due to the checks
3092 in reasonable_label() */
3093 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3094 ac->devname);
3095 ac = ac->next;
3096 }
3097
3098 for(i=0;i<RF_MAXDBGV;i++) {
3099 config->debugVars[i][0] = NULL;
3100 }
3101 }
3102
3103 int
3104 rf_set_autoconfig(raidPtr, new_value)
3105 RF_Raid_t *raidPtr;
3106 int new_value;
3107 {
3108 RF_ComponentLabel_t clabel;
3109 struct vnode *vp;
3110 int row, column;
3111
3112 raidPtr->autoconfigure = new_value;
3113 for(row=0; row<raidPtr->numRow; row++) {
3114 for(column=0; column<raidPtr->numCol; column++) {
3115 if (raidPtr->Disks[row][column].status ==
3116 rf_ds_optimal) {
3117 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3118 raidread_component_label(vp, &clabel);
3119 clabel.autoconfigure = new_value;
3120 raidwrite_component_label(vp, &clabel);
3121 }
3122 }
3123 }
3124 return(new_value);
3125 }
3126
3127 int
3128 rf_set_rootpartition(raidPtr, new_value)
3129 RF_Raid_t *raidPtr;
3130 int new_value;
3131 {
3132 RF_ComponentLabel_t clabel;
3133 struct vnode *vp;
3134 int row, column;
3135
3136 raidPtr->root_partition = new_value;
3137 for(row=0; row<raidPtr->numRow; row++) {
3138 for(column=0; column<raidPtr->numCol; column++) {
3139 if (raidPtr->Disks[row][column].status ==
3140 rf_ds_optimal) {
3141 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3142 raidread_component_label(vp, &clabel);
3143 clabel.root_partition = new_value;
3144 raidwrite_component_label(vp, &clabel);
3145 }
3146 }
3147 }
3148 return(new_value);
3149 }
3150
3151 void
3152 rf_release_all_vps(cset)
3153 RF_ConfigSet_t *cset;
3154 {
3155 RF_AutoConfig_t *ac;
3156
3157 ac = cset->ac;
3158 while(ac!=NULL) {
3159 /* Close the vp, and give it back */
3160 if (ac->vp) {
3161 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3162 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3163 vput(ac->vp);
3164 ac->vp = NULL;
3165 }
3166 ac = ac->next;
3167 }
3168 }
3169
3170
3171 void
3172 rf_cleanup_config_set(cset)
3173 RF_ConfigSet_t *cset;
3174 {
3175 RF_AutoConfig_t *ac;
3176 RF_AutoConfig_t *next_ac;
3177
3178 ac = cset->ac;
3179 while(ac!=NULL) {
3180 next_ac = ac->next;
3181 /* nuke the label */
3182 free(ac->clabel, M_RAIDFRAME);
3183 /* cleanup the config structure */
3184 free(ac, M_RAIDFRAME);
3185 /* "next.." */
3186 ac = next_ac;
3187 }
3188 /* and, finally, nuke the config set */
3189 free(cset, M_RAIDFRAME);
3190 }
3191
3192
3193 void
3194 raid_init_component_label(raidPtr, clabel)
3195 RF_Raid_t *raidPtr;
3196 RF_ComponentLabel_t *clabel;
3197 {
3198 /* current version number */
3199 clabel->version = RF_COMPONENT_LABEL_VERSION;
3200 clabel->serial_number = raidPtr->serial_number;
3201 clabel->mod_counter = raidPtr->mod_counter;
3202 clabel->num_rows = raidPtr->numRow;
3203 clabel->num_columns = raidPtr->numCol;
3204 clabel->clean = RF_RAID_DIRTY; /* not clean */
3205 clabel->status = rf_ds_optimal; /* "It's good!" */
3206
3207 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3208 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3209 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3210
3211 clabel->blockSize = raidPtr->bytesPerSector;
3212 clabel->numBlocks = raidPtr->sectorsPerDisk;
3213
3214 /* XXX not portable */
3215 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3216 clabel->maxOutstanding = raidPtr->maxOutstanding;
3217 clabel->autoconfigure = raidPtr->autoconfigure;
3218 clabel->root_partition = raidPtr->root_partition;
3219 clabel->last_unit = raidPtr->raidid;
3220 clabel->config_order = raidPtr->config_order;
3221 }
3222
3223 int
3224 rf_auto_config_set(cset,unit)
3225 RF_ConfigSet_t *cset;
3226 int *unit;
3227 {
3228 RF_Raid_t *raidPtr;
3229 RF_Config_t *config;
3230 int raidID;
3231 int retcode;
3232
3233 printf("RAID autoconfigure\n");
3234
3235 retcode = 0;
3236 *unit = -1;
3237
3238 /* 1. Create a config structure */
3239
3240 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3241 M_RAIDFRAME,
3242 M_NOWAIT);
3243 if (config==NULL) {
3244 printf("Out of mem!?!?\n");
3245 /* XXX do something more intelligent here. */
3246 return(1);
3247 }
3248
3249 memset(config, 0, sizeof(RF_Config_t));
3250
3251 /* XXX raidID needs to be set correctly.. */
3252
3253 /*
3254 2. Figure out what RAID ID this one is supposed to live at
3255 See if we can get the same RAID dev that it was configured
3256 on last time..
3257 */
3258
3259 raidID = cset->ac->clabel->last_unit;
3260 if ((raidID < 0) || (raidID >= numraid)) {
3261 /* let's not wander off into lala land. */
3262 raidID = numraid - 1;
3263 }
3264 if (raidPtrs[raidID]->valid != 0) {
3265
3266 /*
3267 Nope... Go looking for an alternative...
3268 Start high so we don't immediately use raid0 if that's
3269 not taken.
3270 */
3271
3272 for(raidID = numraid; raidID >= 0; raidID--) {
3273 if (raidPtrs[raidID]->valid == 0) {
3274 /* can use this one! */
3275 break;
3276 }
3277 }
3278 }
3279
3280 if (raidID < 0) {
3281 /* punt... */
3282 printf("Unable to auto configure this set!\n");
3283 printf("(Out of RAID devs!)\n");
3284 return(1);
3285 }
3286 printf("Configuring raid%d:\n",raidID);
3287 raidPtr = raidPtrs[raidID];
3288
3289 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3290 raidPtr->raidid = raidID;
3291 raidPtr->openings = RAIDOUTSTANDING;
3292
3293 /* 3. Build the configuration structure */
3294 rf_create_configuration(cset->ac, config, raidPtr);
3295
3296 /* 4. Do the configuration */
3297 retcode = rf_Configure(raidPtr, config, cset->ac);
3298
3299 if (retcode == 0) {
3300
3301 raidinit(raidPtrs[raidID]);
3302
3303 rf_markalldirty(raidPtrs[raidID]);
3304 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3305 if (cset->ac->clabel->root_partition==1) {
3306 /* everything configured just fine. Make a note
3307 that this set is eligible to be root. */
3308 cset->rootable = 1;
3309 /* XXX do this here? */
3310 raidPtrs[raidID]->root_partition = 1;
3311 }
3312 }
3313
3314 /* 5. Cleanup */
3315 free(config, M_RAIDFRAME);
3316
3317 *unit = raidID;
3318 return(retcode);
3319 }
3320
3321 void
3322 rf_disk_unbusy(desc)
3323 RF_RaidAccessDesc_t *desc;
3324 {
3325 struct buf *bp;
3326
3327 bp = (struct buf *)desc->bp;
3328 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3329 (bp->b_bcount - bp->b_resid));
3330 }
3331