rf_netbsdkintf.c revision 1.109.2.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.109.2.3 2001/09/26 15:28:15 fvdl Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80 /*
81 * Copyright (c) 1995 Carnegie-Mellon University.
82 * All rights reserved.
83 *
84 * Authors: Mark Holland, Jim Zelenka
85 *
86 * Permission to use, copy, modify and distribute this software and
87 * its documentation is hereby granted, provided that both the copyright
88 * notice and this permission notice appear in all copies of the
89 * software, derivative works or modified versions, and any portions
90 * thereof, and that both notices appear in supporting documentation.
91 *
92 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
93 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
94 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
95 *
96 * Carnegie Mellon requests users of this software to return to
97 *
98 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
99 * School of Computer Science
100 * Carnegie Mellon University
101 * Pittsburgh PA 15213-3890
102 *
103 * any improvements or extensions that they make and grant Carnegie the
104 * rights to redistribute these changes.
105 */
106
107 /***********************************************************
108 *
109 * rf_kintf.c -- the kernel interface routines for RAIDframe
110 *
111 ***********************************************************/
112
113 #include <sys/errno.h>
114 #include <sys/param.h>
115 #include <sys/pool.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/namei.h>
124 #include <sys/vnode.h>
125 #include <sys/param.h>
126 #include <sys/types.h>
127 #include <machine/types.h>
128 #include <sys/disklabel.h>
129 #include <sys/conf.h>
130 #include <sys/lock.h>
131 #include <sys/buf.h>
132 #include <sys/user.h>
133 #include <sys/reboot.h>
134
135 #include <miscfs/specfs/specdev.h>
136
137 #include "raid.h"
138 #include "opt_raid_autoconfig.h"
139 #include "rf_raid.h"
140 #include "rf_raidframe.h"
141 #include "rf_copyback.h"
142 #include "rf_dag.h"
143 #include "rf_dagflags.h"
144 #include "rf_desc.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 RF_SectorNum_t startSect, RF_SectorCount_t numSect,
179 caddr_t buf, void (*cbFunc) (struct buf *), void *cbArg,
180 int logBytesPerSector, struct proc * b_proc);
181 static void raidinit(RF_Raid_t *);
182
183 void raidattach(int);
184
185 /*
186 * Pilfered from ccd.c
187 */
188
189 struct raidbuf {
190 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
191 struct buf *rf_obp; /* ptr. to original I/O buf */
192 int rf_flags; /* misc. flags */
193 RF_DiskQueueData_t *req;/* the request that this was part of.. */
194 };
195
196
197 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
198 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
199
200 /* XXX Not sure if the following should be replacing the raidPtrs above,
201 or if it should be used in conjunction with that...
202 */
203
204 struct raid_softc {
205 int sc_flags; /* flags */
206 int sc_cflags; /* configuration flags */
207 size_t sc_size; /* size of the raid device */
208 char sc_xname[20]; /* XXX external name */
209 struct disk sc_dkdev; /* generic disk device info */
210 struct pool sc_cbufpool; /* component buffer pool */
211 struct buf_queue buf_queue; /* used for the device queue */
212 };
213 /* sc_flags */
214 #define RAIDF_INITED 0x01 /* unit has been initialized */
215 #define RAIDF_WLABEL 0x02 /* label area is writable */
216 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
217 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
218 #define RAIDF_LOCKED 0x80 /* unit is locked */
219
220 #define raidunit(x) DISKUNIT(x)
221 int numraid = 0;
222
223 /*
224 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
225 * Be aware that large numbers can allow the driver to consume a lot of
226 * kernel memory, especially on writes, and in degraded mode reads.
227 *
228 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
229 * a single 64K write will typically require 64K for the old data,
230 * 64K for the old parity, and 64K for the new parity, for a total
231 * of 192K (if the parity buffer is not re-used immediately).
232 * Even it if is used immedately, that's still 128K, which when multiplied
233 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
234 *
235 * Now in degraded mode, for example, a 64K read on the above setup may
236 * require data reconstruction, which will require *all* of the 4 remaining
237 * disks to participate -- 4 * 32K/disk == 128K again.
238 */
239
240 #ifndef RAIDOUTSTANDING
241 #define RAIDOUTSTANDING 6
242 #endif
243
244 #define RAIDLABELDEV(dev) \
245 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
246
247 /* declared here, and made public, for the benefit of KVM stuff.. */
248 struct raid_softc *raid_softc;
249
250 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
251 struct disklabel *);
252 static void raidgetdisklabel(struct vnode *);
253 static void raidmakedisklabel(struct raid_softc *);
254
255 static int raidlock(struct raid_softc *);
256 static void raidunlock(struct raid_softc *);
257
258 static void rf_markalldirty(RF_Raid_t *);
259 void rf_mountroot_hook(struct device *);
260
261 struct device *raidrootdev;
262
263 void rf_ReconThread(struct rf_recon_req *);
264 /* XXX what I want is: */
265 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
266 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
267 void rf_CopybackThread(RF_Raid_t *raidPtr);
268 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
269 void rf_buildroothack(void *);
270
271 RF_AutoConfig_t *rf_find_raid_components(void);
272 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
273 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
274 static int rf_reasonable_label(RF_ComponentLabel_t *);
275 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
276 int rf_set_autoconfig(RF_Raid_t *, int);
277 int rf_set_rootpartition(RF_Raid_t *, int);
278 void rf_release_all_vps(RF_ConfigSet_t *);
279 void rf_cleanup_config_set(RF_ConfigSet_t *);
280 int rf_have_enough_components(RF_ConfigSet_t *);
281 int rf_auto_config_set(RF_ConfigSet_t *, int *);
282
283 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
284 allow autoconfig to take place.
285 Note that this is overridden by having
286 RAID_AUTOCONFIG as an option in the
287 kernel config file. */
288
289 void
290 raidattach(num)
291 int num;
292 {
293 int raidID;
294 int i, rc;
295 RF_AutoConfig_t *ac_list; /* autoconfig list */
296 RF_ConfigSet_t *config_sets;
297
298 #ifdef DEBUG
299 printf("raidattach: Asked for %d units\n", num);
300 #endif
301
302 if (num <= 0) {
303 #ifdef DIAGNOSTIC
304 panic("raidattach: count <= 0");
305 #endif
306 return;
307 }
308 /* This is where all the initialization stuff gets done. */
309
310 numraid = num;
311
312 /* Make some space for requested number of units... */
313
314 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
315 if (raidPtrs == NULL) {
316 panic("raidPtrs is NULL!!\n");
317 }
318
319 rc = rf_mutex_init(&rf_sparet_wait_mutex);
320 if (rc) {
321 RF_PANIC();
322 }
323
324 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
325
326 for (i = 0; i < num; i++)
327 raidPtrs[i] = NULL;
328 rc = rf_BootRaidframe();
329 if (rc == 0)
330 printf("Kernelized RAIDframe activated\n");
331 else
332 panic("Serious error booting RAID!!\n");
333
334 /* put together some datastructures like the CCD device does.. This
335 * lets us lock the device and what-not when it gets opened. */
336
337 raid_softc = (struct raid_softc *)
338 malloc(num * sizeof(struct raid_softc),
339 M_RAIDFRAME, M_NOWAIT);
340 if (raid_softc == NULL) {
341 printf("WARNING: no memory for RAIDframe driver\n");
342 return;
343 }
344
345 memset(raid_softc, 0, num * sizeof(struct raid_softc));
346
347 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
348 M_RAIDFRAME, M_NOWAIT);
349 if (raidrootdev == NULL) {
350 panic("No memory for RAIDframe driver!!?!?!\n");
351 }
352
353 for (raidID = 0; raidID < num; raidID++) {
354 BUFQ_INIT(&raid_softc[raidID].buf_queue);
355
356 raidrootdev[raidID].dv_class = DV_DISK;
357 raidrootdev[raidID].dv_cfdata = NULL;
358 raidrootdev[raidID].dv_unit = raidID;
359 raidrootdev[raidID].dv_parent = NULL;
360 raidrootdev[raidID].dv_flags = 0;
361 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
362
363 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
364 (RF_Raid_t *));
365 if (raidPtrs[raidID] == NULL) {
366 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
367 numraid = raidID;
368 return;
369 }
370 }
371
372 #if RAID_AUTOCONFIG
373 raidautoconfig = 1;
374 #endif
375
376 if (raidautoconfig) {
377 /* 1. locate all RAID components on the system */
378
379 #if DEBUG
380 printf("Searching for raid components...\n");
381 #endif
382 ac_list = rf_find_raid_components();
383
384 /* 2. sort them into their respective sets */
385
386 config_sets = rf_create_auto_sets(ac_list);
387
388 /* 3. evaluate each set and configure the valid ones
389 This gets done in rf_buildroothack() */
390
391 /* schedule the creation of the thread to do the
392 "/ on RAID" stuff */
393
394 kthread_create(rf_buildroothack,config_sets);
395
396 #if 0
397 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
398 #endif
399 }
400
401 }
402
403 void
404 rf_buildroothack(arg)
405 void *arg;
406 {
407 RF_ConfigSet_t *config_sets = arg;
408 RF_ConfigSet_t *cset;
409 RF_ConfigSet_t *next_cset;
410 int retcode;
411 int raidID;
412 int rootID;
413 int num_root;
414
415 rootID = 0;
416 num_root = 0;
417 cset = config_sets;
418 while(cset != NULL ) {
419 next_cset = cset->next;
420 if (rf_have_enough_components(cset) &&
421 cset->ac->clabel->autoconfigure==1) {
422 retcode = rf_auto_config_set(cset,&raidID);
423 if (!retcode) {
424 if (cset->rootable) {
425 rootID = raidID;
426 num_root++;
427 }
428 } else {
429 /* The autoconfig didn't work :( */
430 #if DEBUG
431 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
432 #endif
433 rf_release_all_vps(cset);
434 }
435 } else {
436 /* we're not autoconfiguring this set...
437 release the associated resources */
438 rf_release_all_vps(cset);
439 }
440 /* cleanup */
441 rf_cleanup_config_set(cset);
442 cset = next_cset;
443 }
444 if (boothowto & RB_ASKNAME) {
445 /* We don't auto-config... */
446 } else {
447 /* They didn't ask, and we found something bootable... */
448
449 if (num_root == 1) {
450 booted_device = &raidrootdev[rootID];
451 } else if (num_root > 1) {
452 /* we can't guess.. require the user to answer... */
453 boothowto |= RB_ASKNAME;
454 }
455 }
456 }
457
458
459 int
460 raidsize(dev)
461 dev_t dev;
462 {
463 #if 1 /* XXXthorpej */
464 return (-1);
465 #else
466 struct raid_softc *rs;
467 struct disklabel *lp;
468 int part, unit, omask, size;
469
470 unit = raidunit(dev);
471 if (unit >= numraid)
472 return (-1);
473 rs = &raid_softc[unit];
474
475 if ((rs->sc_flags & RAIDF_INITED) == 0)
476 return (-1);
477
478 part = DISKPART(dev);
479 omask = rs->sc_dkdev.dk_openmask & (1 << part);
480 lp = rs->sc_dkdev.dk_label;
481
482 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
483 return (-1);
484
485 if (lp->d_partitions[part].p_fstype != FS_SWAP)
486 size = -1;
487 else
488 size = lp->d_partitions[part].p_size *
489 (lp->d_secsize / DEV_BSIZE);
490
491 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
492 return (-1);
493
494 return (size);
495 #endif
496 }
497
498 int
499 raiddump(dev, blkno, va, size)
500 dev_t dev;
501 daddr_t blkno;
502 caddr_t va;
503 size_t size;
504 {
505 /* Not implemented. */
506 return ENXIO;
507 }
508
509 /* ARGSUSED */
510 int
511 raidopen(devvp, flags, fmt, p)
512 struct vnode *devvp;
513 int flags, fmt;
514 struct proc *p;
515 {
516 int unit = raidunit(vdev_rdev(devvp));
517 struct raid_softc *rs;
518 struct disklabel *lp;
519 int part, pmask;
520 int error = 0;
521
522 if (unit >= numraid)
523 return (ENXIO);
524 rs = &raid_softc[unit];
525
526 vdev_setprivdata(devvp, rs);
527
528 if ((error = raidlock(rs)) != 0)
529 return (error);
530 lp = rs->sc_dkdev.dk_label;
531
532 part = DISKPART(vdev_rdev(devvp));
533 pmask = (1 << part);
534
535 db1_printf(("Opening raid device number: %d partition: %d\n",
536 unit, part));
537
538
539 if ((rs->sc_flags & RAIDF_INITED) &&
540 (rs->sc_dkdev.dk_openmask == 0))
541 raidgetdisklabel(devvp);
542
543 /* make sure that this partition exists */
544
545 if (part != RAW_PART) {
546 db1_printf(("Not a raw partition..\n"));
547 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
548 ((part >= lp->d_npartitions) ||
549 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
550 error = ENXIO;
551 raidunlock(rs);
552 db1_printf(("Bailing out...\n"));
553 return (error);
554 }
555 }
556 /* Prevent this unit from being unconfigured while open. */
557 switch (fmt) {
558 case S_IFCHR:
559 rs->sc_dkdev.dk_copenmask |= pmask;
560 break;
561
562 case S_IFBLK:
563 rs->sc_dkdev.dk_bopenmask |= pmask;
564 break;
565 }
566
567 if ((rs->sc_dkdev.dk_openmask == 0) &&
568 ((rs->sc_flags & RAIDF_INITED) != 0)) {
569 /* First one... mark things as dirty... Note that we *MUST*
570 have done a configure before this. I DO NOT WANT TO BE
571 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
572 THAT THEY BELONG TOGETHER!!!!! */
573 /* XXX should check to see if we're only open for reading
574 here... If so, we needn't do this, but then need some
575 other way of keeping track of what's happened.. */
576
577 rf_markalldirty( raidPtrs[unit] );
578 }
579
580
581 rs->sc_dkdev.dk_openmask =
582 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
583
584 raidunlock(rs);
585
586 return (error);
587
588
589 }
590
591 /* ARGSUSED */
592 int
593 raidclose(devvp, flags, fmt, p)
594 struct vnode *devvp;
595 int flags, fmt;
596 struct proc *p;
597 {
598 struct raid_softc *rs;
599 int error = 0;
600 int part;
601 dev_t rdev;
602
603 rs = vdev_privdata(devvp);
604 rdev = vdev_rdev(devvp);
605
606 if ((error = raidlock(rs)) != 0)
607 return (error);
608
609 part = DISKPART(rdev);
610
611 /* ...that much closer to allowing unconfiguration... */
612 switch (fmt) {
613 case S_IFCHR:
614 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
615 break;
616
617 case S_IFBLK:
618 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
619 break;
620 }
621 rs->sc_dkdev.dk_openmask =
622 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
623
624 if ((rs->sc_dkdev.dk_openmask == 0) &&
625 ((rs->sc_flags & RAIDF_INITED) != 0)) {
626 /* Last one... device is not unconfigured yet.
627 Device shutdown has taken care of setting the
628 clean bits if RAIDF_INITED is not set
629 mark things as clean... */
630 #if 0
631 printf("Last one on raid%d. Updating status.\n",
632 DISKUNIT(vdev_rdev(devvp)));
633 #endif
634 rf_update_component_labels(raidPtrs[DISKUNIT(rdev)],
635 RF_FINAL_COMPONENT_UPDATE);
636 if (doing_shutdown) {
637 /* last one, and we're going down, so
638 lights out for this RAID set too. */
639 error = rf_Shutdown(raidPtrs[DISKUNIT(rdev)]);
640 pool_destroy(&rs->sc_cbufpool);
641
642 /* It's no longer initialized... */
643 rs->sc_flags &= ~RAIDF_INITED;
644
645 /* Detach the disk. */
646 disk_detach(&rs->sc_dkdev);
647 }
648 }
649
650 raidunlock(rs);
651 return (0);
652 }
653
654 void
655 raidstrategy(bp)
656 struct buf *bp;
657 {
658 int s;
659
660 unsigned int raidID;
661 struct raid_softc *rs;
662 RF_Raid_t *raidPtr;
663 struct disklabel *lp;
664 int wlabel;
665 dev_t rdev;
666
667 rdev = vdev_rdev(bp->b_devvp);
668 rs = vdev_privdata(bp->b_devvp);
669
670 raidID = DISKUNIT(rdev);
671
672 if ((rs->sc_flags & RAIDF_INITED) ==0) {
673 bp->b_error = ENXIO;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 raidPtr = raidPtrs[raidID];
680 if (raidPtr == NULL || raidPtr->valid == 0) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 bp->b_resid = bp->b_bcount;
684 biodone(bp);
685 return;
686 }
687 if (bp->b_bcount == 0) {
688 db1_printf(("b_bcount is zero..\n"));
689 biodone(bp);
690 return;
691 }
692 lp = rs->sc_dkdev.dk_label;
693
694 /*
695 * Do bounds checking and adjust transfer. If there's an
696 * error, the bounds check will flag that for us.
697 */
698
699 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
700 if (DISKPART(rdev) != RAW_PART &&
701 (bp->b_flags & B_DKLABEL) == 0) {
702 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
703 db1_printf(("Bounds check failed!!:%d %d\n",
704 (int) bp->b_blkno, (int) wlabel));
705 biodone(bp);
706 return;
707 }
708 }
709 s = splbio();
710
711 bp->b_resid = 0;
712
713 /* stuff it onto our queue */
714 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
715
716 raidstart(raidPtrs[raidID]);
717
718 splx(s);
719 }
720
721 /* ARGSUSED */
722 int
723 raidread(devvp, uio, flags)
724 struct vnode *devvp;
725 struct uio *uio;
726 int flags;
727 {
728 struct raid_softc *rs;
729
730 rs = vdev_privdata(devvp);
731
732 if ((rs->sc_flags & RAIDF_INITED) == 0)
733 return (ENXIO);
734
735 db1_printf(("raidread: unit: %d partition: %d\n",
736 DISKUNIT(vdev_rdev(devvp)), DISKPART(vdev_rdev(devvp))));
737
738 return (physio(raidstrategy, NULL, devvp, B_READ, minphys, uio));
739 }
740
741 /* ARGSUSED */
742 int
743 raidwrite(devvp, uio, flags)
744 struct vnode *devvp;
745 struct uio *uio;
746 int flags;
747 {
748 struct raid_softc *rs;
749
750 rs = vdev_privdata(devvp);
751
752 if ((rs->sc_flags & RAIDF_INITED) == 0)
753 return (ENXIO);
754
755 db1_printf(("raidwrite\n"));
756
757 return (physio(raidstrategy, NULL, devvp, B_WRITE, minphys, uio));
758 }
759
760 int
761 raidioctl(devvp, cmd, data, flag, p)
762 struct vnode *devvp;
763 u_long cmd;
764 caddr_t data;
765 int flag;
766 struct proc *p;
767 {
768 struct raid_softc *rs;
769 int error = 0;
770 int part, pmask;
771 RF_Config_t *k_cfg, *u_cfg;
772 RF_Raid_t *raidPtr;
773 RF_RaidDisk_t *diskPtr;
774 RF_AccTotals_t *totals;
775 RF_DeviceConfig_t *d_cfg, **ucfgp;
776 u_char *specific_buf;
777 int retcode = 0;
778 int row;
779 int column;
780 struct rf_recon_req *rrcopy, *rr;
781 RF_ComponentLabel_t *clabel;
782 RF_ComponentLabel_t ci_label;
783 RF_ComponentLabel_t **clabel_ptr;
784 RF_SingleComponent_t *sparePtr,*componentPtr;
785 RF_SingleComponent_t hot_spare;
786 RF_SingleComponent_t component;
787 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
788 int i, j, d;
789 dev_t rdev;
790 #ifdef __HAVE_OLD_DISKLABEL
791 struct disklabel newlabel;
792 #endif
793
794 rdev = vdev_rdev(devvp);
795 rs = vdev_privdata(devvp);
796
797 raidPtr = raidPtrs[DISKUNIT(rdev)];
798
799 db1_printf(("raidioctl: 0x%x %d %d %ld\n", rdev,
800 DISKPART(rdev), DISKUNIT(rdev), cmd));
801
802 /* Must be open for writes for these commands... */
803 switch (cmd) {
804 case DIOCSDINFO:
805 case DIOCWDINFO:
806 #ifdef __HAVE_OLD_DISKLABEL
807 case ODIOCWDINFO:
808 case ODIOCSDINFO:
809 #endif
810 case DIOCWLABEL:
811 if ((flag & FWRITE) == 0)
812 return (EBADF);
813 }
814
815 /* Must be initialized for these... */
816 switch (cmd) {
817 case DIOCGDINFO:
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCGDINFO:
822 case ODIOCWDINFO:
823 case ODIOCSDINFO:
824 case ODIOCGDEFLABEL:
825 #endif
826 case DIOCGPART:
827 case DIOCWLABEL:
828 case DIOCGDEFLABEL:
829 case RAIDFRAME_SHUTDOWN:
830 case RAIDFRAME_REWRITEPARITY:
831 case RAIDFRAME_GET_INFO:
832 case RAIDFRAME_RESET_ACCTOTALS:
833 case RAIDFRAME_GET_ACCTOTALS:
834 case RAIDFRAME_KEEP_ACCTOTALS:
835 case RAIDFRAME_GET_SIZE:
836 case RAIDFRAME_FAIL_DISK:
837 case RAIDFRAME_COPYBACK:
838 case RAIDFRAME_CHECK_RECON_STATUS:
839 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
840 case RAIDFRAME_GET_COMPONENT_LABEL:
841 case RAIDFRAME_SET_COMPONENT_LABEL:
842 case RAIDFRAME_ADD_HOT_SPARE:
843 case RAIDFRAME_REMOVE_HOT_SPARE:
844 case RAIDFRAME_INIT_LABELS:
845 case RAIDFRAME_REBUILD_IN_PLACE:
846 case RAIDFRAME_CHECK_PARITY:
847 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
848 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
849 case RAIDFRAME_CHECK_COPYBACK_STATUS:
850 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
851 case RAIDFRAME_SET_AUTOCONFIG:
852 case RAIDFRAME_SET_ROOT:
853 case RAIDFRAME_DELETE_COMPONENT:
854 case RAIDFRAME_INCORPORATE_HOT_SPARE:
855 if ((rs->sc_flags & RAIDF_INITED) == 0)
856 return (ENXIO);
857 }
858
859 switch (cmd) {
860
861 /* configure the system */
862 case RAIDFRAME_CONFIGURE:
863
864 if (raidPtr->valid) {
865 /* There is a valid RAID set running on this unit! */
866 printf("raid%d: Device already configured!\n",
867 DISKUNIT(rdev));
868 return(EINVAL);
869 }
870
871 /* copy-in the configuration information */
872 /* data points to a pointer to the configuration structure */
873
874 u_cfg = *((RF_Config_t **) data);
875 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
876 if (k_cfg == NULL) {
877 return (ENOMEM);
878 }
879 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
880 sizeof(RF_Config_t));
881 if (retcode) {
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
884 retcode));
885 return (retcode);
886 }
887 /* allocate a buffer for the layout-specific data, and copy it
888 * in */
889 if (k_cfg->layoutSpecificSize) {
890 if (k_cfg->layoutSpecificSize > 10000) {
891 /* sanity check */
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 return (EINVAL);
894 }
895 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
896 (u_char *));
897 if (specific_buf == NULL) {
898 RF_Free(k_cfg, sizeof(RF_Config_t));
899 return (ENOMEM);
900 }
901 retcode = copyin(k_cfg->layoutSpecific,
902 (caddr_t) specific_buf,
903 k_cfg->layoutSpecificSize);
904 if (retcode) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 RF_Free(specific_buf,
907 k_cfg->layoutSpecificSize);
908 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
909 retcode));
910 return (retcode);
911 }
912 } else
913 specific_buf = NULL;
914 k_cfg->layoutSpecific = specific_buf;
915
916 /* should do some kind of sanity check on the configuration.
917 * Store the sum of all the bytes in the last byte? */
918
919 /* configure the system */
920
921 /*
922 * Clear the entire RAID descriptor, just to make sure
923 * there is no stale data left in the case of a
924 * reconfiguration
925 */
926 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
927 raidPtr->raidid = DISKUNIT(rdev);
928
929 retcode = rf_Configure(raidPtr, k_cfg, NULL);
930
931 if (retcode == 0) {
932
933 /* allow this many simultaneous IO's to
934 this RAID device */
935 raidPtr->openings = RAIDOUTSTANDING;
936
937 raidinit(raidPtr);
938 rf_markalldirty(raidPtr);
939 }
940 /* free the buffers. No return code here. */
941 if (k_cfg->layoutSpecificSize) {
942 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
943 }
944 RF_Free(k_cfg, sizeof(RF_Config_t));
945
946 return (retcode);
947
948 /* shutdown the system */
949 case RAIDFRAME_SHUTDOWN:
950
951 if ((error = raidlock(rs)) != 0)
952 return (error);
953
954 /*
955 * If somebody has a partition mounted, we shouldn't
956 * shutdown.
957 */
958
959 part = DISKPART(rdev);
960 pmask = (1 << part);
961 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
962 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
963 (rs->sc_dkdev.dk_copenmask & pmask))) {
964 raidunlock(rs);
965 return (EBUSY);
966 }
967
968 retcode = rf_Shutdown(raidPtr);
969
970 pool_destroy(&rs->sc_cbufpool);
971
972 /* It's no longer initialized... */
973 rs->sc_flags &= ~RAIDF_INITED;
974
975 /* Detach the disk. */
976 disk_detach(&rs->sc_dkdev);
977
978 raidunlock(rs);
979
980 return (retcode);
981 case RAIDFRAME_GET_COMPONENT_LABEL:
982 clabel_ptr = (RF_ComponentLabel_t **) data;
983 /* need to read the component label for the disk indicated
984 by row,column in clabel */
985
986 /* For practice, let's get it directly fromdisk, rather
987 than from the in-core copy */
988 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
989 (RF_ComponentLabel_t *));
990 if (clabel == NULL)
991 return (ENOMEM);
992
993 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
994
995 retcode = copyin( *clabel_ptr, clabel,
996 sizeof(RF_ComponentLabel_t));
997
998 if (retcode) {
999 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1000 return(retcode);
1001 }
1002
1003 row = clabel->row;
1004 column = clabel->column;
1005
1006 if ((row < 0) || (row >= raidPtr->numRow) ||
1007 (column < 0) || (column >= raidPtr->numCol +
1008 raidPtr->numSpare)) {
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return(EINVAL);
1011 }
1012
1013 raidread_component_label(raidPtr->raid_cinfo[row][column].ci_vp,
1014 clabel);
1015
1016 retcode = copyout((caddr_t) clabel,
1017 (caddr_t) *clabel_ptr,
1018 sizeof(RF_ComponentLabel_t));
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return (retcode);
1021
1022 case RAIDFRAME_SET_COMPONENT_LABEL:
1023 clabel = (RF_ComponentLabel_t *) data;
1024
1025 /* XXX check the label for valid stuff... */
1026 /* Note that some things *should not* get modified --
1027 the user should be re-initing the labels instead of
1028 trying to patch things.
1029 */
1030
1031 printf("Got component label:\n");
1032 printf("Version: %d\n",clabel->version);
1033 printf("Serial Number: %d\n",clabel->serial_number);
1034 printf("Mod counter: %d\n",clabel->mod_counter);
1035 printf("Row: %d\n", clabel->row);
1036 printf("Column: %d\n", clabel->column);
1037 printf("Num Rows: %d\n", clabel->num_rows);
1038 printf("Num Columns: %d\n", clabel->num_columns);
1039 printf("Clean: %d\n", clabel->clean);
1040 printf("Status: %d\n", clabel->status);
1041
1042 row = clabel->row;
1043 column = clabel->column;
1044
1045 if ((row < 0) || (row >= raidPtr->numRow) ||
1046 (column < 0) || (column >= raidPtr->numCol)) {
1047 return(EINVAL);
1048 }
1049
1050 /* XXX this isn't allowed to do anything for now :-) */
1051
1052 /* XXX and before it is, we need to fill in the rest
1053 of the fields!?!?!?! */
1054 #if 0
1055 raidwrite_component_label(
1056 raidPtr->raid_cinfo[row][column].ci_vp, clabel);
1057 #endif
1058 return (0);
1059
1060 case RAIDFRAME_INIT_LABELS:
1061 clabel = (RF_ComponentLabel_t *) data;
1062 /*
1063 we only want the serial number from
1064 the above. We get all the rest of the information
1065 from the config that was used to create this RAID
1066 set.
1067 */
1068
1069 raidPtr->serial_number = clabel->serial_number;
1070
1071 raid_init_component_label(raidPtr, &ci_label);
1072 ci_label.serial_number = clabel->serial_number;
1073
1074 for(row=0;row<raidPtr->numRow;row++) {
1075 ci_label.row = row;
1076 for(column=0;column<raidPtr->numCol;column++) {
1077 diskPtr = &raidPtr->Disks[row][column];
1078 if (!RF_DEAD_DISK(diskPtr->status)) {
1079 ci_label.partitionSize = diskPtr->partitionSize;
1080 ci_label.column = column;
1081 raidwrite_component_label(
1082 raidPtr->raid_cinfo[row][column].ci_vp,
1083 &ci_label );
1084 }
1085 }
1086 }
1087
1088 return (retcode);
1089 case RAIDFRAME_SET_AUTOCONFIG:
1090 d = rf_set_autoconfig(raidPtr, *(int *) data);
1091 printf("New autoconfig value is: %d\n", d);
1092 *(int *) data = d;
1093 return (retcode);
1094
1095 case RAIDFRAME_SET_ROOT:
1096 d = rf_set_rootpartition(raidPtr, *(int *) data);
1097 printf("New rootpartition value is: %d\n", d);
1098 *(int *) data = d;
1099 return (retcode);
1100
1101 /* initialize all parity */
1102 case RAIDFRAME_REWRITEPARITY:
1103
1104 if (raidPtr->Layout.map->faultsTolerated == 0) {
1105 /* Parity for RAID 0 is trivially correct */
1106 raidPtr->parity_good = RF_RAID_CLEAN;
1107 return(0);
1108 }
1109
1110 if (raidPtr->parity_rewrite_in_progress == 1) {
1111 /* Re-write is already in progress! */
1112 return(EINVAL);
1113 }
1114
1115 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1116 rf_RewriteParityThread,
1117 raidPtr,"raid_parity");
1118 return (retcode);
1119
1120
1121 case RAIDFRAME_ADD_HOT_SPARE:
1122 sparePtr = (RF_SingleComponent_t *) data;
1123 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1124 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1125 return(retcode);
1126
1127 case RAIDFRAME_REMOVE_HOT_SPARE:
1128 return(retcode);
1129
1130 case RAIDFRAME_DELETE_COMPONENT:
1131 componentPtr = (RF_SingleComponent_t *)data;
1132 memcpy( &component, componentPtr,
1133 sizeof(RF_SingleComponent_t));
1134 retcode = rf_delete_component(raidPtr, &component);
1135 return(retcode);
1136
1137 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1138 componentPtr = (RF_SingleComponent_t *)data;
1139 memcpy( &component, componentPtr,
1140 sizeof(RF_SingleComponent_t));
1141 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1142 return(retcode);
1143
1144 case RAIDFRAME_REBUILD_IN_PLACE:
1145
1146 if (raidPtr->Layout.map->faultsTolerated == 0) {
1147 /* Can't do this on a RAID 0!! */
1148 return(EINVAL);
1149 }
1150
1151 if (raidPtr->recon_in_progress == 1) {
1152 /* a reconstruct is already in progress! */
1153 return(EINVAL);
1154 }
1155
1156 componentPtr = (RF_SingleComponent_t *) data;
1157 memcpy( &component, componentPtr,
1158 sizeof(RF_SingleComponent_t));
1159 row = component.row;
1160 column = component.column;
1161 printf("Rebuild: %d %d\n",row, column);
1162 if ((row < 0) || (row >= raidPtr->numRow) ||
1163 (column < 0) || (column >= raidPtr->numCol)) {
1164 return(EINVAL);
1165 }
1166
1167 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1168 if (rrcopy == NULL)
1169 return(ENOMEM);
1170
1171 rrcopy->raidPtr = (void *) raidPtr;
1172 rrcopy->row = row;
1173 rrcopy->col = column;
1174
1175 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1176 rf_ReconstructInPlaceThread,
1177 rrcopy,"raid_reconip");
1178 return(retcode);
1179
1180 case RAIDFRAME_GET_INFO:
1181 if (!raidPtr->valid)
1182 return (ENODEV);
1183 ucfgp = (RF_DeviceConfig_t **) data;
1184 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1185 (RF_DeviceConfig_t *));
1186 if (d_cfg == NULL)
1187 return (ENOMEM);
1188 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1189 d_cfg->rows = raidPtr->numRow;
1190 d_cfg->cols = raidPtr->numCol;
1191 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1192 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1193 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1194 return (ENOMEM);
1195 }
1196 d_cfg->nspares = raidPtr->numSpare;
1197 if (d_cfg->nspares >= RF_MAX_DISKS) {
1198 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1199 return (ENOMEM);
1200 }
1201 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1202 d = 0;
1203 for (i = 0; i < d_cfg->rows; i++) {
1204 for (j = 0; j < d_cfg->cols; j++) {
1205 d_cfg->devs[d] = raidPtr->Disks[i][j];
1206 d++;
1207 }
1208 }
1209 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1210 d_cfg->spares[i] = raidPtr->Disks[0][j];
1211 }
1212 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1213 sizeof(RF_DeviceConfig_t));
1214 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1215
1216 return (retcode);
1217
1218 case RAIDFRAME_CHECK_PARITY:
1219 *(int *) data = raidPtr->parity_good;
1220 return (0);
1221
1222 case RAIDFRAME_RESET_ACCTOTALS:
1223 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1224 return (0);
1225
1226 case RAIDFRAME_GET_ACCTOTALS:
1227 totals = (RF_AccTotals_t *) data;
1228 *totals = raidPtr->acc_totals;
1229 return (0);
1230
1231 case RAIDFRAME_KEEP_ACCTOTALS:
1232 raidPtr->keep_acc_totals = *(int *)data;
1233 return (0);
1234
1235 case RAIDFRAME_GET_SIZE:
1236 *(int *) data = raidPtr->totalSectors;
1237 return (0);
1238
1239 /* fail a disk & optionally start reconstruction */
1240 case RAIDFRAME_FAIL_DISK:
1241
1242 if (raidPtr->Layout.map->faultsTolerated == 0) {
1243 /* Can't do this on a RAID 0!! */
1244 return(EINVAL);
1245 }
1246
1247 rr = (struct rf_recon_req *) data;
1248
1249 if (rr->row < 0 || rr->row >= raidPtr->numRow
1250 || rr->col < 0 || rr->col >= raidPtr->numCol)
1251 return (EINVAL);
1252
1253 printf("raid%d: Failing the disk: row: %d col: %d\n",
1254 DISKUNIT(rdev), rr->row, rr->col);
1255
1256 /* make a copy of the recon request so that we don't rely on
1257 * the user's buffer */
1258 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1259 if (rrcopy == NULL)
1260 return(ENOMEM);
1261 bcopy(rr, rrcopy, sizeof(*rr));
1262 rrcopy->raidPtr = (void *) raidPtr;
1263
1264 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1265 rf_ReconThread,
1266 rrcopy,"raid_recon");
1267 return (0);
1268
1269 /* invoke a copyback operation after recon on whatever disk
1270 * needs it, if any */
1271 case RAIDFRAME_COPYBACK:
1272
1273 if (raidPtr->Layout.map->faultsTolerated == 0) {
1274 /* This makes no sense on a RAID 0!! */
1275 return(EINVAL);
1276 }
1277
1278 if (raidPtr->copyback_in_progress == 1) {
1279 /* Copyback is already in progress! */
1280 return(EINVAL);
1281 }
1282
1283 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1284 rf_CopybackThread,
1285 raidPtr,"raid_copyback");
1286 return (retcode);
1287
1288 /* return the percentage completion of reconstruction */
1289 case RAIDFRAME_CHECK_RECON_STATUS:
1290 if (raidPtr->Layout.map->faultsTolerated == 0) {
1291 /* This makes no sense on a RAID 0, so tell the
1292 user it's done. */
1293 *(int *) data = 100;
1294 return(0);
1295 }
1296 row = 0; /* XXX we only consider a single row... */
1297 if (raidPtr->status[row] != rf_rs_reconstructing)
1298 *(int *) data = 100;
1299 else
1300 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1301 return (0);
1302 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1303 progressInfoPtr = (RF_ProgressInfo_t **) data;
1304 row = 0; /* XXX we only consider a single row... */
1305 if (raidPtr->status[row] != rf_rs_reconstructing) {
1306 progressInfo.remaining = 0;
1307 progressInfo.completed = 100;
1308 progressInfo.total = 100;
1309 } else {
1310 progressInfo.total =
1311 raidPtr->reconControl[row]->numRUsTotal;
1312 progressInfo.completed =
1313 raidPtr->reconControl[row]->numRUsComplete;
1314 progressInfo.remaining = progressInfo.total -
1315 progressInfo.completed;
1316 }
1317 retcode = copyout((caddr_t) &progressInfo,
1318 (caddr_t) *progressInfoPtr,
1319 sizeof(RF_ProgressInfo_t));
1320 return (retcode);
1321
1322 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* This makes no sense on a RAID 0, so tell the
1325 user it's done. */
1326 *(int *) data = 100;
1327 return(0);
1328 }
1329 if (raidPtr->parity_rewrite_in_progress == 1) {
1330 *(int *) data = 100 *
1331 raidPtr->parity_rewrite_stripes_done /
1332 raidPtr->Layout.numStripe;
1333 } else {
1334 *(int *) data = 100;
1335 }
1336 return (0);
1337
1338 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1339 progressInfoPtr = (RF_ProgressInfo_t **) data;
1340 if (raidPtr->parity_rewrite_in_progress == 1) {
1341 progressInfo.total = raidPtr->Layout.numStripe;
1342 progressInfo.completed =
1343 raidPtr->parity_rewrite_stripes_done;
1344 progressInfo.remaining = progressInfo.total -
1345 progressInfo.completed;
1346 } else {
1347 progressInfo.remaining = 0;
1348 progressInfo.completed = 100;
1349 progressInfo.total = 100;
1350 }
1351 retcode = copyout((caddr_t) &progressInfo,
1352 (caddr_t) *progressInfoPtr,
1353 sizeof(RF_ProgressInfo_t));
1354 return (retcode);
1355
1356 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1357 if (raidPtr->Layout.map->faultsTolerated == 0) {
1358 /* This makes no sense on a RAID 0 */
1359 *(int *) data = 100;
1360 return(0);
1361 }
1362 if (raidPtr->copyback_in_progress == 1) {
1363 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1364 raidPtr->Layout.numStripe;
1365 } else {
1366 *(int *) data = 100;
1367 }
1368 return (0);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1371 progressInfoPtr = (RF_ProgressInfo_t **) data;
1372 if (raidPtr->copyback_in_progress == 1) {
1373 progressInfo.total = raidPtr->Layout.numStripe;
1374 progressInfo.completed =
1375 raidPtr->copyback_stripes_done;
1376 progressInfo.remaining = progressInfo.total -
1377 progressInfo.completed;
1378 } else {
1379 progressInfo.remaining = 0;
1380 progressInfo.completed = 100;
1381 progressInfo.total = 100;
1382 }
1383 retcode = copyout((caddr_t) &progressInfo,
1384 (caddr_t) *progressInfoPtr,
1385 sizeof(RF_ProgressInfo_t));
1386 return (retcode);
1387
1388 /* the sparetable daemon calls this to wait for the kernel to
1389 * need a spare table. this ioctl does not return until a
1390 * spare table is needed. XXX -- calling mpsleep here in the
1391 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1392 * -- I should either compute the spare table in the kernel,
1393 * or have a different -- XXX XXX -- interface (a different
1394 * character device) for delivering the table -- XXX */
1395 #if 0
1396 case RAIDFRAME_SPARET_WAIT:
1397 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1398 while (!rf_sparet_wait_queue)
1399 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1400 waitreq = rf_sparet_wait_queue;
1401 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1402 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1403
1404 /* structure assignment */
1405 *((RF_SparetWait_t *) data) = *waitreq;
1406
1407 RF_Free(waitreq, sizeof(*waitreq));
1408 return (0);
1409
1410 /* wakes up a process waiting on SPARET_WAIT and puts an error
1411 * code in it that will cause the dameon to exit */
1412 case RAIDFRAME_ABORT_SPARET_WAIT:
1413 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1414 waitreq->fcol = -1;
1415 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1416 waitreq->next = rf_sparet_wait_queue;
1417 rf_sparet_wait_queue = waitreq;
1418 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1419 wakeup(&rf_sparet_wait_queue);
1420 return (0);
1421
1422 /* used by the spare table daemon to deliver a spare table
1423 * into the kernel */
1424 case RAIDFRAME_SEND_SPARET:
1425
1426 /* install the spare table */
1427 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1428
1429 /* respond to the requestor. the return status of the spare
1430 * table installation is passed in the "fcol" field */
1431 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1432 waitreq->fcol = retcode;
1433 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1434 waitreq->next = rf_sparet_resp_queue;
1435 rf_sparet_resp_queue = waitreq;
1436 wakeup(&rf_sparet_resp_queue);
1437 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1438
1439 return (retcode);
1440 #endif
1441
1442 default:
1443 break; /* fall through to the os-specific code below */
1444
1445 }
1446
1447 if (!raidPtr->valid)
1448 return (EINVAL);
1449
1450 /*
1451 * Add support for "regular" device ioctls here.
1452 */
1453
1454 switch (cmd) {
1455 case DIOCGDINFO:
1456 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1457 break;
1458 #ifdef __HAVE_OLD_DISKLABEL
1459 case ODIOCGDINFO:
1460 newlabel = *(rs->sc_dkdev.dk_label);
1461 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1462 return ENOTTY;
1463 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1464 break;
1465 #endif
1466
1467 case DIOCGPART:
1468 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1469 ((struct partinfo *) data)->part =
1470 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(rdev)];
1471 break;
1472
1473 case DIOCWDINFO:
1474 case DIOCSDINFO:
1475 #ifdef __HAVE_OLD_DISKLABEL
1476 case ODIOCWDINFO:
1477 case ODIOCSDINFO:
1478 #endif
1479 {
1480 struct disklabel *lp;
1481 #ifdef __HAVE_OLD_DISKLABEL
1482 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1483 memset(&newlabel, 0, sizeof newlabel);
1484 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1485 lp = &newlabel;
1486 } else
1487 #endif
1488 lp = (struct disklabel *)data;
1489
1490 if ((error = raidlock(rs)) != 0)
1491 return (error);
1492
1493 rs->sc_flags |= RAIDF_LABELLING;
1494
1495 error = setdisklabel(rs->sc_dkdev.dk_label,
1496 lp, 0, rs->sc_dkdev.dk_cpulabel);
1497 if (error == 0) {
1498 if (cmd == DIOCWDINFO
1499 #ifdef __HAVE_OLD_DISKLABEL
1500 || cmd == ODIOCWDINFO
1501 #endif
1502 )
1503 error = writedisklabel(devvp, raidstrategy,
1504 rs->sc_dkdev.dk_label,
1505 rs->sc_dkdev.dk_cpulabel);
1506 }
1507 rs->sc_flags &= ~RAIDF_LABELLING;
1508
1509 raidunlock(rs);
1510
1511 if (error)
1512 return (error);
1513 break;
1514 }
1515
1516 case DIOCWLABEL:
1517 if (*(int *) data != 0)
1518 rs->sc_flags |= RAIDF_WLABEL;
1519 else
1520 rs->sc_flags &= ~RAIDF_WLABEL;
1521 break;
1522
1523 case DIOCGDEFLABEL:
1524 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1525 break;
1526
1527 #ifdef __HAVE_OLD_DISKLABEL
1528 case ODIOCGDEFLABEL:
1529 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1530 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1531 return ENOTTY;
1532 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1533 break;
1534 #endif
1535
1536 default:
1537 retcode = ENOTTY;
1538 }
1539 return (retcode);
1540 }
1541
1542
1543 /* raidinit -- complete the rest of the initialization for the
1544 RAIDframe device. */
1545
1546
1547 static void
1548 raidinit(raidPtr)
1549 RF_Raid_t *raidPtr;
1550 {
1551 struct raid_softc *rs;
1552 int unit;
1553
1554 unit = raidPtr->raidid;
1555
1556 rs = &raid_softc[unit];
1557 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1558 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1559
1560
1561 /* XXX should check return code first... */
1562 rs->sc_flags |= RAIDF_INITED;
1563
1564 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1565
1566 rs->sc_dkdev.dk_name = rs->sc_xname;
1567
1568 /* disk_attach actually creates space for the CPU disklabel, among
1569 * other things, so it's critical to call this *BEFORE* we try putzing
1570 * with disklabels. */
1571
1572 disk_attach(&rs->sc_dkdev);
1573
1574 /* XXX There may be a weird interaction here between this, and
1575 * protectedSectors, as used in RAIDframe. */
1576
1577 rs->sc_size = raidPtr->totalSectors;
1578
1579 }
1580
1581 /* wake up the daemon & tell it to get us a spare table
1582 * XXX
1583 * the entries in the queues should be tagged with the raidPtr
1584 * so that in the extremely rare case that two recons happen at once,
1585 * we know for which device were requesting a spare table
1586 * XXX
1587 *
1588 * XXX This code is not currently used. GO
1589 */
1590 int
1591 rf_GetSpareTableFromDaemon(req)
1592 RF_SparetWait_t *req;
1593 {
1594 int retcode;
1595
1596 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1597 req->next = rf_sparet_wait_queue;
1598 rf_sparet_wait_queue = req;
1599 wakeup(&rf_sparet_wait_queue);
1600
1601 /* mpsleep unlocks the mutex */
1602 while (!rf_sparet_resp_queue) {
1603 tsleep(&rf_sparet_resp_queue, PRIBIO,
1604 "raidframe getsparetable", 0);
1605 }
1606 req = rf_sparet_resp_queue;
1607 rf_sparet_resp_queue = req->next;
1608 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1609
1610 retcode = req->fcol;
1611 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1612 * alloc'd */
1613 return (retcode);
1614 }
1615
1616 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1617 * bp & passes it down.
1618 * any calls originating in the kernel must use non-blocking I/O
1619 * do some extra sanity checking to return "appropriate" error values for
1620 * certain conditions (to make some standard utilities work)
1621 *
1622 * Formerly known as: rf_DoAccessKernel
1623 */
1624 void
1625 raidstart(raidPtr)
1626 RF_Raid_t *raidPtr;
1627 {
1628 RF_SectorCount_t num_blocks, pb, sum;
1629 RF_RaidAddr_t raid_addr;
1630 int retcode;
1631 struct partition *pp;
1632 daddr_t blocknum;
1633 int unit;
1634 struct raid_softc *rs;
1635 int do_async;
1636 struct buf *bp;
1637 dev_t rdev;
1638
1639 unit = raidPtr->raidid;
1640 rs = &raid_softc[unit];
1641
1642 /* quick check to see if anything has died recently */
1643 RF_LOCK_MUTEX(raidPtr->mutex);
1644 if (raidPtr->numNewFailures > 0) {
1645 rf_update_component_labels(raidPtr,
1646 RF_NORMAL_COMPONENT_UPDATE);
1647 raidPtr->numNewFailures--;
1648 }
1649 RF_UNLOCK_MUTEX(raidPtr->mutex);
1650
1651 /* Check to see if we're at the limit... */
1652 RF_LOCK_MUTEX(raidPtr->mutex);
1653 while (raidPtr->openings > 0) {
1654 RF_UNLOCK_MUTEX(raidPtr->mutex);
1655
1656 /* get the next item, if any, from the queue */
1657 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1658 /* nothing more to do */
1659 return;
1660 }
1661 rdev = vdev_rdev(bp->b_devvp);
1662 BUFQ_REMOVE(&rs->buf_queue, bp);
1663
1664 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1665 * partition.. Need to make it absolute to the underlying
1666 * device.. */
1667
1668 blocknum = bp->b_blkno;
1669 if (DISKPART(rdev) != RAW_PART &&
1670 (bp->b_flags & B_DKLABEL) == 0) {
1671 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(rdev)];
1672 blocknum += pp->p_offset;
1673 }
1674
1675 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1676 (int) blocknum));
1677
1678 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1679 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1680
1681 /* *THIS* is where we adjust what block we're going to...
1682 * but DO NOT TOUCH bp->b_blkno!!! */
1683 raid_addr = blocknum;
1684
1685 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1686 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1687 sum = raid_addr + num_blocks + pb;
1688 if (1 || rf_debugKernelAccess) {
1689 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1690 (int) raid_addr, (int) sum, (int) num_blocks,
1691 (int) pb, (int) bp->b_resid));
1692 }
1693 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1694 || (sum < num_blocks) || (sum < pb)) {
1695 bp->b_error = ENOSPC;
1696 bp->b_flags |= B_ERROR;
1697 bp->b_resid = bp->b_bcount;
1698 biodone(bp);
1699 RF_LOCK_MUTEX(raidPtr->mutex);
1700 continue;
1701 }
1702 /*
1703 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1704 */
1705
1706 if (bp->b_bcount & raidPtr->sectorMask) {
1707 bp->b_error = EINVAL;
1708 bp->b_flags |= B_ERROR;
1709 bp->b_resid = bp->b_bcount;
1710 biodone(bp);
1711 RF_LOCK_MUTEX(raidPtr->mutex);
1712 continue;
1713
1714 }
1715 db1_printf(("Calling DoAccess..\n"));
1716
1717
1718 RF_LOCK_MUTEX(raidPtr->mutex);
1719 raidPtr->openings--;
1720 RF_UNLOCK_MUTEX(raidPtr->mutex);
1721
1722 /*
1723 * Everything is async.
1724 */
1725 do_async = 1;
1726
1727 disk_busy(&rs->sc_dkdev);
1728
1729 /* XXX we're still at splbio() here... do we *really*
1730 need to be? */
1731
1732 /* don't ever condition on bp->b_flags & B_WRITE.
1733 * always condition on B_READ instead */
1734
1735 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1736 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1737 do_async, raid_addr, num_blocks,
1738 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1739
1740 RF_LOCK_MUTEX(raidPtr->mutex);
1741 }
1742 RF_UNLOCK_MUTEX(raidPtr->mutex);
1743 }
1744
1745 /*
1746 * invoke an I/O from kernel mode. Disk queue should be
1747 * locked upon entry
1748 */
1749 int
1750 rf_DispatchKernelIO(queue, req)
1751 RF_DiskQueue_t *queue;
1752 RF_DiskQueueData_t *req;
1753 {
1754 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1755 struct buf *bp;
1756 struct raidbuf *raidbp = NULL;
1757 struct raid_softc *rs;
1758 int unit;
1759 int s;
1760
1761 s=0;
1762 /* s = splbio();*/ /* want to test this */
1763 /* XXX along with the vnode, we also need the softc associated with
1764 * this device.. */
1765
1766 req->queue = queue;
1767
1768 unit = queue->raidPtr->raidid;
1769
1770 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1771
1772 if (unit >= numraid) {
1773 printf("Invalid unit number: %d %d\n", unit, numraid);
1774 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1775 }
1776 rs = &raid_softc[unit];
1777
1778 bp = req->bp;
1779 #if 1
1780 /* XXX when there is a physical disk failure, someone is passing us a
1781 * buffer that contains old stuff!! Attempt to deal with this problem
1782 * without taking a performance hit... (not sure where the real bug
1783 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1784
1785 if (bp->b_flags & B_ERROR) {
1786 bp->b_flags &= ~B_ERROR;
1787 }
1788 if (bp->b_error != 0) {
1789 bp->b_error = 0;
1790 }
1791 #endif
1792 raidbp = RAIDGETBUF(rs);
1793
1794 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1795
1796 /*
1797 * context for raidiodone
1798 */
1799 raidbp->rf_obp = bp;
1800 raidbp->req = req;
1801
1802 LIST_INIT(&raidbp->rf_buf.b_dep);
1803
1804 switch (req->type) {
1805 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1806 /* XXX need to do something extra here.. */
1807 /* I'm leaving this in, as I've never actually seen it used,
1808 * and I'd like folks to report it... GO */
1809 printf(("WAKEUP CALLED\n"));
1810 queue->numOutstanding++;
1811
1812 /* XXX need to glue the original buffer into this?? */
1813
1814 KernelWakeupFunc(&raidbp->rf_buf);
1815 break;
1816
1817 case RF_IO_TYPE_READ:
1818 case RF_IO_TYPE_WRITE:
1819
1820 if (req->tracerec) {
1821 RF_ETIMER_START(req->tracerec->timer);
1822 }
1823 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1824 op | bp->b_flags, req->sectorOffset, req->numSector,
1825 req->buf, KernelWakeupFunc, (void *) req,
1826 queue->raidPtr->logBytesPerSector, req->b_proc);
1827
1828 if (rf_debugKernelAccess) {
1829 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1830 (long) bp->b_blkno));
1831 }
1832 queue->numOutstanding++;
1833 queue->last_deq_sector = req->sectorOffset;
1834 /* acc wouldn't have been let in if there were any pending
1835 * reqs at any other priority */
1836 queue->curPriority = req->priority;
1837
1838 db1_printf(("Going for %c to unit %d row %d col %d\n",
1839 req->type, unit, queue->row, queue->col));
1840 db1_printf(("sector %d count %d (%d bytes) %d\n",
1841 (int) req->sectorOffset, (int) req->numSector,
1842 (int) (req->numSector <<
1843 queue->raidPtr->logBytesPerSector),
1844 (int) queue->raidPtr->logBytesPerSector));
1845 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1846 raidbp->rf_buf.b_vp->v_numoutput++;
1847 }
1848 VOP_STRATEGY(&raidbp->rf_buf);
1849
1850 break;
1851
1852 default:
1853 panic("bad req->type in rf_DispatchKernelIO");
1854 }
1855 db1_printf(("Exiting from DispatchKernelIO\n"));
1856 /* splx(s); */ /* want to test this */
1857 return (0);
1858 }
1859 /* this is the callback function associated with a I/O invoked from
1860 kernel code.
1861 */
1862 static void
1863 KernelWakeupFunc(vbp)
1864 struct buf *vbp;
1865 {
1866 RF_DiskQueueData_t *req = NULL;
1867 RF_DiskQueue_t *queue;
1868 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1869 struct buf *bp;
1870 struct raid_softc *rs;
1871 int unit;
1872 int s;
1873
1874 s = splbio();
1875 db1_printf(("recovering the request queue:\n"));
1876 req = raidbp->req;
1877
1878 bp = raidbp->rf_obp;
1879
1880 queue = (RF_DiskQueue_t *) req->queue;
1881
1882 if (raidbp->rf_buf.b_flags & B_ERROR) {
1883 bp->b_flags |= B_ERROR;
1884 bp->b_error = raidbp->rf_buf.b_error ?
1885 raidbp->rf_buf.b_error : EIO;
1886 }
1887
1888 /* XXX methinks this could be wrong... */
1889 #if 1
1890 bp->b_resid = raidbp->rf_buf.b_resid;
1891 #endif
1892
1893 if (req->tracerec) {
1894 RF_ETIMER_STOP(req->tracerec->timer);
1895 RF_ETIMER_EVAL(req->tracerec->timer);
1896 RF_LOCK_MUTEX(rf_tracing_mutex);
1897 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1898 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->num_phys_ios++;
1900 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1901 }
1902 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1903
1904 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1905
1906
1907 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1908 * ballistic, and mark the component as hosed... */
1909
1910 if (bp->b_flags & B_ERROR) {
1911 /* Mark the disk as dead */
1912 /* but only mark it once... */
1913 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1914 rf_ds_optimal) {
1915 printf("raid%d: IO Error. Marking %s as failed.\n",
1916 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1917 queue->raidPtr->Disks[queue->row][queue->col].status =
1918 rf_ds_failed;
1919 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1920 queue->raidPtr->numFailures++;
1921 queue->raidPtr->numNewFailures++;
1922 } else { /* Disk is already dead... */
1923 /* printf("Disk already marked as dead!\n"); */
1924 }
1925
1926 }
1927
1928 rs = &raid_softc[unit];
1929 RAIDPUTBUF(rs, raidbp);
1930
1931 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1932 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1933
1934 splx(s);
1935 }
1936
1937 /*
1938 * initialize a buf structure for doing an I/O in the kernel.
1939 */
1940 static void
1941 InitBP(bp, b_vp, rw_flag, startSect, numSect, buf, cbFunc, cbArg,
1942 logBytesPerSector, b_proc)
1943 struct buf *bp;
1944 struct vnode *b_vp;
1945 unsigned rw_flag;
1946 RF_SectorNum_t startSect;
1947 RF_SectorCount_t numSect;
1948 caddr_t buf;
1949 void (*cbFunc) (struct buf *);
1950 void *cbArg;
1951 int logBytesPerSector;
1952 struct proc *b_proc;
1953 {
1954 /* bp->b_flags = B_PHYS | rw_flag; */
1955 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1956 bp->b_bcount = numSect << logBytesPerSector;
1957 bp->b_bufsize = bp->b_bcount;
1958 bp->b_error = 0;
1959 bp->b_devvp = b_vp;
1960 bp->b_data = buf;
1961 bp->b_blkno = startSect;
1962 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1963 if (bp->b_bcount == 0) {
1964 panic("bp->b_bcount is zero in InitBP!!\n");
1965 }
1966 bp->b_proc = b_proc;
1967 bp->b_iodone = cbFunc;
1968 bp->b_vp = b_vp;
1969
1970 }
1971
1972 static void
1973 raidgetdefaultlabel(raidPtr, rs, lp)
1974 RF_Raid_t *raidPtr;
1975 struct raid_softc *rs;
1976 struct disklabel *lp;
1977 {
1978 db1_printf(("Building a default label...\n"));
1979 memset(lp, 0, sizeof(*lp));
1980
1981 /* fabricate a label... */
1982 lp->d_secperunit = raidPtr->totalSectors;
1983 lp->d_secsize = raidPtr->bytesPerSector;
1984 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1985 lp->d_ntracks = 4 * raidPtr->numCol;
1986 lp->d_ncylinders = raidPtr->totalSectors /
1987 (lp->d_nsectors * lp->d_ntracks);
1988 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1989
1990 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1991 lp->d_type = DTYPE_RAID;
1992 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1993 lp->d_rpm = 3600;
1994 lp->d_interleave = 1;
1995 lp->d_flags = 0;
1996
1997 lp->d_partitions[RAW_PART].p_offset = 0;
1998 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1999 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2000 lp->d_npartitions = RAW_PART + 1;
2001
2002 lp->d_magic = DISKMAGIC;
2003 lp->d_magic2 = DISKMAGIC;
2004 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2005
2006 }
2007 /*
2008 * Read the disklabel from the raid device. If one is not present, fake one
2009 * up.
2010 */
2011 static void
2012 raidgetdisklabel(devvp)
2013 struct vnode *devvp;
2014 {
2015 struct raid_softc *rs;
2016 char *errstring;
2017 struct disklabel *lp;
2018 struct cpu_disklabel *clp;
2019 RF_Raid_t *raidPtr;
2020
2021 rs = vdev_privdata(devvp);
2022 lp = rs->sc_dkdev.dk_label;
2023 clp = rs->sc_dkdev.dk_cpulabel;
2024
2025 db1_printf(("Getting the disklabel...\n"));
2026
2027 memset(clp, 0, sizeof(*clp));
2028
2029 raidPtr = raidPtrs[DISKUNIT(vdev_rdev(devvp))];
2030
2031 raidgetdefaultlabel(raidPtr, rs, lp);
2032
2033 /*
2034 * Call the generic disklabel extraction routine.
2035 */
2036 errstring = readdisklabel(devvp, raidstrategy,
2037 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2038 if (errstring)
2039 raidmakedisklabel(rs);
2040 else {
2041 int i;
2042 struct partition *pp;
2043
2044 /*
2045 * Sanity check whether the found disklabel is valid.
2046 *
2047 * This is necessary since total size of the raid device
2048 * may vary when an interleave is changed even though exactly
2049 * same componets are used, and old disklabel may used
2050 * if that is found.
2051 */
2052 if (lp->d_secperunit != rs->sc_size)
2053 printf("WARNING: %s: "
2054 "total sector size in disklabel (%d) != "
2055 "the size of raid (%ld)\n", rs->sc_xname,
2056 lp->d_secperunit, (long) rs->sc_size);
2057 for (i = 0; i < lp->d_npartitions; i++) {
2058 pp = &lp->d_partitions[i];
2059 if (pp->p_offset + pp->p_size > rs->sc_size)
2060 printf("WARNING: %s: end of partition `%c' "
2061 "exceeds the size of raid (%ld)\n",
2062 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2063 }
2064 }
2065 }
2066 /*
2067 * Take care of things one might want to take care of in the event
2068 * that a disklabel isn't present.
2069 */
2070 static void
2071 raidmakedisklabel(rs)
2072 struct raid_softc *rs;
2073 {
2074 struct disklabel *lp = rs->sc_dkdev.dk_label;
2075 db1_printf(("Making a label..\n"));
2076
2077 /*
2078 * For historical reasons, if there's no disklabel present
2079 * the raw partition must be marked FS_BSDFFS.
2080 */
2081
2082 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2083
2084 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2085
2086 lp->d_checksum = dkcksum(lp);
2087 }
2088 /*
2089 * Lookup the provided name in the filesystem. If the file exists,
2090 * is a valid block device, and isn't being used by anyone else,
2091 * set *vpp to the file's vnode.
2092 * You'll find the original of this in ccd.c
2093 */
2094 int
2095 raidlookup(path, p, vpp)
2096 char *path;
2097 struct proc *p;
2098 struct vnode **vpp; /* result */
2099 {
2100 struct nameidata nd;
2101 struct vnode *vp;
2102 struct vattr va;
2103 int error;
2104
2105 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2106 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2107 #ifdef DEBUG
2108 printf("RAIDframe: vn_open returned %d\n", error);
2109 #endif
2110 return (error);
2111 }
2112 vp = nd.ni_vp;
2113 if (vp->v_usecount > 1) {
2114 VOP_UNLOCK(vp, 0);
2115 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2116 return (EBUSY);
2117 }
2118 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2119 VOP_UNLOCK(vp, 0);
2120 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2121 return (error);
2122 }
2123 /* XXX: eventually we should handle VREG, too. */
2124 if (va.va_type != VBLK) {
2125 VOP_UNLOCK(vp, 0);
2126 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2127 return (ENOTBLK);
2128 }
2129 VOP_UNLOCK(vp, 0);
2130 *vpp = vp;
2131 return (0);
2132 }
2133 /*
2134 * Wait interruptibly for an exclusive lock.
2135 *
2136 * XXX
2137 * Several drivers do this; it should be abstracted and made MP-safe.
2138 * (Hmm... where have we seen this warning before :-> GO )
2139 */
2140 static int
2141 raidlock(rs)
2142 struct raid_softc *rs;
2143 {
2144 int error;
2145
2146 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2147 rs->sc_flags |= RAIDF_WANTED;
2148 if ((error =
2149 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2150 return (error);
2151 }
2152 rs->sc_flags |= RAIDF_LOCKED;
2153 return (0);
2154 }
2155 /*
2156 * Unlock and wake up any waiters.
2157 */
2158 static void
2159 raidunlock(rs)
2160 struct raid_softc *rs;
2161 {
2162
2163 rs->sc_flags &= ~RAIDF_LOCKED;
2164 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2165 rs->sc_flags &= ~RAIDF_WANTED;
2166 wakeup(rs);
2167 }
2168 }
2169
2170
2171 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2172 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2173
2174 int
2175 raidmarkclean(struct vnode *b_vp, int mod_counter)
2176 {
2177 RF_ComponentLabel_t clabel;
2178 raidread_component_label(b_vp, &clabel);
2179 clabel.mod_counter = mod_counter;
2180 clabel.clean = RF_RAID_CLEAN;
2181 raidwrite_component_label(b_vp, &clabel);
2182 return(0);
2183 }
2184
2185
2186 int
2187 raidmarkdirty(struct vnode *b_vp, int mod_counter)
2188 {
2189 RF_ComponentLabel_t clabel;
2190 raidread_component_label(b_vp, &clabel);
2191 clabel.mod_counter = mod_counter;
2192 clabel.clean = RF_RAID_DIRTY;
2193 raidwrite_component_label(b_vp, &clabel);
2194 return(0);
2195 }
2196
2197 /* ARGSUSED */
2198 int
2199 raidread_component_label(b_vp, clabel)
2200 struct vnode *b_vp;
2201 RF_ComponentLabel_t *clabel;
2202 {
2203 struct buf *bp;
2204 int error;
2205
2206 /* XXX should probably ensure that we don't try to do this if
2207 someone has changed rf_protected_sectors. */
2208
2209 if (b_vp == NULL) {
2210 /* For whatever reason, this component is not valid.
2211 Don't try to read a component label from it. */
2212 return(EINVAL);
2213 }
2214
2215 /* get a block of the appropriate size... */
2216 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2217 bp->b_devvp = b_vp;
2218
2219 /* get our ducks in a row for the read */
2220 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2221 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2222 bp->b_flags |= B_READ;
2223 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2224
2225 (*bdevsw[major(vdev_rdev(b_vp))].d_strategy)(bp);
2226
2227 error = biowait(bp);
2228
2229 if (!error) {
2230 memcpy(clabel, bp->b_data,
2231 sizeof(RF_ComponentLabel_t));
2232 #if 0
2233 rf_print_component_label( clabel );
2234 #endif
2235 } else {
2236 #if 0
2237 printf("Failed to read RAID component label!\n");
2238 #endif
2239 }
2240
2241 brelse(bp);
2242 return(error);
2243 }
2244
2245 /* ARGSUSED */
2246 int
2247 raidwrite_component_label(b_vp, clabel)
2248 struct vnode *b_vp;
2249 RF_ComponentLabel_t *clabel;
2250 {
2251 struct buf *bp;
2252 int error;
2253
2254 /* get a block of the appropriate size... */
2255 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2256 bgetdevvp(b_vp, bp);
2257
2258 /* get our ducks in a row for the write */
2259 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2260 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2261 bp->b_flags |= B_WRITE;
2262 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2263
2264 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2265
2266 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2267
2268 (*bdevsw[major(vdev_rdev(b_vp)].d_strategy)(bp);
2269 error = biowait(bp);
2270 bp->b_flags |= B_INVAL;
2271 brelse(bp);
2272 if (error) {
2273 #if 1
2274 printf("Failed to write RAID component info!\n");
2275 #endif
2276 }
2277
2278 return(error);
2279 }
2280
2281 void
2282 rf_markalldirty(raidPtr)
2283 RF_Raid_t *raidPtr;
2284 {
2285 RF_ComponentLabel_t clabel;
2286 int r,c;
2287
2288 raidPtr->mod_counter++;
2289 for (r = 0; r < raidPtr->numRow; r++) {
2290 for (c = 0; c < raidPtr->numCol; c++) {
2291 /* we don't want to touch (at all) a disk that has
2292 failed */
2293 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2294 raidread_component_label(
2295 raidPtr->raid_cinfo[r][c].ci_vp,
2296 &clabel);
2297 if (clabel.status == rf_ds_spared) {
2298 /* XXX do something special...
2299 but whatever you do, don't
2300 try to access it!! */
2301 } else {
2302 #if 0
2303 clabel.status =
2304 raidPtr->Disks[r][c].status;
2305 raidwrite_component_label(
2306 raidPtr->raid_cinfo[r][c].ci_vp,
2307 &clabel);
2308 #endif
2309 raidmarkdirty(
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 raidPtr->mod_counter);
2312 }
2313 }
2314 }
2315 }
2316 /* printf("Component labels marked dirty.\n"); */
2317 #if 0
2318 for( c = 0; c < raidPtr->numSpare ; c++) {
2319 sparecol = raidPtr->numCol + c;
2320 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2321 /*
2322
2323 XXX this is where we get fancy and map this spare
2324 into it's correct spot in the array.
2325
2326 */
2327 /*
2328
2329 we claim this disk is "optimal" if it's
2330 rf_ds_used_spare, as that means it should be
2331 directly substitutable for the disk it replaced.
2332 We note that too...
2333
2334 */
2335
2336 for(i=0;i<raidPtr->numRow;i++) {
2337 for(j=0;j<raidPtr->numCol;j++) {
2338 if ((raidPtr->Disks[i][j].spareRow ==
2339 r) &&
2340 (raidPtr->Disks[i][j].spareCol ==
2341 sparecol)) {
2342 srow = r;
2343 scol = sparecol;
2344 break;
2345 }
2346 }
2347 }
2348
2349 raidread_component_label(
2350 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2351 &clabel);
2352 /* make sure status is noted */
2353 clabel.version = RF_COMPONENT_LABEL_VERSION;
2354 clabel.mod_counter = raidPtr->mod_counter;
2355 clabel.serial_number = raidPtr->serial_number;
2356 clabel.row = srow;
2357 clabel.column = scol;
2358 clabel.num_rows = raidPtr->numRow;
2359 clabel.num_columns = raidPtr->numCol;
2360 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2361 clabel.status = rf_ds_optimal;
2362 raidwrite_component_label(
2363 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2364 &clabel);
2365 raidmarkclean(raidPtr->raid_cinfo[r][sparecol].ci_vp);
2366 }
2367 }
2368 #endif
2369 }
2370
2371
2372 void
2373 rf_update_component_labels(raidPtr, final)
2374 RF_Raid_t *raidPtr;
2375 int final;
2376 {
2377 RF_ComponentLabel_t clabel;
2378 int sparecol;
2379 int r,c;
2380 int i,j;
2381 int srow, scol;
2382
2383 srow = -1;
2384 scol = -1;
2385
2386 /* XXX should do extra checks to make sure things really are clean,
2387 rather than blindly setting the clean bit... */
2388
2389 raidPtr->mod_counter++;
2390
2391 for (r = 0; r < raidPtr->numRow; r++) {
2392 for (c = 0; c < raidPtr->numCol; c++) {
2393 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2394 raidread_component_label(
2395 raidPtr->raid_cinfo[r][c].ci_vp,
2396 &clabel);
2397 /* make sure status is noted */
2398 clabel.status = rf_ds_optimal;
2399 /* bump the counter */
2400 clabel.mod_counter = raidPtr->mod_counter;
2401
2402 raidwrite_component_label(
2403 raidPtr->raid_cinfo[r][c].ci_vp,
2404 &clabel);
2405 if (final == RF_FINAL_COMPONENT_UPDATE) {
2406 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2407 raidmarkclean(
2408 raidPtr->raid_cinfo[r][c].ci_vp,
2409 raidPtr->mod_counter);
2410 }
2411 }
2412 }
2413 /* else we don't touch it.. */
2414 }
2415 }
2416
2417 for( c = 0; c < raidPtr->numSpare ; c++) {
2418 sparecol = raidPtr->numCol + c;
2419 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2420 /*
2421
2422 we claim this disk is "optimal" if it's
2423 rf_ds_used_spare, as that means it should be
2424 directly substitutable for the disk it replaced.
2425 We note that too...
2426
2427 */
2428
2429 for(i=0;i<raidPtr->numRow;i++) {
2430 for(j=0;j<raidPtr->numCol;j++) {
2431 if ((raidPtr->Disks[i][j].spareRow ==
2432 0) &&
2433 (raidPtr->Disks[i][j].spareCol ==
2434 sparecol)) {
2435 srow = i;
2436 scol = j;
2437 break;
2438 }
2439 }
2440 }
2441
2442 /* XXX shouldn't *really* need this... */
2443 raidread_component_label(
2444 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2445 &clabel);
2446 /* make sure status is noted */
2447
2448 raid_init_component_label(raidPtr, &clabel);
2449
2450 clabel.mod_counter = raidPtr->mod_counter;
2451 clabel.row = srow;
2452 clabel.column = scol;
2453 clabel.status = rf_ds_optimal;
2454
2455 raidwrite_component_label(
2456 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2457 &clabel);
2458 if (final == RF_FINAL_COMPONENT_UPDATE) {
2459 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2460 raidmarkclean(
2461 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2462 raidPtr->mod_counter);
2463 }
2464 }
2465 }
2466 }
2467 /* printf("Component labels updated\n"); */
2468 }
2469
2470 void
2471 rf_close_component(raidPtr, vp, auto_configured)
2472 RF_Raid_t *raidPtr;
2473 struct vnode *vp;
2474 int auto_configured;
2475 {
2476 struct proc *p;
2477
2478 p = raidPtr->engine_thread;
2479
2480 if (vp != NULL) {
2481 if (auto_configured == 1) {
2482 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2483 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2484 vput(vp);
2485
2486 } else {
2487 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2488 }
2489 } else {
2490 printf("vnode was NULL\n");
2491 }
2492 }
2493
2494
2495 void
2496 rf_UnconfigureVnodes(raidPtr)
2497 RF_Raid_t *raidPtr;
2498 {
2499 int r,c;
2500 struct proc *p;
2501 struct vnode *vp;
2502 int acd;
2503
2504
2505 /* We take this opportunity to close the vnodes like we should.. */
2506
2507 p = raidPtr->engine_thread;
2508
2509 for (r = 0; r < raidPtr->numRow; r++) {
2510 for (c = 0; c < raidPtr->numCol; c++) {
2511 printf("Closing vnode for row: %d col: %d\n", r, c);
2512 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2513 acd = raidPtr->Disks[r][c].auto_configured;
2514 rf_close_component(raidPtr, vp, acd);
2515 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2516 raidPtr->Disks[r][c].auto_configured = 0;
2517 }
2518 }
2519 for (r = 0; r < raidPtr->numSpare; r++) {
2520 printf("Closing vnode for spare: %d\n", r);
2521 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2522 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2523 rf_close_component(raidPtr, vp, acd);
2524 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2525 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2526 }
2527 }
2528
2529
2530 void
2531 rf_ReconThread(req)
2532 struct rf_recon_req *req;
2533 {
2534 int s;
2535 RF_Raid_t *raidPtr;
2536
2537 s = splbio();
2538 raidPtr = (RF_Raid_t *) req->raidPtr;
2539 raidPtr->recon_in_progress = 1;
2540
2541 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2542 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2543
2544 /* XXX get rid of this! we don't need it at all.. */
2545 RF_Free(req, sizeof(*req));
2546
2547 raidPtr->recon_in_progress = 0;
2548 splx(s);
2549
2550 /* That's all... */
2551 kthread_exit(0); /* does not return */
2552 }
2553
2554 void
2555 rf_RewriteParityThread(raidPtr)
2556 RF_Raid_t *raidPtr;
2557 {
2558 int retcode;
2559 int s;
2560
2561 raidPtr->parity_rewrite_in_progress = 1;
2562 s = splbio();
2563 retcode = rf_RewriteParity(raidPtr);
2564 splx(s);
2565 if (retcode) {
2566 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2567 } else {
2568 /* set the clean bit! If we shutdown correctly,
2569 the clean bit on each component label will get
2570 set */
2571 raidPtr->parity_good = RF_RAID_CLEAN;
2572 }
2573 raidPtr->parity_rewrite_in_progress = 0;
2574
2575 /* Anyone waiting for us to stop? If so, inform them... */
2576 if (raidPtr->waitShutdown) {
2577 wakeup(&raidPtr->parity_rewrite_in_progress);
2578 }
2579
2580 /* That's all... */
2581 kthread_exit(0); /* does not return */
2582 }
2583
2584
2585 void
2586 rf_CopybackThread(raidPtr)
2587 RF_Raid_t *raidPtr;
2588 {
2589 int s;
2590
2591 raidPtr->copyback_in_progress = 1;
2592 s = splbio();
2593 rf_CopybackReconstructedData(raidPtr);
2594 splx(s);
2595 raidPtr->copyback_in_progress = 0;
2596
2597 /* That's all... */
2598 kthread_exit(0); /* does not return */
2599 }
2600
2601
2602 void
2603 rf_ReconstructInPlaceThread(req)
2604 struct rf_recon_req *req;
2605 {
2606 int retcode;
2607 int s;
2608 RF_Raid_t *raidPtr;
2609
2610 s = splbio();
2611 raidPtr = req->raidPtr;
2612 raidPtr->recon_in_progress = 1;
2613 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2614 RF_Free(req, sizeof(*req));
2615 raidPtr->recon_in_progress = 0;
2616 splx(s);
2617
2618 /* That's all... */
2619 kthread_exit(0); /* does not return */
2620 }
2621
2622 void
2623 rf_mountroot_hook(dev)
2624 struct device *dev;
2625 {
2626
2627 }
2628
2629
2630 RF_AutoConfig_t *
2631 rf_find_raid_components()
2632 {
2633 struct devnametobdevmaj *dtobdm;
2634 struct vnode *vp;
2635 struct disklabel label;
2636 struct device *dv;
2637 char *cd_name;
2638 dev_t dev;
2639 int error;
2640 int i;
2641 int good_one;
2642 RF_ComponentLabel_t *clabel;
2643 RF_AutoConfig_t *ac_list;
2644 RF_AutoConfig_t *ac;
2645
2646
2647 /* initialize the AutoConfig list */
2648 ac_list = NULL;
2649
2650 /* we begin by trolling through *all* the devices on the system */
2651
2652 for (dv = alldevs.tqh_first; dv != NULL;
2653 dv = dv->dv_list.tqe_next) {
2654
2655 /* we are only interested in disks... */
2656 if (dv->dv_class != DV_DISK)
2657 continue;
2658
2659 /* we don't care about floppies... */
2660 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2661 continue;
2662 }
2663
2664 /* need to find the device_name_to_block_device_major stuff */
2665 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2666 dtobdm = dev_name2blk;
2667 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2668 dtobdm++;
2669 }
2670
2671 /* get a vnode for the raw partition of this disk */
2672
2673 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2674 if (bdevvp(dev, &vp))
2675 panic("RAID can't alloc vnode");
2676
2677 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2678
2679 error = VOP_OPEN(vp, FREAD, NOCRED, 0, NULL);
2680
2681 if (error) {
2682 /* "Who cares." Continue looking
2683 for something that exists*/
2684 vput(vp);
2685 continue;
2686 }
2687
2688 /* Ok, the disk exists. Go get the disklabel. */
2689 VOP_UNLOCK(vp, 0);
2690 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2691 FREAD, NOCRED, 0);
2692 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2693 if (error) {
2694 /*
2695 * XXX can't happen - open() would
2696 * have errored out (or faked up one)
2697 */
2698 printf("can't get label for dev %s%c (%d)!?!?\n",
2699 dv->dv_xname, 'a' + RAW_PART, error);
2700 }
2701
2702 /* don't need this any more. We'll allocate it again
2703 a little later if we really do... */
2704 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2705 vput(vp);
2706
2707 for (i=0; i < label.d_npartitions; i++) {
2708 /* We only support partitions marked as RAID */
2709 if (label.d_partitions[i].p_fstype != FS_RAID)
2710 continue;
2711
2712 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2713 if (bdevvp(dev, &vp))
2714 panic("RAID can't alloc vnode");
2715
2716 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2717
2718 error = VOP_OPEN(vp, FREAD, NOCRED, 0, NULL);
2719 if (error) {
2720 /* Whatever... */
2721 vput(vp);
2722 continue;
2723 }
2724
2725 good_one = 0;
2726
2727 clabel = (RF_ComponentLabel_t *)
2728 malloc(sizeof(RF_ComponentLabel_t),
2729 M_RAIDFRAME, M_NOWAIT);
2730 if (clabel == NULL) {
2731 /* XXX CLEANUP HERE */
2732 vput(vp, 0);
2733 printf("RAID auto config: out of memory!\n");
2734 return(NULL); /* XXX probably should panic? */
2735 }
2736
2737 if (!raidread_component_label(vp, clabel)) {
2738 /* Got the label. Does it look reasonable? */
2739 if (rf_reasonable_label(clabel) &&
2740 (clabel->partitionSize <=
2741 label.d_partitions[i].p_size)) {
2742 #if DEBUG
2743 printf("Component on: %s%c: %d\n",
2744 dv->dv_xname, 'a'+i,
2745 label.d_partitions[i].p_size);
2746 rf_print_component_label(clabel);
2747 #endif
2748 /* if it's reasonable, add it,
2749 else ignore it. */
2750 ac = (RF_AutoConfig_t *)
2751 malloc(sizeof(RF_AutoConfig_t),
2752 M_RAIDFRAME,
2753 M_NOWAIT);
2754 if (ac == NULL) {
2755 /* XXX should panic?? */
2756 vput(vp);
2757 return(NULL);
2758 }
2759
2760 sprintf(ac->devname, "%s%c",
2761 dv->dv_xname, 'a'+i);
2762 ac->vp = vp;
2763 ac->clabel = clabel;
2764 ac->next = ac_list;
2765 ac_list = ac;
2766 good_one = 1;
2767 }
2768 }
2769 if (!good_one) {
2770 /* cleanup */
2771 free(clabel, M_RAIDFRAME);
2772 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2773 vput(vp);
2774 } else
2775 VOP_UNLOCK(vp, 0);
2776 }
2777 }
2778 return(ac_list);
2779 }
2780
2781 static int
2782 rf_reasonable_label(clabel)
2783 RF_ComponentLabel_t *clabel;
2784 {
2785
2786 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2787 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2788 ((clabel->clean == RF_RAID_CLEAN) ||
2789 (clabel->clean == RF_RAID_DIRTY)) &&
2790 clabel->row >=0 &&
2791 clabel->column >= 0 &&
2792 clabel->num_rows > 0 &&
2793 clabel->num_columns > 0 &&
2794 clabel->row < clabel->num_rows &&
2795 clabel->column < clabel->num_columns &&
2796 clabel->blockSize > 0 &&
2797 clabel->numBlocks > 0) {
2798 /* label looks reasonable enough... */
2799 return(1);
2800 }
2801 return(0);
2802 }
2803
2804
2805 void
2806 rf_print_component_label(clabel)
2807 RF_ComponentLabel_t *clabel;
2808 {
2809 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2810 clabel->row, clabel->column,
2811 clabel->num_rows, clabel->num_columns);
2812 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2813 clabel->version, clabel->serial_number,
2814 clabel->mod_counter);
2815 printf(" Clean: %s Status: %d\n",
2816 clabel->clean ? "Yes" : "No", clabel->status );
2817 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2818 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2819 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2820 (char) clabel->parityConfig, clabel->blockSize,
2821 clabel->numBlocks);
2822 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2823 printf(" Contains root partition: %s\n",
2824 clabel->root_partition ? "Yes" : "No" );
2825 printf(" Last configured as: raid%d\n", clabel->last_unit );
2826 #if 0
2827 printf(" Config order: %d\n", clabel->config_order);
2828 #endif
2829
2830 }
2831
2832 RF_ConfigSet_t *
2833 rf_create_auto_sets(ac_list)
2834 RF_AutoConfig_t *ac_list;
2835 {
2836 RF_AutoConfig_t *ac;
2837 RF_ConfigSet_t *config_sets;
2838 RF_ConfigSet_t *cset;
2839 RF_AutoConfig_t *ac_next;
2840
2841
2842 config_sets = NULL;
2843
2844 /* Go through the AutoConfig list, and figure out which components
2845 belong to what sets. */
2846 ac = ac_list;
2847 while(ac!=NULL) {
2848 /* we're going to putz with ac->next, so save it here
2849 for use at the end of the loop */
2850 ac_next = ac->next;
2851
2852 if (config_sets == NULL) {
2853 /* will need at least this one... */
2854 config_sets = (RF_ConfigSet_t *)
2855 malloc(sizeof(RF_ConfigSet_t),
2856 M_RAIDFRAME, M_NOWAIT);
2857 if (config_sets == NULL) {
2858 panic("rf_create_auto_sets: No memory!\n");
2859 }
2860 /* this one is easy :) */
2861 config_sets->ac = ac;
2862 config_sets->next = NULL;
2863 config_sets->rootable = 0;
2864 ac->next = NULL;
2865 } else {
2866 /* which set does this component fit into? */
2867 cset = config_sets;
2868 while(cset!=NULL) {
2869 if (rf_does_it_fit(cset, ac)) {
2870 /* looks like it matches... */
2871 ac->next = cset->ac;
2872 cset->ac = ac;
2873 break;
2874 }
2875 cset = cset->next;
2876 }
2877 if (cset==NULL) {
2878 /* didn't find a match above... new set..*/
2879 cset = (RF_ConfigSet_t *)
2880 malloc(sizeof(RF_ConfigSet_t),
2881 M_RAIDFRAME, M_NOWAIT);
2882 if (cset == NULL) {
2883 panic("rf_create_auto_sets: No memory!\n");
2884 }
2885 cset->ac = ac;
2886 ac->next = NULL;
2887 cset->next = config_sets;
2888 cset->rootable = 0;
2889 config_sets = cset;
2890 }
2891 }
2892 ac = ac_next;
2893 }
2894
2895
2896 return(config_sets);
2897 }
2898
2899 static int
2900 rf_does_it_fit(cset, ac)
2901 RF_ConfigSet_t *cset;
2902 RF_AutoConfig_t *ac;
2903 {
2904 RF_ComponentLabel_t *clabel1, *clabel2;
2905
2906 /* If this one matches the *first* one in the set, that's good
2907 enough, since the other members of the set would have been
2908 through here too... */
2909 /* note that we are not checking partitionSize here..
2910
2911 Note that we are also not checking the mod_counters here.
2912 If everything else matches execpt the mod_counter, that's
2913 good enough for this test. We will deal with the mod_counters
2914 a little later in the autoconfiguration process.
2915
2916 (clabel1->mod_counter == clabel2->mod_counter) &&
2917
2918 The reason we don't check for this is that failed disks
2919 will have lower modification counts. If those disks are
2920 not added to the set they used to belong to, then they will
2921 form their own set, which may result in 2 different sets,
2922 for example, competing to be configured at raid0, and
2923 perhaps competing to be the root filesystem set. If the
2924 wrong ones get configured, or both attempt to become /,
2925 weird behaviour and or serious lossage will occur. Thus we
2926 need to bring them into the fold here, and kick them out at
2927 a later point.
2928
2929 */
2930
2931 clabel1 = cset->ac->clabel;
2932 clabel2 = ac->clabel;
2933 if ((clabel1->version == clabel2->version) &&
2934 (clabel1->serial_number == clabel2->serial_number) &&
2935 (clabel1->num_rows == clabel2->num_rows) &&
2936 (clabel1->num_columns == clabel2->num_columns) &&
2937 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2938 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2939 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2940 (clabel1->parityConfig == clabel2->parityConfig) &&
2941 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2942 (clabel1->blockSize == clabel2->blockSize) &&
2943 (clabel1->numBlocks == clabel2->numBlocks) &&
2944 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2945 (clabel1->root_partition == clabel2->root_partition) &&
2946 (clabel1->last_unit == clabel2->last_unit) &&
2947 (clabel1->config_order == clabel2->config_order)) {
2948 /* if it get's here, it almost *has* to be a match */
2949 } else {
2950 /* it's not consistent with somebody in the set..
2951 punt */
2952 return(0);
2953 }
2954 /* all was fine.. it must fit... */
2955 return(1);
2956 }
2957
2958 int
2959 rf_have_enough_components(cset)
2960 RF_ConfigSet_t *cset;
2961 {
2962 RF_AutoConfig_t *ac;
2963 RF_AutoConfig_t *auto_config;
2964 RF_ComponentLabel_t *clabel;
2965 int r,c;
2966 int num_rows;
2967 int num_cols;
2968 int num_missing;
2969 int mod_counter;
2970 int mod_counter_found;
2971 int even_pair_failed;
2972 char parity_type;
2973
2974
2975 /* check to see that we have enough 'live' components
2976 of this set. If so, we can configure it if necessary */
2977
2978 num_rows = cset->ac->clabel->num_rows;
2979 num_cols = cset->ac->clabel->num_columns;
2980 parity_type = cset->ac->clabel->parityConfig;
2981
2982 /* XXX Check for duplicate components!?!?!? */
2983
2984 /* Determine what the mod_counter is supposed to be for this set. */
2985
2986 mod_counter_found = 0;
2987 mod_counter = 0;
2988 ac = cset->ac;
2989 while(ac!=NULL) {
2990 if (mod_counter_found==0) {
2991 mod_counter = ac->clabel->mod_counter;
2992 mod_counter_found = 1;
2993 } else {
2994 if (ac->clabel->mod_counter > mod_counter) {
2995 mod_counter = ac->clabel->mod_counter;
2996 }
2997 }
2998 ac = ac->next;
2999 }
3000
3001 num_missing = 0;
3002 auto_config = cset->ac;
3003
3004 for(r=0; r<num_rows; r++) {
3005 even_pair_failed = 0;
3006 for(c=0; c<num_cols; c++) {
3007 ac = auto_config;
3008 while(ac!=NULL) {
3009 if ((ac->clabel->row == r) &&
3010 (ac->clabel->column == c) &&
3011 (ac->clabel->mod_counter == mod_counter)) {
3012 /* it's this one... */
3013 #if DEBUG
3014 printf("Found: %s at %d,%d\n",
3015 ac->devname,r,c);
3016 #endif
3017 break;
3018 }
3019 ac=ac->next;
3020 }
3021 if (ac==NULL) {
3022 /* Didn't find one here! */
3023 /* special case for RAID 1, especially
3024 where there are more than 2
3025 components (where RAIDframe treats
3026 things a little differently :( ) */
3027 if (parity_type == '1') {
3028 if (c%2 == 0) { /* even component */
3029 even_pair_failed = 1;
3030 } else { /* odd component. If
3031 we're failed, and
3032 so is the even
3033 component, it's
3034 "Good Night, Charlie" */
3035 if (even_pair_failed == 1) {
3036 return(0);
3037 }
3038 }
3039 } else {
3040 /* normal accounting */
3041 num_missing++;
3042 }
3043 }
3044 if ((parity_type == '1') && (c%2 == 1)) {
3045 /* Just did an even component, and we didn't
3046 bail.. reset the even_pair_failed flag,
3047 and go on to the next component.... */
3048 even_pair_failed = 0;
3049 }
3050 }
3051 }
3052
3053 clabel = cset->ac->clabel;
3054
3055 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3056 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3057 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3058 /* XXX this needs to be made *much* more general */
3059 /* Too many failures */
3060 return(0);
3061 }
3062 /* otherwise, all is well, and we've got enough to take a kick
3063 at autoconfiguring this set */
3064 return(1);
3065 }
3066
3067 void
3068 rf_create_configuration(ac,config,raidPtr)
3069 RF_AutoConfig_t *ac;
3070 RF_Config_t *config;
3071 RF_Raid_t *raidPtr;
3072 {
3073 RF_ComponentLabel_t *clabel;
3074 int i;
3075
3076 clabel = ac->clabel;
3077
3078 /* 1. Fill in the common stuff */
3079 config->numRow = clabel->num_rows;
3080 config->numCol = clabel->num_columns;
3081 config->numSpare = 0; /* XXX should this be set here? */
3082 config->sectPerSU = clabel->sectPerSU;
3083 config->SUsPerPU = clabel->SUsPerPU;
3084 config->SUsPerRU = clabel->SUsPerRU;
3085 config->parityConfig = clabel->parityConfig;
3086 /* XXX... */
3087 strcpy(config->diskQueueType,"fifo");
3088 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3089 config->layoutSpecificSize = 0; /* XXX ?? */
3090
3091 while(ac!=NULL) {
3092 /* row/col values will be in range due to the checks
3093 in reasonable_label() */
3094 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3095 ac->devname);
3096 ac = ac->next;
3097 }
3098
3099 for(i=0;i<RF_MAXDBGV;i++) {
3100 config->debugVars[i][0] = NULL;
3101 }
3102 }
3103
3104 int
3105 rf_set_autoconfig(raidPtr, new_value)
3106 RF_Raid_t *raidPtr;
3107 int new_value;
3108 {
3109 RF_ComponentLabel_t clabel;
3110 struct vnode *vp;
3111 int row, column;
3112
3113 raidPtr->autoconfigure = new_value;
3114 for(row=0; row<raidPtr->numRow; row++) {
3115 for(column=0; column<raidPtr->numCol; column++) {
3116 if (raidPtr->Disks[row][column].status ==
3117 rf_ds_optimal) {
3118 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3119 raidread_component_label(vp, &clabel);
3120 clabel.autoconfigure = new_value;
3121 raidwrite_component_label(vp, &clabel);
3122 }
3123 }
3124 }
3125 return(new_value);
3126 }
3127
3128 int
3129 rf_set_rootpartition(raidPtr, new_value)
3130 RF_Raid_t *raidPtr;
3131 int new_value;
3132 {
3133 RF_ComponentLabel_t clabel;
3134 struct vnode *vp;
3135 int row, column;
3136
3137 raidPtr->root_partition = new_value;
3138 for(row=0; row<raidPtr->numRow; row++) {
3139 for(column=0; column<raidPtr->numCol; column++) {
3140 if (raidPtr->Disks[row][column].status ==
3141 rf_ds_optimal) {
3142 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3143 raidread_component_label(vp, &clabel);
3144 clabel.root_partition = new_value;
3145 raidwrite_component_label(vp, &clabel);
3146 }
3147 }
3148 }
3149 return(new_value);
3150 }
3151
3152 void
3153 rf_release_all_vps(cset)
3154 RF_ConfigSet_t *cset;
3155 {
3156 RF_AutoConfig_t *ac;
3157
3158 ac = cset->ac;
3159 while(ac!=NULL) {
3160 /* Close the vp, and give it back */
3161 if (ac->vp) {
3162 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3163 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3164 vput(ac->vp);
3165 ac->vp = NULL;
3166 }
3167 ac = ac->next;
3168 }
3169 }
3170
3171
3172 void
3173 rf_cleanup_config_set(cset)
3174 RF_ConfigSet_t *cset;
3175 {
3176 RF_AutoConfig_t *ac;
3177 RF_AutoConfig_t *next_ac;
3178
3179 ac = cset->ac;
3180 while(ac!=NULL) {
3181 next_ac = ac->next;
3182 /* nuke the label */
3183 free(ac->clabel, M_RAIDFRAME);
3184 /* cleanup the config structure */
3185 free(ac, M_RAIDFRAME);
3186 /* "next.." */
3187 ac = next_ac;
3188 }
3189 /* and, finally, nuke the config set */
3190 free(cset, M_RAIDFRAME);
3191 }
3192
3193
3194 void
3195 raid_init_component_label(raidPtr, clabel)
3196 RF_Raid_t *raidPtr;
3197 RF_ComponentLabel_t *clabel;
3198 {
3199 /* current version number */
3200 clabel->version = RF_COMPONENT_LABEL_VERSION;
3201 clabel->serial_number = raidPtr->serial_number;
3202 clabel->mod_counter = raidPtr->mod_counter;
3203 clabel->num_rows = raidPtr->numRow;
3204 clabel->num_columns = raidPtr->numCol;
3205 clabel->clean = RF_RAID_DIRTY; /* not clean */
3206 clabel->status = rf_ds_optimal; /* "It's good!" */
3207
3208 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3209 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3210 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3211
3212 clabel->blockSize = raidPtr->bytesPerSector;
3213 clabel->numBlocks = raidPtr->sectorsPerDisk;
3214
3215 /* XXX not portable */
3216 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3217 clabel->maxOutstanding = raidPtr->maxOutstanding;
3218 clabel->autoconfigure = raidPtr->autoconfigure;
3219 clabel->root_partition = raidPtr->root_partition;
3220 clabel->last_unit = raidPtr->raidid;
3221 clabel->config_order = raidPtr->config_order;
3222 }
3223
3224 int
3225 rf_auto_config_set(cset,unit)
3226 RF_ConfigSet_t *cset;
3227 int *unit;
3228 {
3229 RF_Raid_t *raidPtr;
3230 RF_Config_t *config;
3231 int raidID;
3232 int retcode;
3233
3234 printf("RAID autoconfigure\n");
3235
3236 retcode = 0;
3237 *unit = -1;
3238
3239 /* 1. Create a config structure */
3240
3241 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3242 M_RAIDFRAME,
3243 M_NOWAIT);
3244 if (config==NULL) {
3245 printf("Out of mem!?!?\n");
3246 /* XXX do something more intelligent here. */
3247 return(1);
3248 }
3249
3250 memset(config, 0, sizeof(RF_Config_t));
3251
3252 /* XXX raidID needs to be set correctly.. */
3253
3254 /*
3255 2. Figure out what RAID ID this one is supposed to live at
3256 See if we can get the same RAID dev that it was configured
3257 on last time..
3258 */
3259
3260 raidID = cset->ac->clabel->last_unit;
3261 if ((raidID < 0) || (raidID >= numraid)) {
3262 /* let's not wander off into lala land. */
3263 raidID = numraid - 1;
3264 }
3265 if (raidPtrs[raidID]->valid != 0) {
3266
3267 /*
3268 Nope... Go looking for an alternative...
3269 Start high so we don't immediately use raid0 if that's
3270 not taken.
3271 */
3272
3273 for(raidID = numraid; raidID >= 0; raidID--) {
3274 if (raidPtrs[raidID]->valid == 0) {
3275 /* can use this one! */
3276 break;
3277 }
3278 }
3279 }
3280
3281 if (raidID < 0) {
3282 /* punt... */
3283 printf("Unable to auto configure this set!\n");
3284 printf("(Out of RAID devs!)\n");
3285 return(1);
3286 }
3287 printf("Configuring raid%d:\n",raidID);
3288 raidPtr = raidPtrs[raidID];
3289
3290 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3291 raidPtr->raidid = raidID;
3292 raidPtr->openings = RAIDOUTSTANDING;
3293
3294 /* 3. Build the configuration structure */
3295 rf_create_configuration(cset->ac, config, raidPtr);
3296
3297 /* 4. Do the configuration */
3298 retcode = rf_Configure(raidPtr, config, cset->ac);
3299
3300 if (retcode == 0) {
3301
3302 raidinit(raidPtrs[raidID]);
3303
3304 rf_markalldirty(raidPtrs[raidID]);
3305 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3306 if (cset->ac->clabel->root_partition==1) {
3307 /* everything configured just fine. Make a note
3308 that this set is eligible to be root. */
3309 cset->rootable = 1;
3310 /* XXX do this here? */
3311 raidPtrs[raidID]->root_partition = 1;
3312 }
3313 }
3314
3315 /* 5. Cleanup */
3316 free(config, M_RAIDFRAME);
3317
3318 *unit = raidID;
3319 return(retcode);
3320 }
3321
3322 void
3323 rf_disk_unbusy(desc)
3324 RF_RaidAccessDesc_t *desc;
3325 {
3326 struct buf *bp;
3327
3328 bp = (struct buf *)desc->bp;
3329 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3330 (bp->b_bcount - bp->b_resid));
3331 }
3332