rf_netbsdkintf.c revision 1.102 1 /* $NetBSD: rf_netbsdkintf.c,v 1.102 2001/01/07 18:09:02 fvdl Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_desc.h"
146 #include "rf_diskqueue.h"
147 #include "rf_acctrace.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_debugMem.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_debugprint.h"
156 #include "rf_threadstuff.h"
157 #include "rf_configure.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit __P((RF_Raid_t *));
184
185 void raidattach __P((int));
186 int raidsize __P((dev_t));
187 int raidopen __P((dev_t, int, int, struct proc *));
188 int raidclose __P((dev_t, int, int, struct proc *));
189 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
190 int raidwrite __P((dev_t, struct uio *, int));
191 int raidread __P((dev_t, struct uio *, int));
192 void raidstrategy __P((struct buf *));
193 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206
207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct pool sc_cbufpool; /* component buffer pool */
221 struct buf_queue buf_queue; /* used for the device queue */
222 };
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* unit has been initialized */
225 #define RAIDF_WLABEL 0x02 /* label area is writable */
226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
228 #define RAIDF_LOCKED 0x80 /* unit is locked */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
235 * Be aware that large numbers can allow the driver to consume a lot of
236 * kernel memory, especially on writes, and in degraded mode reads.
237 *
238 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
239 * a single 64K write will typically require 64K for the old data,
240 * 64K for the old parity, and 64K for the new parity, for a total
241 * of 192K (if the parity buffer is not re-used immediately).
242 * Even it if is used immedately, that's still 128K, which when multiplied
243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
244 *
245 * Now in degraded mode, for example, a 64K read on the above setup may
246 * require data reconstruction, which will require *all* of the 4 remaining
247 * disks to participate -- 4 * 32K/disk == 128K again.
248 */
249
250 #ifndef RAIDOUTSTANDING
251 #define RAIDOUTSTANDING 6
252 #endif
253
254 #define RAIDLABELDEV(dev) \
255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
256
257 /* declared here, and made public, for the benefit of KVM stuff.. */
258 struct raid_softc *raid_softc;
259
260 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
261 struct disklabel *));
262 static void raidgetdisklabel __P((dev_t));
263 static void raidmakedisklabel __P((struct raid_softc *));
264
265 static int raidlock __P((struct raid_softc *));
266 static void raidunlock __P((struct raid_softc *));
267
268 static void rf_markalldirty __P((RF_Raid_t *));
269 void rf_mountroot_hook __P((struct device *));
270
271 struct device *raidrootdev;
272
273 void rf_ReconThread __P((struct rf_recon_req *));
274 /* XXX what I want is: */
275 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
276 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
277 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
278 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
279 void rf_buildroothack __P((void *));
280
281 RF_AutoConfig_t *rf_find_raid_components __P((void));
282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
286 RF_Raid_t *));
287 int rf_set_autoconfig __P((RF_Raid_t *, int));
288 int rf_set_rootpartition __P((RF_Raid_t *, int));
289 void rf_release_all_vps __P((RF_ConfigSet_t *));
290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
291 int rf_have_enough_components __P((RF_ConfigSet_t *));
292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 rootID = 0;
427 num_root = 0;
428 cset = config_sets;
429 while(cset != NULL ) {
430 next_cset = cset->next;
431 if (rf_have_enough_components(cset) &&
432 cset->ac->clabel->autoconfigure==1) {
433 retcode = rf_auto_config_set(cset,&raidID);
434 if (!retcode) {
435 if (cset->rootable) {
436 rootID = raidID;
437 num_root++;
438 }
439 } else {
440 /* The autoconfig didn't work :( */
441 #if DEBUG
442 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
443 #endif
444 rf_release_all_vps(cset);
445 }
446 } else {
447 /* we're not autoconfiguring this set...
448 release the associated resources */
449 rf_release_all_vps(cset);
450 }
451 /* cleanup */
452 rf_cleanup_config_set(cset);
453 cset = next_cset;
454 }
455 if (boothowto & RB_ASKNAME) {
456 /* We don't auto-config... */
457 } else {
458 /* They didn't ask, and we found something bootable... */
459
460 if (num_root == 1) {
461 booted_device = &raidrootdev[rootID];
462 } else if (num_root > 1) {
463 /* we can't guess.. require the user to answer... */
464 boothowto |= RB_ASKNAME;
465 }
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_update_component_labels(raidPtrs[unit],
639 RF_FINAL_COMPONENT_UPDATE);
640 }
641
642 raidunlock(rs);
643 return (0);
644
645 }
646
647 void
648 raidstrategy(bp)
649 struct buf *bp;
650 {
651 int s;
652
653 unsigned int raidID = raidunit(bp->b_dev);
654 RF_Raid_t *raidPtr;
655 struct raid_softc *rs = &raid_softc[raidID];
656 struct disklabel *lp;
657 int wlabel;
658
659 if ((rs->sc_flags & RAIDF_INITED) ==0) {
660 bp->b_error = ENXIO;
661 bp->b_flags |= B_ERROR;
662 bp->b_resid = bp->b_bcount;
663 biodone(bp);
664 return;
665 }
666 if (raidID >= numraid || !raidPtrs[raidID]) {
667 bp->b_error = ENODEV;
668 bp->b_flags |= B_ERROR;
669 bp->b_resid = bp->b_bcount;
670 biodone(bp);
671 return;
672 }
673 raidPtr = raidPtrs[raidID];
674 if (!raidPtr->valid) {
675 bp->b_error = ENODEV;
676 bp->b_flags |= B_ERROR;
677 bp->b_resid = bp->b_bcount;
678 biodone(bp);
679 return;
680 }
681 if (bp->b_bcount == 0) {
682 db1_printf(("b_bcount is zero..\n"));
683 biodone(bp);
684 return;
685 }
686 lp = rs->sc_dkdev.dk_label;
687
688 /*
689 * Do bounds checking and adjust transfer. If there's an
690 * error, the bounds check will flag that for us.
691 */
692
693 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
694 if (DISKPART(bp->b_dev) != RAW_PART)
695 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
696 db1_printf(("Bounds check failed!!:%d %d\n",
697 (int) bp->b_blkno, (int) wlabel));
698 biodone(bp);
699 return;
700 }
701 s = splbio();
702
703 bp->b_resid = 0;
704
705 /* stuff it onto our queue */
706 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
707
708 raidstart(raidPtrs[raidID]);
709
710 splx(s);
711 }
712 /* ARGSUSED */
713 int
714 raidread(dev, uio, flags)
715 dev_t dev;
716 struct uio *uio;
717 int flags;
718 {
719 int unit = raidunit(dev);
720 struct raid_softc *rs;
721 int part;
722
723 if (unit >= numraid)
724 return (ENXIO);
725 rs = &raid_softc[unit];
726
727 if ((rs->sc_flags & RAIDF_INITED) == 0)
728 return (ENXIO);
729 part = DISKPART(dev);
730
731 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
732
733 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
734
735 }
736 /* ARGSUSED */
737 int
738 raidwrite(dev, uio, flags)
739 dev_t dev;
740 struct uio *uio;
741 int flags;
742 {
743 int unit = raidunit(dev);
744 struct raid_softc *rs;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752 db1_printf(("raidwrite\n"));
753 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
754
755 }
756
757 int
758 raidioctl(dev, cmd, data, flag, p)
759 dev_t dev;
760 u_long cmd;
761 caddr_t data;
762 int flag;
763 struct proc *p;
764 {
765 int unit = raidunit(dev);
766 int error = 0;
767 int part, pmask;
768 struct raid_softc *rs;
769 RF_Config_t *k_cfg, *u_cfg;
770 RF_Raid_t *raidPtr;
771 RF_RaidDisk_t *diskPtr;
772 RF_AccTotals_t *totals;
773 RF_DeviceConfig_t *d_cfg, **ucfgp;
774 u_char *specific_buf;
775 int retcode = 0;
776 int row;
777 int column;
778 struct rf_recon_req *rrcopy, *rr;
779 RF_ComponentLabel_t *clabel;
780 RF_ComponentLabel_t ci_label;
781 RF_ComponentLabel_t **clabel_ptr;
782 RF_SingleComponent_t *sparePtr,*componentPtr;
783 RF_SingleComponent_t hot_spare;
784 RF_SingleComponent_t component;
785 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
786 int i, j, d;
787 #ifdef __HAVE_OLD_DISKLABEL
788 struct disklabel newlabel;
789 #endif
790
791 if (unit >= numraid)
792 return (ENXIO);
793 rs = &raid_softc[unit];
794 raidPtr = raidPtrs[unit];
795
796 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
797 (int) DISKPART(dev), (int) unit, (int) cmd));
798
799 /* Must be open for writes for these commands... */
800 switch (cmd) {
801 case DIOCSDINFO:
802 case DIOCWDINFO:
803 #ifdef __HAVE_OLD_DISKLABEL
804 case ODIOCWDINFO:
805 case ODIOCSDINFO:
806 #endif
807 case DIOCWLABEL:
808 if ((flag & FWRITE) == 0)
809 return (EBADF);
810 }
811
812 /* Must be initialized for these... */
813 switch (cmd) {
814 case DIOCGDINFO:
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCGDINFO:
819 case ODIOCWDINFO:
820 case ODIOCSDINFO:
821 case ODIOCGDEFLABEL:
822 #endif
823 case DIOCGPART:
824 case DIOCWLABEL:
825 case DIOCGDEFLABEL:
826 case RAIDFRAME_SHUTDOWN:
827 case RAIDFRAME_REWRITEPARITY:
828 case RAIDFRAME_GET_INFO:
829 case RAIDFRAME_RESET_ACCTOTALS:
830 case RAIDFRAME_GET_ACCTOTALS:
831 case RAIDFRAME_KEEP_ACCTOTALS:
832 case RAIDFRAME_GET_SIZE:
833 case RAIDFRAME_FAIL_DISK:
834 case RAIDFRAME_COPYBACK:
835 case RAIDFRAME_CHECK_RECON_STATUS:
836 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
837 case RAIDFRAME_GET_COMPONENT_LABEL:
838 case RAIDFRAME_SET_COMPONENT_LABEL:
839 case RAIDFRAME_ADD_HOT_SPARE:
840 case RAIDFRAME_REMOVE_HOT_SPARE:
841 case RAIDFRAME_INIT_LABELS:
842 case RAIDFRAME_REBUILD_IN_PLACE:
843 case RAIDFRAME_CHECK_PARITY:
844 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
845 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
846 case RAIDFRAME_CHECK_COPYBACK_STATUS:
847 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
848 case RAIDFRAME_SET_AUTOCONFIG:
849 case RAIDFRAME_SET_ROOT:
850 case RAIDFRAME_DELETE_COMPONENT:
851 case RAIDFRAME_INCORPORATE_HOT_SPARE:
852 if ((rs->sc_flags & RAIDF_INITED) == 0)
853 return (ENXIO);
854 }
855
856 switch (cmd) {
857
858 /* configure the system */
859 case RAIDFRAME_CONFIGURE:
860
861 if (raidPtr->valid) {
862 /* There is a valid RAID set running on this unit! */
863 printf("raid%d: Device already configured!\n",unit);
864 return(EINVAL);
865 }
866
867 /* copy-in the configuration information */
868 /* data points to a pointer to the configuration structure */
869
870 u_cfg = *((RF_Config_t **) data);
871 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
872 if (k_cfg == NULL) {
873 return (ENOMEM);
874 }
875 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
876 sizeof(RF_Config_t));
877 if (retcode) {
878 RF_Free(k_cfg, sizeof(RF_Config_t));
879 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
880 retcode));
881 return (retcode);
882 }
883 /* allocate a buffer for the layout-specific data, and copy it
884 * in */
885 if (k_cfg->layoutSpecificSize) {
886 if (k_cfg->layoutSpecificSize > 10000) {
887 /* sanity check */
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 return (EINVAL);
890 }
891 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
892 (u_char *));
893 if (specific_buf == NULL) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 return (ENOMEM);
896 }
897 retcode = copyin(k_cfg->layoutSpecific,
898 (caddr_t) specific_buf,
899 k_cfg->layoutSpecificSize);
900 if (retcode) {
901 RF_Free(k_cfg, sizeof(RF_Config_t));
902 RF_Free(specific_buf,
903 k_cfg->layoutSpecificSize);
904 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
905 retcode));
906 return (retcode);
907 }
908 } else
909 specific_buf = NULL;
910 k_cfg->layoutSpecific = specific_buf;
911
912 /* should do some kind of sanity check on the configuration.
913 * Store the sum of all the bytes in the last byte? */
914
915 /* configure the system */
916
917 /*
918 * Clear the entire RAID descriptor, just to make sure
919 * there is no stale data left in the case of a
920 * reconfiguration
921 */
922 bzero((char *) raidPtr, sizeof(RF_Raid_t));
923 raidPtr->raidid = unit;
924
925 retcode = rf_Configure(raidPtr, k_cfg, NULL);
926
927 if (retcode == 0) {
928
929 /* allow this many simultaneous IO's to
930 this RAID device */
931 raidPtr->openings = RAIDOUTSTANDING;
932
933 raidinit(raidPtr);
934 rf_markalldirty(raidPtr);
935 }
936 /* free the buffers. No return code here. */
937 if (k_cfg->layoutSpecificSize) {
938 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
939 }
940 RF_Free(k_cfg, sizeof(RF_Config_t));
941
942 return (retcode);
943
944 /* shutdown the system */
945 case RAIDFRAME_SHUTDOWN:
946
947 if ((error = raidlock(rs)) != 0)
948 return (error);
949
950 /*
951 * If somebody has a partition mounted, we shouldn't
952 * shutdown.
953 */
954
955 part = DISKPART(dev);
956 pmask = (1 << part);
957 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
958 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
959 (rs->sc_dkdev.dk_copenmask & pmask))) {
960 raidunlock(rs);
961 return (EBUSY);
962 }
963
964 retcode = rf_Shutdown(raidPtr);
965
966 pool_destroy(&rs->sc_cbufpool);
967
968 /* It's no longer initialized... */
969 rs->sc_flags &= ~RAIDF_INITED;
970
971 /* Detach the disk. */
972 disk_detach(&rs->sc_dkdev);
973
974 raidunlock(rs);
975
976 return (retcode);
977 case RAIDFRAME_GET_COMPONENT_LABEL:
978 clabel_ptr = (RF_ComponentLabel_t **) data;
979 /* need to read the component label for the disk indicated
980 by row,column in clabel */
981
982 /* For practice, let's get it directly fromdisk, rather
983 than from the in-core copy */
984 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
985 (RF_ComponentLabel_t *));
986 if (clabel == NULL)
987 return (ENOMEM);
988
989 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
990
991 retcode = copyin( *clabel_ptr, clabel,
992 sizeof(RF_ComponentLabel_t));
993
994 if (retcode) {
995 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
996 return(retcode);
997 }
998
999 row = clabel->row;
1000 column = clabel->column;
1001
1002 if ((row < 0) || (row >= raidPtr->numRow) ||
1003 (column < 0) || (column >= raidPtr->numCol +
1004 raidPtr->numSpare)) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(EINVAL);
1007 }
1008
1009 raidread_component_label(raidPtr->Disks[row][column].dev,
1010 raidPtr->raid_cinfo[row][column].ci_vp,
1011 clabel );
1012
1013 retcode = copyout((caddr_t) clabel,
1014 (caddr_t) *clabel_ptr,
1015 sizeof(RF_ComponentLabel_t));
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return (retcode);
1018
1019 case RAIDFRAME_SET_COMPONENT_LABEL:
1020 clabel = (RF_ComponentLabel_t *) data;
1021
1022 /* XXX check the label for valid stuff... */
1023 /* Note that some things *should not* get modified --
1024 the user should be re-initing the labels instead of
1025 trying to patch things.
1026 */
1027
1028 printf("Got component label:\n");
1029 printf("Version: %d\n",clabel->version);
1030 printf("Serial Number: %d\n",clabel->serial_number);
1031 printf("Mod counter: %d\n",clabel->mod_counter);
1032 printf("Row: %d\n", clabel->row);
1033 printf("Column: %d\n", clabel->column);
1034 printf("Num Rows: %d\n", clabel->num_rows);
1035 printf("Num Columns: %d\n", clabel->num_columns);
1036 printf("Clean: %d\n", clabel->clean);
1037 printf("Status: %d\n", clabel->status);
1038
1039 row = clabel->row;
1040 column = clabel->column;
1041
1042 if ((row < 0) || (row >= raidPtr->numRow) ||
1043 (column < 0) || (column >= raidPtr->numCol)) {
1044 return(EINVAL);
1045 }
1046
1047 /* XXX this isn't allowed to do anything for now :-) */
1048
1049 /* XXX and before it is, we need to fill in the rest
1050 of the fields!?!?!?! */
1051 #if 0
1052 raidwrite_component_label(
1053 raidPtr->Disks[row][column].dev,
1054 raidPtr->raid_cinfo[row][column].ci_vp,
1055 clabel );
1056 #endif
1057 return (0);
1058
1059 case RAIDFRAME_INIT_LABELS:
1060 clabel = (RF_ComponentLabel_t *) data;
1061 /*
1062 we only want the serial number from
1063 the above. We get all the rest of the information
1064 from the config that was used to create this RAID
1065 set.
1066 */
1067
1068 raidPtr->serial_number = clabel->serial_number;
1069
1070 raid_init_component_label(raidPtr, &ci_label);
1071 ci_label.serial_number = clabel->serial_number;
1072
1073 for(row=0;row<raidPtr->numRow;row++) {
1074 ci_label.row = row;
1075 for(column=0;column<raidPtr->numCol;column++) {
1076 diskPtr = &raidPtr->Disks[row][column];
1077 if (!RF_DEAD_DISK(diskPtr->status)) {
1078 ci_label.partitionSize = diskPtr->partitionSize;
1079 ci_label.column = column;
1080 raidwrite_component_label(
1081 raidPtr->Disks[row][column].dev,
1082 raidPtr->raid_cinfo[row][column].ci_vp,
1083 &ci_label );
1084 }
1085 }
1086 }
1087
1088 return (retcode);
1089 case RAIDFRAME_SET_AUTOCONFIG:
1090 d = rf_set_autoconfig(raidPtr, *(int *) data);
1091 printf("New autoconfig value is: %d\n", d);
1092 *(int *) data = d;
1093 return (retcode);
1094
1095 case RAIDFRAME_SET_ROOT:
1096 d = rf_set_rootpartition(raidPtr, *(int *) data);
1097 printf("New rootpartition value is: %d\n", d);
1098 *(int *) data = d;
1099 return (retcode);
1100
1101 /* initialize all parity */
1102 case RAIDFRAME_REWRITEPARITY:
1103
1104 if (raidPtr->Layout.map->faultsTolerated == 0) {
1105 /* Parity for RAID 0 is trivially correct */
1106 raidPtr->parity_good = RF_RAID_CLEAN;
1107 return(0);
1108 }
1109
1110 if (raidPtr->parity_rewrite_in_progress == 1) {
1111 /* Re-write is already in progress! */
1112 return(EINVAL);
1113 }
1114
1115 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1116 rf_RewriteParityThread,
1117 raidPtr,"raid_parity");
1118 return (retcode);
1119
1120
1121 case RAIDFRAME_ADD_HOT_SPARE:
1122 sparePtr = (RF_SingleComponent_t *) data;
1123 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1124 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1125 return(retcode);
1126
1127 case RAIDFRAME_REMOVE_HOT_SPARE:
1128 return(retcode);
1129
1130 case RAIDFRAME_DELETE_COMPONENT:
1131 componentPtr = (RF_SingleComponent_t *)data;
1132 memcpy( &component, componentPtr,
1133 sizeof(RF_SingleComponent_t));
1134 retcode = rf_delete_component(raidPtr, &component);
1135 return(retcode);
1136
1137 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1138 componentPtr = (RF_SingleComponent_t *)data;
1139 memcpy( &component, componentPtr,
1140 sizeof(RF_SingleComponent_t));
1141 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1142 return(retcode);
1143
1144 case RAIDFRAME_REBUILD_IN_PLACE:
1145
1146 if (raidPtr->Layout.map->faultsTolerated == 0) {
1147 /* Can't do this on a RAID 0!! */
1148 return(EINVAL);
1149 }
1150
1151 if (raidPtr->recon_in_progress == 1) {
1152 /* a reconstruct is already in progress! */
1153 return(EINVAL);
1154 }
1155
1156 componentPtr = (RF_SingleComponent_t *) data;
1157 memcpy( &component, componentPtr,
1158 sizeof(RF_SingleComponent_t));
1159 row = component.row;
1160 column = component.column;
1161 printf("Rebuild: %d %d\n",row, column);
1162 if ((row < 0) || (row >= raidPtr->numRow) ||
1163 (column < 0) || (column >= raidPtr->numCol)) {
1164 return(EINVAL);
1165 }
1166
1167 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1168 if (rrcopy == NULL)
1169 return(ENOMEM);
1170
1171 rrcopy->raidPtr = (void *) raidPtr;
1172 rrcopy->row = row;
1173 rrcopy->col = column;
1174
1175 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1176 rf_ReconstructInPlaceThread,
1177 rrcopy,"raid_reconip");
1178 return(retcode);
1179
1180 case RAIDFRAME_GET_INFO:
1181 if (!raidPtr->valid)
1182 return (ENODEV);
1183 ucfgp = (RF_DeviceConfig_t **) data;
1184 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1185 (RF_DeviceConfig_t *));
1186 if (d_cfg == NULL)
1187 return (ENOMEM);
1188 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1189 d_cfg->rows = raidPtr->numRow;
1190 d_cfg->cols = raidPtr->numCol;
1191 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1192 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1193 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1194 return (ENOMEM);
1195 }
1196 d_cfg->nspares = raidPtr->numSpare;
1197 if (d_cfg->nspares >= RF_MAX_DISKS) {
1198 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1199 return (ENOMEM);
1200 }
1201 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1202 d = 0;
1203 for (i = 0; i < d_cfg->rows; i++) {
1204 for (j = 0; j < d_cfg->cols; j++) {
1205 d_cfg->devs[d] = raidPtr->Disks[i][j];
1206 d++;
1207 }
1208 }
1209 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1210 d_cfg->spares[i] = raidPtr->Disks[0][j];
1211 }
1212 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1213 sizeof(RF_DeviceConfig_t));
1214 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1215
1216 return (retcode);
1217
1218 case RAIDFRAME_CHECK_PARITY:
1219 *(int *) data = raidPtr->parity_good;
1220 return (0);
1221
1222 case RAIDFRAME_RESET_ACCTOTALS:
1223 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1224 return (0);
1225
1226 case RAIDFRAME_GET_ACCTOTALS:
1227 totals = (RF_AccTotals_t *) data;
1228 *totals = raidPtr->acc_totals;
1229 return (0);
1230
1231 case RAIDFRAME_KEEP_ACCTOTALS:
1232 raidPtr->keep_acc_totals = *(int *)data;
1233 return (0);
1234
1235 case RAIDFRAME_GET_SIZE:
1236 *(int *) data = raidPtr->totalSectors;
1237 return (0);
1238
1239 /* fail a disk & optionally start reconstruction */
1240 case RAIDFRAME_FAIL_DISK:
1241
1242 if (raidPtr->Layout.map->faultsTolerated == 0) {
1243 /* Can't do this on a RAID 0!! */
1244 return(EINVAL);
1245 }
1246
1247 rr = (struct rf_recon_req *) data;
1248
1249 if (rr->row < 0 || rr->row >= raidPtr->numRow
1250 || rr->col < 0 || rr->col >= raidPtr->numCol)
1251 return (EINVAL);
1252
1253 printf("raid%d: Failing the disk: row: %d col: %d\n",
1254 unit, rr->row, rr->col);
1255
1256 /* make a copy of the recon request so that we don't rely on
1257 * the user's buffer */
1258 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1259 if (rrcopy == NULL)
1260 return(ENOMEM);
1261 bcopy(rr, rrcopy, sizeof(*rr));
1262 rrcopy->raidPtr = (void *) raidPtr;
1263
1264 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1265 rf_ReconThread,
1266 rrcopy,"raid_recon");
1267 return (0);
1268
1269 /* invoke a copyback operation after recon on whatever disk
1270 * needs it, if any */
1271 case RAIDFRAME_COPYBACK:
1272
1273 if (raidPtr->Layout.map->faultsTolerated == 0) {
1274 /* This makes no sense on a RAID 0!! */
1275 return(EINVAL);
1276 }
1277
1278 if (raidPtr->copyback_in_progress == 1) {
1279 /* Copyback is already in progress! */
1280 return(EINVAL);
1281 }
1282
1283 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1284 rf_CopybackThread,
1285 raidPtr,"raid_copyback");
1286 return (retcode);
1287
1288 /* return the percentage completion of reconstruction */
1289 case RAIDFRAME_CHECK_RECON_STATUS:
1290 if (raidPtr->Layout.map->faultsTolerated == 0) {
1291 /* This makes no sense on a RAID 0, so tell the
1292 user it's done. */
1293 *(int *) data = 100;
1294 return(0);
1295 }
1296 row = 0; /* XXX we only consider a single row... */
1297 if (raidPtr->status[row] != rf_rs_reconstructing)
1298 *(int *) data = 100;
1299 else
1300 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1301 return (0);
1302 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1303 progressInfoPtr = (RF_ProgressInfo_t **) data;
1304 row = 0; /* XXX we only consider a single row... */
1305 if (raidPtr->status[row] != rf_rs_reconstructing) {
1306 progressInfo.remaining = 0;
1307 progressInfo.completed = 100;
1308 progressInfo.total = 100;
1309 } else {
1310 progressInfo.total =
1311 raidPtr->reconControl[row]->numRUsTotal;
1312 progressInfo.completed =
1313 raidPtr->reconControl[row]->numRUsComplete;
1314 progressInfo.remaining = progressInfo.total -
1315 progressInfo.completed;
1316 }
1317 retcode = copyout((caddr_t) &progressInfo,
1318 (caddr_t) *progressInfoPtr,
1319 sizeof(RF_ProgressInfo_t));
1320 return (retcode);
1321
1322 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* This makes no sense on a RAID 0, so tell the
1325 user it's done. */
1326 *(int *) data = 100;
1327 return(0);
1328 }
1329 if (raidPtr->parity_rewrite_in_progress == 1) {
1330 *(int *) data = 100 *
1331 raidPtr->parity_rewrite_stripes_done /
1332 raidPtr->Layout.numStripe;
1333 } else {
1334 *(int *) data = 100;
1335 }
1336 return (0);
1337
1338 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1339 progressInfoPtr = (RF_ProgressInfo_t **) data;
1340 if (raidPtr->parity_rewrite_in_progress == 1) {
1341 progressInfo.total = raidPtr->Layout.numStripe;
1342 progressInfo.completed =
1343 raidPtr->parity_rewrite_stripes_done;
1344 progressInfo.remaining = progressInfo.total -
1345 progressInfo.completed;
1346 } else {
1347 progressInfo.remaining = 0;
1348 progressInfo.completed = 100;
1349 progressInfo.total = 100;
1350 }
1351 retcode = copyout((caddr_t) &progressInfo,
1352 (caddr_t) *progressInfoPtr,
1353 sizeof(RF_ProgressInfo_t));
1354 return (retcode);
1355
1356 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1357 if (raidPtr->Layout.map->faultsTolerated == 0) {
1358 /* This makes no sense on a RAID 0 */
1359 *(int *) data = 100;
1360 return(0);
1361 }
1362 if (raidPtr->copyback_in_progress == 1) {
1363 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1364 raidPtr->Layout.numStripe;
1365 } else {
1366 *(int *) data = 100;
1367 }
1368 return (0);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1371 progressInfoPtr = (RF_ProgressInfo_t **) data;
1372 if (raidPtr->copyback_in_progress == 1) {
1373 progressInfo.total = raidPtr->Layout.numStripe;
1374 progressInfo.completed =
1375 raidPtr->copyback_stripes_done;
1376 progressInfo.remaining = progressInfo.total -
1377 progressInfo.completed;
1378 } else {
1379 progressInfo.remaining = 0;
1380 progressInfo.completed = 100;
1381 progressInfo.total = 100;
1382 }
1383 retcode = copyout((caddr_t) &progressInfo,
1384 (caddr_t) *progressInfoPtr,
1385 sizeof(RF_ProgressInfo_t));
1386 return (retcode);
1387
1388 /* the sparetable daemon calls this to wait for the kernel to
1389 * need a spare table. this ioctl does not return until a
1390 * spare table is needed. XXX -- calling mpsleep here in the
1391 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1392 * -- I should either compute the spare table in the kernel,
1393 * or have a different -- XXX XXX -- interface (a different
1394 * character device) for delivering the table -- XXX */
1395 #if 0
1396 case RAIDFRAME_SPARET_WAIT:
1397 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1398 while (!rf_sparet_wait_queue)
1399 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1400 waitreq = rf_sparet_wait_queue;
1401 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1402 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1403
1404 /* structure assignment */
1405 *((RF_SparetWait_t *) data) = *waitreq;
1406
1407 RF_Free(waitreq, sizeof(*waitreq));
1408 return (0);
1409
1410 /* wakes up a process waiting on SPARET_WAIT and puts an error
1411 * code in it that will cause the dameon to exit */
1412 case RAIDFRAME_ABORT_SPARET_WAIT:
1413 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1414 waitreq->fcol = -1;
1415 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1416 waitreq->next = rf_sparet_wait_queue;
1417 rf_sparet_wait_queue = waitreq;
1418 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1419 wakeup(&rf_sparet_wait_queue);
1420 return (0);
1421
1422 /* used by the spare table daemon to deliver a spare table
1423 * into the kernel */
1424 case RAIDFRAME_SEND_SPARET:
1425
1426 /* install the spare table */
1427 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1428
1429 /* respond to the requestor. the return status of the spare
1430 * table installation is passed in the "fcol" field */
1431 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1432 waitreq->fcol = retcode;
1433 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1434 waitreq->next = rf_sparet_resp_queue;
1435 rf_sparet_resp_queue = waitreq;
1436 wakeup(&rf_sparet_resp_queue);
1437 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1438
1439 return (retcode);
1440 #endif
1441
1442 default:
1443 break; /* fall through to the os-specific code below */
1444
1445 }
1446
1447 if (!raidPtr->valid)
1448 return (EINVAL);
1449
1450 /*
1451 * Add support for "regular" device ioctls here.
1452 */
1453
1454 switch (cmd) {
1455 case DIOCGDINFO:
1456 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1457 break;
1458 #ifdef __HAVE_OLD_DISKLABEL
1459 case ODIOCGDINFO:
1460 newlabel = *(rs->sc_dkdev.dk_label);
1461 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1462 newlabel.d_npartitions = OLDMAXPARTITIONS;
1463 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1464 break;
1465 #endif
1466
1467 case DIOCGPART:
1468 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1469 ((struct partinfo *) data)->part =
1470 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1471 break;
1472
1473 case DIOCWDINFO:
1474 case DIOCSDINFO:
1475 #ifdef __HAVE_OLD_DISKLABEL
1476 case ODIOCWDINFO:
1477 case ODIOCSDINFO:
1478 #endif
1479 {
1480 struct disklabel *lp;
1481 #ifdef __HAVE_OLD_DISKLABEL
1482 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1483 memset(&newlabel, 0, sizeof newlabel);
1484 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1485 lp = &newlabel;
1486 } else
1487 #endif
1488 lp = (struct disklabel *)data;
1489
1490 if ((error = raidlock(rs)) != 0)
1491 return (error);
1492
1493 rs->sc_flags |= RAIDF_LABELLING;
1494
1495 error = setdisklabel(rs->sc_dkdev.dk_label,
1496 lp, 0, rs->sc_dkdev.dk_cpulabel);
1497 if (error == 0) {
1498 if (cmd == DIOCWDINFO
1499 #ifdef __HAVE_OLD_DISKLABEL
1500 || cmd == ODIOCWDINFO
1501 #endif
1502 )
1503 error = writedisklabel(RAIDLABELDEV(dev),
1504 raidstrategy, rs->sc_dkdev.dk_label,
1505 rs->sc_dkdev.dk_cpulabel);
1506 }
1507 rs->sc_flags &= ~RAIDF_LABELLING;
1508
1509 raidunlock(rs);
1510
1511 if (error)
1512 return (error);
1513 break;
1514 }
1515
1516 case DIOCWLABEL:
1517 if (*(int *) data != 0)
1518 rs->sc_flags |= RAIDF_WLABEL;
1519 else
1520 rs->sc_flags &= ~RAIDF_WLABEL;
1521 break;
1522
1523 case DIOCGDEFLABEL:
1524 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1525 break;
1526
1527 #ifdef __HAVE_OLD_DISKLABEL
1528 case ODIOCGDEFLABEL:
1529 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1530 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1531 newlabel.d_npartitions = OLDMAXPARTITIONS;
1532 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1533 break;
1534 #endif
1535
1536 default:
1537 retcode = ENOTTY;
1538 }
1539 return (retcode);
1540
1541 }
1542
1543
1544 /* raidinit -- complete the rest of the initialization for the
1545 RAIDframe device. */
1546
1547
1548 static void
1549 raidinit(raidPtr)
1550 RF_Raid_t *raidPtr;
1551 {
1552 struct raid_softc *rs;
1553 int unit;
1554
1555 unit = raidPtr->raidid;
1556
1557 rs = &raid_softc[unit];
1558 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1559 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1560
1561
1562 /* XXX should check return code first... */
1563 rs->sc_flags |= RAIDF_INITED;
1564
1565 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1566
1567 rs->sc_dkdev.dk_name = rs->sc_xname;
1568
1569 /* disk_attach actually creates space for the CPU disklabel, among
1570 * other things, so it's critical to call this *BEFORE* we try putzing
1571 * with disklabels. */
1572
1573 disk_attach(&rs->sc_dkdev);
1574
1575 /* XXX There may be a weird interaction here between this, and
1576 * protectedSectors, as used in RAIDframe. */
1577
1578 rs->sc_size = raidPtr->totalSectors;
1579
1580 }
1581
1582 /* wake up the daemon & tell it to get us a spare table
1583 * XXX
1584 * the entries in the queues should be tagged with the raidPtr
1585 * so that in the extremely rare case that two recons happen at once,
1586 * we know for which device were requesting a spare table
1587 * XXX
1588 *
1589 * XXX This code is not currently used. GO
1590 */
1591 int
1592 rf_GetSpareTableFromDaemon(req)
1593 RF_SparetWait_t *req;
1594 {
1595 int retcode;
1596
1597 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1598 req->next = rf_sparet_wait_queue;
1599 rf_sparet_wait_queue = req;
1600 wakeup(&rf_sparet_wait_queue);
1601
1602 /* mpsleep unlocks the mutex */
1603 while (!rf_sparet_resp_queue) {
1604 tsleep(&rf_sparet_resp_queue, PRIBIO,
1605 "raidframe getsparetable", 0);
1606 }
1607 req = rf_sparet_resp_queue;
1608 rf_sparet_resp_queue = req->next;
1609 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1610
1611 retcode = req->fcol;
1612 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1613 * alloc'd */
1614 return (retcode);
1615 }
1616
1617 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1618 * bp & passes it down.
1619 * any calls originating in the kernel must use non-blocking I/O
1620 * do some extra sanity checking to return "appropriate" error values for
1621 * certain conditions (to make some standard utilities work)
1622 *
1623 * Formerly known as: rf_DoAccessKernel
1624 */
1625 void
1626 raidstart(raidPtr)
1627 RF_Raid_t *raidPtr;
1628 {
1629 RF_SectorCount_t num_blocks, pb, sum;
1630 RF_RaidAddr_t raid_addr;
1631 int retcode;
1632 struct partition *pp;
1633 daddr_t blocknum;
1634 int unit;
1635 struct raid_softc *rs;
1636 int do_async;
1637 struct buf *bp;
1638
1639 unit = raidPtr->raidid;
1640 rs = &raid_softc[unit];
1641
1642 /* quick check to see if anything has died recently */
1643 RF_LOCK_MUTEX(raidPtr->mutex);
1644 if (raidPtr->numNewFailures > 0) {
1645 rf_update_component_labels(raidPtr,
1646 RF_NORMAL_COMPONENT_UPDATE);
1647 raidPtr->numNewFailures--;
1648 }
1649 RF_UNLOCK_MUTEX(raidPtr->mutex);
1650
1651 /* Check to see if we're at the limit... */
1652 RF_LOCK_MUTEX(raidPtr->mutex);
1653 while (raidPtr->openings > 0) {
1654 RF_UNLOCK_MUTEX(raidPtr->mutex);
1655
1656 /* get the next item, if any, from the queue */
1657 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1658 /* nothing more to do */
1659 return;
1660 }
1661 BUFQ_REMOVE(&rs->buf_queue, bp);
1662
1663 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1664 * partition.. Need to make it absolute to the underlying
1665 * device.. */
1666
1667 blocknum = bp->b_blkno;
1668 if (DISKPART(bp->b_dev) != RAW_PART) {
1669 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1670 blocknum += pp->p_offset;
1671 }
1672
1673 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1674 (int) blocknum));
1675
1676 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1677 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1678
1679 /* *THIS* is where we adjust what block we're going to...
1680 * but DO NOT TOUCH bp->b_blkno!!! */
1681 raid_addr = blocknum;
1682
1683 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1684 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1685 sum = raid_addr + num_blocks + pb;
1686 if (1 || rf_debugKernelAccess) {
1687 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1688 (int) raid_addr, (int) sum, (int) num_blocks,
1689 (int) pb, (int) bp->b_resid));
1690 }
1691 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1692 || (sum < num_blocks) || (sum < pb)) {
1693 bp->b_error = ENOSPC;
1694 bp->b_flags |= B_ERROR;
1695 bp->b_resid = bp->b_bcount;
1696 biodone(bp);
1697 RF_LOCK_MUTEX(raidPtr->mutex);
1698 continue;
1699 }
1700 /*
1701 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1702 */
1703
1704 if (bp->b_bcount & raidPtr->sectorMask) {
1705 bp->b_error = EINVAL;
1706 bp->b_flags |= B_ERROR;
1707 bp->b_resid = bp->b_bcount;
1708 biodone(bp);
1709 RF_LOCK_MUTEX(raidPtr->mutex);
1710 continue;
1711
1712 }
1713 db1_printf(("Calling DoAccess..\n"));
1714
1715
1716 RF_LOCK_MUTEX(raidPtr->mutex);
1717 raidPtr->openings--;
1718 RF_UNLOCK_MUTEX(raidPtr->mutex);
1719
1720 /*
1721 * Everything is async.
1722 */
1723 do_async = 1;
1724
1725 disk_busy(&rs->sc_dkdev);
1726
1727 /* XXX we're still at splbio() here... do we *really*
1728 need to be? */
1729
1730 /* don't ever condition on bp->b_flags & B_WRITE.
1731 * always condition on B_READ instead */
1732
1733 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1734 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1735 do_async, raid_addr, num_blocks,
1736 bp->b_data, bp, NULL, NULL,
1737 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1738
1739
1740 RF_LOCK_MUTEX(raidPtr->mutex);
1741 }
1742 RF_UNLOCK_MUTEX(raidPtr->mutex);
1743 }
1744
1745
1746
1747
1748 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1749
1750 int
1751 rf_DispatchKernelIO(queue, req)
1752 RF_DiskQueue_t *queue;
1753 RF_DiskQueueData_t *req;
1754 {
1755 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1756 struct buf *bp;
1757 struct raidbuf *raidbp = NULL;
1758 struct raid_softc *rs;
1759 int unit;
1760 int s;
1761
1762 s=0;
1763 /* s = splbio();*/ /* want to test this */
1764 /* XXX along with the vnode, we also need the softc associated with
1765 * this device.. */
1766
1767 req->queue = queue;
1768
1769 unit = queue->raidPtr->raidid;
1770
1771 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1772
1773 if (unit >= numraid) {
1774 printf("Invalid unit number: %d %d\n", unit, numraid);
1775 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1776 }
1777 rs = &raid_softc[unit];
1778
1779 bp = req->bp;
1780 #if 1
1781 /* XXX when there is a physical disk failure, someone is passing us a
1782 * buffer that contains old stuff!! Attempt to deal with this problem
1783 * without taking a performance hit... (not sure where the real bug
1784 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1785
1786 if (bp->b_flags & B_ERROR) {
1787 bp->b_flags &= ~B_ERROR;
1788 }
1789 if (bp->b_error != 0) {
1790 bp->b_error = 0;
1791 }
1792 #endif
1793 raidbp = RAIDGETBUF(rs);
1794
1795 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1796
1797 /*
1798 * context for raidiodone
1799 */
1800 raidbp->rf_obp = bp;
1801 raidbp->req = req;
1802
1803 LIST_INIT(&raidbp->rf_buf.b_dep);
1804
1805 switch (req->type) {
1806 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1807 /* XXX need to do something extra here.. */
1808 /* I'm leaving this in, as I've never actually seen it used,
1809 * and I'd like folks to report it... GO */
1810 printf(("WAKEUP CALLED\n"));
1811 queue->numOutstanding++;
1812
1813 /* XXX need to glue the original buffer into this?? */
1814
1815 KernelWakeupFunc(&raidbp->rf_buf);
1816 break;
1817
1818 case RF_IO_TYPE_READ:
1819 case RF_IO_TYPE_WRITE:
1820
1821 if (req->tracerec) {
1822 RF_ETIMER_START(req->tracerec->timer);
1823 }
1824 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1825 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1826 req->sectorOffset, req->numSector,
1827 req->buf, KernelWakeupFunc, (void *) req,
1828 queue->raidPtr->logBytesPerSector, req->b_proc);
1829
1830 if (rf_debugKernelAccess) {
1831 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1832 (long) bp->b_blkno));
1833 }
1834 queue->numOutstanding++;
1835 queue->last_deq_sector = req->sectorOffset;
1836 /* acc wouldn't have been let in if there were any pending
1837 * reqs at any other priority */
1838 queue->curPriority = req->priority;
1839
1840 db1_printf(("Going for %c to unit %d row %d col %d\n",
1841 req->type, unit, queue->row, queue->col));
1842 db1_printf(("sector %d count %d (%d bytes) %d\n",
1843 (int) req->sectorOffset, (int) req->numSector,
1844 (int) (req->numSector <<
1845 queue->raidPtr->logBytesPerSector),
1846 (int) queue->raidPtr->logBytesPerSector));
1847 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1848 raidbp->rf_buf.b_vp->v_numoutput++;
1849 }
1850 VOP_STRATEGY(&raidbp->rf_buf);
1851
1852 break;
1853
1854 default:
1855 panic("bad req->type in rf_DispatchKernelIO");
1856 }
1857 db1_printf(("Exiting from DispatchKernelIO\n"));
1858 /* splx(s); */ /* want to test this */
1859 return (0);
1860 }
1861 /* this is the callback function associated with a I/O invoked from
1862 kernel code.
1863 */
1864 static void
1865 KernelWakeupFunc(vbp)
1866 struct buf *vbp;
1867 {
1868 RF_DiskQueueData_t *req = NULL;
1869 RF_DiskQueue_t *queue;
1870 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1871 struct buf *bp;
1872 struct raid_softc *rs;
1873 int unit;
1874 int s;
1875
1876 s = splbio();
1877 db1_printf(("recovering the request queue:\n"));
1878 req = raidbp->req;
1879
1880 bp = raidbp->rf_obp;
1881
1882 queue = (RF_DiskQueue_t *) req->queue;
1883
1884 if (raidbp->rf_buf.b_flags & B_ERROR) {
1885 bp->b_flags |= B_ERROR;
1886 bp->b_error = raidbp->rf_buf.b_error ?
1887 raidbp->rf_buf.b_error : EIO;
1888 }
1889
1890 /* XXX methinks this could be wrong... */
1891 #if 1
1892 bp->b_resid = raidbp->rf_buf.b_resid;
1893 #endif
1894
1895 if (req->tracerec) {
1896 RF_ETIMER_STOP(req->tracerec->timer);
1897 RF_ETIMER_EVAL(req->tracerec->timer);
1898 RF_LOCK_MUTEX(rf_tracing_mutex);
1899 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1900 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1901 req->tracerec->num_phys_ios++;
1902 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1903 }
1904 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1905
1906 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1907
1908
1909 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1910 * ballistic, and mark the component as hosed... */
1911
1912 if (bp->b_flags & B_ERROR) {
1913 /* Mark the disk as dead */
1914 /* but only mark it once... */
1915 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1916 rf_ds_optimal) {
1917 printf("raid%d: IO Error. Marking %s as failed.\n",
1918 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1919 queue->raidPtr->Disks[queue->row][queue->col].status =
1920 rf_ds_failed;
1921 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1922 queue->raidPtr->numFailures++;
1923 queue->raidPtr->numNewFailures++;
1924 } else { /* Disk is already dead... */
1925 /* printf("Disk already marked as dead!\n"); */
1926 }
1927
1928 }
1929
1930 rs = &raid_softc[unit];
1931 RAIDPUTBUF(rs, raidbp);
1932
1933 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1934 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1935
1936 splx(s);
1937 }
1938
1939
1940
1941 /*
1942 * initialize a buf structure for doing an I/O in the kernel.
1943 */
1944 static void
1945 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1946 logBytesPerSector, b_proc)
1947 struct buf *bp;
1948 struct vnode *b_vp;
1949 unsigned rw_flag;
1950 dev_t dev;
1951 RF_SectorNum_t startSect;
1952 RF_SectorCount_t numSect;
1953 caddr_t buf;
1954 void (*cbFunc) (struct buf *);
1955 void *cbArg;
1956 int logBytesPerSector;
1957 struct proc *b_proc;
1958 {
1959 /* bp->b_flags = B_PHYS | rw_flag; */
1960 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1961 bp->b_bcount = numSect << logBytesPerSector;
1962 bp->b_bufsize = bp->b_bcount;
1963 bp->b_error = 0;
1964 bp->b_dev = dev;
1965 bp->b_data = buf;
1966 bp->b_blkno = startSect;
1967 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1968 if (bp->b_bcount == 0) {
1969 panic("bp->b_bcount is zero in InitBP!!\n");
1970 }
1971 bp->b_proc = b_proc;
1972 bp->b_iodone = cbFunc;
1973 bp->b_vp = b_vp;
1974
1975 }
1976
1977 static void
1978 raidgetdefaultlabel(raidPtr, rs, lp)
1979 RF_Raid_t *raidPtr;
1980 struct raid_softc *rs;
1981 struct disklabel *lp;
1982 {
1983 db1_printf(("Building a default label...\n"));
1984 bzero(lp, sizeof(*lp));
1985
1986 /* fabricate a label... */
1987 lp->d_secperunit = raidPtr->totalSectors;
1988 lp->d_secsize = raidPtr->bytesPerSector;
1989 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1990 lp->d_ntracks = 1;
1991 lp->d_ncylinders = raidPtr->totalSectors /
1992 (lp->d_nsectors * lp->d_ntracks);
1993 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1994
1995 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1996 lp->d_type = DTYPE_RAID;
1997 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1998 lp->d_rpm = 3600;
1999 lp->d_interleave = 1;
2000 lp->d_flags = 0;
2001
2002 lp->d_partitions[RAW_PART].p_offset = 0;
2003 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2004 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2005 lp->d_npartitions = RAW_PART + 1;
2006
2007 lp->d_magic = DISKMAGIC;
2008 lp->d_magic2 = DISKMAGIC;
2009 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2010
2011 }
2012 /*
2013 * Read the disklabel from the raid device. If one is not present, fake one
2014 * up.
2015 */
2016 static void
2017 raidgetdisklabel(dev)
2018 dev_t dev;
2019 {
2020 int unit = raidunit(dev);
2021 struct raid_softc *rs = &raid_softc[unit];
2022 char *errstring;
2023 struct disklabel *lp = rs->sc_dkdev.dk_label;
2024 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2025 RF_Raid_t *raidPtr;
2026
2027 db1_printf(("Getting the disklabel...\n"));
2028
2029 bzero(clp, sizeof(*clp));
2030
2031 raidPtr = raidPtrs[unit];
2032
2033 raidgetdefaultlabel(raidPtr, rs, lp);
2034
2035 /*
2036 * Call the generic disklabel extraction routine.
2037 */
2038 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2039 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2040 if (errstring)
2041 raidmakedisklabel(rs);
2042 else {
2043 int i;
2044 struct partition *pp;
2045
2046 /*
2047 * Sanity check whether the found disklabel is valid.
2048 *
2049 * This is necessary since total size of the raid device
2050 * may vary when an interleave is changed even though exactly
2051 * same componets are used, and old disklabel may used
2052 * if that is found.
2053 */
2054 if (lp->d_secperunit != rs->sc_size)
2055 printf("WARNING: %s: "
2056 "total sector size in disklabel (%d) != "
2057 "the size of raid (%ld)\n", rs->sc_xname,
2058 lp->d_secperunit, (long) rs->sc_size);
2059 for (i = 0; i < lp->d_npartitions; i++) {
2060 pp = &lp->d_partitions[i];
2061 if (pp->p_offset + pp->p_size > rs->sc_size)
2062 printf("WARNING: %s: end of partition `%c' "
2063 "exceeds the size of raid (%ld)\n",
2064 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2065 }
2066 }
2067
2068 }
2069 /*
2070 * Take care of things one might want to take care of in the event
2071 * that a disklabel isn't present.
2072 */
2073 static void
2074 raidmakedisklabel(rs)
2075 struct raid_softc *rs;
2076 {
2077 struct disklabel *lp = rs->sc_dkdev.dk_label;
2078 db1_printf(("Making a label..\n"));
2079
2080 /*
2081 * For historical reasons, if there's no disklabel present
2082 * the raw partition must be marked FS_BSDFFS.
2083 */
2084
2085 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2086
2087 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2088
2089 lp->d_checksum = dkcksum(lp);
2090 }
2091 /*
2092 * Lookup the provided name in the filesystem. If the file exists,
2093 * is a valid block device, and isn't being used by anyone else,
2094 * set *vpp to the file's vnode.
2095 * You'll find the original of this in ccd.c
2096 */
2097 int
2098 raidlookup(path, p, vpp)
2099 char *path;
2100 struct proc *p;
2101 struct vnode **vpp; /* result */
2102 {
2103 struct nameidata nd;
2104 struct vnode *vp;
2105 struct vattr va;
2106 int error;
2107
2108 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2109 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2110 #ifdef DEBUG
2111 printf("RAIDframe: vn_open returned %d\n", error);
2112 #endif
2113 return (error);
2114 }
2115 vp = nd.ni_vp;
2116 if (vp->v_usecount > 1) {
2117 VOP_UNLOCK(vp, 0);
2118 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2119 return (EBUSY);
2120 }
2121 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2122 VOP_UNLOCK(vp, 0);
2123 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2124 return (error);
2125 }
2126 /* XXX: eventually we should handle VREG, too. */
2127 if (va.va_type != VBLK) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2130 return (ENOTBLK);
2131 }
2132 VOP_UNLOCK(vp, 0);
2133 *vpp = vp;
2134 return (0);
2135 }
2136 /*
2137 * Wait interruptibly for an exclusive lock.
2138 *
2139 * XXX
2140 * Several drivers do this; it should be abstracted and made MP-safe.
2141 * (Hmm... where have we seen this warning before :-> GO )
2142 */
2143 static int
2144 raidlock(rs)
2145 struct raid_softc *rs;
2146 {
2147 int error;
2148
2149 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2150 rs->sc_flags |= RAIDF_WANTED;
2151 if ((error =
2152 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2153 return (error);
2154 }
2155 rs->sc_flags |= RAIDF_LOCKED;
2156 return (0);
2157 }
2158 /*
2159 * Unlock and wake up any waiters.
2160 */
2161 static void
2162 raidunlock(rs)
2163 struct raid_softc *rs;
2164 {
2165
2166 rs->sc_flags &= ~RAIDF_LOCKED;
2167 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2168 rs->sc_flags &= ~RAIDF_WANTED;
2169 wakeup(rs);
2170 }
2171 }
2172
2173
2174 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2175 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2176
2177 int
2178 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2179 {
2180 RF_ComponentLabel_t clabel;
2181 raidread_component_label(dev, b_vp, &clabel);
2182 clabel.mod_counter = mod_counter;
2183 clabel.clean = RF_RAID_CLEAN;
2184 raidwrite_component_label(dev, b_vp, &clabel);
2185 return(0);
2186 }
2187
2188
2189 int
2190 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2191 {
2192 RF_ComponentLabel_t clabel;
2193 raidread_component_label(dev, b_vp, &clabel);
2194 clabel.mod_counter = mod_counter;
2195 clabel.clean = RF_RAID_DIRTY;
2196 raidwrite_component_label(dev, b_vp, &clabel);
2197 return(0);
2198 }
2199
2200 /* ARGSUSED */
2201 int
2202 raidread_component_label(dev, b_vp, clabel)
2203 dev_t dev;
2204 struct vnode *b_vp;
2205 RF_ComponentLabel_t *clabel;
2206 {
2207 struct buf *bp;
2208 int error;
2209
2210 /* XXX should probably ensure that we don't try to do this if
2211 someone has changed rf_protected_sectors. */
2212
2213 if (b_vp == NULL) {
2214 /* For whatever reason, this component is not valid.
2215 Don't try to read a component label from it. */
2216 return(EINVAL);
2217 }
2218
2219 /* get a block of the appropriate size... */
2220 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2221 bp->b_dev = dev;
2222
2223 /* get our ducks in a row for the read */
2224 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2225 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2226 bp->b_flags |= B_READ;
2227 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2228
2229 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2230
2231 error = biowait(bp);
2232
2233 if (!error) {
2234 memcpy(clabel, bp->b_data,
2235 sizeof(RF_ComponentLabel_t));
2236 #if 0
2237 rf_print_component_label( clabel );
2238 #endif
2239 } else {
2240 #if 0
2241 printf("Failed to read RAID component label!\n");
2242 #endif
2243 }
2244
2245 brelse(bp);
2246 return(error);
2247 }
2248 /* ARGSUSED */
2249 int
2250 raidwrite_component_label(dev, b_vp, clabel)
2251 dev_t dev;
2252 struct vnode *b_vp;
2253 RF_ComponentLabel_t *clabel;
2254 {
2255 struct buf *bp;
2256 int error;
2257
2258 /* get a block of the appropriate size... */
2259 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2260 bp->b_dev = dev;
2261
2262 /* get our ducks in a row for the write */
2263 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2264 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2265 bp->b_flags |= B_WRITE;
2266 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2267
2268 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2269
2270 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2271
2272 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2273 error = biowait(bp);
2274 brelse(bp);
2275 if (error) {
2276 #if 1
2277 printf("Failed to write RAID component info!\n");
2278 #endif
2279 }
2280
2281 return(error);
2282 }
2283
2284 void
2285 rf_markalldirty(raidPtr)
2286 RF_Raid_t *raidPtr;
2287 {
2288 RF_ComponentLabel_t clabel;
2289 int r,c;
2290
2291 raidPtr->mod_counter++;
2292 for (r = 0; r < raidPtr->numRow; r++) {
2293 for (c = 0; c < raidPtr->numCol; c++) {
2294 /* we don't want to touch (at all) a disk that has
2295 failed */
2296 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2297 raidread_component_label(
2298 raidPtr->Disks[r][c].dev,
2299 raidPtr->raid_cinfo[r][c].ci_vp,
2300 &clabel);
2301 if (clabel.status == rf_ds_spared) {
2302 /* XXX do something special...
2303 but whatever you do, don't
2304 try to access it!! */
2305 } else {
2306 #if 0
2307 clabel.status =
2308 raidPtr->Disks[r][c].status;
2309 raidwrite_component_label(
2310 raidPtr->Disks[r][c].dev,
2311 raidPtr->raid_cinfo[r][c].ci_vp,
2312 &clabel);
2313 #endif
2314 raidmarkdirty(
2315 raidPtr->Disks[r][c].dev,
2316 raidPtr->raid_cinfo[r][c].ci_vp,
2317 raidPtr->mod_counter);
2318 }
2319 }
2320 }
2321 }
2322 /* printf("Component labels marked dirty.\n"); */
2323 #if 0
2324 for( c = 0; c < raidPtr->numSpare ; c++) {
2325 sparecol = raidPtr->numCol + c;
2326 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2327 /*
2328
2329 XXX this is where we get fancy and map this spare
2330 into it's correct spot in the array.
2331
2332 */
2333 /*
2334
2335 we claim this disk is "optimal" if it's
2336 rf_ds_used_spare, as that means it should be
2337 directly substitutable for the disk it replaced.
2338 We note that too...
2339
2340 */
2341
2342 for(i=0;i<raidPtr->numRow;i++) {
2343 for(j=0;j<raidPtr->numCol;j++) {
2344 if ((raidPtr->Disks[i][j].spareRow ==
2345 r) &&
2346 (raidPtr->Disks[i][j].spareCol ==
2347 sparecol)) {
2348 srow = r;
2349 scol = sparecol;
2350 break;
2351 }
2352 }
2353 }
2354
2355 raidread_component_label(
2356 raidPtr->Disks[r][sparecol].dev,
2357 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2358 &clabel);
2359 /* make sure status is noted */
2360 clabel.version = RF_COMPONENT_LABEL_VERSION;
2361 clabel.mod_counter = raidPtr->mod_counter;
2362 clabel.serial_number = raidPtr->serial_number;
2363 clabel.row = srow;
2364 clabel.column = scol;
2365 clabel.num_rows = raidPtr->numRow;
2366 clabel.num_columns = raidPtr->numCol;
2367 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2368 clabel.status = rf_ds_optimal;
2369 raidwrite_component_label(
2370 raidPtr->Disks[r][sparecol].dev,
2371 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2372 &clabel);
2373 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2374 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2375 }
2376 }
2377
2378 #endif
2379 }
2380
2381
2382 void
2383 rf_update_component_labels(raidPtr, final)
2384 RF_Raid_t *raidPtr;
2385 int final;
2386 {
2387 RF_ComponentLabel_t clabel;
2388 int sparecol;
2389 int r,c;
2390 int i,j;
2391 int srow, scol;
2392
2393 srow = -1;
2394 scol = -1;
2395
2396 /* XXX should do extra checks to make sure things really are clean,
2397 rather than blindly setting the clean bit... */
2398
2399 raidPtr->mod_counter++;
2400
2401 for (r = 0; r < raidPtr->numRow; r++) {
2402 for (c = 0; c < raidPtr->numCol; c++) {
2403 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2404 raidread_component_label(
2405 raidPtr->Disks[r][c].dev,
2406 raidPtr->raid_cinfo[r][c].ci_vp,
2407 &clabel);
2408 /* make sure status is noted */
2409 clabel.status = rf_ds_optimal;
2410 /* bump the counter */
2411 clabel.mod_counter = raidPtr->mod_counter;
2412
2413 raidwrite_component_label(
2414 raidPtr->Disks[r][c].dev,
2415 raidPtr->raid_cinfo[r][c].ci_vp,
2416 &clabel);
2417 if (final == RF_FINAL_COMPONENT_UPDATE) {
2418 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2419 raidmarkclean(
2420 raidPtr->Disks[r][c].dev,
2421 raidPtr->raid_cinfo[r][c].ci_vp,
2422 raidPtr->mod_counter);
2423 }
2424 }
2425 }
2426 /* else we don't touch it.. */
2427 }
2428 }
2429
2430 for( c = 0; c < raidPtr->numSpare ; c++) {
2431 sparecol = raidPtr->numCol + c;
2432 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2433 /*
2434
2435 we claim this disk is "optimal" if it's
2436 rf_ds_used_spare, as that means it should be
2437 directly substitutable for the disk it replaced.
2438 We note that too...
2439
2440 */
2441
2442 for(i=0;i<raidPtr->numRow;i++) {
2443 for(j=0;j<raidPtr->numCol;j++) {
2444 if ((raidPtr->Disks[i][j].spareRow ==
2445 0) &&
2446 (raidPtr->Disks[i][j].spareCol ==
2447 sparecol)) {
2448 srow = i;
2449 scol = j;
2450 break;
2451 }
2452 }
2453 }
2454
2455 /* XXX shouldn't *really* need this... */
2456 raidread_component_label(
2457 raidPtr->Disks[0][sparecol].dev,
2458 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2459 &clabel);
2460 /* make sure status is noted */
2461
2462 raid_init_component_label(raidPtr, &clabel);
2463
2464 clabel.mod_counter = raidPtr->mod_counter;
2465 clabel.row = srow;
2466 clabel.column = scol;
2467 clabel.status = rf_ds_optimal;
2468
2469 raidwrite_component_label(
2470 raidPtr->Disks[0][sparecol].dev,
2471 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2472 &clabel);
2473 if (final == RF_FINAL_COMPONENT_UPDATE) {
2474 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2475 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2476 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2477 raidPtr->mod_counter);
2478 }
2479 }
2480 }
2481 }
2482 /* printf("Component labels updated\n"); */
2483 }
2484
2485 void
2486 rf_close_component(raidPtr, vp, auto_configured)
2487 RF_Raid_t *raidPtr;
2488 struct vnode *vp;
2489 int auto_configured;
2490 {
2491 struct proc *p;
2492
2493 p = raidPtr->engine_thread;
2494
2495 if (vp != NULL) {
2496 if (auto_configured == 1) {
2497 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2498 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2499 vput(vp);
2500
2501 } else {
2502 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2503 }
2504 } else {
2505 printf("vnode was NULL\n");
2506 }
2507 }
2508
2509
2510 void
2511 rf_UnconfigureVnodes(raidPtr)
2512 RF_Raid_t *raidPtr;
2513 {
2514 int r,c;
2515 struct proc *p;
2516 struct vnode *vp;
2517 int acd;
2518
2519
2520 /* We take this opportunity to close the vnodes like we should.. */
2521
2522 p = raidPtr->engine_thread;
2523
2524 for (r = 0; r < raidPtr->numRow; r++) {
2525 for (c = 0; c < raidPtr->numCol; c++) {
2526 printf("Closing vnode for row: %d col: %d\n", r, c);
2527 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2528 acd = raidPtr->Disks[r][c].auto_configured;
2529 rf_close_component(raidPtr, vp, acd);
2530 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2531 raidPtr->Disks[r][c].auto_configured = 0;
2532 }
2533 }
2534 for (r = 0; r < raidPtr->numSpare; r++) {
2535 printf("Closing vnode for spare: %d\n", r);
2536 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2537 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2538 rf_close_component(raidPtr, vp, acd);
2539 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2540 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2541 }
2542 }
2543
2544
2545 void
2546 rf_ReconThread(req)
2547 struct rf_recon_req *req;
2548 {
2549 int s;
2550 RF_Raid_t *raidPtr;
2551
2552 s = splbio();
2553 raidPtr = (RF_Raid_t *) req->raidPtr;
2554 raidPtr->recon_in_progress = 1;
2555
2556 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2557 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2558
2559 /* XXX get rid of this! we don't need it at all.. */
2560 RF_Free(req, sizeof(*req));
2561
2562 raidPtr->recon_in_progress = 0;
2563 splx(s);
2564
2565 /* That's all... */
2566 kthread_exit(0); /* does not return */
2567 }
2568
2569 void
2570 rf_RewriteParityThread(raidPtr)
2571 RF_Raid_t *raidPtr;
2572 {
2573 int retcode;
2574 int s;
2575
2576 raidPtr->parity_rewrite_in_progress = 1;
2577 s = splbio();
2578 retcode = rf_RewriteParity(raidPtr);
2579 splx(s);
2580 if (retcode) {
2581 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2582 } else {
2583 /* set the clean bit! If we shutdown correctly,
2584 the clean bit on each component label will get
2585 set */
2586 raidPtr->parity_good = RF_RAID_CLEAN;
2587 }
2588 raidPtr->parity_rewrite_in_progress = 0;
2589
2590 /* Anyone waiting for us to stop? If so, inform them... */
2591 if (raidPtr->waitShutdown) {
2592 wakeup(&raidPtr->parity_rewrite_in_progress);
2593 }
2594
2595 /* That's all... */
2596 kthread_exit(0); /* does not return */
2597 }
2598
2599
2600 void
2601 rf_CopybackThread(raidPtr)
2602 RF_Raid_t *raidPtr;
2603 {
2604 int s;
2605
2606 raidPtr->copyback_in_progress = 1;
2607 s = splbio();
2608 rf_CopybackReconstructedData(raidPtr);
2609 splx(s);
2610 raidPtr->copyback_in_progress = 0;
2611
2612 /* That's all... */
2613 kthread_exit(0); /* does not return */
2614 }
2615
2616
2617 void
2618 rf_ReconstructInPlaceThread(req)
2619 struct rf_recon_req *req;
2620 {
2621 int retcode;
2622 int s;
2623 RF_Raid_t *raidPtr;
2624
2625 s = splbio();
2626 raidPtr = req->raidPtr;
2627 raidPtr->recon_in_progress = 1;
2628 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2629 RF_Free(req, sizeof(*req));
2630 raidPtr->recon_in_progress = 0;
2631 splx(s);
2632
2633 /* That's all... */
2634 kthread_exit(0); /* does not return */
2635 }
2636
2637 void
2638 rf_mountroot_hook(dev)
2639 struct device *dev;
2640 {
2641
2642 }
2643
2644
2645 RF_AutoConfig_t *
2646 rf_find_raid_components()
2647 {
2648 struct devnametobdevmaj *dtobdm;
2649 struct vnode *vp;
2650 struct disklabel label;
2651 struct device *dv;
2652 char *cd_name;
2653 dev_t dev;
2654 int error;
2655 int i;
2656 int good_one;
2657 RF_ComponentLabel_t *clabel;
2658 RF_AutoConfig_t *ac_list;
2659 RF_AutoConfig_t *ac;
2660
2661
2662 /* initialize the AutoConfig list */
2663 ac_list = NULL;
2664
2665 if (raidautoconfig) {
2666
2667 /* we begin by trolling through *all* the devices on the system */
2668
2669 for (dv = alldevs.tqh_first; dv != NULL;
2670 dv = dv->dv_list.tqe_next) {
2671
2672 /* we are only interested in disks... */
2673 if (dv->dv_class != DV_DISK)
2674 continue;
2675
2676 /* we don't care about floppies... */
2677 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2678 continue;
2679 }
2680
2681 /* need to find the device_name_to_block_device_major stuff */
2682 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2683 dtobdm = dev_name2blk;
2684 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2685 dtobdm++;
2686 }
2687
2688 /* get a vnode for the raw partition of this disk */
2689
2690 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2691 if (bdevvp(dev, &vp))
2692 panic("RAID can't alloc vnode");
2693
2694 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2695
2696 if (error) {
2697 /* "Who cares." Continue looking
2698 for something that exists*/
2699 vput(vp);
2700 continue;
2701 }
2702
2703 /* Ok, the disk exists. Go get the disklabel. */
2704 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2705 FREAD, NOCRED, 0);
2706 if (error) {
2707 /*
2708 * XXX can't happen - open() would
2709 * have errored out (or faked up one)
2710 */
2711 printf("can't get label for dev %s%c (%d)!?!?\n",
2712 dv->dv_xname, 'a' + RAW_PART, error);
2713 }
2714
2715 /* don't need this any more. We'll allocate it again
2716 a little later if we really do... */
2717 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2718 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2719 vput(vp);
2720
2721 for (i=0; i < label.d_npartitions; i++) {
2722 /* We only support partitions marked as RAID */
2723 if (label.d_partitions[i].p_fstype != FS_RAID)
2724 continue;
2725
2726 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2727 if (bdevvp(dev, &vp))
2728 panic("RAID can't alloc vnode");
2729
2730 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2731 if (error) {
2732 /* Whatever... */
2733 vput(vp);
2734 continue;
2735 }
2736
2737 good_one = 0;
2738
2739 clabel = (RF_ComponentLabel_t *)
2740 malloc(sizeof(RF_ComponentLabel_t),
2741 M_RAIDFRAME, M_NOWAIT);
2742 if (clabel == NULL) {
2743 /* XXX CLEANUP HERE */
2744 printf("RAID auto config: out of memory!\n");
2745 return(NULL); /* XXX probably should panic? */
2746 }
2747
2748 if (!raidread_component_label(dev, vp, clabel)) {
2749 /* Got the label. Does it look reasonable? */
2750 if (rf_reasonable_label(clabel) &&
2751 (clabel->partitionSize <=
2752 label.d_partitions[i].p_size)) {
2753 #if DEBUG
2754 printf("Component on: %s%c: %d\n",
2755 dv->dv_xname, 'a'+i,
2756 label.d_partitions[i].p_size);
2757 rf_print_component_label(clabel);
2758 #endif
2759 /* if it's reasonable, add it,
2760 else ignore it. */
2761 ac = (RF_AutoConfig_t *)
2762 malloc(sizeof(RF_AutoConfig_t),
2763 M_RAIDFRAME,
2764 M_NOWAIT);
2765 if (ac == NULL) {
2766 /* XXX should panic?? */
2767 return(NULL);
2768 }
2769
2770 sprintf(ac->devname, "%s%c",
2771 dv->dv_xname, 'a'+i);
2772 ac->dev = dev;
2773 ac->vp = vp;
2774 ac->clabel = clabel;
2775 ac->next = ac_list;
2776 ac_list = ac;
2777 good_one = 1;
2778 }
2779 }
2780 if (!good_one) {
2781 /* cleanup */
2782 free(clabel, M_RAIDFRAME);
2783 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2784 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2785 vput(vp);
2786 }
2787 }
2788 }
2789 }
2790 return(ac_list);
2791 }
2792
2793 static int
2794 rf_reasonable_label(clabel)
2795 RF_ComponentLabel_t *clabel;
2796 {
2797
2798 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2799 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2800 ((clabel->clean == RF_RAID_CLEAN) ||
2801 (clabel->clean == RF_RAID_DIRTY)) &&
2802 clabel->row >=0 &&
2803 clabel->column >= 0 &&
2804 clabel->num_rows > 0 &&
2805 clabel->num_columns > 0 &&
2806 clabel->row < clabel->num_rows &&
2807 clabel->column < clabel->num_columns &&
2808 clabel->blockSize > 0 &&
2809 clabel->numBlocks > 0) {
2810 /* label looks reasonable enough... */
2811 return(1);
2812 }
2813 return(0);
2814 }
2815
2816
2817 void
2818 rf_print_component_label(clabel)
2819 RF_ComponentLabel_t *clabel;
2820 {
2821 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2822 clabel->row, clabel->column,
2823 clabel->num_rows, clabel->num_columns);
2824 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2825 clabel->version, clabel->serial_number,
2826 clabel->mod_counter);
2827 printf(" Clean: %s Status: %d\n",
2828 clabel->clean ? "Yes" : "No", clabel->status );
2829 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2830 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2831 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2832 (char) clabel->parityConfig, clabel->blockSize,
2833 clabel->numBlocks);
2834 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2835 printf(" Contains root partition: %s\n",
2836 clabel->root_partition ? "Yes" : "No" );
2837 printf(" Last configured as: raid%d\n", clabel->last_unit );
2838 #if 0
2839 printf(" Config order: %d\n", clabel->config_order);
2840 #endif
2841
2842 }
2843
2844 RF_ConfigSet_t *
2845 rf_create_auto_sets(ac_list)
2846 RF_AutoConfig_t *ac_list;
2847 {
2848 RF_AutoConfig_t *ac;
2849 RF_ConfigSet_t *config_sets;
2850 RF_ConfigSet_t *cset;
2851 RF_AutoConfig_t *ac_next;
2852
2853
2854 config_sets = NULL;
2855
2856 /* Go through the AutoConfig list, and figure out which components
2857 belong to what sets. */
2858 ac = ac_list;
2859 while(ac!=NULL) {
2860 /* we're going to putz with ac->next, so save it here
2861 for use at the end of the loop */
2862 ac_next = ac->next;
2863
2864 if (config_sets == NULL) {
2865 /* will need at least this one... */
2866 config_sets = (RF_ConfigSet_t *)
2867 malloc(sizeof(RF_ConfigSet_t),
2868 M_RAIDFRAME, M_NOWAIT);
2869 if (config_sets == NULL) {
2870 panic("rf_create_auto_sets: No memory!\n");
2871 }
2872 /* this one is easy :) */
2873 config_sets->ac = ac;
2874 config_sets->next = NULL;
2875 config_sets->rootable = 0;
2876 ac->next = NULL;
2877 } else {
2878 /* which set does this component fit into? */
2879 cset = config_sets;
2880 while(cset!=NULL) {
2881 if (rf_does_it_fit(cset, ac)) {
2882 /* looks like it matches... */
2883 ac->next = cset->ac;
2884 cset->ac = ac;
2885 break;
2886 }
2887 cset = cset->next;
2888 }
2889 if (cset==NULL) {
2890 /* didn't find a match above... new set..*/
2891 cset = (RF_ConfigSet_t *)
2892 malloc(sizeof(RF_ConfigSet_t),
2893 M_RAIDFRAME, M_NOWAIT);
2894 if (cset == NULL) {
2895 panic("rf_create_auto_sets: No memory!\n");
2896 }
2897 cset->ac = ac;
2898 ac->next = NULL;
2899 cset->next = config_sets;
2900 cset->rootable = 0;
2901 config_sets = cset;
2902 }
2903 }
2904 ac = ac_next;
2905 }
2906
2907
2908 return(config_sets);
2909 }
2910
2911 static int
2912 rf_does_it_fit(cset, ac)
2913 RF_ConfigSet_t *cset;
2914 RF_AutoConfig_t *ac;
2915 {
2916 RF_ComponentLabel_t *clabel1, *clabel2;
2917
2918 /* If this one matches the *first* one in the set, that's good
2919 enough, since the other members of the set would have been
2920 through here too... */
2921 /* note that we are not checking partitionSize here..
2922
2923 Note that we are also not checking the mod_counters here.
2924 If everything else matches execpt the mod_counter, that's
2925 good enough for this test. We will deal with the mod_counters
2926 a little later in the autoconfiguration process.
2927
2928 (clabel1->mod_counter == clabel2->mod_counter) &&
2929
2930 The reason we don't check for this is that failed disks
2931 will have lower modification counts. If those disks are
2932 not added to the set they used to belong to, then they will
2933 form their own set, which may result in 2 different sets,
2934 for example, competing to be configured at raid0, and
2935 perhaps competing to be the root filesystem set. If the
2936 wrong ones get configured, or both attempt to become /,
2937 weird behaviour and or serious lossage will occur. Thus we
2938 need to bring them into the fold here, and kick them out at
2939 a later point.
2940
2941 */
2942
2943 clabel1 = cset->ac->clabel;
2944 clabel2 = ac->clabel;
2945 if ((clabel1->version == clabel2->version) &&
2946 (clabel1->serial_number == clabel2->serial_number) &&
2947 (clabel1->num_rows == clabel2->num_rows) &&
2948 (clabel1->num_columns == clabel2->num_columns) &&
2949 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2950 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2951 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2952 (clabel1->parityConfig == clabel2->parityConfig) &&
2953 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2954 (clabel1->blockSize == clabel2->blockSize) &&
2955 (clabel1->numBlocks == clabel2->numBlocks) &&
2956 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2957 (clabel1->root_partition == clabel2->root_partition) &&
2958 (clabel1->last_unit == clabel2->last_unit) &&
2959 (clabel1->config_order == clabel2->config_order)) {
2960 /* if it get's here, it almost *has* to be a match */
2961 } else {
2962 /* it's not consistent with somebody in the set..
2963 punt */
2964 return(0);
2965 }
2966 /* all was fine.. it must fit... */
2967 return(1);
2968 }
2969
2970 int
2971 rf_have_enough_components(cset)
2972 RF_ConfigSet_t *cset;
2973 {
2974 RF_AutoConfig_t *ac;
2975 RF_AutoConfig_t *auto_config;
2976 RF_ComponentLabel_t *clabel;
2977 int r,c;
2978 int num_rows;
2979 int num_cols;
2980 int num_missing;
2981 int mod_counter;
2982 int mod_counter_found;
2983 int even_pair_failed;
2984 char parity_type;
2985
2986
2987 /* check to see that we have enough 'live' components
2988 of this set. If so, we can configure it if necessary */
2989
2990 num_rows = cset->ac->clabel->num_rows;
2991 num_cols = cset->ac->clabel->num_columns;
2992 parity_type = cset->ac->clabel->parityConfig;
2993
2994 /* XXX Check for duplicate components!?!?!? */
2995
2996 /* Determine what the mod_counter is supposed to be for this set. */
2997
2998 mod_counter_found = 0;
2999 mod_counter = 0;
3000 ac = cset->ac;
3001 while(ac!=NULL) {
3002 if (mod_counter_found==0) {
3003 mod_counter = ac->clabel->mod_counter;
3004 mod_counter_found = 1;
3005 } else {
3006 if (ac->clabel->mod_counter > mod_counter) {
3007 mod_counter = ac->clabel->mod_counter;
3008 }
3009 }
3010 ac = ac->next;
3011 }
3012
3013 num_missing = 0;
3014 auto_config = cset->ac;
3015
3016 for(r=0; r<num_rows; r++) {
3017 even_pair_failed = 0;
3018 for(c=0; c<num_cols; c++) {
3019 ac = auto_config;
3020 while(ac!=NULL) {
3021 if ((ac->clabel->row == r) &&
3022 (ac->clabel->column == c) &&
3023 (ac->clabel->mod_counter == mod_counter)) {
3024 /* it's this one... */
3025 #if DEBUG
3026 printf("Found: %s at %d,%d\n",
3027 ac->devname,r,c);
3028 #endif
3029 break;
3030 }
3031 ac=ac->next;
3032 }
3033 if (ac==NULL) {
3034 /* Didn't find one here! */
3035 /* special case for RAID 1, especially
3036 where there are more than 2
3037 components (where RAIDframe treats
3038 things a little differently :( ) */
3039 if (parity_type == '1') {
3040 if (c%2 == 0) { /* even component */
3041 even_pair_failed = 1;
3042 } else { /* odd component. If
3043 we're failed, and
3044 so is the even
3045 component, it's
3046 "Good Night, Charlie" */
3047 if (even_pair_failed == 1) {
3048 return(0);
3049 }
3050 }
3051 } else {
3052 /* normal accounting */
3053 num_missing++;
3054 }
3055 }
3056 if ((parity_type == '1') && (c%2 == 1)) {
3057 /* Just did an even component, and we didn't
3058 bail.. reset the even_pair_failed flag,
3059 and go on to the next component.... */
3060 even_pair_failed = 0;
3061 }
3062 }
3063 }
3064
3065 clabel = cset->ac->clabel;
3066
3067 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3068 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3069 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3070 /* XXX this needs to be made *much* more general */
3071 /* Too many failures */
3072 return(0);
3073 }
3074 /* otherwise, all is well, and we've got enough to take a kick
3075 at autoconfiguring this set */
3076 return(1);
3077 }
3078
3079 void
3080 rf_create_configuration(ac,config,raidPtr)
3081 RF_AutoConfig_t *ac;
3082 RF_Config_t *config;
3083 RF_Raid_t *raidPtr;
3084 {
3085 RF_ComponentLabel_t *clabel;
3086 int i;
3087
3088 clabel = ac->clabel;
3089
3090 /* 1. Fill in the common stuff */
3091 config->numRow = clabel->num_rows;
3092 config->numCol = clabel->num_columns;
3093 config->numSpare = 0; /* XXX should this be set here? */
3094 config->sectPerSU = clabel->sectPerSU;
3095 config->SUsPerPU = clabel->SUsPerPU;
3096 config->SUsPerRU = clabel->SUsPerRU;
3097 config->parityConfig = clabel->parityConfig;
3098 /* XXX... */
3099 strcpy(config->diskQueueType,"fifo");
3100 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3101 config->layoutSpecificSize = 0; /* XXX ?? */
3102
3103 while(ac!=NULL) {
3104 /* row/col values will be in range due to the checks
3105 in reasonable_label() */
3106 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3107 ac->devname);
3108 ac = ac->next;
3109 }
3110
3111 for(i=0;i<RF_MAXDBGV;i++) {
3112 config->debugVars[i][0] = NULL;
3113 }
3114 }
3115
3116 int
3117 rf_set_autoconfig(raidPtr, new_value)
3118 RF_Raid_t *raidPtr;
3119 int new_value;
3120 {
3121 RF_ComponentLabel_t clabel;
3122 struct vnode *vp;
3123 dev_t dev;
3124 int row, column;
3125
3126 raidPtr->autoconfigure = new_value;
3127 for(row=0; row<raidPtr->numRow; row++) {
3128 for(column=0; column<raidPtr->numCol; column++) {
3129 if (raidPtr->Disks[row][column].status ==
3130 rf_ds_optimal) {
3131 dev = raidPtr->Disks[row][column].dev;
3132 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3133 raidread_component_label(dev, vp, &clabel);
3134 clabel.autoconfigure = new_value;
3135 raidwrite_component_label(dev, vp, &clabel);
3136 }
3137 }
3138 }
3139 return(new_value);
3140 }
3141
3142 int
3143 rf_set_rootpartition(raidPtr, new_value)
3144 RF_Raid_t *raidPtr;
3145 int new_value;
3146 {
3147 RF_ComponentLabel_t clabel;
3148 struct vnode *vp;
3149 dev_t dev;
3150 int row, column;
3151
3152 raidPtr->root_partition = new_value;
3153 for(row=0; row<raidPtr->numRow; row++) {
3154 for(column=0; column<raidPtr->numCol; column++) {
3155 if (raidPtr->Disks[row][column].status ==
3156 rf_ds_optimal) {
3157 dev = raidPtr->Disks[row][column].dev;
3158 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3159 raidread_component_label(dev, vp, &clabel);
3160 clabel.root_partition = new_value;
3161 raidwrite_component_label(dev, vp, &clabel);
3162 }
3163 }
3164 }
3165 return(new_value);
3166 }
3167
3168 void
3169 rf_release_all_vps(cset)
3170 RF_ConfigSet_t *cset;
3171 {
3172 RF_AutoConfig_t *ac;
3173
3174 ac = cset->ac;
3175 while(ac!=NULL) {
3176 /* Close the vp, and give it back */
3177 if (ac->vp) {
3178 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3179 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3180 vput(ac->vp);
3181 ac->vp = NULL;
3182 }
3183 ac = ac->next;
3184 }
3185 }
3186
3187
3188 void
3189 rf_cleanup_config_set(cset)
3190 RF_ConfigSet_t *cset;
3191 {
3192 RF_AutoConfig_t *ac;
3193 RF_AutoConfig_t *next_ac;
3194
3195 ac = cset->ac;
3196 while(ac!=NULL) {
3197 next_ac = ac->next;
3198 /* nuke the label */
3199 free(ac->clabel, M_RAIDFRAME);
3200 /* cleanup the config structure */
3201 free(ac, M_RAIDFRAME);
3202 /* "next.." */
3203 ac = next_ac;
3204 }
3205 /* and, finally, nuke the config set */
3206 free(cset, M_RAIDFRAME);
3207 }
3208
3209
3210 void
3211 raid_init_component_label(raidPtr, clabel)
3212 RF_Raid_t *raidPtr;
3213 RF_ComponentLabel_t *clabel;
3214 {
3215 /* current version number */
3216 clabel->version = RF_COMPONENT_LABEL_VERSION;
3217 clabel->serial_number = raidPtr->serial_number;
3218 clabel->mod_counter = raidPtr->mod_counter;
3219 clabel->num_rows = raidPtr->numRow;
3220 clabel->num_columns = raidPtr->numCol;
3221 clabel->clean = RF_RAID_DIRTY; /* not clean */
3222 clabel->status = rf_ds_optimal; /* "It's good!" */
3223
3224 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3225 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3226 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3227
3228 clabel->blockSize = raidPtr->bytesPerSector;
3229 clabel->numBlocks = raidPtr->sectorsPerDisk;
3230
3231 /* XXX not portable */
3232 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3233 clabel->maxOutstanding = raidPtr->maxOutstanding;
3234 clabel->autoconfigure = raidPtr->autoconfigure;
3235 clabel->root_partition = raidPtr->root_partition;
3236 clabel->last_unit = raidPtr->raidid;
3237 clabel->config_order = raidPtr->config_order;
3238 }
3239
3240 int
3241 rf_auto_config_set(cset,unit)
3242 RF_ConfigSet_t *cset;
3243 int *unit;
3244 {
3245 RF_Raid_t *raidPtr;
3246 RF_Config_t *config;
3247 int raidID;
3248 int retcode;
3249
3250 printf("RAID autoconfigure\n");
3251
3252 retcode = 0;
3253 *unit = -1;
3254
3255 /* 1. Create a config structure */
3256
3257 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3258 M_RAIDFRAME,
3259 M_NOWAIT);
3260 if (config==NULL) {
3261 printf("Out of mem!?!?\n");
3262 /* XXX do something more intelligent here. */
3263 return(1);
3264 }
3265
3266 memset(config, 0, sizeof(RF_Config_t));
3267
3268 /* XXX raidID needs to be set correctly.. */
3269
3270 /*
3271 2. Figure out what RAID ID this one is supposed to live at
3272 See if we can get the same RAID dev that it was configured
3273 on last time..
3274 */
3275
3276 raidID = cset->ac->clabel->last_unit;
3277 if ((raidID < 0) || (raidID >= numraid)) {
3278 /* let's not wander off into lala land. */
3279 raidID = numraid - 1;
3280 }
3281 if (raidPtrs[raidID]->valid != 0) {
3282
3283 /*
3284 Nope... Go looking for an alternative...
3285 Start high so we don't immediately use raid0 if that's
3286 not taken.
3287 */
3288
3289 for(raidID = numraid; raidID >= 0; raidID--) {
3290 if (raidPtrs[raidID]->valid == 0) {
3291 /* can use this one! */
3292 break;
3293 }
3294 }
3295 }
3296
3297 if (raidID < 0) {
3298 /* punt... */
3299 printf("Unable to auto configure this set!\n");
3300 printf("(Out of RAID devs!)\n");
3301 return(1);
3302 }
3303 printf("Configuring raid%d:\n",raidID);
3304 raidPtr = raidPtrs[raidID];
3305
3306 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3307 raidPtr->raidid = raidID;
3308 raidPtr->openings = RAIDOUTSTANDING;
3309
3310 /* 3. Build the configuration structure */
3311 rf_create_configuration(cset->ac, config, raidPtr);
3312
3313 /* 4. Do the configuration */
3314 retcode = rf_Configure(raidPtr, config, cset->ac);
3315
3316 if (retcode == 0) {
3317
3318 raidinit(raidPtrs[raidID]);
3319
3320 rf_markalldirty(raidPtrs[raidID]);
3321 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3322 if (cset->ac->clabel->root_partition==1) {
3323 /* everything configured just fine. Make a note
3324 that this set is eligible to be root. */
3325 cset->rootable = 1;
3326 /* XXX do this here? */
3327 raidPtrs[raidID]->root_partition = 1;
3328 }
3329 }
3330
3331 /* 5. Cleanup */
3332 free(config, M_RAIDFRAME);
3333
3334 *unit = raidID;
3335 return(retcode);
3336 }
3337
3338 void
3339 rf_disk_unbusy(desc)
3340 RF_RaidAccessDesc_t *desc;
3341 {
3342 struct buf *bp;
3343
3344 bp = (struct buf *)desc->bp;
3345 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3346 (bp->b_bcount - bp->b_resid));
3347 }
3348