rf_netbsdkintf.c revision 1.92.2.9 1 /* $NetBSD: rf_netbsdkintf.c,v 1.92.2.9 2001/05/01 12:27:33 he Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_desc.h"
146 #include "rf_diskqueue.h"
147 #include "rf_acctrace.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_debugMem.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_debugprint.h"
156 #include "rf_threadstuff.h"
157 #include "rf_configure.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit __P((RF_Raid_t *));
184
185 void raidattach __P((int));
186 int raidsize __P((dev_t));
187 int raidopen __P((dev_t, int, int, struct proc *));
188 int raidclose __P((dev_t, int, int, struct proc *));
189 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
190 int raidwrite __P((dev_t, struct uio *, int));
191 int raidread __P((dev_t, struct uio *, int));
192 void raidstrategy __P((struct buf *));
193 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206
207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct pool sc_cbufpool; /* component buffer pool */
221 struct buf_queue buf_queue; /* used for the device queue */
222 };
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* unit has been initialized */
225 #define RAIDF_WLABEL 0x02 /* label area is writable */
226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
228 #define RAIDF_LOCKED 0x80 /* unit is locked */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
235 * Be aware that large numbers can allow the driver to consume a lot of
236 * kernel memory, especially on writes, and in degraded mode reads.
237 *
238 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
239 * a single 64K write will typically require 64K for the old data,
240 * 64K for the old parity, and 64K for the new parity, for a total
241 * of 192K (if the parity buffer is not re-used immediately).
242 * Even it if is used immedately, that's still 128K, which when multiplied
243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
244 *
245 * Now in degraded mode, for example, a 64K read on the above setup may
246 * require data reconstruction, which will require *all* of the 4 remaining
247 * disks to participate -- 4 * 32K/disk == 128K again.
248 */
249
250 #ifndef RAIDOUTSTANDING
251 #define RAIDOUTSTANDING 6
252 #endif
253
254 #define RAIDLABELDEV(dev) \
255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
256
257 /* declared here, and made public, for the benefit of KVM stuff.. */
258 struct raid_softc *raid_softc;
259
260 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
261 struct disklabel *));
262 static void raidgetdisklabel __P((dev_t));
263 static void raidmakedisklabel __P((struct raid_softc *));
264
265 static int raidlock __P((struct raid_softc *));
266 static void raidunlock __P((struct raid_softc *));
267
268 static void rf_markalldirty __P((RF_Raid_t *));
269 void rf_mountroot_hook __P((struct device *));
270
271 struct device *raidrootdev;
272
273 void rf_ReconThread __P((struct rf_recon_req *));
274 /* XXX what I want is: */
275 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
276 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
277 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
278 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
279 void rf_buildroothack __P((void *));
280
281 RF_AutoConfig_t *rf_find_raid_components __P((void));
282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
286 RF_Raid_t *));
287 int rf_set_autoconfig __P((RF_Raid_t *, int));
288 int rf_set_rootpartition __P((RF_Raid_t *, int));
289 void rf_release_all_vps __P((RF_ConfigSet_t *));
290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
291 int rf_have_enough_components __P((RF_ConfigSet_t *));
292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_update_component_labels(raidPtrs[unit],
638 RF_FINAL_COMPONENT_UPDATE);
639 }
640
641 raidunlock(rs);
642 return (0);
643
644 }
645
646 void
647 raidstrategy(bp)
648 struct buf *bp;
649 {
650 int s;
651
652 unsigned int raidID = raidunit(bp->b_dev);
653 RF_Raid_t *raidPtr;
654 struct raid_softc *rs = &raid_softc[raidID];
655 struct disklabel *lp;
656 int wlabel;
657
658 if ((rs->sc_flags & RAIDF_INITED) ==0) {
659 bp->b_error = ENXIO;
660 bp->b_flags = B_ERROR;
661 bp->b_resid = bp->b_bcount;
662 biodone(bp);
663 return;
664 }
665 if (raidID >= numraid || !raidPtrs[raidID]) {
666 bp->b_error = ENODEV;
667 bp->b_flags |= B_ERROR;
668 bp->b_resid = bp->b_bcount;
669 biodone(bp);
670 return;
671 }
672 raidPtr = raidPtrs[raidID];
673 if (!raidPtr->valid) {
674 bp->b_error = ENODEV;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (bp->b_bcount == 0) {
681 db1_printf(("b_bcount is zero..\n"));
682 biodone(bp);
683 return;
684 }
685 lp = rs->sc_dkdev.dk_label;
686
687 /*
688 * Do bounds checking and adjust transfer. If there's an
689 * error, the bounds check will flag that for us.
690 */
691
692 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
693 if (DISKPART(bp->b_dev) != RAW_PART)
694 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
695 db1_printf(("Bounds check failed!!:%d %d\n",
696 (int) bp->b_blkno, (int) wlabel));
697 biodone(bp);
698 return;
699 }
700 s = splbio();
701
702 bp->b_resid = 0;
703
704 /* stuff it onto our queue */
705 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
706
707 raidstart(raidPtrs[raidID]);
708
709 splx(s);
710 }
711 /* ARGSUSED */
712 int
713 raidread(dev, uio, flags)
714 dev_t dev;
715 struct uio *uio;
716 int flags;
717 {
718 int unit = raidunit(dev);
719 struct raid_softc *rs;
720 int part;
721
722 if (unit >= numraid)
723 return (ENXIO);
724 rs = &raid_softc[unit];
725
726 if ((rs->sc_flags & RAIDF_INITED) == 0)
727 return (ENXIO);
728 part = DISKPART(dev);
729
730 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
731
732 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
733
734 }
735 /* ARGSUSED */
736 int
737 raidwrite(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744
745 if (unit >= numraid)
746 return (ENXIO);
747 rs = &raid_softc[unit];
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return (ENXIO);
751 db1_printf(("raidwrite\n"));
752 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
753
754 }
755
756 int
757 raidioctl(dev, cmd, data, flag, p)
758 dev_t dev;
759 u_long cmd;
760 caddr_t data;
761 int flag;
762 struct proc *p;
763 {
764 int unit = raidunit(dev);
765 int error = 0;
766 int part, pmask;
767 struct raid_softc *rs;
768 RF_Config_t *k_cfg, *u_cfg;
769 RF_Raid_t *raidPtr;
770 RF_RaidDisk_t *diskPtr;
771 RF_AccTotals_t *totals;
772 RF_DeviceConfig_t *d_cfg, **ucfgp;
773 u_char *specific_buf;
774 int retcode = 0;
775 int row;
776 int column;
777 struct rf_recon_req *rrcopy, *rr;
778 RF_ComponentLabel_t *clabel;
779 RF_ComponentLabel_t ci_label;
780 RF_ComponentLabel_t **clabel_ptr;
781 RF_SingleComponent_t *sparePtr,*componentPtr;
782 RF_SingleComponent_t hot_spare;
783 RF_SingleComponent_t component;
784 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
785 int i, j, d;
786 #ifdef __HAVE_OLD_DISKLABEL
787 struct disklabel newlabel;
788 #endif
789
790 if (unit >= numraid)
791 return (ENXIO);
792 rs = &raid_softc[unit];
793 raidPtr = raidPtrs[unit];
794
795 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
796 (int) DISKPART(dev), (int) unit, (int) cmd));
797
798 /* Must be open for writes for these commands... */
799 switch (cmd) {
800 case DIOCSDINFO:
801 case DIOCWDINFO:
802 #ifdef __HAVE_OLD_DISKLABEL
803 case ODIOCWDINFO:
804 case ODIOCSDINFO:
805 #endif
806 case DIOCWLABEL:
807 if ((flag & FWRITE) == 0)
808 return (EBADF);
809 }
810
811 /* Must be initialized for these... */
812 switch (cmd) {
813 case DIOCGDINFO:
814 case DIOCSDINFO:
815 case DIOCWDINFO:
816 #ifdef __HAVE_OLD_DISKLABEL
817 case ODIOCGDINFO:
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 case ODIOCGDEFLABEL:
821 #endif
822 case DIOCGPART:
823 case DIOCWLABEL:
824 case DIOCGDEFLABEL:
825 case RAIDFRAME_SHUTDOWN:
826 case RAIDFRAME_REWRITEPARITY:
827 case RAIDFRAME_GET_INFO:
828 case RAIDFRAME_RESET_ACCTOTALS:
829 case RAIDFRAME_GET_ACCTOTALS:
830 case RAIDFRAME_KEEP_ACCTOTALS:
831 case RAIDFRAME_GET_SIZE:
832 case RAIDFRAME_FAIL_DISK:
833 case RAIDFRAME_COPYBACK:
834 case RAIDFRAME_CHECK_RECON_STATUS:
835 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
836 case RAIDFRAME_GET_COMPONENT_LABEL:
837 case RAIDFRAME_SET_COMPONENT_LABEL:
838 case RAIDFRAME_ADD_HOT_SPARE:
839 case RAIDFRAME_REMOVE_HOT_SPARE:
840 case RAIDFRAME_INIT_LABELS:
841 case RAIDFRAME_REBUILD_IN_PLACE:
842 case RAIDFRAME_CHECK_PARITY:
843 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
844 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
845 case RAIDFRAME_CHECK_COPYBACK_STATUS:
846 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
847 case RAIDFRAME_SET_AUTOCONFIG:
848 case RAIDFRAME_SET_ROOT:
849 case RAIDFRAME_DELETE_COMPONENT:
850 case RAIDFRAME_INCORPORATE_HOT_SPARE:
851 if ((rs->sc_flags & RAIDF_INITED) == 0)
852 return (ENXIO);
853 }
854
855 switch (cmd) {
856
857 /* configure the system */
858 case RAIDFRAME_CONFIGURE:
859
860 if (raidPtr->valid) {
861 /* There is a valid RAID set running on this unit! */
862 printf("raid%d: Device already configured!\n",unit);
863 return(EINVAL);
864 }
865
866 /* copy-in the configuration information */
867 /* data points to a pointer to the configuration structure */
868
869 u_cfg = *((RF_Config_t **) data);
870 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
871 if (k_cfg == NULL) {
872 return (ENOMEM);
873 }
874 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
875 sizeof(RF_Config_t));
876 if (retcode) {
877 RF_Free(k_cfg, sizeof(RF_Config_t));
878 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
879 retcode));
880 return (retcode);
881 }
882 /* allocate a buffer for the layout-specific data, and copy it
883 * in */
884 if (k_cfg->layoutSpecificSize) {
885 if (k_cfg->layoutSpecificSize > 10000) {
886 /* sanity check */
887 RF_Free(k_cfg, sizeof(RF_Config_t));
888 return (EINVAL);
889 }
890 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
891 (u_char *));
892 if (specific_buf == NULL) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 return (ENOMEM);
895 }
896 retcode = copyin(k_cfg->layoutSpecific,
897 (caddr_t) specific_buf,
898 k_cfg->layoutSpecificSize);
899 if (retcode) {
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 RF_Free(specific_buf,
902 k_cfg->layoutSpecificSize);
903 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
904 retcode));
905 return (retcode);
906 }
907 } else
908 specific_buf = NULL;
909 k_cfg->layoutSpecific = specific_buf;
910
911 /* should do some kind of sanity check on the configuration.
912 * Store the sum of all the bytes in the last byte? */
913
914 /* configure the system */
915
916 /*
917 * Clear the entire RAID descriptor, just to make sure
918 * there is no stale data left in the case of a
919 * reconfiguration
920 */
921 bzero((char *) raidPtr, sizeof(RF_Raid_t));
922 raidPtr->raidid = unit;
923
924 retcode = rf_Configure(raidPtr, k_cfg, NULL);
925
926 if (retcode == 0) {
927
928 /* allow this many simultaneous IO's to
929 this RAID device */
930 raidPtr->openings = RAIDOUTSTANDING;
931
932 raidinit(raidPtr);
933 rf_markalldirty(raidPtr);
934 }
935 /* free the buffers. No return code here. */
936 if (k_cfg->layoutSpecificSize) {
937 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
938 }
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940
941 return (retcode);
942
943 /* shutdown the system */
944 case RAIDFRAME_SHUTDOWN:
945
946 if ((error = raidlock(rs)) != 0)
947 return (error);
948
949 /*
950 * If somebody has a partition mounted, we shouldn't
951 * shutdown.
952 */
953
954 part = DISKPART(dev);
955 pmask = (1 << part);
956 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
957 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
958 (rs->sc_dkdev.dk_copenmask & pmask))) {
959 raidunlock(rs);
960 return (EBUSY);
961 }
962
963 retcode = rf_Shutdown(raidPtr);
964
965 pool_destroy(&rs->sc_cbufpool);
966
967 /* It's no longer initialized... */
968 rs->sc_flags &= ~RAIDF_INITED;
969
970 /* Detach the disk. */
971 disk_detach(&rs->sc_dkdev);
972
973 raidunlock(rs);
974
975 return (retcode);
976 case RAIDFRAME_GET_COMPONENT_LABEL:
977 clabel_ptr = (RF_ComponentLabel_t **) data;
978 /* need to read the component label for the disk indicated
979 by row,column in clabel */
980
981 /* For practice, let's get it directly fromdisk, rather
982 than from the in-core copy */
983 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
984 (RF_ComponentLabel_t *));
985 if (clabel == NULL)
986 return (ENOMEM);
987
988 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
989
990 retcode = copyin( *clabel_ptr, clabel,
991 sizeof(RF_ComponentLabel_t));
992
993 if (retcode) {
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return(retcode);
996 }
997
998 row = clabel->row;
999 column = clabel->column;
1000
1001 if ((row < 0) || (row >= raidPtr->numRow) ||
1002 (column < 0) || (column >= raidPtr->numCol +
1003 raidPtr->numSpare)) {
1004 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1005 return(EINVAL);
1006 }
1007
1008 raidread_component_label(raidPtr->Disks[row][column].dev,
1009 raidPtr->raid_cinfo[row][column].ci_vp,
1010 clabel );
1011
1012 retcode = copyout((caddr_t) clabel,
1013 (caddr_t) *clabel_ptr,
1014 sizeof(RF_ComponentLabel_t));
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return (retcode);
1017
1018 case RAIDFRAME_SET_COMPONENT_LABEL:
1019 clabel = (RF_ComponentLabel_t *) data;
1020
1021 /* XXX check the label for valid stuff... */
1022 /* Note that some things *should not* get modified --
1023 the user should be re-initing the labels instead of
1024 trying to patch things.
1025 */
1026
1027 printf("Got component label:\n");
1028 printf("Version: %d\n",clabel->version);
1029 printf("Serial Number: %d\n",clabel->serial_number);
1030 printf("Mod counter: %d\n",clabel->mod_counter);
1031 printf("Row: %d\n", clabel->row);
1032 printf("Column: %d\n", clabel->column);
1033 printf("Num Rows: %d\n", clabel->num_rows);
1034 printf("Num Columns: %d\n", clabel->num_columns);
1035 printf("Clean: %d\n", clabel->clean);
1036 printf("Status: %d\n", clabel->status);
1037
1038 row = clabel->row;
1039 column = clabel->column;
1040
1041 if ((row < 0) || (row >= raidPtr->numRow) ||
1042 (column < 0) || (column >= raidPtr->numCol)) {
1043 return(EINVAL);
1044 }
1045
1046 /* XXX this isn't allowed to do anything for now :-) */
1047
1048 /* XXX and before it is, we need to fill in the rest
1049 of the fields!?!?!?! */
1050 #if 0
1051 raidwrite_component_label(
1052 raidPtr->Disks[row][column].dev,
1053 raidPtr->raid_cinfo[row][column].ci_vp,
1054 clabel );
1055 #endif
1056 return (0);
1057
1058 case RAIDFRAME_INIT_LABELS:
1059 clabel = (RF_ComponentLabel_t *) data;
1060 /*
1061 we only want the serial number from
1062 the above. We get all the rest of the information
1063 from the config that was used to create this RAID
1064 set.
1065 */
1066
1067 raidPtr->serial_number = clabel->serial_number;
1068
1069 raid_init_component_label(raidPtr, &ci_label);
1070 ci_label.serial_number = clabel->serial_number;
1071
1072 for(row=0;row<raidPtr->numRow;row++) {
1073 ci_label.row = row;
1074 for(column=0;column<raidPtr->numCol;column++) {
1075 diskPtr = &raidPtr->Disks[row][column];
1076 if (!RF_DEAD_DISK(diskPtr->status)) {
1077 ci_label.partitionSize = diskPtr->partitionSize;
1078 ci_label.column = column;
1079 raidwrite_component_label(
1080 raidPtr->Disks[row][column].dev,
1081 raidPtr->raid_cinfo[row][column].ci_vp,
1082 &ci_label );
1083 }
1084 }
1085 }
1086
1087 return (retcode);
1088 case RAIDFRAME_SET_AUTOCONFIG:
1089 d = rf_set_autoconfig(raidPtr, *(int *) data);
1090 printf("New autoconfig value is: %d\n", d);
1091 *(int *) data = d;
1092 return (retcode);
1093
1094 case RAIDFRAME_SET_ROOT:
1095 d = rf_set_rootpartition(raidPtr, *(int *) data);
1096 printf("New rootpartition value is: %d\n", d);
1097 *(int *) data = d;
1098 return (retcode);
1099
1100 /* initialize all parity */
1101 case RAIDFRAME_REWRITEPARITY:
1102
1103 if (raidPtr->Layout.map->faultsTolerated == 0) {
1104 /* Parity for RAID 0 is trivially correct */
1105 raidPtr->parity_good = RF_RAID_CLEAN;
1106 return(0);
1107 }
1108
1109 if (raidPtr->parity_rewrite_in_progress == 1) {
1110 /* Re-write is already in progress! */
1111 return(EINVAL);
1112 }
1113
1114 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1115 rf_RewriteParityThread,
1116 raidPtr,"raid_parity");
1117 return (retcode);
1118
1119
1120 case RAIDFRAME_ADD_HOT_SPARE:
1121 sparePtr = (RF_SingleComponent_t *) data;
1122 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1123 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1124 return(retcode);
1125
1126 case RAIDFRAME_REMOVE_HOT_SPARE:
1127 return(retcode);
1128
1129 case RAIDFRAME_DELETE_COMPONENT:
1130 componentPtr = (RF_SingleComponent_t *)data;
1131 memcpy( &component, componentPtr,
1132 sizeof(RF_SingleComponent_t));
1133 retcode = rf_delete_component(raidPtr, &component);
1134 return(retcode);
1135
1136 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1137 componentPtr = (RF_SingleComponent_t *)data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1141 return(retcode);
1142
1143 case RAIDFRAME_REBUILD_IN_PLACE:
1144
1145 if (raidPtr->Layout.map->faultsTolerated == 0) {
1146 /* Can't do this on a RAID 0!! */
1147 return(EINVAL);
1148 }
1149
1150 if (raidPtr->recon_in_progress == 1) {
1151 /* a reconstruct is already in progress! */
1152 return(EINVAL);
1153 }
1154
1155 componentPtr = (RF_SingleComponent_t *) data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 row = component.row;
1159 column = component.column;
1160 printf("Rebuild: %d %d\n",row, column);
1161 if ((row < 0) || (row >= raidPtr->numRow) ||
1162 (column < 0) || (column >= raidPtr->numCol)) {
1163 return(EINVAL);
1164 }
1165
1166 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1167 if (rrcopy == NULL)
1168 return(ENOMEM);
1169
1170 rrcopy->raidPtr = (void *) raidPtr;
1171 rrcopy->row = row;
1172 rrcopy->col = column;
1173
1174 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1175 rf_ReconstructInPlaceThread,
1176 rrcopy,"raid_reconip");
1177 return(retcode);
1178
1179 case RAIDFRAME_GET_INFO:
1180 if (!raidPtr->valid)
1181 return (ENODEV);
1182 ucfgp = (RF_DeviceConfig_t **) data;
1183 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1184 (RF_DeviceConfig_t *));
1185 if (d_cfg == NULL)
1186 return (ENOMEM);
1187 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1188 d_cfg->rows = raidPtr->numRow;
1189 d_cfg->cols = raidPtr->numCol;
1190 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1191 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1192 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1193 return (ENOMEM);
1194 }
1195 d_cfg->nspares = raidPtr->numSpare;
1196 if (d_cfg->nspares >= RF_MAX_DISKS) {
1197 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1198 return (ENOMEM);
1199 }
1200 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1201 d = 0;
1202 for (i = 0; i < d_cfg->rows; i++) {
1203 for (j = 0; j < d_cfg->cols; j++) {
1204 d_cfg->devs[d] = raidPtr->Disks[i][j];
1205 d++;
1206 }
1207 }
1208 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1209 d_cfg->spares[i] = raidPtr->Disks[0][j];
1210 }
1211 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1212 sizeof(RF_DeviceConfig_t));
1213 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1214
1215 return (retcode);
1216
1217 case RAIDFRAME_CHECK_PARITY:
1218 *(int *) data = raidPtr->parity_good;
1219 return (0);
1220
1221 case RAIDFRAME_RESET_ACCTOTALS:
1222 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1223 return (0);
1224
1225 case RAIDFRAME_GET_ACCTOTALS:
1226 totals = (RF_AccTotals_t *) data;
1227 *totals = raidPtr->acc_totals;
1228 return (0);
1229
1230 case RAIDFRAME_KEEP_ACCTOTALS:
1231 raidPtr->keep_acc_totals = *(int *)data;
1232 return (0);
1233
1234 case RAIDFRAME_GET_SIZE:
1235 *(int *) data = raidPtr->totalSectors;
1236 return (0);
1237
1238 /* fail a disk & optionally start reconstruction */
1239 case RAIDFRAME_FAIL_DISK:
1240
1241 if (raidPtr->Layout.map->faultsTolerated == 0) {
1242 /* Can't do this on a RAID 0!! */
1243 return(EINVAL);
1244 }
1245
1246 rr = (struct rf_recon_req *) data;
1247
1248 if (rr->row < 0 || rr->row >= raidPtr->numRow
1249 || rr->col < 0 || rr->col >= raidPtr->numCol)
1250 return (EINVAL);
1251
1252 printf("raid%d: Failing the disk: row: %d col: %d\n",
1253 unit, rr->row, rr->col);
1254
1255 /* make a copy of the recon request so that we don't rely on
1256 * the user's buffer */
1257 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1258 if (rrcopy == NULL)
1259 return(ENOMEM);
1260 bcopy(rr, rrcopy, sizeof(*rr));
1261 rrcopy->raidPtr = (void *) raidPtr;
1262
1263 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1264 rf_ReconThread,
1265 rrcopy,"raid_recon");
1266 return (0);
1267
1268 /* invoke a copyback operation after recon on whatever disk
1269 * needs it, if any */
1270 case RAIDFRAME_COPYBACK:
1271
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* This makes no sense on a RAID 0!! */
1274 return(EINVAL);
1275 }
1276
1277 if (raidPtr->copyback_in_progress == 1) {
1278 /* Copyback is already in progress! */
1279 return(EINVAL);
1280 }
1281
1282 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1283 rf_CopybackThread,
1284 raidPtr,"raid_copyback");
1285 return (retcode);
1286
1287 /* return the percentage completion of reconstruction */
1288 case RAIDFRAME_CHECK_RECON_STATUS:
1289 if (raidPtr->Layout.map->faultsTolerated == 0) {
1290 /* This makes no sense on a RAID 0, so tell the
1291 user it's done. */
1292 *(int *) data = 100;
1293 return(0);
1294 }
1295 row = 0; /* XXX we only consider a single row... */
1296 if (raidPtr->status[row] != rf_rs_reconstructing)
1297 *(int *) data = 100;
1298 else
1299 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1300 return (0);
1301 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1302 progressInfoPtr = (RF_ProgressInfo_t **) data;
1303 row = 0; /* XXX we only consider a single row... */
1304 if (raidPtr->status[row] != rf_rs_reconstructing) {
1305 progressInfo.remaining = 0;
1306 progressInfo.completed = 100;
1307 progressInfo.total = 100;
1308 } else {
1309 progressInfo.total =
1310 raidPtr->reconControl[row]->numRUsTotal;
1311 progressInfo.completed =
1312 raidPtr->reconControl[row]->numRUsComplete;
1313 progressInfo.remaining = progressInfo.total -
1314 progressInfo.completed;
1315 }
1316 retcode = copyout((caddr_t) &progressInfo,
1317 (caddr_t) *progressInfoPtr,
1318 sizeof(RF_ProgressInfo_t));
1319 return (retcode);
1320
1321 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1322 if (raidPtr->Layout.map->faultsTolerated == 0) {
1323 /* This makes no sense on a RAID 0, so tell the
1324 user it's done. */
1325 *(int *) data = 100;
1326 return(0);
1327 }
1328 if (raidPtr->parity_rewrite_in_progress == 1) {
1329 *(int *) data = 100 *
1330 raidPtr->parity_rewrite_stripes_done /
1331 raidPtr->Layout.numStripe;
1332 } else {
1333 *(int *) data = 100;
1334 }
1335 return (0);
1336
1337 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1338 progressInfoPtr = (RF_ProgressInfo_t **) data;
1339 if (raidPtr->parity_rewrite_in_progress == 1) {
1340 progressInfo.total = raidPtr->Layout.numStripe;
1341 progressInfo.completed =
1342 raidPtr->parity_rewrite_stripes_done;
1343 progressInfo.remaining = progressInfo.total -
1344 progressInfo.completed;
1345 } else {
1346 progressInfo.remaining = 0;
1347 progressInfo.completed = 100;
1348 progressInfo.total = 100;
1349 }
1350 retcode = copyout((caddr_t) &progressInfo,
1351 (caddr_t) *progressInfoPtr,
1352 sizeof(RF_ProgressInfo_t));
1353 return (retcode);
1354
1355 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1356 if (raidPtr->Layout.map->faultsTolerated == 0) {
1357 /* This makes no sense on a RAID 0 */
1358 *(int *) data = 100;
1359 return(0);
1360 }
1361 if (raidPtr->copyback_in_progress == 1) {
1362 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1363 raidPtr->Layout.numStripe;
1364 } else {
1365 *(int *) data = 100;
1366 }
1367 return (0);
1368
1369 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1370 progressInfoPtr = (RF_ProgressInfo_t **) data;
1371 if (raidPtr->copyback_in_progress == 1) {
1372 progressInfo.total = raidPtr->Layout.numStripe;
1373 progressInfo.completed =
1374 raidPtr->copyback_stripes_done;
1375 progressInfo.remaining = progressInfo.total -
1376 progressInfo.completed;
1377 } else {
1378 progressInfo.remaining = 0;
1379 progressInfo.completed = 100;
1380 progressInfo.total = 100;
1381 }
1382 retcode = copyout((caddr_t) &progressInfo,
1383 (caddr_t) *progressInfoPtr,
1384 sizeof(RF_ProgressInfo_t));
1385 return (retcode);
1386
1387 /* the sparetable daemon calls this to wait for the kernel to
1388 * need a spare table. this ioctl does not return until a
1389 * spare table is needed. XXX -- calling mpsleep here in the
1390 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1391 * -- I should either compute the spare table in the kernel,
1392 * or have a different -- XXX XXX -- interface (a different
1393 * character device) for delivering the table -- XXX */
1394 #if 0
1395 case RAIDFRAME_SPARET_WAIT:
1396 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1397 while (!rf_sparet_wait_queue)
1398 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1399 waitreq = rf_sparet_wait_queue;
1400 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1401 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1402
1403 /* structure assignment */
1404 *((RF_SparetWait_t *) data) = *waitreq;
1405
1406 RF_Free(waitreq, sizeof(*waitreq));
1407 return (0);
1408
1409 /* wakes up a process waiting on SPARET_WAIT and puts an error
1410 * code in it that will cause the dameon to exit */
1411 case RAIDFRAME_ABORT_SPARET_WAIT:
1412 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1413 waitreq->fcol = -1;
1414 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1415 waitreq->next = rf_sparet_wait_queue;
1416 rf_sparet_wait_queue = waitreq;
1417 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1418 wakeup(&rf_sparet_wait_queue);
1419 return (0);
1420
1421 /* used by the spare table daemon to deliver a spare table
1422 * into the kernel */
1423 case RAIDFRAME_SEND_SPARET:
1424
1425 /* install the spare table */
1426 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1427
1428 /* respond to the requestor. the return status of the spare
1429 * table installation is passed in the "fcol" field */
1430 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1431 waitreq->fcol = retcode;
1432 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1433 waitreq->next = rf_sparet_resp_queue;
1434 rf_sparet_resp_queue = waitreq;
1435 wakeup(&rf_sparet_resp_queue);
1436 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1437
1438 return (retcode);
1439 #endif
1440
1441 default:
1442 break; /* fall through to the os-specific code below */
1443
1444 }
1445
1446 if (!raidPtr->valid)
1447 return (EINVAL);
1448
1449 /*
1450 * Add support for "regular" device ioctls here.
1451 */
1452
1453 switch (cmd) {
1454 case DIOCGDINFO:
1455 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1456 break;
1457 #ifdef __HAVE_OLD_DISKLABEL
1458 case ODIOCGDINFO:
1459 newlabel = *(rs->sc_dkdev.dk_label);
1460 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1461 return ENOTTY;
1462 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1463 break;
1464 #endif
1465
1466 case DIOCGPART:
1467 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1468 ((struct partinfo *) data)->part =
1469 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1470 break;
1471
1472 case DIOCWDINFO:
1473 case DIOCSDINFO:
1474 #ifdef __HAVE_OLD_DISKLABEL
1475 case ODIOCWDINFO:
1476 case ODIOCSDINFO:
1477 #endif
1478 {
1479 struct disklabel *lp;
1480 #ifdef __HAVE_OLD_DISKLABEL
1481 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1482 memset(&newlabel, 0, sizeof newlabel);
1483 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1484 lp = &newlabel;
1485 } else
1486 #endif
1487 lp = (struct disklabel *)data;
1488
1489 if ((error = raidlock(rs)) != 0)
1490 return (error);
1491
1492 rs->sc_flags |= RAIDF_LABELLING;
1493
1494 error = setdisklabel(rs->sc_dkdev.dk_label,
1495 lp, 0, rs->sc_dkdev.dk_cpulabel);
1496 if (error == 0) {
1497 if (cmd == DIOCWDINFO
1498 #ifdef __HAVE_OLD_DISKLABEL
1499 || cmd == ODIOCWDINFO
1500 #endif
1501 )
1502 error = writedisklabel(RAIDLABELDEV(dev),
1503 raidstrategy, rs->sc_dkdev.dk_label,
1504 rs->sc_dkdev.dk_cpulabel);
1505 }
1506 rs->sc_flags &= ~RAIDF_LABELLING;
1507
1508 raidunlock(rs);
1509
1510 if (error)
1511 return (error);
1512 break;
1513 }
1514
1515 case DIOCWLABEL:
1516 if (*(int *) data != 0)
1517 rs->sc_flags |= RAIDF_WLABEL;
1518 else
1519 rs->sc_flags &= ~RAIDF_WLABEL;
1520 break;
1521
1522 case DIOCGDEFLABEL:
1523 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1524 break;
1525
1526 #ifdef __HAVE_OLD_DISKLABEL
1527 case ODIOCGDEFLABEL:
1528 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1529 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1530 return ENOTTY;
1531 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1532 break;
1533 #endif
1534
1535 default:
1536 retcode = ENOTTY;
1537 }
1538 return (retcode);
1539
1540 }
1541
1542
1543 /* raidinit -- complete the rest of the initialization for the
1544 RAIDframe device. */
1545
1546
1547 static void
1548 raidinit(raidPtr)
1549 RF_Raid_t *raidPtr;
1550 {
1551 struct raid_softc *rs;
1552 int unit;
1553
1554 unit = raidPtr->raidid;
1555
1556 rs = &raid_softc[unit];
1557 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1558 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1559
1560
1561 /* XXX should check return code first... */
1562 rs->sc_flags |= RAIDF_INITED;
1563
1564 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1565
1566 rs->sc_dkdev.dk_name = rs->sc_xname;
1567
1568 /* disk_attach actually creates space for the CPU disklabel, among
1569 * other things, so it's critical to call this *BEFORE* we try putzing
1570 * with disklabels. */
1571
1572 disk_attach(&rs->sc_dkdev);
1573
1574 /* XXX There may be a weird interaction here between this, and
1575 * protectedSectors, as used in RAIDframe. */
1576
1577 rs->sc_size = raidPtr->totalSectors;
1578
1579 }
1580
1581 /* wake up the daemon & tell it to get us a spare table
1582 * XXX
1583 * the entries in the queues should be tagged with the raidPtr
1584 * so that in the extremely rare case that two recons happen at once,
1585 * we know for which device were requesting a spare table
1586 * XXX
1587 *
1588 * XXX This code is not currently used. GO
1589 */
1590 int
1591 rf_GetSpareTableFromDaemon(req)
1592 RF_SparetWait_t *req;
1593 {
1594 int retcode;
1595
1596 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1597 req->next = rf_sparet_wait_queue;
1598 rf_sparet_wait_queue = req;
1599 wakeup(&rf_sparet_wait_queue);
1600
1601 /* mpsleep unlocks the mutex */
1602 while (!rf_sparet_resp_queue) {
1603 tsleep(&rf_sparet_resp_queue, PRIBIO,
1604 "raidframe getsparetable", 0);
1605 }
1606 req = rf_sparet_resp_queue;
1607 rf_sparet_resp_queue = req->next;
1608 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1609
1610 retcode = req->fcol;
1611 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1612 * alloc'd */
1613 return (retcode);
1614 }
1615
1616 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1617 * bp & passes it down.
1618 * any calls originating in the kernel must use non-blocking I/O
1619 * do some extra sanity checking to return "appropriate" error values for
1620 * certain conditions (to make some standard utilities work)
1621 *
1622 * Formerly known as: rf_DoAccessKernel
1623 */
1624 void
1625 raidstart(raidPtr)
1626 RF_Raid_t *raidPtr;
1627 {
1628 RF_SectorCount_t num_blocks, pb, sum;
1629 RF_RaidAddr_t raid_addr;
1630 int retcode;
1631 struct partition *pp;
1632 daddr_t blocknum;
1633 int unit;
1634 struct raid_softc *rs;
1635 int do_async;
1636 struct buf *bp;
1637
1638 unit = raidPtr->raidid;
1639 rs = &raid_softc[unit];
1640
1641 /* quick check to see if anything has died recently */
1642 RF_LOCK_MUTEX(raidPtr->mutex);
1643 if (raidPtr->numNewFailures > 0) {
1644 rf_update_component_labels(raidPtr,
1645 RF_NORMAL_COMPONENT_UPDATE);
1646 raidPtr->numNewFailures--;
1647 }
1648 RF_UNLOCK_MUTEX(raidPtr->mutex);
1649
1650 /* Check to see if we're at the limit... */
1651 RF_LOCK_MUTEX(raidPtr->mutex);
1652 while (raidPtr->openings > 0) {
1653 RF_UNLOCK_MUTEX(raidPtr->mutex);
1654
1655 /* get the next item, if any, from the queue */
1656 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1657 /* nothing more to do */
1658 return;
1659 }
1660 BUFQ_REMOVE(&rs->buf_queue, bp);
1661
1662 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1663 * partition.. Need to make it absolute to the underlying
1664 * device.. */
1665
1666 blocknum = bp->b_blkno;
1667 if (DISKPART(bp->b_dev) != RAW_PART) {
1668 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1669 blocknum += pp->p_offset;
1670 }
1671
1672 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1673 (int) blocknum));
1674
1675 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1676 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1677
1678 /* *THIS* is where we adjust what block we're going to...
1679 * but DO NOT TOUCH bp->b_blkno!!! */
1680 raid_addr = blocknum;
1681
1682 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1683 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1684 sum = raid_addr + num_blocks + pb;
1685 if (1 || rf_debugKernelAccess) {
1686 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1687 (int) raid_addr, (int) sum, (int) num_blocks,
1688 (int) pb, (int) bp->b_resid));
1689 }
1690 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1691 || (sum < num_blocks) || (sum < pb)) {
1692 bp->b_error = ENOSPC;
1693 bp->b_flags |= B_ERROR;
1694 bp->b_resid = bp->b_bcount;
1695 biodone(bp);
1696 RF_LOCK_MUTEX(raidPtr->mutex);
1697 continue;
1698 }
1699 /*
1700 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1701 */
1702
1703 if (bp->b_bcount & raidPtr->sectorMask) {
1704 bp->b_error = EINVAL;
1705 bp->b_flags |= B_ERROR;
1706 bp->b_resid = bp->b_bcount;
1707 biodone(bp);
1708 RF_LOCK_MUTEX(raidPtr->mutex);
1709 continue;
1710
1711 }
1712 db1_printf(("Calling DoAccess..\n"));
1713
1714
1715 RF_LOCK_MUTEX(raidPtr->mutex);
1716 raidPtr->openings--;
1717 RF_UNLOCK_MUTEX(raidPtr->mutex);
1718
1719 /*
1720 * Everything is async.
1721 */
1722 do_async = 1;
1723
1724 disk_busy(&rs->sc_dkdev);
1725
1726 /* XXX we're still at splbio() here... do we *really*
1727 need to be? */
1728
1729 /* don't ever condition on bp->b_flags & B_WRITE.
1730 * always condition on B_READ instead */
1731
1732 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1733 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1734 do_async, raid_addr, num_blocks,
1735 bp->b_data, bp, NULL, NULL,
1736 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1737
1738
1739 RF_LOCK_MUTEX(raidPtr->mutex);
1740 }
1741 RF_UNLOCK_MUTEX(raidPtr->mutex);
1742 }
1743
1744
1745
1746
1747 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1748
1749 int
1750 rf_DispatchKernelIO(queue, req)
1751 RF_DiskQueue_t *queue;
1752 RF_DiskQueueData_t *req;
1753 {
1754 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1755 struct buf *bp;
1756 struct raidbuf *raidbp = NULL;
1757 struct raid_softc *rs;
1758 int unit;
1759 int s;
1760
1761 s=0;
1762 /* s = splbio();*/ /* want to test this */
1763 /* XXX along with the vnode, we also need the softc associated with
1764 * this device.. */
1765
1766 req->queue = queue;
1767
1768 unit = queue->raidPtr->raidid;
1769
1770 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1771
1772 if (unit >= numraid) {
1773 printf("Invalid unit number: %d %d\n", unit, numraid);
1774 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1775 }
1776 rs = &raid_softc[unit];
1777
1778 bp = req->bp;
1779 #if 1
1780 /* XXX when there is a physical disk failure, someone is passing us a
1781 * buffer that contains old stuff!! Attempt to deal with this problem
1782 * without taking a performance hit... (not sure where the real bug
1783 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1784
1785 if (bp->b_flags & B_ERROR) {
1786 bp->b_flags &= ~B_ERROR;
1787 }
1788 if (bp->b_error != 0) {
1789 bp->b_error = 0;
1790 }
1791 #endif
1792 raidbp = RAIDGETBUF(rs);
1793
1794 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1795
1796 /*
1797 * context for raidiodone
1798 */
1799 raidbp->rf_obp = bp;
1800 raidbp->req = req;
1801
1802 LIST_INIT(&raidbp->rf_buf.b_dep);
1803
1804 switch (req->type) {
1805 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1806 /* XXX need to do something extra here.. */
1807 /* I'm leaving this in, as I've never actually seen it used,
1808 * and I'd like folks to report it... GO */
1809 printf(("WAKEUP CALLED\n"));
1810 queue->numOutstanding++;
1811
1812 /* XXX need to glue the original buffer into this?? */
1813
1814 KernelWakeupFunc(&raidbp->rf_buf);
1815 break;
1816
1817 case RF_IO_TYPE_READ:
1818 case RF_IO_TYPE_WRITE:
1819
1820 if (req->tracerec) {
1821 RF_ETIMER_START(req->tracerec->timer);
1822 }
1823 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1824 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1825 req->sectorOffset, req->numSector,
1826 req->buf, KernelWakeupFunc, (void *) req,
1827 queue->raidPtr->logBytesPerSector, req->b_proc);
1828
1829 if (rf_debugKernelAccess) {
1830 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1831 (long) bp->b_blkno));
1832 }
1833 queue->numOutstanding++;
1834 queue->last_deq_sector = req->sectorOffset;
1835 /* acc wouldn't have been let in if there were any pending
1836 * reqs at any other priority */
1837 queue->curPriority = req->priority;
1838
1839 db1_printf(("Going for %c to unit %d row %d col %d\n",
1840 req->type, unit, queue->row, queue->col));
1841 db1_printf(("sector %d count %d (%d bytes) %d\n",
1842 (int) req->sectorOffset, (int) req->numSector,
1843 (int) (req->numSector <<
1844 queue->raidPtr->logBytesPerSector),
1845 (int) queue->raidPtr->logBytesPerSector));
1846 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1847 raidbp->rf_buf.b_vp->v_numoutput++;
1848 }
1849 VOP_STRATEGY(&raidbp->rf_buf);
1850
1851 break;
1852
1853 default:
1854 panic("bad req->type in rf_DispatchKernelIO");
1855 }
1856 db1_printf(("Exiting from DispatchKernelIO\n"));
1857 /* splx(s); */ /* want to test this */
1858 return (0);
1859 }
1860 /* this is the callback function associated with a I/O invoked from
1861 kernel code.
1862 */
1863 static void
1864 KernelWakeupFunc(vbp)
1865 struct buf *vbp;
1866 {
1867 RF_DiskQueueData_t *req = NULL;
1868 RF_DiskQueue_t *queue;
1869 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1870 struct buf *bp;
1871 struct raid_softc *rs;
1872 int unit;
1873 int s;
1874
1875 s = splbio();
1876 db1_printf(("recovering the request queue:\n"));
1877 req = raidbp->req;
1878
1879 bp = raidbp->rf_obp;
1880
1881 queue = (RF_DiskQueue_t *) req->queue;
1882
1883 if (raidbp->rf_buf.b_flags & B_ERROR) {
1884 bp->b_flags |= B_ERROR;
1885 bp->b_error = raidbp->rf_buf.b_error ?
1886 raidbp->rf_buf.b_error : EIO;
1887 }
1888
1889 /* XXX methinks this could be wrong... */
1890 #if 1
1891 bp->b_resid = raidbp->rf_buf.b_resid;
1892 #endif
1893
1894 if (req->tracerec) {
1895 RF_ETIMER_STOP(req->tracerec->timer);
1896 RF_ETIMER_EVAL(req->tracerec->timer);
1897 RF_LOCK_MUTEX(rf_tracing_mutex);
1898 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1900 req->tracerec->num_phys_ios++;
1901 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1902 }
1903 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1904
1905 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1906
1907
1908 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1909 * ballistic, and mark the component as hosed... */
1910
1911 if (bp->b_flags & B_ERROR) {
1912 /* Mark the disk as dead */
1913 /* but only mark it once... */
1914 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1915 rf_ds_optimal) {
1916 printf("raid%d: IO Error. Marking %s as failed.\n",
1917 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1918 queue->raidPtr->Disks[queue->row][queue->col].status =
1919 rf_ds_failed;
1920 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1921 queue->raidPtr->numFailures++;
1922 queue->raidPtr->numNewFailures++;
1923 } else { /* Disk is already dead... */
1924 /* printf("Disk already marked as dead!\n"); */
1925 }
1926
1927 }
1928
1929 rs = &raid_softc[unit];
1930 RAIDPUTBUF(rs, raidbp);
1931
1932 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1933 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1934
1935 splx(s);
1936 }
1937
1938
1939
1940 /*
1941 * initialize a buf structure for doing an I/O in the kernel.
1942 */
1943 static void
1944 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1945 logBytesPerSector, b_proc)
1946 struct buf *bp;
1947 struct vnode *b_vp;
1948 unsigned rw_flag;
1949 dev_t dev;
1950 RF_SectorNum_t startSect;
1951 RF_SectorCount_t numSect;
1952 caddr_t buf;
1953 void (*cbFunc) (struct buf *);
1954 void *cbArg;
1955 int logBytesPerSector;
1956 struct proc *b_proc;
1957 {
1958 /* bp->b_flags = B_PHYS | rw_flag; */
1959 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1960 bp->b_bcount = numSect << logBytesPerSector;
1961 bp->b_bufsize = bp->b_bcount;
1962 bp->b_error = 0;
1963 bp->b_dev = dev;
1964 bp->b_data = buf;
1965 bp->b_blkno = startSect;
1966 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1967 if (bp->b_bcount == 0) {
1968 panic("bp->b_bcount is zero in InitBP!!\n");
1969 }
1970 bp->b_proc = b_proc;
1971 bp->b_iodone = cbFunc;
1972 bp->b_vp = b_vp;
1973
1974 }
1975
1976 static void
1977 raidgetdefaultlabel(raidPtr, rs, lp)
1978 RF_Raid_t *raidPtr;
1979 struct raid_softc *rs;
1980 struct disklabel *lp;
1981 {
1982 db1_printf(("Building a default label...\n"));
1983 bzero(lp, sizeof(*lp));
1984
1985 /* fabricate a label... */
1986 lp->d_secperunit = raidPtr->totalSectors;
1987 lp->d_secsize = raidPtr->bytesPerSector;
1988 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1989 lp->d_ntracks = 4 * raidPtr->numCol;
1990 lp->d_ncylinders = raidPtr->totalSectors /
1991 (lp->d_nsectors * lp->d_ntracks);
1992 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1993
1994 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1995 lp->d_type = DTYPE_RAID;
1996 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1997 lp->d_rpm = 3600;
1998 lp->d_interleave = 1;
1999 lp->d_flags = 0;
2000
2001 lp->d_partitions[RAW_PART].p_offset = 0;
2002 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2003 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2004 lp->d_npartitions = RAW_PART + 1;
2005
2006 lp->d_magic = DISKMAGIC;
2007 lp->d_magic2 = DISKMAGIC;
2008 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2009
2010 }
2011 /*
2012 * Read the disklabel from the raid device. If one is not present, fake one
2013 * up.
2014 */
2015 static void
2016 raidgetdisklabel(dev)
2017 dev_t dev;
2018 {
2019 int unit = raidunit(dev);
2020 struct raid_softc *rs = &raid_softc[unit];
2021 char *errstring;
2022 struct disklabel *lp = rs->sc_dkdev.dk_label;
2023 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2024 RF_Raid_t *raidPtr;
2025
2026 db1_printf(("Getting the disklabel...\n"));
2027
2028 bzero(clp, sizeof(*clp));
2029
2030 raidPtr = raidPtrs[unit];
2031
2032 raidgetdefaultlabel(raidPtr, rs, lp);
2033
2034 /*
2035 * Call the generic disklabel extraction routine.
2036 */
2037 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2038 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2039 if (errstring)
2040 raidmakedisklabel(rs);
2041 else {
2042 int i;
2043 struct partition *pp;
2044
2045 /*
2046 * Sanity check whether the found disklabel is valid.
2047 *
2048 * This is necessary since total size of the raid device
2049 * may vary when an interleave is changed even though exactly
2050 * same componets are used, and old disklabel may used
2051 * if that is found.
2052 */
2053 if (lp->d_secperunit != rs->sc_size)
2054 printf("WARNING: %s: "
2055 "total sector size in disklabel (%d) != "
2056 "the size of raid (%ld)\n", rs->sc_xname,
2057 lp->d_secperunit, (long) rs->sc_size);
2058 for (i = 0; i < lp->d_npartitions; i++) {
2059 pp = &lp->d_partitions[i];
2060 if (pp->p_offset + pp->p_size > rs->sc_size)
2061 printf("WARNING: %s: end of partition `%c' "
2062 "exceeds the size of raid (%ld)\n",
2063 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2064 }
2065 }
2066
2067 }
2068 /*
2069 * Take care of things one might want to take care of in the event
2070 * that a disklabel isn't present.
2071 */
2072 static void
2073 raidmakedisklabel(rs)
2074 struct raid_softc *rs;
2075 {
2076 struct disklabel *lp = rs->sc_dkdev.dk_label;
2077 db1_printf(("Making a label..\n"));
2078
2079 /*
2080 * For historical reasons, if there's no disklabel present
2081 * the raw partition must be marked FS_BSDFFS.
2082 */
2083
2084 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2085
2086 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2087
2088 lp->d_checksum = dkcksum(lp);
2089 }
2090 /*
2091 * Lookup the provided name in the filesystem. If the file exists,
2092 * is a valid block device, and isn't being used by anyone else,
2093 * set *vpp to the file's vnode.
2094 * You'll find the original of this in ccd.c
2095 */
2096 int
2097 raidlookup(path, p, vpp)
2098 char *path;
2099 struct proc *p;
2100 struct vnode **vpp; /* result */
2101 {
2102 struct nameidata nd;
2103 struct vnode *vp;
2104 struct vattr va;
2105 int error;
2106
2107 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2108 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2109 #ifdef DEBUG
2110 printf("RAIDframe: vn_open returned %d\n", error);
2111 #endif
2112 return (error);
2113 }
2114 vp = nd.ni_vp;
2115 if (vp->v_usecount > 1) {
2116 VOP_UNLOCK(vp, 0);
2117 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2118 return (EBUSY);
2119 }
2120 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2121 VOP_UNLOCK(vp, 0);
2122 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2123 return (error);
2124 }
2125 /* XXX: eventually we should handle VREG, too. */
2126 if (va.va_type != VBLK) {
2127 VOP_UNLOCK(vp, 0);
2128 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2129 return (ENOTBLK);
2130 }
2131 VOP_UNLOCK(vp, 0);
2132 *vpp = vp;
2133 return (0);
2134 }
2135 /*
2136 * Wait interruptibly for an exclusive lock.
2137 *
2138 * XXX
2139 * Several drivers do this; it should be abstracted and made MP-safe.
2140 * (Hmm... where have we seen this warning before :-> GO )
2141 */
2142 static int
2143 raidlock(rs)
2144 struct raid_softc *rs;
2145 {
2146 int error;
2147
2148 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2149 rs->sc_flags |= RAIDF_WANTED;
2150 if ((error =
2151 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2152 return (error);
2153 }
2154 rs->sc_flags |= RAIDF_LOCKED;
2155 return (0);
2156 }
2157 /*
2158 * Unlock and wake up any waiters.
2159 */
2160 static void
2161 raidunlock(rs)
2162 struct raid_softc *rs;
2163 {
2164
2165 rs->sc_flags &= ~RAIDF_LOCKED;
2166 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2167 rs->sc_flags &= ~RAIDF_WANTED;
2168 wakeup(rs);
2169 }
2170 }
2171
2172
2173 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2174 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2175
2176 int
2177 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2178 {
2179 RF_ComponentLabel_t clabel;
2180 raidread_component_label(dev, b_vp, &clabel);
2181 clabel.mod_counter = mod_counter;
2182 clabel.clean = RF_RAID_CLEAN;
2183 raidwrite_component_label(dev, b_vp, &clabel);
2184 return(0);
2185 }
2186
2187
2188 int
2189 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2190 {
2191 RF_ComponentLabel_t clabel;
2192 raidread_component_label(dev, b_vp, &clabel);
2193 clabel.mod_counter = mod_counter;
2194 clabel.clean = RF_RAID_DIRTY;
2195 raidwrite_component_label(dev, b_vp, &clabel);
2196 return(0);
2197 }
2198
2199 /* ARGSUSED */
2200 int
2201 raidread_component_label(dev, b_vp, clabel)
2202 dev_t dev;
2203 struct vnode *b_vp;
2204 RF_ComponentLabel_t *clabel;
2205 {
2206 struct buf *bp;
2207 int error;
2208
2209 /* XXX should probably ensure that we don't try to do this if
2210 someone has changed rf_protected_sectors. */
2211
2212 if (b_vp == NULL) {
2213 /* For whatever reason, this component is not valid.
2214 Don't try to read a component label from it. */
2215 return(EINVAL);
2216 }
2217
2218 /* get a block of the appropriate size... */
2219 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2220 bp->b_dev = dev;
2221
2222 /* get our ducks in a row for the read */
2223 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2224 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2225 bp->b_flags = B_BUSY | B_READ;
2226 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2227
2228 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2229
2230 error = biowait(bp);
2231
2232 if (!error) {
2233 memcpy(clabel, bp->b_data,
2234 sizeof(RF_ComponentLabel_t));
2235 #if 0
2236 rf_print_component_label( clabel );
2237 #endif
2238 } else {
2239 #if 0
2240 printf("Failed to read RAID component label!\n");
2241 #endif
2242 }
2243
2244 bp->b_flags = B_INVAL | B_AGE;
2245 brelse(bp);
2246 return(error);
2247 }
2248 /* ARGSUSED */
2249 int
2250 raidwrite_component_label(dev, b_vp, clabel)
2251 dev_t dev;
2252 struct vnode *b_vp;
2253 RF_ComponentLabel_t *clabel;
2254 {
2255 struct buf *bp;
2256 int error;
2257
2258 /* get a block of the appropriate size... */
2259 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2260 bp->b_dev = dev;
2261
2262 /* get our ducks in a row for the write */
2263 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2264 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2265 bp->b_flags = B_BUSY | B_WRITE;
2266 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2267
2268 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2269
2270 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2271
2272 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2273 error = biowait(bp);
2274 bp->b_flags = B_INVAL | B_AGE;
2275 brelse(bp);
2276 if (error) {
2277 #if 1
2278 printf("Failed to write RAID component info!\n");
2279 #endif
2280 }
2281
2282 return(error);
2283 }
2284
2285 void
2286 rf_markalldirty(raidPtr)
2287 RF_Raid_t *raidPtr;
2288 {
2289 RF_ComponentLabel_t clabel;
2290 int r,c;
2291
2292 raidPtr->mod_counter++;
2293 for (r = 0; r < raidPtr->numRow; r++) {
2294 for (c = 0; c < raidPtr->numCol; c++) {
2295 /* we don't want to touch (at all) a disk that has
2296 failed */
2297 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2298 raidread_component_label(
2299 raidPtr->Disks[r][c].dev,
2300 raidPtr->raid_cinfo[r][c].ci_vp,
2301 &clabel);
2302 if (clabel.status == rf_ds_spared) {
2303 /* XXX do something special...
2304 but whatever you do, don't
2305 try to access it!! */
2306 } else {
2307 #if 0
2308 clabel.status =
2309 raidPtr->Disks[r][c].status;
2310 raidwrite_component_label(
2311 raidPtr->Disks[r][c].dev,
2312 raidPtr->raid_cinfo[r][c].ci_vp,
2313 &clabel);
2314 #endif
2315 raidmarkdirty(
2316 raidPtr->Disks[r][c].dev,
2317 raidPtr->raid_cinfo[r][c].ci_vp,
2318 raidPtr->mod_counter);
2319 }
2320 }
2321 }
2322 }
2323 /* printf("Component labels marked dirty.\n"); */
2324 #if 0
2325 for( c = 0; c < raidPtr->numSpare ; c++) {
2326 sparecol = raidPtr->numCol + c;
2327 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2328 /*
2329
2330 XXX this is where we get fancy and map this spare
2331 into it's correct spot in the array.
2332
2333 */
2334 /*
2335
2336 we claim this disk is "optimal" if it's
2337 rf_ds_used_spare, as that means it should be
2338 directly substitutable for the disk it replaced.
2339 We note that too...
2340
2341 */
2342
2343 for(i=0;i<raidPtr->numRow;i++) {
2344 for(j=0;j<raidPtr->numCol;j++) {
2345 if ((raidPtr->Disks[i][j].spareRow ==
2346 r) &&
2347 (raidPtr->Disks[i][j].spareCol ==
2348 sparecol)) {
2349 srow = r;
2350 scol = sparecol;
2351 break;
2352 }
2353 }
2354 }
2355
2356 raidread_component_label(
2357 raidPtr->Disks[r][sparecol].dev,
2358 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2359 &clabel);
2360 /* make sure status is noted */
2361 clabel.version = RF_COMPONENT_LABEL_VERSION;
2362 clabel.mod_counter = raidPtr->mod_counter;
2363 clabel.serial_number = raidPtr->serial_number;
2364 clabel.row = srow;
2365 clabel.column = scol;
2366 clabel.num_rows = raidPtr->numRow;
2367 clabel.num_columns = raidPtr->numCol;
2368 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2369 clabel.status = rf_ds_optimal;
2370 raidwrite_component_label(
2371 raidPtr->Disks[r][sparecol].dev,
2372 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2373 &clabel);
2374 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2375 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2376 }
2377 }
2378
2379 #endif
2380 }
2381
2382
2383 void
2384 rf_update_component_labels(raidPtr, final)
2385 RF_Raid_t *raidPtr;
2386 int final;
2387 {
2388 RF_ComponentLabel_t clabel;
2389 int sparecol;
2390 int r,c;
2391 int i,j;
2392 int srow, scol;
2393
2394 srow = -1;
2395 scol = -1;
2396
2397 /* XXX should do extra checks to make sure things really are clean,
2398 rather than blindly setting the clean bit... */
2399
2400 raidPtr->mod_counter++;
2401
2402 for (r = 0; r < raidPtr->numRow; r++) {
2403 for (c = 0; c < raidPtr->numCol; c++) {
2404 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2405 raidread_component_label(
2406 raidPtr->Disks[r][c].dev,
2407 raidPtr->raid_cinfo[r][c].ci_vp,
2408 &clabel);
2409 /* make sure status is noted */
2410 clabel.status = rf_ds_optimal;
2411 /* bump the counter */
2412 clabel.mod_counter = raidPtr->mod_counter;
2413
2414 raidwrite_component_label(
2415 raidPtr->Disks[r][c].dev,
2416 raidPtr->raid_cinfo[r][c].ci_vp,
2417 &clabel);
2418 if (final == RF_FINAL_COMPONENT_UPDATE) {
2419 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2420 raidmarkclean(
2421 raidPtr->Disks[r][c].dev,
2422 raidPtr->raid_cinfo[r][c].ci_vp,
2423 raidPtr->mod_counter);
2424 }
2425 }
2426 }
2427 /* else we don't touch it.. */
2428 }
2429 }
2430
2431 for( c = 0; c < raidPtr->numSpare ; c++) {
2432 sparecol = raidPtr->numCol + c;
2433 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2434 /*
2435
2436 we claim this disk is "optimal" if it's
2437 rf_ds_used_spare, as that means it should be
2438 directly substitutable for the disk it replaced.
2439 We note that too...
2440
2441 */
2442
2443 for(i=0;i<raidPtr->numRow;i++) {
2444 for(j=0;j<raidPtr->numCol;j++) {
2445 if ((raidPtr->Disks[i][j].spareRow ==
2446 0) &&
2447 (raidPtr->Disks[i][j].spareCol ==
2448 sparecol)) {
2449 srow = i;
2450 scol = j;
2451 break;
2452 }
2453 }
2454 }
2455
2456 /* XXX shouldn't *really* need this... */
2457 raidread_component_label(
2458 raidPtr->Disks[0][sparecol].dev,
2459 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2460 &clabel);
2461 /* make sure status is noted */
2462
2463 raid_init_component_label(raidPtr, &clabel);
2464
2465 clabel.mod_counter = raidPtr->mod_counter;
2466 clabel.row = srow;
2467 clabel.column = scol;
2468 clabel.status = rf_ds_optimal;
2469
2470 raidwrite_component_label(
2471 raidPtr->Disks[0][sparecol].dev,
2472 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2473 &clabel);
2474 if (final == RF_FINAL_COMPONENT_UPDATE) {
2475 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2476 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2477 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2478 raidPtr->mod_counter);
2479 }
2480 }
2481 }
2482 }
2483 /* printf("Component labels updated\n"); */
2484 }
2485
2486 void
2487 rf_close_component(raidPtr, vp, auto_configured)
2488 RF_Raid_t *raidPtr;
2489 struct vnode *vp;
2490 int auto_configured;
2491 {
2492 struct proc *p;
2493
2494 p = raidPtr->engine_thread;
2495
2496 if (vp != NULL) {
2497 if (auto_configured == 1) {
2498 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2499 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2500 vput(vp);
2501
2502 } else {
2503 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2504 }
2505 } else {
2506 printf("vnode was NULL\n");
2507 }
2508 }
2509
2510
2511 void
2512 rf_UnconfigureVnodes(raidPtr)
2513 RF_Raid_t *raidPtr;
2514 {
2515 int r,c;
2516 struct proc *p;
2517 struct vnode *vp;
2518 int acd;
2519
2520
2521 /* We take this opportunity to close the vnodes like we should.. */
2522
2523 p = raidPtr->engine_thread;
2524
2525 for (r = 0; r < raidPtr->numRow; r++) {
2526 for (c = 0; c < raidPtr->numCol; c++) {
2527 printf("Closing vnode for row: %d col: %d\n", r, c);
2528 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2529 acd = raidPtr->Disks[r][c].auto_configured;
2530 rf_close_component(raidPtr, vp, acd);
2531 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2532 raidPtr->Disks[r][c].auto_configured = 0;
2533 }
2534 }
2535 for (r = 0; r < raidPtr->numSpare; r++) {
2536 printf("Closing vnode for spare: %d\n", r);
2537 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2538 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2539 rf_close_component(raidPtr, vp, acd);
2540 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2541 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2542 }
2543 }
2544
2545
2546 void
2547 rf_ReconThread(req)
2548 struct rf_recon_req *req;
2549 {
2550 int s;
2551 RF_Raid_t *raidPtr;
2552
2553 s = splbio();
2554 raidPtr = (RF_Raid_t *) req->raidPtr;
2555 raidPtr->recon_in_progress = 1;
2556
2557 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2558 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2559
2560 /* XXX get rid of this! we don't need it at all.. */
2561 RF_Free(req, sizeof(*req));
2562
2563 raidPtr->recon_in_progress = 0;
2564 splx(s);
2565
2566 /* That's all... */
2567 kthread_exit(0); /* does not return */
2568 }
2569
2570 void
2571 rf_RewriteParityThread(raidPtr)
2572 RF_Raid_t *raidPtr;
2573 {
2574 int retcode;
2575 int s;
2576
2577 raidPtr->parity_rewrite_in_progress = 1;
2578 s = splbio();
2579 retcode = rf_RewriteParity(raidPtr);
2580 splx(s);
2581 if (retcode) {
2582 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2583 } else {
2584 /* set the clean bit! If we shutdown correctly,
2585 the clean bit on each component label will get
2586 set */
2587 raidPtr->parity_good = RF_RAID_CLEAN;
2588 }
2589 raidPtr->parity_rewrite_in_progress = 0;
2590
2591 /* Anyone waiting for us to stop? If so, inform them... */
2592 if (raidPtr->waitShutdown) {
2593 wakeup(&raidPtr->parity_rewrite_in_progress);
2594 }
2595
2596 /* That's all... */
2597 kthread_exit(0); /* does not return */
2598 }
2599
2600
2601 void
2602 rf_CopybackThread(raidPtr)
2603 RF_Raid_t *raidPtr;
2604 {
2605 int s;
2606
2607 raidPtr->copyback_in_progress = 1;
2608 s = splbio();
2609 rf_CopybackReconstructedData(raidPtr);
2610 splx(s);
2611 raidPtr->copyback_in_progress = 0;
2612
2613 /* That's all... */
2614 kthread_exit(0); /* does not return */
2615 }
2616
2617
2618 void
2619 rf_ReconstructInPlaceThread(req)
2620 struct rf_recon_req *req;
2621 {
2622 int retcode;
2623 int s;
2624 RF_Raid_t *raidPtr;
2625
2626 s = splbio();
2627 raidPtr = req->raidPtr;
2628 raidPtr->recon_in_progress = 1;
2629 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2630 RF_Free(req, sizeof(*req));
2631 raidPtr->recon_in_progress = 0;
2632 splx(s);
2633
2634 /* That's all... */
2635 kthread_exit(0); /* does not return */
2636 }
2637
2638 void
2639 rf_mountroot_hook(dev)
2640 struct device *dev;
2641 {
2642
2643 }
2644
2645
2646 RF_AutoConfig_t *
2647 rf_find_raid_components()
2648 {
2649 struct devnametobdevmaj *dtobdm;
2650 struct vnode *vp;
2651 struct disklabel label;
2652 struct device *dv;
2653 char *cd_name;
2654 dev_t dev;
2655 int error;
2656 int i;
2657 int good_one;
2658 RF_ComponentLabel_t *clabel;
2659 RF_AutoConfig_t *ac_list;
2660 RF_AutoConfig_t *ac;
2661
2662
2663 /* initialize the AutoConfig list */
2664 ac_list = NULL;
2665
2666 if (raidautoconfig) {
2667
2668 /* we begin by trolling through *all* the devices on the system */
2669
2670 for (dv = alldevs.tqh_first; dv != NULL;
2671 dv = dv->dv_list.tqe_next) {
2672
2673 /* we are only interested in disks... */
2674 if (dv->dv_class != DV_DISK)
2675 continue;
2676
2677 /* we don't care about floppies... */
2678 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2679 continue;
2680 }
2681
2682 /* need to find the device_name_to_block_device_major stuff */
2683 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2684 dtobdm = dev_name2blk;
2685 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2686 dtobdm++;
2687 }
2688
2689 /* get a vnode for the raw partition of this disk */
2690
2691 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2692 if (bdevvp(dev, &vp))
2693 panic("RAID can't alloc vnode");
2694
2695 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2696
2697 if (error) {
2698 /* "Who cares." Continue looking
2699 for something that exists*/
2700 vput(vp);
2701 continue;
2702 }
2703
2704 /* Ok, the disk exists. Go get the disklabel. */
2705 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2706 FREAD, NOCRED, 0);
2707 if (error) {
2708 /*
2709 * XXX can't happen - open() would
2710 * have errored out (or faked up one)
2711 */
2712 printf("can't get label for dev %s%c (%d)!?!?\n",
2713 dv->dv_xname, 'a' + RAW_PART, error);
2714 }
2715
2716 /* don't need this any more. We'll allocate it again
2717 a little later if we really do... */
2718 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2719 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2720 vput(vp);
2721
2722 for (i=0; i < label.d_npartitions; i++) {
2723 /* We only support partitions marked as RAID */
2724 if (label.d_partitions[i].p_fstype != FS_RAID)
2725 continue;
2726
2727 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2728 if (bdevvp(dev, &vp))
2729 panic("RAID can't alloc vnode");
2730
2731 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2732 if (error) {
2733 /* Whatever... */
2734 vput(vp);
2735 continue;
2736 }
2737
2738 good_one = 0;
2739
2740 clabel = (RF_ComponentLabel_t *)
2741 malloc(sizeof(RF_ComponentLabel_t),
2742 M_RAIDFRAME, M_NOWAIT);
2743 if (clabel == NULL) {
2744 /* XXX CLEANUP HERE */
2745 printf("RAID auto config: out of memory!\n");
2746 return(NULL); /* XXX probably should panic? */
2747 }
2748
2749 if (!raidread_component_label(dev, vp, clabel)) {
2750 /* Got the label. Does it look reasonable? */
2751 if (rf_reasonable_label(clabel) &&
2752 (clabel->partitionSize <=
2753 label.d_partitions[i].p_size)) {
2754 #if DEBUG
2755 printf("Component on: %s%c: %d\n",
2756 dv->dv_xname, 'a'+i,
2757 label.d_partitions[i].p_size);
2758 rf_print_component_label(clabel);
2759 #endif
2760 /* if it's reasonable, add it,
2761 else ignore it. */
2762 ac = (RF_AutoConfig_t *)
2763 malloc(sizeof(RF_AutoConfig_t),
2764 M_RAIDFRAME,
2765 M_NOWAIT);
2766 if (ac == NULL) {
2767 /* XXX should panic?? */
2768 return(NULL);
2769 }
2770
2771 sprintf(ac->devname, "%s%c",
2772 dv->dv_xname, 'a'+i);
2773 ac->dev = dev;
2774 ac->vp = vp;
2775 ac->clabel = clabel;
2776 ac->next = ac_list;
2777 ac_list = ac;
2778 good_one = 1;
2779 }
2780 }
2781 if (!good_one) {
2782 /* cleanup */
2783 free(clabel, M_RAIDFRAME);
2784 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2785 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2786 vput(vp);
2787 }
2788 }
2789 }
2790 }
2791 return(ac_list);
2792 }
2793
2794 static int
2795 rf_reasonable_label(clabel)
2796 RF_ComponentLabel_t *clabel;
2797 {
2798
2799 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2800 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2801 ((clabel->clean == RF_RAID_CLEAN) ||
2802 (clabel->clean == RF_RAID_DIRTY)) &&
2803 clabel->row >=0 &&
2804 clabel->column >= 0 &&
2805 clabel->num_rows > 0 &&
2806 clabel->num_columns > 0 &&
2807 clabel->row < clabel->num_rows &&
2808 clabel->column < clabel->num_columns &&
2809 clabel->blockSize > 0 &&
2810 clabel->numBlocks > 0) {
2811 /* label looks reasonable enough... */
2812 return(1);
2813 }
2814 return(0);
2815 }
2816
2817
2818 void
2819 rf_print_component_label(clabel)
2820 RF_ComponentLabel_t *clabel;
2821 {
2822 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2823 clabel->row, clabel->column,
2824 clabel->num_rows, clabel->num_columns);
2825 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2826 clabel->version, clabel->serial_number,
2827 clabel->mod_counter);
2828 printf(" Clean: %s Status: %d\n",
2829 clabel->clean ? "Yes" : "No", clabel->status );
2830 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2831 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2832 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2833 (char) clabel->parityConfig, clabel->blockSize,
2834 clabel->numBlocks);
2835 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2836 printf(" Contains root partition: %s\n",
2837 clabel->root_partition ? "Yes" : "No" );
2838 printf(" Last configured as: raid%d\n", clabel->last_unit );
2839 #if 0
2840 printf(" Config order: %d\n", clabel->config_order);
2841 #endif
2842
2843 }
2844
2845 RF_ConfigSet_t *
2846 rf_create_auto_sets(ac_list)
2847 RF_AutoConfig_t *ac_list;
2848 {
2849 RF_AutoConfig_t *ac;
2850 RF_ConfigSet_t *config_sets;
2851 RF_ConfigSet_t *cset;
2852 RF_AutoConfig_t *ac_next;
2853
2854
2855 config_sets = NULL;
2856
2857 /* Go through the AutoConfig list, and figure out which components
2858 belong to what sets. */
2859 ac = ac_list;
2860 while(ac!=NULL) {
2861 /* we're going to putz with ac->next, so save it here
2862 for use at the end of the loop */
2863 ac_next = ac->next;
2864
2865 if (config_sets == NULL) {
2866 /* will need at least this one... */
2867 config_sets = (RF_ConfigSet_t *)
2868 malloc(sizeof(RF_ConfigSet_t),
2869 M_RAIDFRAME, M_NOWAIT);
2870 if (config_sets == NULL) {
2871 panic("rf_create_auto_sets: No memory!\n");
2872 }
2873 /* this one is easy :) */
2874 config_sets->ac = ac;
2875 config_sets->next = NULL;
2876 config_sets->rootable = 0;
2877 ac->next = NULL;
2878 } else {
2879 /* which set does this component fit into? */
2880 cset = config_sets;
2881 while(cset!=NULL) {
2882 if (rf_does_it_fit(cset, ac)) {
2883 /* looks like it matches... */
2884 ac->next = cset->ac;
2885 cset->ac = ac;
2886 break;
2887 }
2888 cset = cset->next;
2889 }
2890 if (cset==NULL) {
2891 /* didn't find a match above... new set..*/
2892 cset = (RF_ConfigSet_t *)
2893 malloc(sizeof(RF_ConfigSet_t),
2894 M_RAIDFRAME, M_NOWAIT);
2895 if (cset == NULL) {
2896 panic("rf_create_auto_sets: No memory!\n");
2897 }
2898 cset->ac = ac;
2899 ac->next = NULL;
2900 cset->next = config_sets;
2901 cset->rootable = 0;
2902 config_sets = cset;
2903 }
2904 }
2905 ac = ac_next;
2906 }
2907
2908
2909 return(config_sets);
2910 }
2911
2912 static int
2913 rf_does_it_fit(cset, ac)
2914 RF_ConfigSet_t *cset;
2915 RF_AutoConfig_t *ac;
2916 {
2917 RF_ComponentLabel_t *clabel1, *clabel2;
2918
2919 /* If this one matches the *first* one in the set, that's good
2920 enough, since the other members of the set would have been
2921 through here too... */
2922 /* note that we are not checking partitionSize here..
2923
2924 Note that we are also not checking the mod_counters here.
2925 If everything else matches execpt the mod_counter, that's
2926 good enough for this test. We will deal with the mod_counters
2927 a little later in the autoconfiguration process.
2928
2929 (clabel1->mod_counter == clabel2->mod_counter) &&
2930
2931 The reason we don't check for this is that failed disks
2932 will have lower modification counts. If those disks are
2933 not added to the set they used to belong to, then they will
2934 form their own set, which may result in 2 different sets,
2935 for example, competing to be configured at raid0, and
2936 perhaps competing to be the root filesystem set. If the
2937 wrong ones get configured, or both attempt to become /,
2938 weird behaviour and or serious lossage will occur. Thus we
2939 need to bring them into the fold here, and kick them out at
2940 a later point.
2941
2942 */
2943
2944 clabel1 = cset->ac->clabel;
2945 clabel2 = ac->clabel;
2946 if ((clabel1->version == clabel2->version) &&
2947 (clabel1->serial_number == clabel2->serial_number) &&
2948 (clabel1->num_rows == clabel2->num_rows) &&
2949 (clabel1->num_columns == clabel2->num_columns) &&
2950 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2951 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2952 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2953 (clabel1->parityConfig == clabel2->parityConfig) &&
2954 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2955 (clabel1->blockSize == clabel2->blockSize) &&
2956 (clabel1->numBlocks == clabel2->numBlocks) &&
2957 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2958 (clabel1->root_partition == clabel2->root_partition) &&
2959 (clabel1->last_unit == clabel2->last_unit) &&
2960 (clabel1->config_order == clabel2->config_order)) {
2961 /* if it get's here, it almost *has* to be a match */
2962 } else {
2963 /* it's not consistent with somebody in the set..
2964 punt */
2965 return(0);
2966 }
2967 /* all was fine.. it must fit... */
2968 return(1);
2969 }
2970
2971 int
2972 rf_have_enough_components(cset)
2973 RF_ConfigSet_t *cset;
2974 {
2975 RF_AutoConfig_t *ac;
2976 RF_AutoConfig_t *auto_config;
2977 RF_ComponentLabel_t *clabel;
2978 int r,c;
2979 int num_rows;
2980 int num_cols;
2981 int num_missing;
2982 int mod_counter;
2983 int mod_counter_found;
2984 int even_pair_failed;
2985 char parity_type;
2986
2987
2988 /* check to see that we have enough 'live' components
2989 of this set. If so, we can configure it if necessary */
2990
2991 num_rows = cset->ac->clabel->num_rows;
2992 num_cols = cset->ac->clabel->num_columns;
2993 parity_type = cset->ac->clabel->parityConfig;
2994
2995 /* XXX Check for duplicate components!?!?!? */
2996
2997 /* Determine what the mod_counter is supposed to be for this set. */
2998
2999 mod_counter_found = 0;
3000 ac = cset->ac;
3001 while(ac!=NULL) {
3002 if (mod_counter_found==0) {
3003 mod_counter = ac->clabel->mod_counter;
3004 mod_counter_found = 1;
3005 } else {
3006 if (ac->clabel->mod_counter > mod_counter) {
3007 mod_counter = ac->clabel->mod_counter;
3008 }
3009 }
3010 ac = ac->next;
3011 }
3012
3013 num_missing = 0;
3014 auto_config = cset->ac;
3015
3016 for(r=0; r<num_rows; r++) {
3017 even_pair_failed = 0;
3018 for(c=0; c<num_cols; c++) {
3019 ac = auto_config;
3020 while(ac!=NULL) {
3021 if ((ac->clabel->row == r) &&
3022 (ac->clabel->column == c) &&
3023 (ac->clabel->mod_counter == mod_counter)) {
3024 /* it's this one... */
3025 #if DEBUG
3026 printf("Found: %s at %d,%d\n",
3027 ac->devname,r,c);
3028 #endif
3029 break;
3030 }
3031 ac=ac->next;
3032 }
3033 if (ac==NULL) {
3034 /* Didn't find one here! */
3035 /* special case for RAID 1, especially
3036 where there are more than 2
3037 components (where RAIDframe treats
3038 things a little differently :( ) */
3039 if (parity_type == '1') {
3040 if (c%2 == 0) { /* even component */
3041 even_pair_failed = 1;
3042 } else { /* odd component. If
3043 we're failed, and
3044 so is the even
3045 component, it's
3046 "Good Night, Charlie" */
3047 if (even_pair_failed == 1) {
3048 return(0);
3049 }
3050 }
3051 } else {
3052 /* normal accounting */
3053 num_missing++;
3054 }
3055 }
3056 if ((parity_type == '1') && (c%2 == 1)) {
3057 /* Just did an even component, and we didn't
3058 bail.. reset the even_pair_failed flag,
3059 and go on to the next component.... */
3060 even_pair_failed = 0;
3061 }
3062 }
3063 }
3064
3065 clabel = cset->ac->clabel;
3066
3067 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3068 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3069 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3070 /* XXX this needs to be made *much* more general */
3071 /* Too many failures */
3072 return(0);
3073 }
3074 /* otherwise, all is well, and we've got enough to take a kick
3075 at autoconfiguring this set */
3076 return(1);
3077 }
3078
3079 void
3080 rf_create_configuration(ac,config,raidPtr)
3081 RF_AutoConfig_t *ac;
3082 RF_Config_t *config;
3083 RF_Raid_t *raidPtr;
3084 {
3085 RF_ComponentLabel_t *clabel;
3086 int i;
3087
3088 clabel = ac->clabel;
3089
3090 /* 1. Fill in the common stuff */
3091 config->numRow = clabel->num_rows;
3092 config->numCol = clabel->num_columns;
3093 config->numSpare = 0; /* XXX should this be set here? */
3094 config->sectPerSU = clabel->sectPerSU;
3095 config->SUsPerPU = clabel->SUsPerPU;
3096 config->SUsPerRU = clabel->SUsPerRU;
3097 config->parityConfig = clabel->parityConfig;
3098 /* XXX... */
3099 strcpy(config->diskQueueType,"fifo");
3100 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3101 config->layoutSpecificSize = 0; /* XXX ?? */
3102
3103 while(ac!=NULL) {
3104 /* row/col values will be in range due to the checks
3105 in reasonable_label() */
3106 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3107 ac->devname);
3108 ac = ac->next;
3109 }
3110
3111 for(i=0;i<RF_MAXDBGV;i++) {
3112 config->debugVars[i][0] = NULL;
3113 }
3114 }
3115
3116 int
3117 rf_set_autoconfig(raidPtr, new_value)
3118 RF_Raid_t *raidPtr;
3119 int new_value;
3120 {
3121 RF_ComponentLabel_t clabel;
3122 struct vnode *vp;
3123 dev_t dev;
3124 int row, column;
3125
3126 raidPtr->autoconfigure = new_value;
3127 for(row=0; row<raidPtr->numRow; row++) {
3128 for(column=0; column<raidPtr->numCol; column++) {
3129 if (raidPtr->Disks[row][column].status ==
3130 rf_ds_optimal) {
3131 dev = raidPtr->Disks[row][column].dev;
3132 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3133 raidread_component_label(dev, vp, &clabel);
3134 clabel.autoconfigure = new_value;
3135 raidwrite_component_label(dev, vp, &clabel);
3136 }
3137 }
3138 }
3139 return(new_value);
3140 }
3141
3142 int
3143 rf_set_rootpartition(raidPtr, new_value)
3144 RF_Raid_t *raidPtr;
3145 int new_value;
3146 {
3147 RF_ComponentLabel_t clabel;
3148 struct vnode *vp;
3149 dev_t dev;
3150 int row, column;
3151
3152 raidPtr->root_partition = new_value;
3153 for(row=0; row<raidPtr->numRow; row++) {
3154 for(column=0; column<raidPtr->numCol; column++) {
3155 if (raidPtr->Disks[row][column].status ==
3156 rf_ds_optimal) {
3157 dev = raidPtr->Disks[row][column].dev;
3158 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3159 raidread_component_label(dev, vp, &clabel);
3160 clabel.root_partition = new_value;
3161 raidwrite_component_label(dev, vp, &clabel);
3162 }
3163 }
3164 }
3165 return(new_value);
3166 }
3167
3168 void
3169 rf_release_all_vps(cset)
3170 RF_ConfigSet_t *cset;
3171 {
3172 RF_AutoConfig_t *ac;
3173
3174 ac = cset->ac;
3175 while(ac!=NULL) {
3176 /* Close the vp, and give it back */
3177 if (ac->vp) {
3178 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3179 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3180 vput(ac->vp);
3181 ac->vp = NULL;
3182 }
3183 ac = ac->next;
3184 }
3185 }
3186
3187
3188 void
3189 rf_cleanup_config_set(cset)
3190 RF_ConfigSet_t *cset;
3191 {
3192 RF_AutoConfig_t *ac;
3193 RF_AutoConfig_t *next_ac;
3194
3195 ac = cset->ac;
3196 while(ac!=NULL) {
3197 next_ac = ac->next;
3198 /* nuke the label */
3199 free(ac->clabel, M_RAIDFRAME);
3200 /* cleanup the config structure */
3201 free(ac, M_RAIDFRAME);
3202 /* "next.." */
3203 ac = next_ac;
3204 }
3205 /* and, finally, nuke the config set */
3206 free(cset, M_RAIDFRAME);
3207 }
3208
3209
3210 void
3211 raid_init_component_label(raidPtr, clabel)
3212 RF_Raid_t *raidPtr;
3213 RF_ComponentLabel_t *clabel;
3214 {
3215 /* current version number */
3216 clabel->version = RF_COMPONENT_LABEL_VERSION;
3217 clabel->serial_number = raidPtr->serial_number;
3218 clabel->mod_counter = raidPtr->mod_counter;
3219 clabel->num_rows = raidPtr->numRow;
3220 clabel->num_columns = raidPtr->numCol;
3221 clabel->clean = RF_RAID_DIRTY; /* not clean */
3222 clabel->status = rf_ds_optimal; /* "It's good!" */
3223
3224 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3225 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3226 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3227
3228 clabel->blockSize = raidPtr->bytesPerSector;
3229 clabel->numBlocks = raidPtr->sectorsPerDisk;
3230
3231 /* XXX not portable */
3232 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3233 clabel->maxOutstanding = raidPtr->maxOutstanding;
3234 clabel->autoconfigure = raidPtr->autoconfigure;
3235 clabel->root_partition = raidPtr->root_partition;
3236 clabel->last_unit = raidPtr->raidid;
3237 clabel->config_order = raidPtr->config_order;
3238 }
3239
3240 int
3241 rf_auto_config_set(cset,unit)
3242 RF_ConfigSet_t *cset;
3243 int *unit;
3244 {
3245 RF_Raid_t *raidPtr;
3246 RF_Config_t *config;
3247 int raidID;
3248 int retcode;
3249
3250 printf("RAID autoconfigure\n");
3251
3252 retcode = 0;
3253 *unit = -1;
3254
3255 /* 1. Create a config structure */
3256
3257 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3258 M_RAIDFRAME,
3259 M_NOWAIT);
3260 if (config==NULL) {
3261 printf("Out of mem!?!?\n");
3262 /* XXX do something more intelligent here. */
3263 return(1);
3264 }
3265
3266 memset(config, 0, sizeof(RF_Config_t));
3267
3268 /* XXX raidID needs to be set correctly.. */
3269
3270 /*
3271 2. Figure out what RAID ID this one is supposed to live at
3272 See if we can get the same RAID dev that it was configured
3273 on last time..
3274 */
3275
3276 raidID = cset->ac->clabel->last_unit;
3277 if ((raidID < 0) || (raidID >= numraid)) {
3278 /* let's not wander off into lala land. */
3279 raidID = numraid - 1;
3280 }
3281 if (raidPtrs[raidID]->valid != 0) {
3282
3283 /*
3284 Nope... Go looking for an alternative...
3285 Start high so we don't immediately use raid0 if that's
3286 not taken.
3287 */
3288
3289 for(raidID = numraid; raidID >= 0; raidID--) {
3290 if (raidPtrs[raidID]->valid == 0) {
3291 /* can use this one! */
3292 break;
3293 }
3294 }
3295 }
3296
3297 if (raidID < 0) {
3298 /* punt... */
3299 printf("Unable to auto configure this set!\n");
3300 printf("(Out of RAID devs!)\n");
3301 return(1);
3302 }
3303 printf("Configuring raid%d:\n",raidID);
3304 raidPtr = raidPtrs[raidID];
3305
3306 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3307 raidPtr->raidid = raidID;
3308 raidPtr->openings = RAIDOUTSTANDING;
3309
3310 /* 3. Build the configuration structure */
3311 rf_create_configuration(cset->ac, config, raidPtr);
3312
3313 /* 4. Do the configuration */
3314 retcode = rf_Configure(raidPtr, config, cset->ac);
3315
3316 if (retcode == 0) {
3317
3318 raidinit(raidPtrs[raidID]);
3319
3320 rf_markalldirty(raidPtrs[raidID]);
3321 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3322 if (cset->ac->clabel->root_partition==1) {
3323 /* everything configured just fine. Make a note
3324 that this set is eligible to be root. */
3325 cset->rootable = 1;
3326 /* XXX do this here? */
3327 raidPtrs[raidID]->root_partition = 1;
3328 }
3329 }
3330
3331 /* 5. Cleanup */
3332 free(config, M_RAIDFRAME);
3333
3334 *unit = raidID;
3335 return(retcode);
3336 }
3337
3338 void
3339 rf_disk_unbusy(desc)
3340 RF_RaidAccessDesc_t *desc;
3341 {
3342 struct buf *bp;
3343
3344 bp = (struct buf *)desc->bp;
3345 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3346 (bp->b_bcount - bp->b_resid));
3347 }
3348