rf_netbsdkintf.c revision 1.105 1 /* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_desc.h"
146 #include "rf_diskqueue.h"
147 #include "rf_acctrace.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_debugMem.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_debugprint.h"
156 #include "rf_threadstuff.h"
157 #include "rf_configure.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186 int raidsize(dev_t);
187 int raidopen(dev_t, int, int, struct proc *);
188 int raidclose(dev_t, int, int, struct proc *);
189 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
190 int raidwrite(dev_t, struct uio *, int);
191 int raidread(dev_t, struct uio *, int);
192 void raidstrategy(struct buf *);
193 int raiddump(dev_t, daddr_t, caddr_t, size_t);
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206
207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct pool sc_cbufpool; /* component buffer pool */
221 struct buf_queue buf_queue; /* used for the device queue */
222 };
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* unit has been initialized */
225 #define RAIDF_WLABEL 0x02 /* label area is writable */
226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
228 #define RAIDF_LOCKED 0x80 /* unit is locked */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
235 * Be aware that large numbers can allow the driver to consume a lot of
236 * kernel memory, especially on writes, and in degraded mode reads.
237 *
238 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
239 * a single 64K write will typically require 64K for the old data,
240 * 64K for the old parity, and 64K for the new parity, for a total
241 * of 192K (if the parity buffer is not re-used immediately).
242 * Even it if is used immedately, that's still 128K, which when multiplied
243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
244 *
245 * Now in degraded mode, for example, a 64K read on the above setup may
246 * require data reconstruction, which will require *all* of the 4 remaining
247 * disks to participate -- 4 * 32K/disk == 128K again.
248 */
249
250 #ifndef RAIDOUTSTANDING
251 #define RAIDOUTSTANDING 6
252 #endif
253
254 #define RAIDLABELDEV(dev) \
255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
256
257 /* declared here, and made public, for the benefit of KVM stuff.. */
258 struct raid_softc *raid_softc;
259
260 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
261 struct disklabel *);
262 static void raidgetdisklabel(dev_t);
263 static void raidmakedisklabel(struct raid_softc *);
264
265 static int raidlock(struct raid_softc *);
266 static void raidunlock(struct raid_softc *);
267
268 static void rf_markalldirty(RF_Raid_t *);
269 void rf_mountroot_hook(struct device *);
270
271 struct device *raidrootdev;
272
273 void rf_ReconThread(struct rf_recon_req *);
274 /* XXX what I want is: */
275 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
276 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
277 void rf_CopybackThread(RF_Raid_t *raidPtr);
278 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
279 void rf_buildroothack(void *);
280
281 RF_AutoConfig_t *rf_find_raid_components(void);
282 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
283 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
284 static int rf_reasonable_label(RF_ComponentLabel_t *);
285 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
286 int rf_set_autoconfig(RF_Raid_t *, int);
287 int rf_set_rootpartition(RF_Raid_t *, int);
288 void rf_release_all_vps(RF_ConfigSet_t *);
289 void rf_cleanup_config_set(RF_ConfigSet_t *);
290 int rf_have_enough_components(RF_ConfigSet_t *);
291 int rf_auto_config_set(RF_ConfigSet_t *, int *);
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305 RF_AutoConfig_t *ac_list; /* autoconfig list */
306 RF_ConfigSet_t *config_sets;
307
308 #ifdef DEBUG
309 printf("raidattach: Asked for %d units\n", num);
310 #endif
311
312 if (num <= 0) {
313 #ifdef DIAGNOSTIC
314 panic("raidattach: count <= 0");
315 #endif
316 return;
317 }
318 /* This is where all the initialization stuff gets done. */
319
320 numraid = num;
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < num; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
358 M_RAIDFRAME, M_NOWAIT);
359 if (raidrootdev == NULL) {
360 panic("No memory for RAIDframe driver!!?!?!\n");
361 }
362
363 for (raidID = 0; raidID < num; raidID++) {
364 BUFQ_INIT(&raid_softc[raidID].buf_queue);
365
366 raidrootdev[raidID].dv_class = DV_DISK;
367 raidrootdev[raidID].dv_cfdata = NULL;
368 raidrootdev[raidID].dv_unit = raidID;
369 raidrootdev[raidID].dv_parent = NULL;
370 raidrootdev[raidID].dv_flags = 0;
371 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
372
373 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
374 (RF_Raid_t *));
375 if (raidPtrs[raidID] == NULL) {
376 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
377 numraid = raidID;
378 return;
379 }
380 }
381
382 #if RAID_AUTOCONFIG
383 raidautoconfig = 1;
384 #endif
385
386 if (raidautoconfig) {
387 /* 1. locate all RAID components on the system */
388
389 #if DEBUG
390 printf("Searching for raid components...\n");
391 #endif
392 ac_list = rf_find_raid_components();
393
394 /* 2. sort them into their respective sets */
395
396 config_sets = rf_create_auto_sets(ac_list);
397
398 /* 3. evaluate each set and configure the valid ones
399 This gets done in rf_buildroothack() */
400
401 /* schedule the creation of the thread to do the
402 "/ on RAID" stuff */
403
404 kthread_create(rf_buildroothack,config_sets);
405
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 }
410
411 }
412
413 void
414 rf_buildroothack(arg)
415 void *arg;
416 {
417 RF_ConfigSet_t *config_sets = arg;
418 RF_ConfigSet_t *cset;
419 RF_ConfigSet_t *next_cset;
420 int retcode;
421 int raidID;
422 int rootID;
423 int num_root;
424
425 rootID = 0;
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_update_component_labels(raidPtrs[unit],
638 RF_FINAL_COMPONENT_UPDATE);
639 }
640
641 raidunlock(rs);
642 return (0);
643
644 }
645
646 void
647 raidstrategy(bp)
648 struct buf *bp;
649 {
650 int s;
651
652 unsigned int raidID = raidunit(bp->b_dev);
653 RF_Raid_t *raidPtr;
654 struct raid_softc *rs = &raid_softc[raidID];
655 struct disklabel *lp;
656 int wlabel;
657
658 if ((rs->sc_flags & RAIDF_INITED) ==0) {
659 bp->b_error = ENXIO;
660 bp->b_flags |= B_ERROR;
661 bp->b_resid = bp->b_bcount;
662 biodone(bp);
663 return;
664 }
665 if (raidID >= numraid || !raidPtrs[raidID]) {
666 bp->b_error = ENODEV;
667 bp->b_flags |= B_ERROR;
668 bp->b_resid = bp->b_bcount;
669 biodone(bp);
670 return;
671 }
672 raidPtr = raidPtrs[raidID];
673 if (!raidPtr->valid) {
674 bp->b_error = ENODEV;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (bp->b_bcount == 0) {
681 db1_printf(("b_bcount is zero..\n"));
682 biodone(bp);
683 return;
684 }
685 lp = rs->sc_dkdev.dk_label;
686
687 /*
688 * Do bounds checking and adjust transfer. If there's an
689 * error, the bounds check will flag that for us.
690 */
691
692 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
693 if (DISKPART(bp->b_dev) != RAW_PART)
694 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
695 db1_printf(("Bounds check failed!!:%d %d\n",
696 (int) bp->b_blkno, (int) wlabel));
697 biodone(bp);
698 return;
699 }
700 s = splbio();
701
702 bp->b_resid = 0;
703
704 /* stuff it onto our queue */
705 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
706
707 raidstart(raidPtrs[raidID]);
708
709 splx(s);
710 }
711 /* ARGSUSED */
712 int
713 raidread(dev, uio, flags)
714 dev_t dev;
715 struct uio *uio;
716 int flags;
717 {
718 int unit = raidunit(dev);
719 struct raid_softc *rs;
720 int part;
721
722 if (unit >= numraid)
723 return (ENXIO);
724 rs = &raid_softc[unit];
725
726 if ((rs->sc_flags & RAIDF_INITED) == 0)
727 return (ENXIO);
728 part = DISKPART(dev);
729
730 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
731
732 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
733
734 }
735 /* ARGSUSED */
736 int
737 raidwrite(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744
745 if (unit >= numraid)
746 return (ENXIO);
747 rs = &raid_softc[unit];
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return (ENXIO);
751 db1_printf(("raidwrite\n"));
752 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
753
754 }
755
756 int
757 raidioctl(dev, cmd, data, flag, p)
758 dev_t dev;
759 u_long cmd;
760 caddr_t data;
761 int flag;
762 struct proc *p;
763 {
764 int unit = raidunit(dev);
765 int error = 0;
766 int part, pmask;
767 struct raid_softc *rs;
768 RF_Config_t *k_cfg, *u_cfg;
769 RF_Raid_t *raidPtr;
770 RF_RaidDisk_t *diskPtr;
771 RF_AccTotals_t *totals;
772 RF_DeviceConfig_t *d_cfg, **ucfgp;
773 u_char *specific_buf;
774 int retcode = 0;
775 int row;
776 int column;
777 struct rf_recon_req *rrcopy, *rr;
778 RF_ComponentLabel_t *clabel;
779 RF_ComponentLabel_t ci_label;
780 RF_ComponentLabel_t **clabel_ptr;
781 RF_SingleComponent_t *sparePtr,*componentPtr;
782 RF_SingleComponent_t hot_spare;
783 RF_SingleComponent_t component;
784 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
785 int i, j, d;
786 #ifdef __HAVE_OLD_DISKLABEL
787 struct disklabel newlabel;
788 #endif
789
790 if (unit >= numraid)
791 return (ENXIO);
792 rs = &raid_softc[unit];
793 raidPtr = raidPtrs[unit];
794
795 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
796 (int) DISKPART(dev), (int) unit, (int) cmd));
797
798 /* Must be open for writes for these commands... */
799 switch (cmd) {
800 case DIOCSDINFO:
801 case DIOCWDINFO:
802 #ifdef __HAVE_OLD_DISKLABEL
803 case ODIOCWDINFO:
804 case ODIOCSDINFO:
805 #endif
806 case DIOCWLABEL:
807 if ((flag & FWRITE) == 0)
808 return (EBADF);
809 }
810
811 /* Must be initialized for these... */
812 switch (cmd) {
813 case DIOCGDINFO:
814 case DIOCSDINFO:
815 case DIOCWDINFO:
816 #ifdef __HAVE_OLD_DISKLABEL
817 case ODIOCGDINFO:
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 case ODIOCGDEFLABEL:
821 #endif
822 case DIOCGPART:
823 case DIOCWLABEL:
824 case DIOCGDEFLABEL:
825 case RAIDFRAME_SHUTDOWN:
826 case RAIDFRAME_REWRITEPARITY:
827 case RAIDFRAME_GET_INFO:
828 case RAIDFRAME_RESET_ACCTOTALS:
829 case RAIDFRAME_GET_ACCTOTALS:
830 case RAIDFRAME_KEEP_ACCTOTALS:
831 case RAIDFRAME_GET_SIZE:
832 case RAIDFRAME_FAIL_DISK:
833 case RAIDFRAME_COPYBACK:
834 case RAIDFRAME_CHECK_RECON_STATUS:
835 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
836 case RAIDFRAME_GET_COMPONENT_LABEL:
837 case RAIDFRAME_SET_COMPONENT_LABEL:
838 case RAIDFRAME_ADD_HOT_SPARE:
839 case RAIDFRAME_REMOVE_HOT_SPARE:
840 case RAIDFRAME_INIT_LABELS:
841 case RAIDFRAME_REBUILD_IN_PLACE:
842 case RAIDFRAME_CHECK_PARITY:
843 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
844 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
845 case RAIDFRAME_CHECK_COPYBACK_STATUS:
846 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
847 case RAIDFRAME_SET_AUTOCONFIG:
848 case RAIDFRAME_SET_ROOT:
849 case RAIDFRAME_DELETE_COMPONENT:
850 case RAIDFRAME_INCORPORATE_HOT_SPARE:
851 if ((rs->sc_flags & RAIDF_INITED) == 0)
852 return (ENXIO);
853 }
854
855 switch (cmd) {
856
857 /* configure the system */
858 case RAIDFRAME_CONFIGURE:
859
860 if (raidPtr->valid) {
861 /* There is a valid RAID set running on this unit! */
862 printf("raid%d: Device already configured!\n",unit);
863 return(EINVAL);
864 }
865
866 /* copy-in the configuration information */
867 /* data points to a pointer to the configuration structure */
868
869 u_cfg = *((RF_Config_t **) data);
870 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
871 if (k_cfg == NULL) {
872 return (ENOMEM);
873 }
874 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
875 sizeof(RF_Config_t));
876 if (retcode) {
877 RF_Free(k_cfg, sizeof(RF_Config_t));
878 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
879 retcode));
880 return (retcode);
881 }
882 /* allocate a buffer for the layout-specific data, and copy it
883 * in */
884 if (k_cfg->layoutSpecificSize) {
885 if (k_cfg->layoutSpecificSize > 10000) {
886 /* sanity check */
887 RF_Free(k_cfg, sizeof(RF_Config_t));
888 return (EINVAL);
889 }
890 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
891 (u_char *));
892 if (specific_buf == NULL) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 return (ENOMEM);
895 }
896 retcode = copyin(k_cfg->layoutSpecific,
897 (caddr_t) specific_buf,
898 k_cfg->layoutSpecificSize);
899 if (retcode) {
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 RF_Free(specific_buf,
902 k_cfg->layoutSpecificSize);
903 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
904 retcode));
905 return (retcode);
906 }
907 } else
908 specific_buf = NULL;
909 k_cfg->layoutSpecific = specific_buf;
910
911 /* should do some kind of sanity check on the configuration.
912 * Store the sum of all the bytes in the last byte? */
913
914 /* configure the system */
915
916 /*
917 * Clear the entire RAID descriptor, just to make sure
918 * there is no stale data left in the case of a
919 * reconfiguration
920 */
921 bzero((char *) raidPtr, sizeof(RF_Raid_t));
922 raidPtr->raidid = unit;
923
924 retcode = rf_Configure(raidPtr, k_cfg, NULL);
925
926 if (retcode == 0) {
927
928 /* allow this many simultaneous IO's to
929 this RAID device */
930 raidPtr->openings = RAIDOUTSTANDING;
931
932 raidinit(raidPtr);
933 rf_markalldirty(raidPtr);
934 }
935 /* free the buffers. No return code here. */
936 if (k_cfg->layoutSpecificSize) {
937 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
938 }
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940
941 return (retcode);
942
943 /* shutdown the system */
944 case RAIDFRAME_SHUTDOWN:
945
946 if ((error = raidlock(rs)) != 0)
947 return (error);
948
949 /*
950 * If somebody has a partition mounted, we shouldn't
951 * shutdown.
952 */
953
954 part = DISKPART(dev);
955 pmask = (1 << part);
956 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
957 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
958 (rs->sc_dkdev.dk_copenmask & pmask))) {
959 raidunlock(rs);
960 return (EBUSY);
961 }
962
963 retcode = rf_Shutdown(raidPtr);
964
965 pool_destroy(&rs->sc_cbufpool);
966
967 /* It's no longer initialized... */
968 rs->sc_flags &= ~RAIDF_INITED;
969
970 /* Detach the disk. */
971 disk_detach(&rs->sc_dkdev);
972
973 raidunlock(rs);
974
975 return (retcode);
976 case RAIDFRAME_GET_COMPONENT_LABEL:
977 clabel_ptr = (RF_ComponentLabel_t **) data;
978 /* need to read the component label for the disk indicated
979 by row,column in clabel */
980
981 /* For practice, let's get it directly fromdisk, rather
982 than from the in-core copy */
983 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
984 (RF_ComponentLabel_t *));
985 if (clabel == NULL)
986 return (ENOMEM);
987
988 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
989
990 retcode = copyin( *clabel_ptr, clabel,
991 sizeof(RF_ComponentLabel_t));
992
993 if (retcode) {
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return(retcode);
996 }
997
998 row = clabel->row;
999 column = clabel->column;
1000
1001 if ((row < 0) || (row >= raidPtr->numRow) ||
1002 (column < 0) || (column >= raidPtr->numCol +
1003 raidPtr->numSpare)) {
1004 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1005 return(EINVAL);
1006 }
1007
1008 raidread_component_label(raidPtr->Disks[row][column].dev,
1009 raidPtr->raid_cinfo[row][column].ci_vp,
1010 clabel );
1011
1012 retcode = copyout((caddr_t) clabel,
1013 (caddr_t) *clabel_ptr,
1014 sizeof(RF_ComponentLabel_t));
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return (retcode);
1017
1018 case RAIDFRAME_SET_COMPONENT_LABEL:
1019 clabel = (RF_ComponentLabel_t *) data;
1020
1021 /* XXX check the label for valid stuff... */
1022 /* Note that some things *should not* get modified --
1023 the user should be re-initing the labels instead of
1024 trying to patch things.
1025 */
1026
1027 printf("Got component label:\n");
1028 printf("Version: %d\n",clabel->version);
1029 printf("Serial Number: %d\n",clabel->serial_number);
1030 printf("Mod counter: %d\n",clabel->mod_counter);
1031 printf("Row: %d\n", clabel->row);
1032 printf("Column: %d\n", clabel->column);
1033 printf("Num Rows: %d\n", clabel->num_rows);
1034 printf("Num Columns: %d\n", clabel->num_columns);
1035 printf("Clean: %d\n", clabel->clean);
1036 printf("Status: %d\n", clabel->status);
1037
1038 row = clabel->row;
1039 column = clabel->column;
1040
1041 if ((row < 0) || (row >= raidPtr->numRow) ||
1042 (column < 0) || (column >= raidPtr->numCol)) {
1043 return(EINVAL);
1044 }
1045
1046 /* XXX this isn't allowed to do anything for now :-) */
1047
1048 /* XXX and before it is, we need to fill in the rest
1049 of the fields!?!?!?! */
1050 #if 0
1051 raidwrite_component_label(
1052 raidPtr->Disks[row][column].dev,
1053 raidPtr->raid_cinfo[row][column].ci_vp,
1054 clabel );
1055 #endif
1056 return (0);
1057
1058 case RAIDFRAME_INIT_LABELS:
1059 clabel = (RF_ComponentLabel_t *) data;
1060 /*
1061 we only want the serial number from
1062 the above. We get all the rest of the information
1063 from the config that was used to create this RAID
1064 set.
1065 */
1066
1067 raidPtr->serial_number = clabel->serial_number;
1068
1069 raid_init_component_label(raidPtr, &ci_label);
1070 ci_label.serial_number = clabel->serial_number;
1071
1072 for(row=0;row<raidPtr->numRow;row++) {
1073 ci_label.row = row;
1074 for(column=0;column<raidPtr->numCol;column++) {
1075 diskPtr = &raidPtr->Disks[row][column];
1076 if (!RF_DEAD_DISK(diskPtr->status)) {
1077 ci_label.partitionSize = diskPtr->partitionSize;
1078 ci_label.column = column;
1079 raidwrite_component_label(
1080 raidPtr->Disks[row][column].dev,
1081 raidPtr->raid_cinfo[row][column].ci_vp,
1082 &ci_label );
1083 }
1084 }
1085 }
1086
1087 return (retcode);
1088 case RAIDFRAME_SET_AUTOCONFIG:
1089 d = rf_set_autoconfig(raidPtr, *(int *) data);
1090 printf("New autoconfig value is: %d\n", d);
1091 *(int *) data = d;
1092 return (retcode);
1093
1094 case RAIDFRAME_SET_ROOT:
1095 d = rf_set_rootpartition(raidPtr, *(int *) data);
1096 printf("New rootpartition value is: %d\n", d);
1097 *(int *) data = d;
1098 return (retcode);
1099
1100 /* initialize all parity */
1101 case RAIDFRAME_REWRITEPARITY:
1102
1103 if (raidPtr->Layout.map->faultsTolerated == 0) {
1104 /* Parity for RAID 0 is trivially correct */
1105 raidPtr->parity_good = RF_RAID_CLEAN;
1106 return(0);
1107 }
1108
1109 if (raidPtr->parity_rewrite_in_progress == 1) {
1110 /* Re-write is already in progress! */
1111 return(EINVAL);
1112 }
1113
1114 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1115 rf_RewriteParityThread,
1116 raidPtr,"raid_parity");
1117 return (retcode);
1118
1119
1120 case RAIDFRAME_ADD_HOT_SPARE:
1121 sparePtr = (RF_SingleComponent_t *) data;
1122 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1123 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1124 return(retcode);
1125
1126 case RAIDFRAME_REMOVE_HOT_SPARE:
1127 return(retcode);
1128
1129 case RAIDFRAME_DELETE_COMPONENT:
1130 componentPtr = (RF_SingleComponent_t *)data;
1131 memcpy( &component, componentPtr,
1132 sizeof(RF_SingleComponent_t));
1133 retcode = rf_delete_component(raidPtr, &component);
1134 return(retcode);
1135
1136 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1137 componentPtr = (RF_SingleComponent_t *)data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1141 return(retcode);
1142
1143 case RAIDFRAME_REBUILD_IN_PLACE:
1144
1145 if (raidPtr->Layout.map->faultsTolerated == 0) {
1146 /* Can't do this on a RAID 0!! */
1147 return(EINVAL);
1148 }
1149
1150 if (raidPtr->recon_in_progress == 1) {
1151 /* a reconstruct is already in progress! */
1152 return(EINVAL);
1153 }
1154
1155 componentPtr = (RF_SingleComponent_t *) data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 row = component.row;
1159 column = component.column;
1160 printf("Rebuild: %d %d\n",row, column);
1161 if ((row < 0) || (row >= raidPtr->numRow) ||
1162 (column < 0) || (column >= raidPtr->numCol)) {
1163 return(EINVAL);
1164 }
1165
1166 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1167 if (rrcopy == NULL)
1168 return(ENOMEM);
1169
1170 rrcopy->raidPtr = (void *) raidPtr;
1171 rrcopy->row = row;
1172 rrcopy->col = column;
1173
1174 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1175 rf_ReconstructInPlaceThread,
1176 rrcopy,"raid_reconip");
1177 return(retcode);
1178
1179 case RAIDFRAME_GET_INFO:
1180 if (!raidPtr->valid)
1181 return (ENODEV);
1182 ucfgp = (RF_DeviceConfig_t **) data;
1183 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1184 (RF_DeviceConfig_t *));
1185 if (d_cfg == NULL)
1186 return (ENOMEM);
1187 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1188 d_cfg->rows = raidPtr->numRow;
1189 d_cfg->cols = raidPtr->numCol;
1190 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1191 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1192 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1193 return (ENOMEM);
1194 }
1195 d_cfg->nspares = raidPtr->numSpare;
1196 if (d_cfg->nspares >= RF_MAX_DISKS) {
1197 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1198 return (ENOMEM);
1199 }
1200 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1201 d = 0;
1202 for (i = 0; i < d_cfg->rows; i++) {
1203 for (j = 0; j < d_cfg->cols; j++) {
1204 d_cfg->devs[d] = raidPtr->Disks[i][j];
1205 d++;
1206 }
1207 }
1208 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1209 d_cfg->spares[i] = raidPtr->Disks[0][j];
1210 }
1211 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1212 sizeof(RF_DeviceConfig_t));
1213 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1214
1215 return (retcode);
1216
1217 case RAIDFRAME_CHECK_PARITY:
1218 *(int *) data = raidPtr->parity_good;
1219 return (0);
1220
1221 case RAIDFRAME_RESET_ACCTOTALS:
1222 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1223 return (0);
1224
1225 case RAIDFRAME_GET_ACCTOTALS:
1226 totals = (RF_AccTotals_t *) data;
1227 *totals = raidPtr->acc_totals;
1228 return (0);
1229
1230 case RAIDFRAME_KEEP_ACCTOTALS:
1231 raidPtr->keep_acc_totals = *(int *)data;
1232 return (0);
1233
1234 case RAIDFRAME_GET_SIZE:
1235 *(int *) data = raidPtr->totalSectors;
1236 return (0);
1237
1238 /* fail a disk & optionally start reconstruction */
1239 case RAIDFRAME_FAIL_DISK:
1240
1241 if (raidPtr->Layout.map->faultsTolerated == 0) {
1242 /* Can't do this on a RAID 0!! */
1243 return(EINVAL);
1244 }
1245
1246 rr = (struct rf_recon_req *) data;
1247
1248 if (rr->row < 0 || rr->row >= raidPtr->numRow
1249 || rr->col < 0 || rr->col >= raidPtr->numCol)
1250 return (EINVAL);
1251
1252 printf("raid%d: Failing the disk: row: %d col: %d\n",
1253 unit, rr->row, rr->col);
1254
1255 /* make a copy of the recon request so that we don't rely on
1256 * the user's buffer */
1257 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1258 if (rrcopy == NULL)
1259 return(ENOMEM);
1260 bcopy(rr, rrcopy, sizeof(*rr));
1261 rrcopy->raidPtr = (void *) raidPtr;
1262
1263 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1264 rf_ReconThread,
1265 rrcopy,"raid_recon");
1266 return (0);
1267
1268 /* invoke a copyback operation after recon on whatever disk
1269 * needs it, if any */
1270 case RAIDFRAME_COPYBACK:
1271
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* This makes no sense on a RAID 0!! */
1274 return(EINVAL);
1275 }
1276
1277 if (raidPtr->copyback_in_progress == 1) {
1278 /* Copyback is already in progress! */
1279 return(EINVAL);
1280 }
1281
1282 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1283 rf_CopybackThread,
1284 raidPtr,"raid_copyback");
1285 return (retcode);
1286
1287 /* return the percentage completion of reconstruction */
1288 case RAIDFRAME_CHECK_RECON_STATUS:
1289 if (raidPtr->Layout.map->faultsTolerated == 0) {
1290 /* This makes no sense on a RAID 0, so tell the
1291 user it's done. */
1292 *(int *) data = 100;
1293 return(0);
1294 }
1295 row = 0; /* XXX we only consider a single row... */
1296 if (raidPtr->status[row] != rf_rs_reconstructing)
1297 *(int *) data = 100;
1298 else
1299 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1300 return (0);
1301 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1302 progressInfoPtr = (RF_ProgressInfo_t **) data;
1303 row = 0; /* XXX we only consider a single row... */
1304 if (raidPtr->status[row] != rf_rs_reconstructing) {
1305 progressInfo.remaining = 0;
1306 progressInfo.completed = 100;
1307 progressInfo.total = 100;
1308 } else {
1309 progressInfo.total =
1310 raidPtr->reconControl[row]->numRUsTotal;
1311 progressInfo.completed =
1312 raidPtr->reconControl[row]->numRUsComplete;
1313 progressInfo.remaining = progressInfo.total -
1314 progressInfo.completed;
1315 }
1316 retcode = copyout((caddr_t) &progressInfo,
1317 (caddr_t) *progressInfoPtr,
1318 sizeof(RF_ProgressInfo_t));
1319 return (retcode);
1320
1321 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1322 if (raidPtr->Layout.map->faultsTolerated == 0) {
1323 /* This makes no sense on a RAID 0, so tell the
1324 user it's done. */
1325 *(int *) data = 100;
1326 return(0);
1327 }
1328 if (raidPtr->parity_rewrite_in_progress == 1) {
1329 *(int *) data = 100 *
1330 raidPtr->parity_rewrite_stripes_done /
1331 raidPtr->Layout.numStripe;
1332 } else {
1333 *(int *) data = 100;
1334 }
1335 return (0);
1336
1337 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1338 progressInfoPtr = (RF_ProgressInfo_t **) data;
1339 if (raidPtr->parity_rewrite_in_progress == 1) {
1340 progressInfo.total = raidPtr->Layout.numStripe;
1341 progressInfo.completed =
1342 raidPtr->parity_rewrite_stripes_done;
1343 progressInfo.remaining = progressInfo.total -
1344 progressInfo.completed;
1345 } else {
1346 progressInfo.remaining = 0;
1347 progressInfo.completed = 100;
1348 progressInfo.total = 100;
1349 }
1350 retcode = copyout((caddr_t) &progressInfo,
1351 (caddr_t) *progressInfoPtr,
1352 sizeof(RF_ProgressInfo_t));
1353 return (retcode);
1354
1355 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1356 if (raidPtr->Layout.map->faultsTolerated == 0) {
1357 /* This makes no sense on a RAID 0 */
1358 *(int *) data = 100;
1359 return(0);
1360 }
1361 if (raidPtr->copyback_in_progress == 1) {
1362 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1363 raidPtr->Layout.numStripe;
1364 } else {
1365 *(int *) data = 100;
1366 }
1367 return (0);
1368
1369 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1370 progressInfoPtr = (RF_ProgressInfo_t **) data;
1371 if (raidPtr->copyback_in_progress == 1) {
1372 progressInfo.total = raidPtr->Layout.numStripe;
1373 progressInfo.completed =
1374 raidPtr->copyback_stripes_done;
1375 progressInfo.remaining = progressInfo.total -
1376 progressInfo.completed;
1377 } else {
1378 progressInfo.remaining = 0;
1379 progressInfo.completed = 100;
1380 progressInfo.total = 100;
1381 }
1382 retcode = copyout((caddr_t) &progressInfo,
1383 (caddr_t) *progressInfoPtr,
1384 sizeof(RF_ProgressInfo_t));
1385 return (retcode);
1386
1387 /* the sparetable daemon calls this to wait for the kernel to
1388 * need a spare table. this ioctl does not return until a
1389 * spare table is needed. XXX -- calling mpsleep here in the
1390 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1391 * -- I should either compute the spare table in the kernel,
1392 * or have a different -- XXX XXX -- interface (a different
1393 * character device) for delivering the table -- XXX */
1394 #if 0
1395 case RAIDFRAME_SPARET_WAIT:
1396 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1397 while (!rf_sparet_wait_queue)
1398 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1399 waitreq = rf_sparet_wait_queue;
1400 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1401 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1402
1403 /* structure assignment */
1404 *((RF_SparetWait_t *) data) = *waitreq;
1405
1406 RF_Free(waitreq, sizeof(*waitreq));
1407 return (0);
1408
1409 /* wakes up a process waiting on SPARET_WAIT and puts an error
1410 * code in it that will cause the dameon to exit */
1411 case RAIDFRAME_ABORT_SPARET_WAIT:
1412 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1413 waitreq->fcol = -1;
1414 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1415 waitreq->next = rf_sparet_wait_queue;
1416 rf_sparet_wait_queue = waitreq;
1417 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1418 wakeup(&rf_sparet_wait_queue);
1419 return (0);
1420
1421 /* used by the spare table daemon to deliver a spare table
1422 * into the kernel */
1423 case RAIDFRAME_SEND_SPARET:
1424
1425 /* install the spare table */
1426 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1427
1428 /* respond to the requestor. the return status of the spare
1429 * table installation is passed in the "fcol" field */
1430 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1431 waitreq->fcol = retcode;
1432 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1433 waitreq->next = rf_sparet_resp_queue;
1434 rf_sparet_resp_queue = waitreq;
1435 wakeup(&rf_sparet_resp_queue);
1436 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1437
1438 return (retcode);
1439 #endif
1440
1441 default:
1442 break; /* fall through to the os-specific code below */
1443
1444 }
1445
1446 if (!raidPtr->valid)
1447 return (EINVAL);
1448
1449 /*
1450 * Add support for "regular" device ioctls here.
1451 */
1452
1453 switch (cmd) {
1454 case DIOCGDINFO:
1455 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1456 break;
1457 #ifdef __HAVE_OLD_DISKLABEL
1458 case ODIOCGDINFO:
1459 newlabel = *(rs->sc_dkdev.dk_label);
1460 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1461 return ENOTTY;
1462 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1463 break;
1464 #endif
1465
1466 case DIOCGPART:
1467 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1468 ((struct partinfo *) data)->part =
1469 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1470 break;
1471
1472 case DIOCWDINFO:
1473 case DIOCSDINFO:
1474 #ifdef __HAVE_OLD_DISKLABEL
1475 case ODIOCWDINFO:
1476 case ODIOCSDINFO:
1477 #endif
1478 {
1479 struct disklabel *lp;
1480 #ifdef __HAVE_OLD_DISKLABEL
1481 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1482 memset(&newlabel, 0, sizeof newlabel);
1483 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1484 lp = &newlabel;
1485 } else
1486 #endif
1487 lp = (struct disklabel *)data;
1488
1489 if ((error = raidlock(rs)) != 0)
1490 return (error);
1491
1492 rs->sc_flags |= RAIDF_LABELLING;
1493
1494 error = setdisklabel(rs->sc_dkdev.dk_label,
1495 lp, 0, rs->sc_dkdev.dk_cpulabel);
1496 if (error == 0) {
1497 if (cmd == DIOCWDINFO
1498 #ifdef __HAVE_OLD_DISKLABEL
1499 || cmd == ODIOCWDINFO
1500 #endif
1501 )
1502 error = writedisklabel(RAIDLABELDEV(dev),
1503 raidstrategy, rs->sc_dkdev.dk_label,
1504 rs->sc_dkdev.dk_cpulabel);
1505 }
1506 rs->sc_flags &= ~RAIDF_LABELLING;
1507
1508 raidunlock(rs);
1509
1510 if (error)
1511 return (error);
1512 break;
1513 }
1514
1515 case DIOCWLABEL:
1516 if (*(int *) data != 0)
1517 rs->sc_flags |= RAIDF_WLABEL;
1518 else
1519 rs->sc_flags &= ~RAIDF_WLABEL;
1520 break;
1521
1522 case DIOCGDEFLABEL:
1523 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1524 break;
1525
1526 #ifdef __HAVE_OLD_DISKLABEL
1527 case ODIOCGDEFLABEL:
1528 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1529 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1530 return ENOTTY;
1531 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1532 break;
1533 #endif
1534
1535 default:
1536 retcode = ENOTTY;
1537 }
1538 return (retcode);
1539
1540 }
1541
1542
1543 /* raidinit -- complete the rest of the initialization for the
1544 RAIDframe device. */
1545
1546
1547 static void
1548 raidinit(raidPtr)
1549 RF_Raid_t *raidPtr;
1550 {
1551 struct raid_softc *rs;
1552 int unit;
1553
1554 unit = raidPtr->raidid;
1555
1556 rs = &raid_softc[unit];
1557 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1558 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1559
1560
1561 /* XXX should check return code first... */
1562 rs->sc_flags |= RAIDF_INITED;
1563
1564 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1565
1566 rs->sc_dkdev.dk_name = rs->sc_xname;
1567
1568 /* disk_attach actually creates space for the CPU disklabel, among
1569 * other things, so it's critical to call this *BEFORE* we try putzing
1570 * with disklabels. */
1571
1572 disk_attach(&rs->sc_dkdev);
1573
1574 /* XXX There may be a weird interaction here between this, and
1575 * protectedSectors, as used in RAIDframe. */
1576
1577 rs->sc_size = raidPtr->totalSectors;
1578
1579 }
1580
1581 /* wake up the daemon & tell it to get us a spare table
1582 * XXX
1583 * the entries in the queues should be tagged with the raidPtr
1584 * so that in the extremely rare case that two recons happen at once,
1585 * we know for which device were requesting a spare table
1586 * XXX
1587 *
1588 * XXX This code is not currently used. GO
1589 */
1590 int
1591 rf_GetSpareTableFromDaemon(req)
1592 RF_SparetWait_t *req;
1593 {
1594 int retcode;
1595
1596 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1597 req->next = rf_sparet_wait_queue;
1598 rf_sparet_wait_queue = req;
1599 wakeup(&rf_sparet_wait_queue);
1600
1601 /* mpsleep unlocks the mutex */
1602 while (!rf_sparet_resp_queue) {
1603 tsleep(&rf_sparet_resp_queue, PRIBIO,
1604 "raidframe getsparetable", 0);
1605 }
1606 req = rf_sparet_resp_queue;
1607 rf_sparet_resp_queue = req->next;
1608 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1609
1610 retcode = req->fcol;
1611 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1612 * alloc'd */
1613 return (retcode);
1614 }
1615
1616 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1617 * bp & passes it down.
1618 * any calls originating in the kernel must use non-blocking I/O
1619 * do some extra sanity checking to return "appropriate" error values for
1620 * certain conditions (to make some standard utilities work)
1621 *
1622 * Formerly known as: rf_DoAccessKernel
1623 */
1624 void
1625 raidstart(raidPtr)
1626 RF_Raid_t *raidPtr;
1627 {
1628 RF_SectorCount_t num_blocks, pb, sum;
1629 RF_RaidAddr_t raid_addr;
1630 int retcode;
1631 struct partition *pp;
1632 daddr_t blocknum;
1633 int unit;
1634 struct raid_softc *rs;
1635 int do_async;
1636 struct buf *bp;
1637
1638 unit = raidPtr->raidid;
1639 rs = &raid_softc[unit];
1640
1641 /* quick check to see if anything has died recently */
1642 RF_LOCK_MUTEX(raidPtr->mutex);
1643 if (raidPtr->numNewFailures > 0) {
1644 rf_update_component_labels(raidPtr,
1645 RF_NORMAL_COMPONENT_UPDATE);
1646 raidPtr->numNewFailures--;
1647 }
1648 RF_UNLOCK_MUTEX(raidPtr->mutex);
1649
1650 /* Check to see if we're at the limit... */
1651 RF_LOCK_MUTEX(raidPtr->mutex);
1652 while (raidPtr->openings > 0) {
1653 RF_UNLOCK_MUTEX(raidPtr->mutex);
1654
1655 /* get the next item, if any, from the queue */
1656 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1657 /* nothing more to do */
1658 return;
1659 }
1660 BUFQ_REMOVE(&rs->buf_queue, bp);
1661
1662 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1663 * partition.. Need to make it absolute to the underlying
1664 * device.. */
1665
1666 blocknum = bp->b_blkno;
1667 if (DISKPART(bp->b_dev) != RAW_PART) {
1668 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1669 blocknum += pp->p_offset;
1670 }
1671
1672 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1673 (int) blocknum));
1674
1675 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1676 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1677
1678 /* *THIS* is where we adjust what block we're going to...
1679 * but DO NOT TOUCH bp->b_blkno!!! */
1680 raid_addr = blocknum;
1681
1682 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1683 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1684 sum = raid_addr + num_blocks + pb;
1685 if (1 || rf_debugKernelAccess) {
1686 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1687 (int) raid_addr, (int) sum, (int) num_blocks,
1688 (int) pb, (int) bp->b_resid));
1689 }
1690 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1691 || (sum < num_blocks) || (sum < pb)) {
1692 bp->b_error = ENOSPC;
1693 bp->b_flags |= B_ERROR;
1694 bp->b_resid = bp->b_bcount;
1695 biodone(bp);
1696 RF_LOCK_MUTEX(raidPtr->mutex);
1697 continue;
1698 }
1699 /*
1700 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1701 */
1702
1703 if (bp->b_bcount & raidPtr->sectorMask) {
1704 bp->b_error = EINVAL;
1705 bp->b_flags |= B_ERROR;
1706 bp->b_resid = bp->b_bcount;
1707 biodone(bp);
1708 RF_LOCK_MUTEX(raidPtr->mutex);
1709 continue;
1710
1711 }
1712 db1_printf(("Calling DoAccess..\n"));
1713
1714
1715 RF_LOCK_MUTEX(raidPtr->mutex);
1716 raidPtr->openings--;
1717 RF_UNLOCK_MUTEX(raidPtr->mutex);
1718
1719 /*
1720 * Everything is async.
1721 */
1722 do_async = 1;
1723
1724 disk_busy(&rs->sc_dkdev);
1725
1726 /* XXX we're still at splbio() here... do we *really*
1727 need to be? */
1728
1729 /* don't ever condition on bp->b_flags & B_WRITE.
1730 * always condition on B_READ instead */
1731
1732 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1733 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1734 do_async, raid_addr, num_blocks,
1735 bp->b_data, bp, NULL, NULL,
1736 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1737
1738
1739 RF_LOCK_MUTEX(raidPtr->mutex);
1740 }
1741 RF_UNLOCK_MUTEX(raidPtr->mutex);
1742 }
1743
1744
1745
1746
1747 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1748
1749 int
1750 rf_DispatchKernelIO(queue, req)
1751 RF_DiskQueue_t *queue;
1752 RF_DiskQueueData_t *req;
1753 {
1754 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1755 struct buf *bp;
1756 struct raidbuf *raidbp = NULL;
1757 struct raid_softc *rs;
1758 int unit;
1759 int s;
1760
1761 s=0;
1762 /* s = splbio();*/ /* want to test this */
1763 /* XXX along with the vnode, we also need the softc associated with
1764 * this device.. */
1765
1766 req->queue = queue;
1767
1768 unit = queue->raidPtr->raidid;
1769
1770 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1771
1772 if (unit >= numraid) {
1773 printf("Invalid unit number: %d %d\n", unit, numraid);
1774 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1775 }
1776 rs = &raid_softc[unit];
1777
1778 bp = req->bp;
1779 #if 1
1780 /* XXX when there is a physical disk failure, someone is passing us a
1781 * buffer that contains old stuff!! Attempt to deal with this problem
1782 * without taking a performance hit... (not sure where the real bug
1783 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1784
1785 if (bp->b_flags & B_ERROR) {
1786 bp->b_flags &= ~B_ERROR;
1787 }
1788 if (bp->b_error != 0) {
1789 bp->b_error = 0;
1790 }
1791 #endif
1792 raidbp = RAIDGETBUF(rs);
1793
1794 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1795
1796 /*
1797 * context for raidiodone
1798 */
1799 raidbp->rf_obp = bp;
1800 raidbp->req = req;
1801
1802 LIST_INIT(&raidbp->rf_buf.b_dep);
1803
1804 switch (req->type) {
1805 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1806 /* XXX need to do something extra here.. */
1807 /* I'm leaving this in, as I've never actually seen it used,
1808 * and I'd like folks to report it... GO */
1809 printf(("WAKEUP CALLED\n"));
1810 queue->numOutstanding++;
1811
1812 /* XXX need to glue the original buffer into this?? */
1813
1814 KernelWakeupFunc(&raidbp->rf_buf);
1815 break;
1816
1817 case RF_IO_TYPE_READ:
1818 case RF_IO_TYPE_WRITE:
1819
1820 if (req->tracerec) {
1821 RF_ETIMER_START(req->tracerec->timer);
1822 }
1823 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1824 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1825 req->sectorOffset, req->numSector,
1826 req->buf, KernelWakeupFunc, (void *) req,
1827 queue->raidPtr->logBytesPerSector, req->b_proc);
1828
1829 if (rf_debugKernelAccess) {
1830 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1831 (long) bp->b_blkno));
1832 }
1833 queue->numOutstanding++;
1834 queue->last_deq_sector = req->sectorOffset;
1835 /* acc wouldn't have been let in if there were any pending
1836 * reqs at any other priority */
1837 queue->curPriority = req->priority;
1838
1839 db1_printf(("Going for %c to unit %d row %d col %d\n",
1840 req->type, unit, queue->row, queue->col));
1841 db1_printf(("sector %d count %d (%d bytes) %d\n",
1842 (int) req->sectorOffset, (int) req->numSector,
1843 (int) (req->numSector <<
1844 queue->raidPtr->logBytesPerSector),
1845 (int) queue->raidPtr->logBytesPerSector));
1846 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1847 raidbp->rf_buf.b_vp->v_numoutput++;
1848 }
1849 VOP_STRATEGY(&raidbp->rf_buf);
1850
1851 break;
1852
1853 default:
1854 panic("bad req->type in rf_DispatchKernelIO");
1855 }
1856 db1_printf(("Exiting from DispatchKernelIO\n"));
1857 /* splx(s); */ /* want to test this */
1858 return (0);
1859 }
1860 /* this is the callback function associated with a I/O invoked from
1861 kernel code.
1862 */
1863 static void
1864 KernelWakeupFunc(vbp)
1865 struct buf *vbp;
1866 {
1867 RF_DiskQueueData_t *req = NULL;
1868 RF_DiskQueue_t *queue;
1869 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1870 struct buf *bp;
1871 struct raid_softc *rs;
1872 int unit;
1873 int s;
1874
1875 s = splbio();
1876 db1_printf(("recovering the request queue:\n"));
1877 req = raidbp->req;
1878
1879 bp = raidbp->rf_obp;
1880
1881 queue = (RF_DiskQueue_t *) req->queue;
1882
1883 if (raidbp->rf_buf.b_flags & B_ERROR) {
1884 bp->b_flags |= B_ERROR;
1885 bp->b_error = raidbp->rf_buf.b_error ?
1886 raidbp->rf_buf.b_error : EIO;
1887 }
1888
1889 /* XXX methinks this could be wrong... */
1890 #if 1
1891 bp->b_resid = raidbp->rf_buf.b_resid;
1892 #endif
1893
1894 if (req->tracerec) {
1895 RF_ETIMER_STOP(req->tracerec->timer);
1896 RF_ETIMER_EVAL(req->tracerec->timer);
1897 RF_LOCK_MUTEX(rf_tracing_mutex);
1898 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1900 req->tracerec->num_phys_ios++;
1901 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1902 }
1903 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1904
1905 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1906
1907
1908 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1909 * ballistic, and mark the component as hosed... */
1910
1911 if (bp->b_flags & B_ERROR) {
1912 /* Mark the disk as dead */
1913 /* but only mark it once... */
1914 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1915 rf_ds_optimal) {
1916 printf("raid%d: IO Error. Marking %s as failed.\n",
1917 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1918 queue->raidPtr->Disks[queue->row][queue->col].status =
1919 rf_ds_failed;
1920 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1921 queue->raidPtr->numFailures++;
1922 queue->raidPtr->numNewFailures++;
1923 } else { /* Disk is already dead... */
1924 /* printf("Disk already marked as dead!\n"); */
1925 }
1926
1927 }
1928
1929 rs = &raid_softc[unit];
1930 RAIDPUTBUF(rs, raidbp);
1931
1932 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1933 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1934
1935 splx(s);
1936 }
1937
1938
1939
1940 /*
1941 * initialize a buf structure for doing an I/O in the kernel.
1942 */
1943 static void
1944 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1945 logBytesPerSector, b_proc)
1946 struct buf *bp;
1947 struct vnode *b_vp;
1948 unsigned rw_flag;
1949 dev_t dev;
1950 RF_SectorNum_t startSect;
1951 RF_SectorCount_t numSect;
1952 caddr_t buf;
1953 void (*cbFunc) (struct buf *);
1954 void *cbArg;
1955 int logBytesPerSector;
1956 struct proc *b_proc;
1957 {
1958 /* bp->b_flags = B_PHYS | rw_flag; */
1959 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1960 bp->b_bcount = numSect << logBytesPerSector;
1961 bp->b_bufsize = bp->b_bcount;
1962 bp->b_error = 0;
1963 bp->b_dev = dev;
1964 bp->b_data = buf;
1965 bp->b_blkno = startSect;
1966 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1967 if (bp->b_bcount == 0) {
1968 panic("bp->b_bcount is zero in InitBP!!\n");
1969 }
1970 bp->b_proc = b_proc;
1971 bp->b_iodone = cbFunc;
1972 bp->b_vp = b_vp;
1973
1974 }
1975
1976 static void
1977 raidgetdefaultlabel(raidPtr, rs, lp)
1978 RF_Raid_t *raidPtr;
1979 struct raid_softc *rs;
1980 struct disklabel *lp;
1981 {
1982 db1_printf(("Building a default label...\n"));
1983 bzero(lp, sizeof(*lp));
1984
1985 /* fabricate a label... */
1986 lp->d_secperunit = raidPtr->totalSectors;
1987 lp->d_secsize = raidPtr->bytesPerSector;
1988 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1989 lp->d_ntracks = 4 * raidPtr->numCol;
1990 lp->d_ncylinders = raidPtr->totalSectors /
1991 (lp->d_nsectors * lp->d_ntracks);
1992 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1993
1994 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1995 lp->d_type = DTYPE_RAID;
1996 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1997 lp->d_rpm = 3600;
1998 lp->d_interleave = 1;
1999 lp->d_flags = 0;
2000
2001 lp->d_partitions[RAW_PART].p_offset = 0;
2002 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2003 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2004 lp->d_npartitions = RAW_PART + 1;
2005
2006 lp->d_magic = DISKMAGIC;
2007 lp->d_magic2 = DISKMAGIC;
2008 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2009
2010 }
2011 /*
2012 * Read the disklabel from the raid device. If one is not present, fake one
2013 * up.
2014 */
2015 static void
2016 raidgetdisklabel(dev)
2017 dev_t dev;
2018 {
2019 int unit = raidunit(dev);
2020 struct raid_softc *rs = &raid_softc[unit];
2021 char *errstring;
2022 struct disklabel *lp = rs->sc_dkdev.dk_label;
2023 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2024 RF_Raid_t *raidPtr;
2025
2026 db1_printf(("Getting the disklabel...\n"));
2027
2028 bzero(clp, sizeof(*clp));
2029
2030 raidPtr = raidPtrs[unit];
2031
2032 raidgetdefaultlabel(raidPtr, rs, lp);
2033
2034 /*
2035 * Call the generic disklabel extraction routine.
2036 */
2037 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2038 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2039 if (errstring)
2040 raidmakedisklabel(rs);
2041 else {
2042 int i;
2043 struct partition *pp;
2044
2045 /*
2046 * Sanity check whether the found disklabel is valid.
2047 *
2048 * This is necessary since total size of the raid device
2049 * may vary when an interleave is changed even though exactly
2050 * same componets are used, and old disklabel may used
2051 * if that is found.
2052 */
2053 if (lp->d_secperunit != rs->sc_size)
2054 printf("WARNING: %s: "
2055 "total sector size in disklabel (%d) != "
2056 "the size of raid (%ld)\n", rs->sc_xname,
2057 lp->d_secperunit, (long) rs->sc_size);
2058 for (i = 0; i < lp->d_npartitions; i++) {
2059 pp = &lp->d_partitions[i];
2060 if (pp->p_offset + pp->p_size > rs->sc_size)
2061 printf("WARNING: %s: end of partition `%c' "
2062 "exceeds the size of raid (%ld)\n",
2063 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2064 }
2065 }
2066
2067 }
2068 /*
2069 * Take care of things one might want to take care of in the event
2070 * that a disklabel isn't present.
2071 */
2072 static void
2073 raidmakedisklabel(rs)
2074 struct raid_softc *rs;
2075 {
2076 struct disklabel *lp = rs->sc_dkdev.dk_label;
2077 db1_printf(("Making a label..\n"));
2078
2079 /*
2080 * For historical reasons, if there's no disklabel present
2081 * the raw partition must be marked FS_BSDFFS.
2082 */
2083
2084 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2085
2086 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2087
2088 lp->d_checksum = dkcksum(lp);
2089 }
2090 /*
2091 * Lookup the provided name in the filesystem. If the file exists,
2092 * is a valid block device, and isn't being used by anyone else,
2093 * set *vpp to the file's vnode.
2094 * You'll find the original of this in ccd.c
2095 */
2096 int
2097 raidlookup(path, p, vpp)
2098 char *path;
2099 struct proc *p;
2100 struct vnode **vpp; /* result */
2101 {
2102 struct nameidata nd;
2103 struct vnode *vp;
2104 struct vattr va;
2105 int error;
2106
2107 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2108 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2109 #ifdef DEBUG
2110 printf("RAIDframe: vn_open returned %d\n", error);
2111 #endif
2112 return (error);
2113 }
2114 vp = nd.ni_vp;
2115 if (vp->v_usecount > 1) {
2116 VOP_UNLOCK(vp, 0);
2117 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2118 return (EBUSY);
2119 }
2120 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2121 VOP_UNLOCK(vp, 0);
2122 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2123 return (error);
2124 }
2125 /* XXX: eventually we should handle VREG, too. */
2126 if (va.va_type != VBLK) {
2127 VOP_UNLOCK(vp, 0);
2128 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2129 return (ENOTBLK);
2130 }
2131 VOP_UNLOCK(vp, 0);
2132 *vpp = vp;
2133 return (0);
2134 }
2135 /*
2136 * Wait interruptibly for an exclusive lock.
2137 *
2138 * XXX
2139 * Several drivers do this; it should be abstracted and made MP-safe.
2140 * (Hmm... where have we seen this warning before :-> GO )
2141 */
2142 static int
2143 raidlock(rs)
2144 struct raid_softc *rs;
2145 {
2146 int error;
2147
2148 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2149 rs->sc_flags |= RAIDF_WANTED;
2150 if ((error =
2151 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2152 return (error);
2153 }
2154 rs->sc_flags |= RAIDF_LOCKED;
2155 return (0);
2156 }
2157 /*
2158 * Unlock and wake up any waiters.
2159 */
2160 static void
2161 raidunlock(rs)
2162 struct raid_softc *rs;
2163 {
2164
2165 rs->sc_flags &= ~RAIDF_LOCKED;
2166 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2167 rs->sc_flags &= ~RAIDF_WANTED;
2168 wakeup(rs);
2169 }
2170 }
2171
2172
2173 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2174 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2175
2176 int
2177 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2178 {
2179 RF_ComponentLabel_t clabel;
2180 raidread_component_label(dev, b_vp, &clabel);
2181 clabel.mod_counter = mod_counter;
2182 clabel.clean = RF_RAID_CLEAN;
2183 raidwrite_component_label(dev, b_vp, &clabel);
2184 return(0);
2185 }
2186
2187
2188 int
2189 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2190 {
2191 RF_ComponentLabel_t clabel;
2192 raidread_component_label(dev, b_vp, &clabel);
2193 clabel.mod_counter = mod_counter;
2194 clabel.clean = RF_RAID_DIRTY;
2195 raidwrite_component_label(dev, b_vp, &clabel);
2196 return(0);
2197 }
2198
2199 /* ARGSUSED */
2200 int
2201 raidread_component_label(dev, b_vp, clabel)
2202 dev_t dev;
2203 struct vnode *b_vp;
2204 RF_ComponentLabel_t *clabel;
2205 {
2206 struct buf *bp;
2207 int error;
2208
2209 /* XXX should probably ensure that we don't try to do this if
2210 someone has changed rf_protected_sectors. */
2211
2212 if (b_vp == NULL) {
2213 /* For whatever reason, this component is not valid.
2214 Don't try to read a component label from it. */
2215 return(EINVAL);
2216 }
2217
2218 /* get a block of the appropriate size... */
2219 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2220 bp->b_dev = dev;
2221
2222 /* get our ducks in a row for the read */
2223 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2224 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2225 bp->b_flags |= B_READ;
2226 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2227
2228 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2229
2230 error = biowait(bp);
2231
2232 if (!error) {
2233 memcpy(clabel, bp->b_data,
2234 sizeof(RF_ComponentLabel_t));
2235 #if 0
2236 rf_print_component_label( clabel );
2237 #endif
2238 } else {
2239 #if 0
2240 printf("Failed to read RAID component label!\n");
2241 #endif
2242 }
2243
2244 brelse(bp);
2245 return(error);
2246 }
2247 /* ARGSUSED */
2248 int
2249 raidwrite_component_label(dev, b_vp, clabel)
2250 dev_t dev;
2251 struct vnode *b_vp;
2252 RF_ComponentLabel_t *clabel;
2253 {
2254 struct buf *bp;
2255 int error;
2256
2257 /* get a block of the appropriate size... */
2258 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2259 bp->b_dev = dev;
2260
2261 /* get our ducks in a row for the write */
2262 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2263 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2264 bp->b_flags |= B_WRITE;
2265 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2266
2267 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2268
2269 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2270
2271 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2272 error = biowait(bp);
2273 brelse(bp);
2274 if (error) {
2275 #if 1
2276 printf("Failed to write RAID component info!\n");
2277 #endif
2278 }
2279
2280 return(error);
2281 }
2282
2283 void
2284 rf_markalldirty(raidPtr)
2285 RF_Raid_t *raidPtr;
2286 {
2287 RF_ComponentLabel_t clabel;
2288 int r,c;
2289
2290 raidPtr->mod_counter++;
2291 for (r = 0; r < raidPtr->numRow; r++) {
2292 for (c = 0; c < raidPtr->numCol; c++) {
2293 /* we don't want to touch (at all) a disk that has
2294 failed */
2295 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2296 raidread_component_label(
2297 raidPtr->Disks[r][c].dev,
2298 raidPtr->raid_cinfo[r][c].ci_vp,
2299 &clabel);
2300 if (clabel.status == rf_ds_spared) {
2301 /* XXX do something special...
2302 but whatever you do, don't
2303 try to access it!! */
2304 } else {
2305 #if 0
2306 clabel.status =
2307 raidPtr->Disks[r][c].status;
2308 raidwrite_component_label(
2309 raidPtr->Disks[r][c].dev,
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 &clabel);
2312 #endif
2313 raidmarkdirty(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 raidPtr->mod_counter);
2317 }
2318 }
2319 }
2320 }
2321 /* printf("Component labels marked dirty.\n"); */
2322 #if 0
2323 for( c = 0; c < raidPtr->numSpare ; c++) {
2324 sparecol = raidPtr->numCol + c;
2325 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2326 /*
2327
2328 XXX this is where we get fancy and map this spare
2329 into it's correct spot in the array.
2330
2331 */
2332 /*
2333
2334 we claim this disk is "optimal" if it's
2335 rf_ds_used_spare, as that means it should be
2336 directly substitutable for the disk it replaced.
2337 We note that too...
2338
2339 */
2340
2341 for(i=0;i<raidPtr->numRow;i++) {
2342 for(j=0;j<raidPtr->numCol;j++) {
2343 if ((raidPtr->Disks[i][j].spareRow ==
2344 r) &&
2345 (raidPtr->Disks[i][j].spareCol ==
2346 sparecol)) {
2347 srow = r;
2348 scol = sparecol;
2349 break;
2350 }
2351 }
2352 }
2353
2354 raidread_component_label(
2355 raidPtr->Disks[r][sparecol].dev,
2356 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2357 &clabel);
2358 /* make sure status is noted */
2359 clabel.version = RF_COMPONENT_LABEL_VERSION;
2360 clabel.mod_counter = raidPtr->mod_counter;
2361 clabel.serial_number = raidPtr->serial_number;
2362 clabel.row = srow;
2363 clabel.column = scol;
2364 clabel.num_rows = raidPtr->numRow;
2365 clabel.num_columns = raidPtr->numCol;
2366 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2367 clabel.status = rf_ds_optimal;
2368 raidwrite_component_label(
2369 raidPtr->Disks[r][sparecol].dev,
2370 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2371 &clabel);
2372 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2373 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2374 }
2375 }
2376
2377 #endif
2378 }
2379
2380
2381 void
2382 rf_update_component_labels(raidPtr, final)
2383 RF_Raid_t *raidPtr;
2384 int final;
2385 {
2386 RF_ComponentLabel_t clabel;
2387 int sparecol;
2388 int r,c;
2389 int i,j;
2390 int srow, scol;
2391
2392 srow = -1;
2393 scol = -1;
2394
2395 /* XXX should do extra checks to make sure things really are clean,
2396 rather than blindly setting the clean bit... */
2397
2398 raidPtr->mod_counter++;
2399
2400 for (r = 0; r < raidPtr->numRow; r++) {
2401 for (c = 0; c < raidPtr->numCol; c++) {
2402 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2403 raidread_component_label(
2404 raidPtr->Disks[r][c].dev,
2405 raidPtr->raid_cinfo[r][c].ci_vp,
2406 &clabel);
2407 /* make sure status is noted */
2408 clabel.status = rf_ds_optimal;
2409 /* bump the counter */
2410 clabel.mod_counter = raidPtr->mod_counter;
2411
2412 raidwrite_component_label(
2413 raidPtr->Disks[r][c].dev,
2414 raidPtr->raid_cinfo[r][c].ci_vp,
2415 &clabel);
2416 if (final == RF_FINAL_COMPONENT_UPDATE) {
2417 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2418 raidmarkclean(
2419 raidPtr->Disks[r][c].dev,
2420 raidPtr->raid_cinfo[r][c].ci_vp,
2421 raidPtr->mod_counter);
2422 }
2423 }
2424 }
2425 /* else we don't touch it.. */
2426 }
2427 }
2428
2429 for( c = 0; c < raidPtr->numSpare ; c++) {
2430 sparecol = raidPtr->numCol + c;
2431 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2432 /*
2433
2434 we claim this disk is "optimal" if it's
2435 rf_ds_used_spare, as that means it should be
2436 directly substitutable for the disk it replaced.
2437 We note that too...
2438
2439 */
2440
2441 for(i=0;i<raidPtr->numRow;i++) {
2442 for(j=0;j<raidPtr->numCol;j++) {
2443 if ((raidPtr->Disks[i][j].spareRow ==
2444 0) &&
2445 (raidPtr->Disks[i][j].spareCol ==
2446 sparecol)) {
2447 srow = i;
2448 scol = j;
2449 break;
2450 }
2451 }
2452 }
2453
2454 /* XXX shouldn't *really* need this... */
2455 raidread_component_label(
2456 raidPtr->Disks[0][sparecol].dev,
2457 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2458 &clabel);
2459 /* make sure status is noted */
2460
2461 raid_init_component_label(raidPtr, &clabel);
2462
2463 clabel.mod_counter = raidPtr->mod_counter;
2464 clabel.row = srow;
2465 clabel.column = scol;
2466 clabel.status = rf_ds_optimal;
2467
2468 raidwrite_component_label(
2469 raidPtr->Disks[0][sparecol].dev,
2470 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2471 &clabel);
2472 if (final == RF_FINAL_COMPONENT_UPDATE) {
2473 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2474 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2475 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2476 raidPtr->mod_counter);
2477 }
2478 }
2479 }
2480 }
2481 /* printf("Component labels updated\n"); */
2482 }
2483
2484 void
2485 rf_close_component(raidPtr, vp, auto_configured)
2486 RF_Raid_t *raidPtr;
2487 struct vnode *vp;
2488 int auto_configured;
2489 {
2490 struct proc *p;
2491
2492 p = raidPtr->engine_thread;
2493
2494 if (vp != NULL) {
2495 if (auto_configured == 1) {
2496 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2497 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2498 vput(vp);
2499
2500 } else {
2501 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2502 }
2503 } else {
2504 printf("vnode was NULL\n");
2505 }
2506 }
2507
2508
2509 void
2510 rf_UnconfigureVnodes(raidPtr)
2511 RF_Raid_t *raidPtr;
2512 {
2513 int r,c;
2514 struct proc *p;
2515 struct vnode *vp;
2516 int acd;
2517
2518
2519 /* We take this opportunity to close the vnodes like we should.. */
2520
2521 p = raidPtr->engine_thread;
2522
2523 for (r = 0; r < raidPtr->numRow; r++) {
2524 for (c = 0; c < raidPtr->numCol; c++) {
2525 printf("Closing vnode for row: %d col: %d\n", r, c);
2526 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2527 acd = raidPtr->Disks[r][c].auto_configured;
2528 rf_close_component(raidPtr, vp, acd);
2529 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2530 raidPtr->Disks[r][c].auto_configured = 0;
2531 }
2532 }
2533 for (r = 0; r < raidPtr->numSpare; r++) {
2534 printf("Closing vnode for spare: %d\n", r);
2535 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2536 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2537 rf_close_component(raidPtr, vp, acd);
2538 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2539 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2540 }
2541 }
2542
2543
2544 void
2545 rf_ReconThread(req)
2546 struct rf_recon_req *req;
2547 {
2548 int s;
2549 RF_Raid_t *raidPtr;
2550
2551 s = splbio();
2552 raidPtr = (RF_Raid_t *) req->raidPtr;
2553 raidPtr->recon_in_progress = 1;
2554
2555 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2556 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2557
2558 /* XXX get rid of this! we don't need it at all.. */
2559 RF_Free(req, sizeof(*req));
2560
2561 raidPtr->recon_in_progress = 0;
2562 splx(s);
2563
2564 /* That's all... */
2565 kthread_exit(0); /* does not return */
2566 }
2567
2568 void
2569 rf_RewriteParityThread(raidPtr)
2570 RF_Raid_t *raidPtr;
2571 {
2572 int retcode;
2573 int s;
2574
2575 raidPtr->parity_rewrite_in_progress = 1;
2576 s = splbio();
2577 retcode = rf_RewriteParity(raidPtr);
2578 splx(s);
2579 if (retcode) {
2580 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2581 } else {
2582 /* set the clean bit! If we shutdown correctly,
2583 the clean bit on each component label will get
2584 set */
2585 raidPtr->parity_good = RF_RAID_CLEAN;
2586 }
2587 raidPtr->parity_rewrite_in_progress = 0;
2588
2589 /* Anyone waiting for us to stop? If so, inform them... */
2590 if (raidPtr->waitShutdown) {
2591 wakeup(&raidPtr->parity_rewrite_in_progress);
2592 }
2593
2594 /* That's all... */
2595 kthread_exit(0); /* does not return */
2596 }
2597
2598
2599 void
2600 rf_CopybackThread(raidPtr)
2601 RF_Raid_t *raidPtr;
2602 {
2603 int s;
2604
2605 raidPtr->copyback_in_progress = 1;
2606 s = splbio();
2607 rf_CopybackReconstructedData(raidPtr);
2608 splx(s);
2609 raidPtr->copyback_in_progress = 0;
2610
2611 /* That's all... */
2612 kthread_exit(0); /* does not return */
2613 }
2614
2615
2616 void
2617 rf_ReconstructInPlaceThread(req)
2618 struct rf_recon_req *req;
2619 {
2620 int retcode;
2621 int s;
2622 RF_Raid_t *raidPtr;
2623
2624 s = splbio();
2625 raidPtr = req->raidPtr;
2626 raidPtr->recon_in_progress = 1;
2627 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2628 RF_Free(req, sizeof(*req));
2629 raidPtr->recon_in_progress = 0;
2630 splx(s);
2631
2632 /* That's all... */
2633 kthread_exit(0); /* does not return */
2634 }
2635
2636 void
2637 rf_mountroot_hook(dev)
2638 struct device *dev;
2639 {
2640
2641 }
2642
2643
2644 RF_AutoConfig_t *
2645 rf_find_raid_components()
2646 {
2647 struct devnametobdevmaj *dtobdm;
2648 struct vnode *vp;
2649 struct disklabel label;
2650 struct device *dv;
2651 char *cd_name;
2652 dev_t dev;
2653 int error;
2654 int i;
2655 int good_one;
2656 RF_ComponentLabel_t *clabel;
2657 RF_AutoConfig_t *ac_list;
2658 RF_AutoConfig_t *ac;
2659
2660
2661 /* initialize the AutoConfig list */
2662 ac_list = NULL;
2663
2664 if (raidautoconfig) {
2665
2666 /* we begin by trolling through *all* the devices on the system */
2667
2668 for (dv = alldevs.tqh_first; dv != NULL;
2669 dv = dv->dv_list.tqe_next) {
2670
2671 /* we are only interested in disks... */
2672 if (dv->dv_class != DV_DISK)
2673 continue;
2674
2675 /* we don't care about floppies... */
2676 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2677 continue;
2678 }
2679
2680 /* need to find the device_name_to_block_device_major stuff */
2681 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2682 dtobdm = dev_name2blk;
2683 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2684 dtobdm++;
2685 }
2686
2687 /* get a vnode for the raw partition of this disk */
2688
2689 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2690 if (bdevvp(dev, &vp))
2691 panic("RAID can't alloc vnode");
2692
2693 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2694
2695 if (error) {
2696 /* "Who cares." Continue looking
2697 for something that exists*/
2698 vput(vp);
2699 continue;
2700 }
2701
2702 /* Ok, the disk exists. Go get the disklabel. */
2703 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2704 FREAD, NOCRED, 0);
2705 if (error) {
2706 /*
2707 * XXX can't happen - open() would
2708 * have errored out (or faked up one)
2709 */
2710 printf("can't get label for dev %s%c (%d)!?!?\n",
2711 dv->dv_xname, 'a' + RAW_PART, error);
2712 }
2713
2714 /* don't need this any more. We'll allocate it again
2715 a little later if we really do... */
2716 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2717 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2718 vput(vp);
2719
2720 for (i=0; i < label.d_npartitions; i++) {
2721 /* We only support partitions marked as RAID */
2722 if (label.d_partitions[i].p_fstype != FS_RAID)
2723 continue;
2724
2725 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2726 if (bdevvp(dev, &vp))
2727 panic("RAID can't alloc vnode");
2728
2729 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2730 if (error) {
2731 /* Whatever... */
2732 vput(vp);
2733 continue;
2734 }
2735
2736 good_one = 0;
2737
2738 clabel = (RF_ComponentLabel_t *)
2739 malloc(sizeof(RF_ComponentLabel_t),
2740 M_RAIDFRAME, M_NOWAIT);
2741 if (clabel == NULL) {
2742 /* XXX CLEANUP HERE */
2743 printf("RAID auto config: out of memory!\n");
2744 return(NULL); /* XXX probably should panic? */
2745 }
2746
2747 if (!raidread_component_label(dev, vp, clabel)) {
2748 /* Got the label. Does it look reasonable? */
2749 if (rf_reasonable_label(clabel) &&
2750 (clabel->partitionSize <=
2751 label.d_partitions[i].p_size)) {
2752 #if DEBUG
2753 printf("Component on: %s%c: %d\n",
2754 dv->dv_xname, 'a'+i,
2755 label.d_partitions[i].p_size);
2756 rf_print_component_label(clabel);
2757 #endif
2758 /* if it's reasonable, add it,
2759 else ignore it. */
2760 ac = (RF_AutoConfig_t *)
2761 malloc(sizeof(RF_AutoConfig_t),
2762 M_RAIDFRAME,
2763 M_NOWAIT);
2764 if (ac == NULL) {
2765 /* XXX should panic?? */
2766 return(NULL);
2767 }
2768
2769 sprintf(ac->devname, "%s%c",
2770 dv->dv_xname, 'a'+i);
2771 ac->dev = dev;
2772 ac->vp = vp;
2773 ac->clabel = clabel;
2774 ac->next = ac_list;
2775 ac_list = ac;
2776 good_one = 1;
2777 }
2778 }
2779 if (!good_one) {
2780 /* cleanup */
2781 free(clabel, M_RAIDFRAME);
2782 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2783 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2784 vput(vp);
2785 }
2786 }
2787 }
2788 }
2789 return(ac_list);
2790 }
2791
2792 static int
2793 rf_reasonable_label(clabel)
2794 RF_ComponentLabel_t *clabel;
2795 {
2796
2797 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2798 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2799 ((clabel->clean == RF_RAID_CLEAN) ||
2800 (clabel->clean == RF_RAID_DIRTY)) &&
2801 clabel->row >=0 &&
2802 clabel->column >= 0 &&
2803 clabel->num_rows > 0 &&
2804 clabel->num_columns > 0 &&
2805 clabel->row < clabel->num_rows &&
2806 clabel->column < clabel->num_columns &&
2807 clabel->blockSize > 0 &&
2808 clabel->numBlocks > 0) {
2809 /* label looks reasonable enough... */
2810 return(1);
2811 }
2812 return(0);
2813 }
2814
2815
2816 void
2817 rf_print_component_label(clabel)
2818 RF_ComponentLabel_t *clabel;
2819 {
2820 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2821 clabel->row, clabel->column,
2822 clabel->num_rows, clabel->num_columns);
2823 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2824 clabel->version, clabel->serial_number,
2825 clabel->mod_counter);
2826 printf(" Clean: %s Status: %d\n",
2827 clabel->clean ? "Yes" : "No", clabel->status );
2828 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2829 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2830 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2831 (char) clabel->parityConfig, clabel->blockSize,
2832 clabel->numBlocks);
2833 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2834 printf(" Contains root partition: %s\n",
2835 clabel->root_partition ? "Yes" : "No" );
2836 printf(" Last configured as: raid%d\n", clabel->last_unit );
2837 #if 0
2838 printf(" Config order: %d\n", clabel->config_order);
2839 #endif
2840
2841 }
2842
2843 RF_ConfigSet_t *
2844 rf_create_auto_sets(ac_list)
2845 RF_AutoConfig_t *ac_list;
2846 {
2847 RF_AutoConfig_t *ac;
2848 RF_ConfigSet_t *config_sets;
2849 RF_ConfigSet_t *cset;
2850 RF_AutoConfig_t *ac_next;
2851
2852
2853 config_sets = NULL;
2854
2855 /* Go through the AutoConfig list, and figure out which components
2856 belong to what sets. */
2857 ac = ac_list;
2858 while(ac!=NULL) {
2859 /* we're going to putz with ac->next, so save it here
2860 for use at the end of the loop */
2861 ac_next = ac->next;
2862
2863 if (config_sets == NULL) {
2864 /* will need at least this one... */
2865 config_sets = (RF_ConfigSet_t *)
2866 malloc(sizeof(RF_ConfigSet_t),
2867 M_RAIDFRAME, M_NOWAIT);
2868 if (config_sets == NULL) {
2869 panic("rf_create_auto_sets: No memory!\n");
2870 }
2871 /* this one is easy :) */
2872 config_sets->ac = ac;
2873 config_sets->next = NULL;
2874 config_sets->rootable = 0;
2875 ac->next = NULL;
2876 } else {
2877 /* which set does this component fit into? */
2878 cset = config_sets;
2879 while(cset!=NULL) {
2880 if (rf_does_it_fit(cset, ac)) {
2881 /* looks like it matches... */
2882 ac->next = cset->ac;
2883 cset->ac = ac;
2884 break;
2885 }
2886 cset = cset->next;
2887 }
2888 if (cset==NULL) {
2889 /* didn't find a match above... new set..*/
2890 cset = (RF_ConfigSet_t *)
2891 malloc(sizeof(RF_ConfigSet_t),
2892 M_RAIDFRAME, M_NOWAIT);
2893 if (cset == NULL) {
2894 panic("rf_create_auto_sets: No memory!\n");
2895 }
2896 cset->ac = ac;
2897 ac->next = NULL;
2898 cset->next = config_sets;
2899 cset->rootable = 0;
2900 config_sets = cset;
2901 }
2902 }
2903 ac = ac_next;
2904 }
2905
2906
2907 return(config_sets);
2908 }
2909
2910 static int
2911 rf_does_it_fit(cset, ac)
2912 RF_ConfigSet_t *cset;
2913 RF_AutoConfig_t *ac;
2914 {
2915 RF_ComponentLabel_t *clabel1, *clabel2;
2916
2917 /* If this one matches the *first* one in the set, that's good
2918 enough, since the other members of the set would have been
2919 through here too... */
2920 /* note that we are not checking partitionSize here..
2921
2922 Note that we are also not checking the mod_counters here.
2923 If everything else matches execpt the mod_counter, that's
2924 good enough for this test. We will deal with the mod_counters
2925 a little later in the autoconfiguration process.
2926
2927 (clabel1->mod_counter == clabel2->mod_counter) &&
2928
2929 The reason we don't check for this is that failed disks
2930 will have lower modification counts. If those disks are
2931 not added to the set they used to belong to, then they will
2932 form their own set, which may result in 2 different sets,
2933 for example, competing to be configured at raid0, and
2934 perhaps competing to be the root filesystem set. If the
2935 wrong ones get configured, or both attempt to become /,
2936 weird behaviour and or serious lossage will occur. Thus we
2937 need to bring them into the fold here, and kick them out at
2938 a later point.
2939
2940 */
2941
2942 clabel1 = cset->ac->clabel;
2943 clabel2 = ac->clabel;
2944 if ((clabel1->version == clabel2->version) &&
2945 (clabel1->serial_number == clabel2->serial_number) &&
2946 (clabel1->num_rows == clabel2->num_rows) &&
2947 (clabel1->num_columns == clabel2->num_columns) &&
2948 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2949 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2950 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2951 (clabel1->parityConfig == clabel2->parityConfig) &&
2952 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2953 (clabel1->blockSize == clabel2->blockSize) &&
2954 (clabel1->numBlocks == clabel2->numBlocks) &&
2955 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2956 (clabel1->root_partition == clabel2->root_partition) &&
2957 (clabel1->last_unit == clabel2->last_unit) &&
2958 (clabel1->config_order == clabel2->config_order)) {
2959 /* if it get's here, it almost *has* to be a match */
2960 } else {
2961 /* it's not consistent with somebody in the set..
2962 punt */
2963 return(0);
2964 }
2965 /* all was fine.. it must fit... */
2966 return(1);
2967 }
2968
2969 int
2970 rf_have_enough_components(cset)
2971 RF_ConfigSet_t *cset;
2972 {
2973 RF_AutoConfig_t *ac;
2974 RF_AutoConfig_t *auto_config;
2975 RF_ComponentLabel_t *clabel;
2976 int r,c;
2977 int num_rows;
2978 int num_cols;
2979 int num_missing;
2980 int mod_counter;
2981 int mod_counter_found;
2982 int even_pair_failed;
2983 char parity_type;
2984
2985
2986 /* check to see that we have enough 'live' components
2987 of this set. If so, we can configure it if necessary */
2988
2989 num_rows = cset->ac->clabel->num_rows;
2990 num_cols = cset->ac->clabel->num_columns;
2991 parity_type = cset->ac->clabel->parityConfig;
2992
2993 /* XXX Check for duplicate components!?!?!? */
2994
2995 /* Determine what the mod_counter is supposed to be for this set. */
2996
2997 mod_counter_found = 0;
2998 mod_counter = 0;
2999 ac = cset->ac;
3000 while(ac!=NULL) {
3001 if (mod_counter_found==0) {
3002 mod_counter = ac->clabel->mod_counter;
3003 mod_counter_found = 1;
3004 } else {
3005 if (ac->clabel->mod_counter > mod_counter) {
3006 mod_counter = ac->clabel->mod_counter;
3007 }
3008 }
3009 ac = ac->next;
3010 }
3011
3012 num_missing = 0;
3013 auto_config = cset->ac;
3014
3015 for(r=0; r<num_rows; r++) {
3016 even_pair_failed = 0;
3017 for(c=0; c<num_cols; c++) {
3018 ac = auto_config;
3019 while(ac!=NULL) {
3020 if ((ac->clabel->row == r) &&
3021 (ac->clabel->column == c) &&
3022 (ac->clabel->mod_counter == mod_counter)) {
3023 /* it's this one... */
3024 #if DEBUG
3025 printf("Found: %s at %d,%d\n",
3026 ac->devname,r,c);
3027 #endif
3028 break;
3029 }
3030 ac=ac->next;
3031 }
3032 if (ac==NULL) {
3033 /* Didn't find one here! */
3034 /* special case for RAID 1, especially
3035 where there are more than 2
3036 components (where RAIDframe treats
3037 things a little differently :( ) */
3038 if (parity_type == '1') {
3039 if (c%2 == 0) { /* even component */
3040 even_pair_failed = 1;
3041 } else { /* odd component. If
3042 we're failed, and
3043 so is the even
3044 component, it's
3045 "Good Night, Charlie" */
3046 if (even_pair_failed == 1) {
3047 return(0);
3048 }
3049 }
3050 } else {
3051 /* normal accounting */
3052 num_missing++;
3053 }
3054 }
3055 if ((parity_type == '1') && (c%2 == 1)) {
3056 /* Just did an even component, and we didn't
3057 bail.. reset the even_pair_failed flag,
3058 and go on to the next component.... */
3059 even_pair_failed = 0;
3060 }
3061 }
3062 }
3063
3064 clabel = cset->ac->clabel;
3065
3066 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3067 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3068 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3069 /* XXX this needs to be made *much* more general */
3070 /* Too many failures */
3071 return(0);
3072 }
3073 /* otherwise, all is well, and we've got enough to take a kick
3074 at autoconfiguring this set */
3075 return(1);
3076 }
3077
3078 void
3079 rf_create_configuration(ac,config,raidPtr)
3080 RF_AutoConfig_t *ac;
3081 RF_Config_t *config;
3082 RF_Raid_t *raidPtr;
3083 {
3084 RF_ComponentLabel_t *clabel;
3085 int i;
3086
3087 clabel = ac->clabel;
3088
3089 /* 1. Fill in the common stuff */
3090 config->numRow = clabel->num_rows;
3091 config->numCol = clabel->num_columns;
3092 config->numSpare = 0; /* XXX should this be set here? */
3093 config->sectPerSU = clabel->sectPerSU;
3094 config->SUsPerPU = clabel->SUsPerPU;
3095 config->SUsPerRU = clabel->SUsPerRU;
3096 config->parityConfig = clabel->parityConfig;
3097 /* XXX... */
3098 strcpy(config->diskQueueType,"fifo");
3099 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3100 config->layoutSpecificSize = 0; /* XXX ?? */
3101
3102 while(ac!=NULL) {
3103 /* row/col values will be in range due to the checks
3104 in reasonable_label() */
3105 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3106 ac->devname);
3107 ac = ac->next;
3108 }
3109
3110 for(i=0;i<RF_MAXDBGV;i++) {
3111 config->debugVars[i][0] = NULL;
3112 }
3113 }
3114
3115 int
3116 rf_set_autoconfig(raidPtr, new_value)
3117 RF_Raid_t *raidPtr;
3118 int new_value;
3119 {
3120 RF_ComponentLabel_t clabel;
3121 struct vnode *vp;
3122 dev_t dev;
3123 int row, column;
3124
3125 raidPtr->autoconfigure = new_value;
3126 for(row=0; row<raidPtr->numRow; row++) {
3127 for(column=0; column<raidPtr->numCol; column++) {
3128 if (raidPtr->Disks[row][column].status ==
3129 rf_ds_optimal) {
3130 dev = raidPtr->Disks[row][column].dev;
3131 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3132 raidread_component_label(dev, vp, &clabel);
3133 clabel.autoconfigure = new_value;
3134 raidwrite_component_label(dev, vp, &clabel);
3135 }
3136 }
3137 }
3138 return(new_value);
3139 }
3140
3141 int
3142 rf_set_rootpartition(raidPtr, new_value)
3143 RF_Raid_t *raidPtr;
3144 int new_value;
3145 {
3146 RF_ComponentLabel_t clabel;
3147 struct vnode *vp;
3148 dev_t dev;
3149 int row, column;
3150
3151 raidPtr->root_partition = new_value;
3152 for(row=0; row<raidPtr->numRow; row++) {
3153 for(column=0; column<raidPtr->numCol; column++) {
3154 if (raidPtr->Disks[row][column].status ==
3155 rf_ds_optimal) {
3156 dev = raidPtr->Disks[row][column].dev;
3157 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3158 raidread_component_label(dev, vp, &clabel);
3159 clabel.root_partition = new_value;
3160 raidwrite_component_label(dev, vp, &clabel);
3161 }
3162 }
3163 }
3164 return(new_value);
3165 }
3166
3167 void
3168 rf_release_all_vps(cset)
3169 RF_ConfigSet_t *cset;
3170 {
3171 RF_AutoConfig_t *ac;
3172
3173 ac = cset->ac;
3174 while(ac!=NULL) {
3175 /* Close the vp, and give it back */
3176 if (ac->vp) {
3177 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3178 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3179 vput(ac->vp);
3180 ac->vp = NULL;
3181 }
3182 ac = ac->next;
3183 }
3184 }
3185
3186
3187 void
3188 rf_cleanup_config_set(cset)
3189 RF_ConfigSet_t *cset;
3190 {
3191 RF_AutoConfig_t *ac;
3192 RF_AutoConfig_t *next_ac;
3193
3194 ac = cset->ac;
3195 while(ac!=NULL) {
3196 next_ac = ac->next;
3197 /* nuke the label */
3198 free(ac->clabel, M_RAIDFRAME);
3199 /* cleanup the config structure */
3200 free(ac, M_RAIDFRAME);
3201 /* "next.." */
3202 ac = next_ac;
3203 }
3204 /* and, finally, nuke the config set */
3205 free(cset, M_RAIDFRAME);
3206 }
3207
3208
3209 void
3210 raid_init_component_label(raidPtr, clabel)
3211 RF_Raid_t *raidPtr;
3212 RF_ComponentLabel_t *clabel;
3213 {
3214 /* current version number */
3215 clabel->version = RF_COMPONENT_LABEL_VERSION;
3216 clabel->serial_number = raidPtr->serial_number;
3217 clabel->mod_counter = raidPtr->mod_counter;
3218 clabel->num_rows = raidPtr->numRow;
3219 clabel->num_columns = raidPtr->numCol;
3220 clabel->clean = RF_RAID_DIRTY; /* not clean */
3221 clabel->status = rf_ds_optimal; /* "It's good!" */
3222
3223 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3224 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3225 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3226
3227 clabel->blockSize = raidPtr->bytesPerSector;
3228 clabel->numBlocks = raidPtr->sectorsPerDisk;
3229
3230 /* XXX not portable */
3231 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3232 clabel->maxOutstanding = raidPtr->maxOutstanding;
3233 clabel->autoconfigure = raidPtr->autoconfigure;
3234 clabel->root_partition = raidPtr->root_partition;
3235 clabel->last_unit = raidPtr->raidid;
3236 clabel->config_order = raidPtr->config_order;
3237 }
3238
3239 int
3240 rf_auto_config_set(cset,unit)
3241 RF_ConfigSet_t *cset;
3242 int *unit;
3243 {
3244 RF_Raid_t *raidPtr;
3245 RF_Config_t *config;
3246 int raidID;
3247 int retcode;
3248
3249 printf("RAID autoconfigure\n");
3250
3251 retcode = 0;
3252 *unit = -1;
3253
3254 /* 1. Create a config structure */
3255
3256 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3257 M_RAIDFRAME,
3258 M_NOWAIT);
3259 if (config==NULL) {
3260 printf("Out of mem!?!?\n");
3261 /* XXX do something more intelligent here. */
3262 return(1);
3263 }
3264
3265 memset(config, 0, sizeof(RF_Config_t));
3266
3267 /* XXX raidID needs to be set correctly.. */
3268
3269 /*
3270 2. Figure out what RAID ID this one is supposed to live at
3271 See if we can get the same RAID dev that it was configured
3272 on last time..
3273 */
3274
3275 raidID = cset->ac->clabel->last_unit;
3276 if ((raidID < 0) || (raidID >= numraid)) {
3277 /* let's not wander off into lala land. */
3278 raidID = numraid - 1;
3279 }
3280 if (raidPtrs[raidID]->valid != 0) {
3281
3282 /*
3283 Nope... Go looking for an alternative...
3284 Start high so we don't immediately use raid0 if that's
3285 not taken.
3286 */
3287
3288 for(raidID = numraid; raidID >= 0; raidID--) {
3289 if (raidPtrs[raidID]->valid == 0) {
3290 /* can use this one! */
3291 break;
3292 }
3293 }
3294 }
3295
3296 if (raidID < 0) {
3297 /* punt... */
3298 printf("Unable to auto configure this set!\n");
3299 printf("(Out of RAID devs!)\n");
3300 return(1);
3301 }
3302 printf("Configuring raid%d:\n",raidID);
3303 raidPtr = raidPtrs[raidID];
3304
3305 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3306 raidPtr->raidid = raidID;
3307 raidPtr->openings = RAIDOUTSTANDING;
3308
3309 /* 3. Build the configuration structure */
3310 rf_create_configuration(cset->ac, config, raidPtr);
3311
3312 /* 4. Do the configuration */
3313 retcode = rf_Configure(raidPtr, config, cset->ac);
3314
3315 if (retcode == 0) {
3316
3317 raidinit(raidPtrs[raidID]);
3318
3319 rf_markalldirty(raidPtrs[raidID]);
3320 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3321 if (cset->ac->clabel->root_partition==1) {
3322 /* everything configured just fine. Make a note
3323 that this set is eligible to be root. */
3324 cset->rootable = 1;
3325 /* XXX do this here? */
3326 raidPtrs[raidID]->root_partition = 1;
3327 }
3328 }
3329
3330 /* 5. Cleanup */
3331 free(config, M_RAIDFRAME);
3332
3333 *unit = raidID;
3334 return(retcode);
3335 }
3336
3337 void
3338 rf_disk_unbusy(desc)
3339 RF_RaidAccessDesc_t *desc;
3340 {
3341 struct buf *bp;
3342
3343 bp = (struct buf *)desc->bp;
3344 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3345 (bp->b_bcount - bp->b_resid));
3346 }
3347