rf_netbsdkintf.c revision 1.140 1 /* $NetBSD: rf_netbsdkintf.c,v 1.140 2002/09/27 02:24:31 thorpej Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.140 2002/09/27 02:24:31 thorpej Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 RF_DiskQueueData_t *req;/* the request that this was part of.. */
210 };
211
212 /* component buffer pool */
213 struct pool raidframe_cbufpool;
214
215 /* XXX Not sure if the following should be replacing the raidPtrs above,
216 or if it should be used in conjunction with that...
217 */
218
219 struct raid_softc {
220 int sc_flags; /* flags */
221 int sc_cflags; /* configuration flags */
222 size_t sc_size; /* size of the raid device */
223 char sc_xname[20]; /* XXX external name */
224 struct disk sc_dkdev; /* generic disk device info */
225 struct bufq_state buf_queue; /* used for the device queue */
226 };
227 /* sc_flags */
228 #define RAIDF_INITED 0x01 /* unit has been initialized */
229 #define RAIDF_WLABEL 0x02 /* label area is writable */
230 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
231 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
232 #define RAIDF_LOCKED 0x80 /* unit is locked */
233
234 #define raidunit(x) DISKUNIT(x)
235 int numraid = 0;
236
237 /*
238 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
239 * Be aware that large numbers can allow the driver to consume a lot of
240 * kernel memory, especially on writes, and in degraded mode reads.
241 *
242 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
243 * a single 64K write will typically require 64K for the old data,
244 * 64K for the old parity, and 64K for the new parity, for a total
245 * of 192K (if the parity buffer is not re-used immediately).
246 * Even it if is used immediately, that's still 128K, which when multiplied
247 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
248 *
249 * Now in degraded mode, for example, a 64K read on the above setup may
250 * require data reconstruction, which will require *all* of the 4 remaining
251 * disks to participate -- 4 * 32K/disk == 128K again.
252 */
253
254 #ifndef RAIDOUTSTANDING
255 #define RAIDOUTSTANDING 6
256 #endif
257
258 #define RAIDLABELDEV(dev) \
259 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
260
261 /* declared here, and made public, for the benefit of KVM stuff.. */
262 struct raid_softc *raid_softc;
263
264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
265 struct disklabel *);
266 static void raidgetdisklabel(dev_t);
267 static void raidmakedisklabel(struct raid_softc *);
268
269 static int raidlock(struct raid_softc *);
270 static void raidunlock(struct raid_softc *);
271
272 static void rf_markalldirty(RF_Raid_t *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 void rf_buildroothack(void *);
283
284 RF_AutoConfig_t *rf_find_raid_components(void);
285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
287 static int rf_reasonable_label(RF_ComponentLabel_t *);
288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
289 int rf_set_autoconfig(RF_Raid_t *, int);
290 int rf_set_rootpartition(RF_Raid_t *, int);
291 void rf_release_all_vps(RF_ConfigSet_t *);
292 void rf_cleanup_config_set(RF_ConfigSet_t *);
293 int rf_have_enough_components(RF_ConfigSet_t *);
294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place.
298 Note that this is overridden by having
299 RAID_AUTOCONFIG as an option in the
300 kernel config file. */
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!\n");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!\n");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 if (raidautoconfig) {
394 /* 1. locate all RAID components on the system */
395
396 #if DEBUG
397 printf("Searching for raid components...\n");
398 #endif
399 ac_list = rf_find_raid_components();
400
401 /* 2. sort them into their respective sets */
402
403 config_sets = rf_create_auto_sets(ac_list);
404
405 /* 3. evaluate each set and configure the valid ones
406 This gets done in rf_buildroothack() */
407
408 /* schedule the creation of the thread to do the
409 "/ on RAID" stuff */
410
411 kthread_create(rf_buildroothack,config_sets);
412
413 }
414
415 }
416
417 void
418 rf_buildroothack(arg)
419 void *arg;
420 {
421 RF_ConfigSet_t *config_sets = arg;
422 RF_ConfigSet_t *cset;
423 RF_ConfigSet_t *next_cset;
424 int retcode;
425 int raidID;
426 int rootID;
427 int num_root;
428
429 rootID = 0;
430 num_root = 0;
431 cset = config_sets;
432 while(cset != NULL ) {
433 next_cset = cset->next;
434 if (rf_have_enough_components(cset) &&
435 cset->ac->clabel->autoconfigure==1) {
436 retcode = rf_auto_config_set(cset,&raidID);
437 if (!retcode) {
438 if (cset->rootable) {
439 rootID = raidID;
440 num_root++;
441 }
442 } else {
443 /* The autoconfig didn't work :( */
444 #if DEBUG
445 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
446 #endif
447 rf_release_all_vps(cset);
448 }
449 } else {
450 /* we're not autoconfiguring this set...
451 release the associated resources */
452 rf_release_all_vps(cset);
453 }
454 /* cleanup */
455 rf_cleanup_config_set(cset);
456 cset = next_cset;
457 }
458
459 /* we found something bootable... */
460
461 if (num_root == 1) {
462 booted_device = &raidrootdev[rootID];
463 } else if (num_root > 1) {
464 /* we can't guess.. require the user to answer... */
465 boothowto |= RB_ASKNAME;
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_update_component_labels(raidPtrs[unit],
639 RF_FINAL_COMPONENT_UPDATE);
640 if (doing_shutdown) {
641 /* last one, and we're going down, so
642 lights out for this RAID set too. */
643 error = rf_Shutdown(raidPtrs[unit]);
644
645 /* It's no longer initialized... */
646 rs->sc_flags &= ~RAIDF_INITED;
647
648 /* Detach the disk. */
649 disk_detach(&rs->sc_dkdev);
650 }
651 }
652
653 raidunlock(rs);
654 return (0);
655
656 }
657
658 void
659 raidstrategy(bp)
660 struct buf *bp;
661 {
662 int s;
663
664 unsigned int raidID = raidunit(bp->b_dev);
665 RF_Raid_t *raidPtr;
666 struct raid_softc *rs = &raid_softc[raidID];
667 struct disklabel *lp;
668 int wlabel;
669
670 if ((rs->sc_flags & RAIDF_INITED) ==0) {
671 bp->b_error = ENXIO;
672 bp->b_flags |= B_ERROR;
673 bp->b_resid = bp->b_bcount;
674 biodone(bp);
675 return;
676 }
677 if (raidID >= numraid || !raidPtrs[raidID]) {
678 bp->b_error = ENODEV;
679 bp->b_flags |= B_ERROR;
680 bp->b_resid = bp->b_bcount;
681 biodone(bp);
682 return;
683 }
684 raidPtr = raidPtrs[raidID];
685 if (!raidPtr->valid) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (bp->b_bcount == 0) {
693 db1_printf(("b_bcount is zero..\n"));
694 biodone(bp);
695 return;
696 }
697 lp = rs->sc_dkdev.dk_label;
698
699 /*
700 * Do bounds checking and adjust transfer. If there's an
701 * error, the bounds check will flag that for us.
702 */
703
704 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
705 if (DISKPART(bp->b_dev) != RAW_PART)
706 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
707 db1_printf(("Bounds check failed!!:%d %d\n",
708 (int) bp->b_blkno, (int) wlabel));
709 biodone(bp);
710 return;
711 }
712 s = splbio();
713
714 bp->b_resid = 0;
715
716 /* stuff it onto our queue */
717 BUFQ_PUT(&rs->buf_queue, bp);
718
719 raidstart(raidPtrs[raidID]);
720
721 splx(s);
722 }
723 /* ARGSUSED */
724 int
725 raidread(dev, uio, flags)
726 dev_t dev;
727 struct uio *uio;
728 int flags;
729 {
730 int unit = raidunit(dev);
731 struct raid_softc *rs;
732 int part;
733
734 if (unit >= numraid)
735 return (ENXIO);
736 rs = &raid_softc[unit];
737
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 part = DISKPART(dev);
741
742 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
743
744 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
745
746 }
747 /* ARGSUSED */
748 int
749 raidwrite(dev, uio, flags)
750 dev_t dev;
751 struct uio *uio;
752 int flags;
753 {
754 int unit = raidunit(dev);
755 struct raid_softc *rs;
756
757 if (unit >= numraid)
758 return (ENXIO);
759 rs = &raid_softc[unit];
760
761 if ((rs->sc_flags & RAIDF_INITED) == 0)
762 return (ENXIO);
763 db1_printf(("raidwrite\n"));
764 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
765
766 }
767
768 int
769 raidioctl(dev, cmd, data, flag, p)
770 dev_t dev;
771 u_long cmd;
772 caddr_t data;
773 int flag;
774 struct proc *p;
775 {
776 int unit = raidunit(dev);
777 int error = 0;
778 int part, pmask;
779 struct raid_softc *rs;
780 RF_Config_t *k_cfg, *u_cfg;
781 RF_Raid_t *raidPtr;
782 RF_RaidDisk_t *diskPtr;
783 RF_AccTotals_t *totals;
784 RF_DeviceConfig_t *d_cfg, **ucfgp;
785 u_char *specific_buf;
786 int retcode = 0;
787 int row;
788 int column;
789 int raidid;
790 struct rf_recon_req *rrcopy, *rr;
791 RF_ComponentLabel_t *clabel;
792 RF_ComponentLabel_t ci_label;
793 RF_ComponentLabel_t **clabel_ptr;
794 RF_SingleComponent_t *sparePtr,*componentPtr;
795 RF_SingleComponent_t hot_spare;
796 RF_SingleComponent_t component;
797 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
798 int i, j, d;
799 #ifdef __HAVE_OLD_DISKLABEL
800 struct disklabel newlabel;
801 #endif
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 #ifdef __HAVE_OLD_DISKLABEL
816 case ODIOCWDINFO:
817 case ODIOCSDINFO:
818 #endif
819 case DIOCWLABEL:
820 if ((flag & FWRITE) == 0)
821 return (EBADF);
822 }
823
824 /* Must be initialized for these... */
825 switch (cmd) {
826 case DIOCGDINFO:
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCGDINFO:
831 case ODIOCWDINFO:
832 case ODIOCSDINFO:
833 case ODIOCGDEFLABEL:
834 #endif
835 case DIOCGPART:
836 case DIOCWLABEL:
837 case DIOCGDEFLABEL:
838 case RAIDFRAME_SHUTDOWN:
839 case RAIDFRAME_REWRITEPARITY:
840 case RAIDFRAME_GET_INFO:
841 case RAIDFRAME_RESET_ACCTOTALS:
842 case RAIDFRAME_GET_ACCTOTALS:
843 case RAIDFRAME_KEEP_ACCTOTALS:
844 case RAIDFRAME_GET_SIZE:
845 case RAIDFRAME_FAIL_DISK:
846 case RAIDFRAME_COPYBACK:
847 case RAIDFRAME_CHECK_RECON_STATUS:
848 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
860 case RAIDFRAME_SET_AUTOCONFIG:
861 case RAIDFRAME_SET_ROOT:
862 case RAIDFRAME_DELETE_COMPONENT:
863 case RAIDFRAME_INCORPORATE_HOT_SPARE:
864 if ((rs->sc_flags & RAIDF_INITED) == 0)
865 return (ENXIO);
866 }
867
868 switch (cmd) {
869
870 /* configure the system */
871 case RAIDFRAME_CONFIGURE:
872
873 if (raidPtr->valid) {
874 /* There is a valid RAID set running on this unit! */
875 printf("raid%d: Device already configured!\n",unit);
876 return(EINVAL);
877 }
878
879 /* copy-in the configuration information */
880 /* data points to a pointer to the configuration structure */
881
882 u_cfg = *((RF_Config_t **) data);
883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
884 if (k_cfg == NULL) {
885 return (ENOMEM);
886 }
887 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
888 sizeof(RF_Config_t));
889 if (retcode) {
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
892 retcode));
893 return (retcode);
894 }
895 /* allocate a buffer for the layout-specific data, and copy it
896 * in */
897 if (k_cfg->layoutSpecificSize) {
898 if (k_cfg->layoutSpecificSize > 10000) {
899 /* sanity check */
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 return (EINVAL);
902 }
903 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
904 (u_char *));
905 if (specific_buf == NULL) {
906 RF_Free(k_cfg, sizeof(RF_Config_t));
907 return (ENOMEM);
908 }
909 retcode = copyin(k_cfg->layoutSpecific,
910 (caddr_t) specific_buf,
911 k_cfg->layoutSpecificSize);
912 if (retcode) {
913 RF_Free(k_cfg, sizeof(RF_Config_t));
914 RF_Free(specific_buf,
915 k_cfg->layoutSpecificSize);
916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
917 retcode));
918 return (retcode);
919 }
920 } else
921 specific_buf = NULL;
922 k_cfg->layoutSpecific = specific_buf;
923
924 /* should do some kind of sanity check on the configuration.
925 * Store the sum of all the bytes in the last byte? */
926
927 /* configure the system */
928
929 /*
930 * Clear the entire RAID descriptor, just to make sure
931 * there is no stale data left in the case of a
932 * reconfiguration
933 */
934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
935 raidPtr->raidid = unit;
936
937 retcode = rf_Configure(raidPtr, k_cfg, NULL);
938
939 if (retcode == 0) {
940
941 /* allow this many simultaneous IO's to
942 this RAID device */
943 raidPtr->openings = RAIDOUTSTANDING;
944
945 raidinit(raidPtr);
946 rf_markalldirty(raidPtr);
947 }
948 /* free the buffers. No return code here. */
949 if (k_cfg->layoutSpecificSize) {
950 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
951 }
952 RF_Free(k_cfg, sizeof(RF_Config_t));
953
954 return (retcode);
955
956 /* shutdown the system */
957 case RAIDFRAME_SHUTDOWN:
958
959 if ((error = raidlock(rs)) != 0)
960 return (error);
961
962 /*
963 * If somebody has a partition mounted, we shouldn't
964 * shutdown.
965 */
966
967 part = DISKPART(dev);
968 pmask = (1 << part);
969 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
970 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
971 (rs->sc_dkdev.dk_copenmask & pmask))) {
972 raidunlock(rs);
973 return (EBUSY);
974 }
975
976 retcode = rf_Shutdown(raidPtr);
977
978 /* It's no longer initialized... */
979 rs->sc_flags &= ~RAIDF_INITED;
980
981 /* Detach the disk. */
982 disk_detach(&rs->sc_dkdev);
983
984 raidunlock(rs);
985
986 return (retcode);
987 case RAIDFRAME_GET_COMPONENT_LABEL:
988 clabel_ptr = (RF_ComponentLabel_t **) data;
989 /* need to read the component label for the disk indicated
990 by row,column in clabel */
991
992 /* For practice, let's get it directly fromdisk, rather
993 than from the in-core copy */
994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
995 (RF_ComponentLabel_t *));
996 if (clabel == NULL)
997 return (ENOMEM);
998
999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1000
1001 retcode = copyin( *clabel_ptr, clabel,
1002 sizeof(RF_ComponentLabel_t));
1003
1004 if (retcode) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(retcode);
1007 }
1008
1009 row = clabel->row;
1010 column = clabel->column;
1011
1012 if ((row < 0) || (row >= raidPtr->numRow) ||
1013 (column < 0) || (column >= raidPtr->numCol +
1014 raidPtr->numSpare)) {
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return(EINVAL);
1017 }
1018
1019 raidread_component_label(raidPtr->Disks[row][column].dev,
1020 raidPtr->raid_cinfo[row][column].ci_vp,
1021 clabel );
1022
1023 retcode = copyout((caddr_t) clabel,
1024 (caddr_t) *clabel_ptr,
1025 sizeof(RF_ComponentLabel_t));
1026 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1027 return (retcode);
1028
1029 case RAIDFRAME_SET_COMPONENT_LABEL:
1030 clabel = (RF_ComponentLabel_t *) data;
1031
1032 /* XXX check the label for valid stuff... */
1033 /* Note that some things *should not* get modified --
1034 the user should be re-initing the labels instead of
1035 trying to patch things.
1036 */
1037
1038 raidid = raidPtr->raidid;
1039 printf("raid%d: Got component label:\n", raidid);
1040 printf("raid%d: Version: %d\n", raidid, clabel->version);
1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1043 printf("raid%d: Row: %d\n", raidid, clabel->row);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1046 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1047 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1048 printf("raid%d: Status: %d\n", raidid, clabel->status);
1049
1050 row = clabel->row;
1051 column = clabel->column;
1052
1053 if ((row < 0) || (row >= raidPtr->numRow) ||
1054 (column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[row][column].dev,
1065 raidPtr->raid_cinfo[row][column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083
1084 for(row=0;row<raidPtr->numRow;row++) {
1085 ci_label.row = row;
1086 for(column=0;column<raidPtr->numCol;column++) {
1087 diskPtr = &raidPtr->Disks[row][column];
1088 if (!RF_DEAD_DISK(diskPtr->status)) {
1089 ci_label.partitionSize = diskPtr->partitionSize;
1090 ci_label.column = column;
1091 raidwrite_component_label(
1092 raidPtr->Disks[row][column].dev,
1093 raidPtr->raid_cinfo[row][column].ci_vp,
1094 &ci_label );
1095 }
1096 }
1097 }
1098
1099 return (retcode);
1100 case RAIDFRAME_SET_AUTOCONFIG:
1101 d = rf_set_autoconfig(raidPtr, *(int *) data);
1102 printf("raid%d: New autoconfig value is: %d\n",
1103 raidPtr->raidid, d);
1104 *(int *) data = d;
1105 return (retcode);
1106
1107 case RAIDFRAME_SET_ROOT:
1108 d = rf_set_rootpartition(raidPtr, *(int *) data);
1109 printf("raid%d: New rootpartition value is: %d\n",
1110 raidPtr->raidid, d);
1111 *(int *) data = d;
1112 return (retcode);
1113
1114 /* initialize all parity */
1115 case RAIDFRAME_REWRITEPARITY:
1116
1117 if (raidPtr->Layout.map->faultsTolerated == 0) {
1118 /* Parity for RAID 0 is trivially correct */
1119 raidPtr->parity_good = RF_RAID_CLEAN;
1120 return(0);
1121 }
1122
1123 if (raidPtr->parity_rewrite_in_progress == 1) {
1124 /* Re-write is already in progress! */
1125 return(EINVAL);
1126 }
1127
1128 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1129 rf_RewriteParityThread,
1130 raidPtr,"raid_parity");
1131 return (retcode);
1132
1133
1134 case RAIDFRAME_ADD_HOT_SPARE:
1135 sparePtr = (RF_SingleComponent_t *) data;
1136 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1137 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1138 return(retcode);
1139
1140 case RAIDFRAME_REMOVE_HOT_SPARE:
1141 return(retcode);
1142
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_delete_component(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1151 componentPtr = (RF_SingleComponent_t *)data;
1152 memcpy( &component, componentPtr,
1153 sizeof(RF_SingleComponent_t));
1154 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1155 return(retcode);
1156
1157 case RAIDFRAME_REBUILD_IN_PLACE:
1158
1159 if (raidPtr->Layout.map->faultsTolerated == 0) {
1160 /* Can't do this on a RAID 0!! */
1161 return(EINVAL);
1162 }
1163
1164 if (raidPtr->recon_in_progress == 1) {
1165 /* a reconstruct is already in progress! */
1166 return(EINVAL);
1167 }
1168
1169 componentPtr = (RF_SingleComponent_t *) data;
1170 memcpy( &component, componentPtr,
1171 sizeof(RF_SingleComponent_t));
1172 row = component.row;
1173 column = component.column;
1174 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1175 row, column);
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1182 if (rrcopy == NULL)
1183 return(ENOMEM);
1184
1185 rrcopy->raidPtr = (void *) raidPtr;
1186 rrcopy->row = row;
1187 rrcopy->col = column;
1188
1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1190 rf_ReconstructInPlaceThread,
1191 rrcopy,"raid_reconip");
1192 return(retcode);
1193
1194 case RAIDFRAME_GET_INFO:
1195 if (!raidPtr->valid)
1196 return (ENODEV);
1197 ucfgp = (RF_DeviceConfig_t **) data;
1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1199 (RF_DeviceConfig_t *));
1200 if (d_cfg == NULL)
1201 return (ENOMEM);
1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1203 d_cfg->rows = raidPtr->numRow;
1204 d_cfg->cols = raidPtr->numCol;
1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1206 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1208 return (ENOMEM);
1209 }
1210 d_cfg->nspares = raidPtr->numSpare;
1211 if (d_cfg->nspares >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1216 d = 0;
1217 for (i = 0; i < d_cfg->rows; i++) {
1218 for (j = 0; j < d_cfg->cols; j++) {
1219 d_cfg->devs[d] = raidPtr->Disks[i][j];
1220 d++;
1221 }
1222 }
1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1224 d_cfg->spares[i] = raidPtr->Disks[0][j];
1225 }
1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1227 sizeof(RF_DeviceConfig_t));
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229
1230 return (retcode);
1231
1232 case RAIDFRAME_CHECK_PARITY:
1233 *(int *) data = raidPtr->parity_good;
1234 return (0);
1235
1236 case RAIDFRAME_RESET_ACCTOTALS:
1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1238 return (0);
1239
1240 case RAIDFRAME_GET_ACCTOTALS:
1241 totals = (RF_AccTotals_t *) data;
1242 *totals = raidPtr->acc_totals;
1243 return (0);
1244
1245 case RAIDFRAME_KEEP_ACCTOTALS:
1246 raidPtr->keep_acc_totals = *(int *)data;
1247 return (0);
1248
1249 case RAIDFRAME_GET_SIZE:
1250 *(int *) data = raidPtr->totalSectors;
1251 return (0);
1252
1253 /* fail a disk & optionally start reconstruction */
1254 case RAIDFRAME_FAIL_DISK:
1255
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* Can't do this on a RAID 0!! */
1258 return(EINVAL);
1259 }
1260
1261 rr = (struct rf_recon_req *) data;
1262
1263 if (rr->row < 0 || rr->row >= raidPtr->numRow
1264 || rr->col < 0 || rr->col >= raidPtr->numCol)
1265 return (EINVAL);
1266
1267 printf("raid%d: Failing the disk: row: %d col: %d\n",
1268 unit, rr->row, rr->col);
1269
1270 /* make a copy of the recon request so that we don't rely on
1271 * the user's buffer */
1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1273 if (rrcopy == NULL)
1274 return(ENOMEM);
1275 memcpy(rrcopy, rr, sizeof(*rr));
1276 rrcopy->raidPtr = (void *) raidPtr;
1277
1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1279 rf_ReconThread,
1280 rrcopy,"raid_recon");
1281 return (0);
1282
1283 /* invoke a copyback operation after recon on whatever disk
1284 * needs it, if any */
1285 case RAIDFRAME_COPYBACK:
1286
1287 if (raidPtr->Layout.map->faultsTolerated == 0) {
1288 /* This makes no sense on a RAID 0!! */
1289 return(EINVAL);
1290 }
1291
1292 if (raidPtr->copyback_in_progress == 1) {
1293 /* Copyback is already in progress! */
1294 return(EINVAL);
1295 }
1296
1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1298 rf_CopybackThread,
1299 raidPtr,"raid_copyback");
1300 return (retcode);
1301
1302 /* return the percentage completion of reconstruction */
1303 case RAIDFRAME_CHECK_RECON_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 row = 0; /* XXX we only consider a single row... */
1311 if (raidPtr->status[row] != rf_rs_reconstructing)
1312 *(int *) data = 100;
1313 else
1314 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1315 return (0);
1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1317 progressInfoPtr = (RF_ProgressInfo_t **) data;
1318 row = 0; /* XXX we only consider a single row... */
1319 if (raidPtr->status[row] != rf_rs_reconstructing) {
1320 progressInfo.remaining = 0;
1321 progressInfo.completed = 100;
1322 progressInfo.total = 100;
1323 } else {
1324 progressInfo.total =
1325 raidPtr->reconControl[row]->numRUsTotal;
1326 progressInfo.completed =
1327 raidPtr->reconControl[row]->numRUsComplete;
1328 progressInfo.remaining = progressInfo.total -
1329 progressInfo.completed;
1330 }
1331 retcode = copyout((caddr_t) &progressInfo,
1332 (caddr_t) *progressInfoPtr,
1333 sizeof(RF_ProgressInfo_t));
1334 return (retcode);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1337 if (raidPtr->Layout.map->faultsTolerated == 0) {
1338 /* This makes no sense on a RAID 0, so tell the
1339 user it's done. */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->parity_rewrite_in_progress == 1) {
1344 *(int *) data = 100 *
1345 raidPtr->parity_rewrite_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1353 progressInfoPtr = (RF_ProgressInfo_t **) data;
1354 if (raidPtr->parity_rewrite_in_progress == 1) {
1355 progressInfo.total = raidPtr->Layout.numStripe;
1356 progressInfo.completed =
1357 raidPtr->parity_rewrite_stripes_done;
1358 progressInfo.remaining = progressInfo.total -
1359 progressInfo.completed;
1360 } else {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 }
1365 retcode = copyout((caddr_t) &progressInfo,
1366 (caddr_t) *progressInfoPtr,
1367 sizeof(RF_ProgressInfo_t));
1368 return (retcode);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* This makes no sense on a RAID 0 */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->copyback_in_progress == 1) {
1377 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1378 raidPtr->Layout.numStripe;
1379 } else {
1380 *(int *) data = 100;
1381 }
1382 return (0);
1383
1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1385 progressInfoPtr = (RF_ProgressInfo_t **) data;
1386 if (raidPtr->copyback_in_progress == 1) {
1387 progressInfo.total = raidPtr->Layout.numStripe;
1388 progressInfo.completed =
1389 raidPtr->copyback_stripes_done;
1390 progressInfo.remaining = progressInfo.total -
1391 progressInfo.completed;
1392 } else {
1393 progressInfo.remaining = 0;
1394 progressInfo.completed = 100;
1395 progressInfo.total = 100;
1396 }
1397 retcode = copyout((caddr_t) &progressInfo,
1398 (caddr_t) *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 /* the sparetable daemon calls this to wait for the kernel to
1403 * need a spare table. this ioctl does not return until a
1404 * spare table is needed. XXX -- calling mpsleep here in the
1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1406 * -- I should either compute the spare table in the kernel,
1407 * or have a different -- XXX XXX -- interface (a different
1408 * character device) for delivering the table -- XXX */
1409 #if 0
1410 case RAIDFRAME_SPARET_WAIT:
1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1412 while (!rf_sparet_wait_queue)
1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1414 waitreq = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417
1418 /* structure assignment */
1419 *((RF_SparetWait_t *) data) = *waitreq;
1420
1421 RF_Free(waitreq, sizeof(*waitreq));
1422 return (0);
1423
1424 /* wakes up a process waiting on SPARET_WAIT and puts an error
1425 * code in it that will cause the dameon to exit */
1426 case RAIDFRAME_ABORT_SPARET_WAIT:
1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1428 waitreq->fcol = -1;
1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1430 waitreq->next = rf_sparet_wait_queue;
1431 rf_sparet_wait_queue = waitreq;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433 wakeup(&rf_sparet_wait_queue);
1434 return (0);
1435
1436 /* used by the spare table daemon to deliver a spare table
1437 * into the kernel */
1438 case RAIDFRAME_SEND_SPARET:
1439
1440 /* install the spare table */
1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1442
1443 /* respond to the requestor. the return status of the spare
1444 * table installation is passed in the "fcol" field */
1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1446 waitreq->fcol = retcode;
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 waitreq->next = rf_sparet_resp_queue;
1449 rf_sparet_resp_queue = waitreq;
1450 wakeup(&rf_sparet_resp_queue);
1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1452
1453 return (retcode);
1454 #endif
1455
1456 default:
1457 break; /* fall through to the os-specific code below */
1458
1459 }
1460
1461 if (!raidPtr->valid)
1462 return (EINVAL);
1463
1464 /*
1465 * Add support for "regular" device ioctls here.
1466 */
1467
1468 switch (cmd) {
1469 case DIOCGDINFO:
1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1471 break;
1472 #ifdef __HAVE_OLD_DISKLABEL
1473 case ODIOCGDINFO:
1474 newlabel = *(rs->sc_dkdev.dk_label);
1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1476 return ENOTTY;
1477 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1478 break;
1479 #endif
1480
1481 case DIOCGPART:
1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1483 ((struct partinfo *) data)->part =
1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1485 break;
1486
1487 case DIOCWDINFO:
1488 case DIOCSDINFO:
1489 #ifdef __HAVE_OLD_DISKLABEL
1490 case ODIOCWDINFO:
1491 case ODIOCSDINFO:
1492 #endif
1493 {
1494 struct disklabel *lp;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1497 memset(&newlabel, 0, sizeof newlabel);
1498 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1499 lp = &newlabel;
1500 } else
1501 #endif
1502 lp = (struct disklabel *)data;
1503
1504 if ((error = raidlock(rs)) != 0)
1505 return (error);
1506
1507 rs->sc_flags |= RAIDF_LABELLING;
1508
1509 error = setdisklabel(rs->sc_dkdev.dk_label,
1510 lp, 0, rs->sc_dkdev.dk_cpulabel);
1511 if (error == 0) {
1512 if (cmd == DIOCWDINFO
1513 #ifdef __HAVE_OLD_DISKLABEL
1514 || cmd == ODIOCWDINFO
1515 #endif
1516 )
1517 error = writedisklabel(RAIDLABELDEV(dev),
1518 raidstrategy, rs->sc_dkdev.dk_label,
1519 rs->sc_dkdev.dk_cpulabel);
1520 }
1521 rs->sc_flags &= ~RAIDF_LABELLING;
1522
1523 raidunlock(rs);
1524
1525 if (error)
1526 return (error);
1527 break;
1528 }
1529
1530 case DIOCWLABEL:
1531 if (*(int *) data != 0)
1532 rs->sc_flags |= RAIDF_WLABEL;
1533 else
1534 rs->sc_flags &= ~RAIDF_WLABEL;
1535 break;
1536
1537 case DIOCGDEFLABEL:
1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1539 break;
1540
1541 #ifdef __HAVE_OLD_DISKLABEL
1542 case ODIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1545 return ENOTTY;
1546 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1547 break;
1548 #endif
1549
1550 default:
1551 retcode = ENOTTY;
1552 }
1553 return (retcode);
1554
1555 }
1556
1557
1558 /* raidinit -- complete the rest of the initialization for the
1559 RAIDframe device. */
1560
1561
1562 static void
1563 raidinit(raidPtr)
1564 RF_Raid_t *raidPtr;
1565 {
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 unit = raidPtr->raidid;
1570
1571 rs = &raid_softc[unit];
1572
1573 /* XXX should check return code first... */
1574 rs->sc_flags |= RAIDF_INITED;
1575
1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1577
1578 rs->sc_dkdev.dk_name = rs->sc_xname;
1579
1580 /* disk_attach actually creates space for the CPU disklabel, among
1581 * other things, so it's critical to call this *BEFORE* we try putzing
1582 * with disklabels. */
1583
1584 disk_attach(&rs->sc_dkdev);
1585
1586 /* XXX There may be a weird interaction here between this, and
1587 * protectedSectors, as used in RAIDframe. */
1588
1589 rs->sc_size = raidPtr->totalSectors;
1590
1591 }
1592
1593 /* wake up the daemon & tell it to get us a spare table
1594 * XXX
1595 * the entries in the queues should be tagged with the raidPtr
1596 * so that in the extremely rare case that two recons happen at once,
1597 * we know for which device were requesting a spare table
1598 * XXX
1599 *
1600 * XXX This code is not currently used. GO
1601 */
1602 int
1603 rf_GetSpareTableFromDaemon(req)
1604 RF_SparetWait_t *req;
1605 {
1606 int retcode;
1607
1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1609 req->next = rf_sparet_wait_queue;
1610 rf_sparet_wait_queue = req;
1611 wakeup(&rf_sparet_wait_queue);
1612
1613 /* mpsleep unlocks the mutex */
1614 while (!rf_sparet_resp_queue) {
1615 tsleep(&rf_sparet_resp_queue, PRIBIO,
1616 "raidframe getsparetable", 0);
1617 }
1618 req = rf_sparet_resp_queue;
1619 rf_sparet_resp_queue = req->next;
1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1621
1622 retcode = req->fcol;
1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1624 * alloc'd */
1625 return (retcode);
1626 }
1627
1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1629 * bp & passes it down.
1630 * any calls originating in the kernel must use non-blocking I/O
1631 * do some extra sanity checking to return "appropriate" error values for
1632 * certain conditions (to make some standard utilities work)
1633 *
1634 * Formerly known as: rf_DoAccessKernel
1635 */
1636 void
1637 raidstart(raidPtr)
1638 RF_Raid_t *raidPtr;
1639 {
1640 RF_SectorCount_t num_blocks, pb, sum;
1641 RF_RaidAddr_t raid_addr;
1642 int retcode;
1643 struct partition *pp;
1644 daddr_t blocknum;
1645 int unit;
1646 struct raid_softc *rs;
1647 int do_async;
1648 struct buf *bp;
1649
1650 unit = raidPtr->raidid;
1651 rs = &raid_softc[unit];
1652
1653 /* quick check to see if anything has died recently */
1654 RF_LOCK_MUTEX(raidPtr->mutex);
1655 if (raidPtr->numNewFailures > 0) {
1656 rf_update_component_labels(raidPtr,
1657 RF_NORMAL_COMPONENT_UPDATE);
1658 raidPtr->numNewFailures--;
1659 }
1660
1661 /* Check to see if we're at the limit... */
1662 while (raidPtr->openings > 0) {
1663 RF_UNLOCK_MUTEX(raidPtr->mutex);
1664
1665 /* get the next item, if any, from the queue */
1666 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1667 /* nothing more to do */
1668 return;
1669 }
1670
1671 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1672 * partition.. Need to make it absolute to the underlying
1673 * device.. */
1674
1675 blocknum = bp->b_blkno;
1676 if (DISKPART(bp->b_dev) != RAW_PART) {
1677 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1678 blocknum += pp->p_offset;
1679 }
1680
1681 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1682 (int) blocknum));
1683
1684 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1685 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1686
1687 /* *THIS* is where we adjust what block we're going to...
1688 * but DO NOT TOUCH bp->b_blkno!!! */
1689 raid_addr = blocknum;
1690
1691 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1692 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1693 sum = raid_addr + num_blocks + pb;
1694 if (1 || rf_debugKernelAccess) {
1695 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1696 (int) raid_addr, (int) sum, (int) num_blocks,
1697 (int) pb, (int) bp->b_resid));
1698 }
1699 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1700 || (sum < num_blocks) || (sum < pb)) {
1701 bp->b_error = ENOSPC;
1702 bp->b_flags |= B_ERROR;
1703 bp->b_resid = bp->b_bcount;
1704 biodone(bp);
1705 RF_LOCK_MUTEX(raidPtr->mutex);
1706 continue;
1707 }
1708 /*
1709 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1710 */
1711
1712 if (bp->b_bcount & raidPtr->sectorMask) {
1713 bp->b_error = EINVAL;
1714 bp->b_flags |= B_ERROR;
1715 bp->b_resid = bp->b_bcount;
1716 biodone(bp);
1717 RF_LOCK_MUTEX(raidPtr->mutex);
1718 continue;
1719
1720 }
1721 db1_printf(("Calling DoAccess..\n"));
1722
1723
1724 RF_LOCK_MUTEX(raidPtr->mutex);
1725 raidPtr->openings--;
1726 RF_UNLOCK_MUTEX(raidPtr->mutex);
1727
1728 /*
1729 * Everything is async.
1730 */
1731 do_async = 1;
1732
1733 disk_busy(&rs->sc_dkdev);
1734
1735 /* XXX we're still at splbio() here... do we *really*
1736 need to be? */
1737
1738 /* don't ever condition on bp->b_flags & B_WRITE.
1739 * always condition on B_READ instead */
1740
1741 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1742 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1743 do_async, raid_addr, num_blocks,
1744 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1745
1746 RF_LOCK_MUTEX(raidPtr->mutex);
1747 }
1748 RF_UNLOCK_MUTEX(raidPtr->mutex);
1749 }
1750
1751
1752
1753
1754 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1755
1756 int
1757 rf_DispatchKernelIO(queue, req)
1758 RF_DiskQueue_t *queue;
1759 RF_DiskQueueData_t *req;
1760 {
1761 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1762 struct buf *bp;
1763 struct raidbuf *raidbp = NULL;
1764
1765 req->queue = queue;
1766
1767 #if DIAGNOSTIC
1768 if (queue->raidPtr->raidid >= numraid) {
1769 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1770 numraid);
1771 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1772 }
1773 #endif
1774
1775 bp = req->bp;
1776 #if 1
1777 /* XXX when there is a physical disk failure, someone is passing us a
1778 * buffer that contains old stuff!! Attempt to deal with this problem
1779 * without taking a performance hit... (not sure where the real bug
1780 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1781
1782 if (bp->b_flags & B_ERROR) {
1783 bp->b_flags &= ~B_ERROR;
1784 }
1785 if (bp->b_error != 0) {
1786 bp->b_error = 0;
1787 }
1788 #endif
1789 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1790
1791 /*
1792 * context for raidiodone
1793 */
1794 raidbp->rf_obp = bp;
1795 raidbp->req = req;
1796
1797 LIST_INIT(&raidbp->rf_buf.b_dep);
1798
1799 switch (req->type) {
1800 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1801 /* XXX need to do something extra here.. */
1802 /* I'm leaving this in, as I've never actually seen it used,
1803 * and I'd like folks to report it... GO */
1804 printf(("WAKEUP CALLED\n"));
1805 queue->numOutstanding++;
1806
1807 /* XXX need to glue the original buffer into this?? */
1808
1809 KernelWakeupFunc(&raidbp->rf_buf);
1810 break;
1811
1812 case RF_IO_TYPE_READ:
1813 case RF_IO_TYPE_WRITE:
1814
1815 if (req->tracerec) {
1816 RF_ETIMER_START(req->tracerec->timer);
1817 }
1818 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1819 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1820 req->sectorOffset, req->numSector,
1821 req->buf, KernelWakeupFunc, (void *) req,
1822 queue->raidPtr->logBytesPerSector, req->b_proc);
1823
1824 if (rf_debugKernelAccess) {
1825 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1826 (long) bp->b_blkno));
1827 }
1828 queue->numOutstanding++;
1829 queue->last_deq_sector = req->sectorOffset;
1830 /* acc wouldn't have been let in if there were any pending
1831 * reqs at any other priority */
1832 queue->curPriority = req->priority;
1833
1834 db1_printf(("Going for %c to unit %d row %d col %d\n",
1835 req->type, queue->raidPtr->raidid,
1836 queue->row, queue->col));
1837 db1_printf(("sector %d count %d (%d bytes) %d\n",
1838 (int) req->sectorOffset, (int) req->numSector,
1839 (int) (req->numSector <<
1840 queue->raidPtr->logBytesPerSector),
1841 (int) queue->raidPtr->logBytesPerSector));
1842 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1843 raidbp->rf_buf.b_vp->v_numoutput++;
1844 }
1845 VOP_STRATEGY(&raidbp->rf_buf);
1846
1847 break;
1848
1849 default:
1850 panic("bad req->type in rf_DispatchKernelIO");
1851 }
1852 db1_printf(("Exiting from DispatchKernelIO\n"));
1853
1854 return (0);
1855 }
1856 /* this is the callback function associated with a I/O invoked from
1857 kernel code.
1858 */
1859 static void
1860 KernelWakeupFunc(vbp)
1861 struct buf *vbp;
1862 {
1863 RF_DiskQueueData_t *req = NULL;
1864 RF_DiskQueue_t *queue;
1865 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1866 struct buf *bp;
1867 int s;
1868
1869 s = splbio();
1870 db1_printf(("recovering the request queue:\n"));
1871 req = raidbp->req;
1872
1873 bp = raidbp->rf_obp;
1874
1875 queue = (RF_DiskQueue_t *) req->queue;
1876
1877 if (raidbp->rf_buf.b_flags & B_ERROR) {
1878 bp->b_flags |= B_ERROR;
1879 bp->b_error = raidbp->rf_buf.b_error ?
1880 raidbp->rf_buf.b_error : EIO;
1881 }
1882
1883 /* XXX methinks this could be wrong... */
1884 #if 1
1885 bp->b_resid = raidbp->rf_buf.b_resid;
1886 #endif
1887
1888 if (req->tracerec) {
1889 RF_ETIMER_STOP(req->tracerec->timer);
1890 RF_ETIMER_EVAL(req->tracerec->timer);
1891 RF_LOCK_MUTEX(rf_tracing_mutex);
1892 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1893 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1894 req->tracerec->num_phys_ios++;
1895 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1896 }
1897 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1898
1899 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1900 * ballistic, and mark the component as hosed... */
1901
1902 if (bp->b_flags & B_ERROR) {
1903 /* Mark the disk as dead */
1904 /* but only mark it once... */
1905 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1906 rf_ds_optimal) {
1907 printf("raid%d: IO Error. Marking %s as failed.\n",
1908 queue->raidPtr->raidid,
1909 queue->raidPtr->Disks[queue->row][queue->col].devname);
1910 queue->raidPtr->Disks[queue->row][queue->col].status =
1911 rf_ds_failed;
1912 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1913 queue->raidPtr->numFailures++;
1914 queue->raidPtr->numNewFailures++;
1915 } else { /* Disk is already dead... */
1916 /* printf("Disk already marked as dead!\n"); */
1917 }
1918
1919 }
1920
1921 pool_put(&raidframe_cbufpool, raidbp);
1922
1923 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1924 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1925
1926 splx(s);
1927 }
1928
1929
1930
1931 /*
1932 * initialize a buf structure for doing an I/O in the kernel.
1933 */
1934 static void
1935 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1936 logBytesPerSector, b_proc)
1937 struct buf *bp;
1938 struct vnode *b_vp;
1939 unsigned rw_flag;
1940 dev_t dev;
1941 RF_SectorNum_t startSect;
1942 RF_SectorCount_t numSect;
1943 caddr_t buf;
1944 void (*cbFunc) (struct buf *);
1945 void *cbArg;
1946 int logBytesPerSector;
1947 struct proc *b_proc;
1948 {
1949 /* bp->b_flags = B_PHYS | rw_flag; */
1950 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1951 bp->b_bcount = numSect << logBytesPerSector;
1952 bp->b_bufsize = bp->b_bcount;
1953 bp->b_error = 0;
1954 bp->b_dev = dev;
1955 bp->b_data = buf;
1956 bp->b_blkno = startSect;
1957 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1958 if (bp->b_bcount == 0) {
1959 panic("bp->b_bcount is zero in InitBP!!\n");
1960 }
1961 bp->b_proc = b_proc;
1962 bp->b_iodone = cbFunc;
1963 bp->b_vp = b_vp;
1964
1965 }
1966
1967 static void
1968 raidgetdefaultlabel(raidPtr, rs, lp)
1969 RF_Raid_t *raidPtr;
1970 struct raid_softc *rs;
1971 struct disklabel *lp;
1972 {
1973 db1_printf(("Building a default label...\n"));
1974 memset(lp, 0, sizeof(*lp));
1975
1976 /* fabricate a label... */
1977 lp->d_secperunit = raidPtr->totalSectors;
1978 lp->d_secsize = raidPtr->bytesPerSector;
1979 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1980 lp->d_ntracks = 4 * raidPtr->numCol;
1981 lp->d_ncylinders = raidPtr->totalSectors /
1982 (lp->d_nsectors * lp->d_ntracks);
1983 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1984
1985 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1986 lp->d_type = DTYPE_RAID;
1987 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1988 lp->d_rpm = 3600;
1989 lp->d_interleave = 1;
1990 lp->d_flags = 0;
1991
1992 lp->d_partitions[RAW_PART].p_offset = 0;
1993 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1994 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1995 lp->d_npartitions = RAW_PART + 1;
1996
1997 lp->d_magic = DISKMAGIC;
1998 lp->d_magic2 = DISKMAGIC;
1999 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2000
2001 }
2002 /*
2003 * Read the disklabel from the raid device. If one is not present, fake one
2004 * up.
2005 */
2006 static void
2007 raidgetdisklabel(dev)
2008 dev_t dev;
2009 {
2010 int unit = raidunit(dev);
2011 struct raid_softc *rs = &raid_softc[unit];
2012 char *errstring;
2013 struct disklabel *lp = rs->sc_dkdev.dk_label;
2014 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2015 RF_Raid_t *raidPtr;
2016
2017 db1_printf(("Getting the disklabel...\n"));
2018
2019 memset(clp, 0, sizeof(*clp));
2020
2021 raidPtr = raidPtrs[unit];
2022
2023 raidgetdefaultlabel(raidPtr, rs, lp);
2024
2025 /*
2026 * Call the generic disklabel extraction routine.
2027 */
2028 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2029 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2030 if (errstring)
2031 raidmakedisklabel(rs);
2032 else {
2033 int i;
2034 struct partition *pp;
2035
2036 /*
2037 * Sanity check whether the found disklabel is valid.
2038 *
2039 * This is necessary since total size of the raid device
2040 * may vary when an interleave is changed even though exactly
2041 * same componets are used, and old disklabel may used
2042 * if that is found.
2043 */
2044 if (lp->d_secperunit != rs->sc_size)
2045 printf("raid%d: WARNING: %s: "
2046 "total sector size in disklabel (%d) != "
2047 "the size of raid (%ld)\n", unit, rs->sc_xname,
2048 lp->d_secperunit, (long) rs->sc_size);
2049 for (i = 0; i < lp->d_npartitions; i++) {
2050 pp = &lp->d_partitions[i];
2051 if (pp->p_offset + pp->p_size > rs->sc_size)
2052 printf("raid%d: WARNING: %s: end of partition `%c' "
2053 "exceeds the size of raid (%ld)\n",
2054 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2055 }
2056 }
2057
2058 }
2059 /*
2060 * Take care of things one might want to take care of in the event
2061 * that a disklabel isn't present.
2062 */
2063 static void
2064 raidmakedisklabel(rs)
2065 struct raid_softc *rs;
2066 {
2067 struct disklabel *lp = rs->sc_dkdev.dk_label;
2068 db1_printf(("Making a label..\n"));
2069
2070 /*
2071 * For historical reasons, if there's no disklabel present
2072 * the raw partition must be marked FS_BSDFFS.
2073 */
2074
2075 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2076
2077 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2078
2079 lp->d_checksum = dkcksum(lp);
2080 }
2081 /*
2082 * Lookup the provided name in the filesystem. If the file exists,
2083 * is a valid block device, and isn't being used by anyone else,
2084 * set *vpp to the file's vnode.
2085 * You'll find the original of this in ccd.c
2086 */
2087 int
2088 raidlookup(path, p, vpp)
2089 char *path;
2090 struct proc *p;
2091 struct vnode **vpp; /* result */
2092 {
2093 struct nameidata nd;
2094 struct vnode *vp;
2095 struct vattr va;
2096 int error;
2097
2098 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2099 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2100 #if 0
2101 printf("RAIDframe: vn_open returned %d\n", error);
2102 #endif
2103 return (error);
2104 }
2105 vp = nd.ni_vp;
2106 if (vp->v_usecount > 1) {
2107 VOP_UNLOCK(vp, 0);
2108 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2109 return (EBUSY);
2110 }
2111 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2112 VOP_UNLOCK(vp, 0);
2113 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2114 return (error);
2115 }
2116 /* XXX: eventually we should handle VREG, too. */
2117 if (va.va_type != VBLK) {
2118 VOP_UNLOCK(vp, 0);
2119 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2120 return (ENOTBLK);
2121 }
2122 VOP_UNLOCK(vp, 0);
2123 *vpp = vp;
2124 return (0);
2125 }
2126 /*
2127 * Wait interruptibly for an exclusive lock.
2128 *
2129 * XXX
2130 * Several drivers do this; it should be abstracted and made MP-safe.
2131 * (Hmm... where have we seen this warning before :-> GO )
2132 */
2133 static int
2134 raidlock(rs)
2135 struct raid_softc *rs;
2136 {
2137 int error;
2138
2139 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2140 rs->sc_flags |= RAIDF_WANTED;
2141 if ((error =
2142 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2143 return (error);
2144 }
2145 rs->sc_flags |= RAIDF_LOCKED;
2146 return (0);
2147 }
2148 /*
2149 * Unlock and wake up any waiters.
2150 */
2151 static void
2152 raidunlock(rs)
2153 struct raid_softc *rs;
2154 {
2155
2156 rs->sc_flags &= ~RAIDF_LOCKED;
2157 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2158 rs->sc_flags &= ~RAIDF_WANTED;
2159 wakeup(rs);
2160 }
2161 }
2162
2163
2164 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2165 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2166
2167 int
2168 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2169 {
2170 RF_ComponentLabel_t clabel;
2171 raidread_component_label(dev, b_vp, &clabel);
2172 clabel.mod_counter = mod_counter;
2173 clabel.clean = RF_RAID_CLEAN;
2174 raidwrite_component_label(dev, b_vp, &clabel);
2175 return(0);
2176 }
2177
2178
2179 int
2180 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2181 {
2182 RF_ComponentLabel_t clabel;
2183 raidread_component_label(dev, b_vp, &clabel);
2184 clabel.mod_counter = mod_counter;
2185 clabel.clean = RF_RAID_DIRTY;
2186 raidwrite_component_label(dev, b_vp, &clabel);
2187 return(0);
2188 }
2189
2190 /* ARGSUSED */
2191 int
2192 raidread_component_label(dev, b_vp, clabel)
2193 dev_t dev;
2194 struct vnode *b_vp;
2195 RF_ComponentLabel_t *clabel;
2196 {
2197 struct buf *bp;
2198 const struct bdevsw *bdev;
2199 int error;
2200
2201 /* XXX should probably ensure that we don't try to do this if
2202 someone has changed rf_protected_sectors. */
2203
2204 if (b_vp == NULL) {
2205 /* For whatever reason, this component is not valid.
2206 Don't try to read a component label from it. */
2207 return(EINVAL);
2208 }
2209
2210 /* get a block of the appropriate size... */
2211 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2212 bp->b_dev = dev;
2213
2214 /* get our ducks in a row for the read */
2215 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2216 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2217 bp->b_flags |= B_READ;
2218 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2219
2220 bdev = bdevsw_lookup(bp->b_dev);
2221 if (bdev == NULL)
2222 return (ENXIO);
2223 (*bdev->d_strategy)(bp);
2224
2225 error = biowait(bp);
2226
2227 if (!error) {
2228 memcpy(clabel, bp->b_data,
2229 sizeof(RF_ComponentLabel_t));
2230 #if 0
2231 rf_print_component_label( clabel );
2232 #endif
2233 } else {
2234 #if 0
2235 printf("Failed to read RAID component label!\n");
2236 #endif
2237 }
2238
2239 brelse(bp);
2240 return(error);
2241 }
2242 /* ARGSUSED */
2243 int
2244 raidwrite_component_label(dev, b_vp, clabel)
2245 dev_t dev;
2246 struct vnode *b_vp;
2247 RF_ComponentLabel_t *clabel;
2248 {
2249 struct buf *bp;
2250 const struct bdevsw *bdev;
2251 int error;
2252
2253 /* get a block of the appropriate size... */
2254 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2255 bp->b_dev = dev;
2256
2257 /* get our ducks in a row for the write */
2258 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2259 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2260 bp->b_flags |= B_WRITE;
2261 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2262
2263 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2264
2265 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2266
2267 bdev = bdevsw_lookup(bp->b_dev);
2268 if (bdev == NULL)
2269 return (ENXIO);
2270 (*bdev->d_strategy)(bp);
2271 error = biowait(bp);
2272 brelse(bp);
2273 if (error) {
2274 #if 1
2275 printf("Failed to write RAID component info!\n");
2276 #endif
2277 }
2278
2279 return(error);
2280 }
2281
2282 void
2283 rf_markalldirty(raidPtr)
2284 RF_Raid_t *raidPtr;
2285 {
2286 RF_ComponentLabel_t clabel;
2287 int r,c;
2288
2289 raidPtr->mod_counter++;
2290 for (r = 0; r < raidPtr->numRow; r++) {
2291 for (c = 0; c < raidPtr->numCol; c++) {
2292 /* we don't want to touch (at all) a disk that has
2293 failed */
2294 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2295 raidread_component_label(
2296 raidPtr->Disks[r][c].dev,
2297 raidPtr->raid_cinfo[r][c].ci_vp,
2298 &clabel);
2299 if (clabel.status == rf_ds_spared) {
2300 /* XXX do something special...
2301 but whatever you do, don't
2302 try to access it!! */
2303 } else {
2304 #if 0
2305 clabel.status =
2306 raidPtr->Disks[r][c].status;
2307 raidwrite_component_label(
2308 raidPtr->Disks[r][c].dev,
2309 raidPtr->raid_cinfo[r][c].ci_vp,
2310 &clabel);
2311 #endif
2312 raidmarkdirty(
2313 raidPtr->Disks[r][c].dev,
2314 raidPtr->raid_cinfo[r][c].ci_vp,
2315 raidPtr->mod_counter);
2316 }
2317 }
2318 }
2319 }
2320 /* printf("Component labels marked dirty.\n"); */
2321 #if 0
2322 for( c = 0; c < raidPtr->numSpare ; c++) {
2323 sparecol = raidPtr->numCol + c;
2324 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2325 /*
2326
2327 XXX this is where we get fancy and map this spare
2328 into it's correct spot in the array.
2329
2330 */
2331 /*
2332
2333 we claim this disk is "optimal" if it's
2334 rf_ds_used_spare, as that means it should be
2335 directly substitutable for the disk it replaced.
2336 We note that too...
2337
2338 */
2339
2340 for(i=0;i<raidPtr->numRow;i++) {
2341 for(j=0;j<raidPtr->numCol;j++) {
2342 if ((raidPtr->Disks[i][j].spareRow ==
2343 r) &&
2344 (raidPtr->Disks[i][j].spareCol ==
2345 sparecol)) {
2346 srow = r;
2347 scol = sparecol;
2348 break;
2349 }
2350 }
2351 }
2352
2353 raidread_component_label(
2354 raidPtr->Disks[r][sparecol].dev,
2355 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2356 &clabel);
2357 /* make sure status is noted */
2358 clabel.version = RF_COMPONENT_LABEL_VERSION;
2359 clabel.mod_counter = raidPtr->mod_counter;
2360 clabel.serial_number = raidPtr->serial_number;
2361 clabel.row = srow;
2362 clabel.column = scol;
2363 clabel.num_rows = raidPtr->numRow;
2364 clabel.num_columns = raidPtr->numCol;
2365 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2366 clabel.status = rf_ds_optimal;
2367 raidwrite_component_label(
2368 raidPtr->Disks[r][sparecol].dev,
2369 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2370 &clabel);
2371 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2372 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2373 }
2374 }
2375
2376 #endif
2377 }
2378
2379
2380 void
2381 rf_update_component_labels(raidPtr, final)
2382 RF_Raid_t *raidPtr;
2383 int final;
2384 {
2385 RF_ComponentLabel_t clabel;
2386 int sparecol;
2387 int r,c;
2388 int i,j;
2389 int srow, scol;
2390
2391 srow = -1;
2392 scol = -1;
2393
2394 /* XXX should do extra checks to make sure things really are clean,
2395 rather than blindly setting the clean bit... */
2396
2397 raidPtr->mod_counter++;
2398
2399 for (r = 0; r < raidPtr->numRow; r++) {
2400 for (c = 0; c < raidPtr->numCol; c++) {
2401 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2402 raidread_component_label(
2403 raidPtr->Disks[r][c].dev,
2404 raidPtr->raid_cinfo[r][c].ci_vp,
2405 &clabel);
2406 /* make sure status is noted */
2407 clabel.status = rf_ds_optimal;
2408 /* bump the counter */
2409 clabel.mod_counter = raidPtr->mod_counter;
2410
2411 raidwrite_component_label(
2412 raidPtr->Disks[r][c].dev,
2413 raidPtr->raid_cinfo[r][c].ci_vp,
2414 &clabel);
2415 if (final == RF_FINAL_COMPONENT_UPDATE) {
2416 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2417 raidmarkclean(
2418 raidPtr->Disks[r][c].dev,
2419 raidPtr->raid_cinfo[r][c].ci_vp,
2420 raidPtr->mod_counter);
2421 }
2422 }
2423 }
2424 /* else we don't touch it.. */
2425 }
2426 }
2427
2428 for( c = 0; c < raidPtr->numSpare ; c++) {
2429 sparecol = raidPtr->numCol + c;
2430 /* Need to ensure that the reconstruct actually completed! */
2431 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2432 /*
2433
2434 we claim this disk is "optimal" if it's
2435 rf_ds_used_spare, as that means it should be
2436 directly substitutable for the disk it replaced.
2437 We note that too...
2438
2439 */
2440
2441 for(i=0;i<raidPtr->numRow;i++) {
2442 for(j=0;j<raidPtr->numCol;j++) {
2443 if ((raidPtr->Disks[i][j].spareRow ==
2444 0) &&
2445 (raidPtr->Disks[i][j].spareCol ==
2446 sparecol)) {
2447 srow = i;
2448 scol = j;
2449 break;
2450 }
2451 }
2452 }
2453
2454 /* XXX shouldn't *really* need this... */
2455 raidread_component_label(
2456 raidPtr->Disks[0][sparecol].dev,
2457 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2458 &clabel);
2459 /* make sure status is noted */
2460
2461 raid_init_component_label(raidPtr, &clabel);
2462
2463 clabel.mod_counter = raidPtr->mod_counter;
2464 clabel.row = srow;
2465 clabel.column = scol;
2466 clabel.status = rf_ds_optimal;
2467
2468 raidwrite_component_label(
2469 raidPtr->Disks[0][sparecol].dev,
2470 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2471 &clabel);
2472 if (final == RF_FINAL_COMPONENT_UPDATE) {
2473 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2474 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2475 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2476 raidPtr->mod_counter);
2477 }
2478 }
2479 }
2480 }
2481 /* printf("Component labels updated\n"); */
2482 }
2483
2484 void
2485 rf_close_component(raidPtr, vp, auto_configured)
2486 RF_Raid_t *raidPtr;
2487 struct vnode *vp;
2488 int auto_configured;
2489 {
2490 struct proc *p;
2491
2492 p = raidPtr->engine_thread;
2493
2494 if (vp != NULL) {
2495 if (auto_configured == 1) {
2496 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2497 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2498 vput(vp);
2499
2500 } else {
2501 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2502 }
2503 } else {
2504 #if 0
2505 printf("vnode was NULL\n");
2506 #endif
2507 }
2508 }
2509
2510
2511 void
2512 rf_UnconfigureVnodes(raidPtr)
2513 RF_Raid_t *raidPtr;
2514 {
2515 int r,c;
2516 struct vnode *vp;
2517 int acd;
2518
2519
2520 /* We take this opportunity to close the vnodes like we should.. */
2521
2522 for (r = 0; r < raidPtr->numRow; r++) {
2523 for (c = 0; c < raidPtr->numCol; c++) {
2524 #if 0
2525 printf("raid%d: Closing vnode for row: %d col: %d\n",
2526 raidPtr->raidid, r, c);
2527 #endif
2528 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2529 acd = raidPtr->Disks[r][c].auto_configured;
2530 rf_close_component(raidPtr, vp, acd);
2531 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2532 raidPtr->Disks[r][c].auto_configured = 0;
2533 }
2534 }
2535 for (r = 0; r < raidPtr->numSpare; r++) {
2536 #if 0
2537 printf("raid%d: Closing vnode for spare: %d\n",
2538 raidPtr->raidid, r);
2539 #endif
2540 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2541 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2542 rf_close_component(raidPtr, vp, acd);
2543 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2544 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2545 }
2546 }
2547
2548
2549 void
2550 rf_ReconThread(req)
2551 struct rf_recon_req *req;
2552 {
2553 int s;
2554 RF_Raid_t *raidPtr;
2555
2556 s = splbio();
2557 raidPtr = (RF_Raid_t *) req->raidPtr;
2558 raidPtr->recon_in_progress = 1;
2559
2560 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2561 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2562
2563 /* XXX get rid of this! we don't need it at all.. */
2564 RF_Free(req, sizeof(*req));
2565
2566 raidPtr->recon_in_progress = 0;
2567 splx(s);
2568
2569 /* That's all... */
2570 kthread_exit(0); /* does not return */
2571 }
2572
2573 void
2574 rf_RewriteParityThread(raidPtr)
2575 RF_Raid_t *raidPtr;
2576 {
2577 int retcode;
2578 int s;
2579
2580 raidPtr->parity_rewrite_in_progress = 1;
2581 s = splbio();
2582 retcode = rf_RewriteParity(raidPtr);
2583 splx(s);
2584 if (retcode) {
2585 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2586 } else {
2587 /* set the clean bit! If we shutdown correctly,
2588 the clean bit on each component label will get
2589 set */
2590 raidPtr->parity_good = RF_RAID_CLEAN;
2591 }
2592 raidPtr->parity_rewrite_in_progress = 0;
2593
2594 /* Anyone waiting for us to stop? If so, inform them... */
2595 if (raidPtr->waitShutdown) {
2596 wakeup(&raidPtr->parity_rewrite_in_progress);
2597 }
2598
2599 /* That's all... */
2600 kthread_exit(0); /* does not return */
2601 }
2602
2603
2604 void
2605 rf_CopybackThread(raidPtr)
2606 RF_Raid_t *raidPtr;
2607 {
2608 int s;
2609
2610 raidPtr->copyback_in_progress = 1;
2611 s = splbio();
2612 rf_CopybackReconstructedData(raidPtr);
2613 splx(s);
2614 raidPtr->copyback_in_progress = 0;
2615
2616 /* That's all... */
2617 kthread_exit(0); /* does not return */
2618 }
2619
2620
2621 void
2622 rf_ReconstructInPlaceThread(req)
2623 struct rf_recon_req *req;
2624 {
2625 int retcode;
2626 int s;
2627 RF_Raid_t *raidPtr;
2628
2629 s = splbio();
2630 raidPtr = req->raidPtr;
2631 raidPtr->recon_in_progress = 1;
2632 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2633 RF_Free(req, sizeof(*req));
2634 raidPtr->recon_in_progress = 0;
2635 splx(s);
2636
2637 /* That's all... */
2638 kthread_exit(0); /* does not return */
2639 }
2640
2641 RF_AutoConfig_t *
2642 rf_find_raid_components()
2643 {
2644 struct vnode *vp;
2645 struct disklabel label;
2646 struct device *dv;
2647 dev_t dev;
2648 int bmajor;
2649 int error;
2650 int i;
2651 int good_one;
2652 RF_ComponentLabel_t *clabel;
2653 RF_AutoConfig_t *ac_list;
2654 RF_AutoConfig_t *ac;
2655
2656
2657 /* initialize the AutoConfig list */
2658 ac_list = NULL;
2659
2660 /* we begin by trolling through *all* the devices on the system */
2661
2662 for (dv = alldevs.tqh_first; dv != NULL;
2663 dv = dv->dv_list.tqe_next) {
2664
2665 /* we are only interested in disks... */
2666 if (dv->dv_class != DV_DISK)
2667 continue;
2668
2669 /* we don't care about floppies... */
2670 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2671 continue;
2672 }
2673
2674 /* we don't care about CD's... */
2675 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2676 continue;
2677 }
2678
2679 /* hdfd is the Atari/Hades floppy driver */
2680 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2681 continue;
2682 }
2683 /* fdisa is the Atari/Milan floppy driver */
2684 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2685 continue;
2686 }
2687
2688 /* need to find the device_name_to_block_device_major stuff */
2689 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2690
2691 /* get a vnode for the raw partition of this disk */
2692
2693 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2694 if (bdevvp(dev, &vp))
2695 panic("RAID can't alloc vnode");
2696
2697 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2698
2699 if (error) {
2700 /* "Who cares." Continue looking
2701 for something that exists*/
2702 vput(vp);
2703 continue;
2704 }
2705
2706 /* Ok, the disk exists. Go get the disklabel. */
2707 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2708 FREAD, NOCRED, 0);
2709 if (error) {
2710 /*
2711 * XXX can't happen - open() would
2712 * have errored out (or faked up one)
2713 */
2714 printf("can't get label for dev %s%c (%d)!?!?\n",
2715 dv->dv_xname, 'a' + RAW_PART, error);
2716 }
2717
2718 /* don't need this any more. We'll allocate it again
2719 a little later if we really do... */
2720 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2721 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2722 vput(vp);
2723
2724 for (i=0; i < label.d_npartitions; i++) {
2725 /* We only support partitions marked as RAID */
2726 if (label.d_partitions[i].p_fstype != FS_RAID)
2727 continue;
2728
2729 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2730 if (bdevvp(dev, &vp))
2731 panic("RAID can't alloc vnode");
2732
2733 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2734 if (error) {
2735 /* Whatever... */
2736 vput(vp);
2737 continue;
2738 }
2739
2740 good_one = 0;
2741
2742 clabel = (RF_ComponentLabel_t *)
2743 malloc(sizeof(RF_ComponentLabel_t),
2744 M_RAIDFRAME, M_NOWAIT);
2745 if (clabel == NULL) {
2746 /* XXX CLEANUP HERE */
2747 printf("RAID auto config: out of memory!\n");
2748 return(NULL); /* XXX probably should panic? */
2749 }
2750
2751 if (!raidread_component_label(dev, vp, clabel)) {
2752 /* Got the label. Does it look reasonable? */
2753 if (rf_reasonable_label(clabel) &&
2754 (clabel->partitionSize <=
2755 label.d_partitions[i].p_size)) {
2756 #if DEBUG
2757 printf("Component on: %s%c: %d\n",
2758 dv->dv_xname, 'a'+i,
2759 label.d_partitions[i].p_size);
2760 rf_print_component_label(clabel);
2761 #endif
2762 /* if it's reasonable, add it,
2763 else ignore it. */
2764 ac = (RF_AutoConfig_t *)
2765 malloc(sizeof(RF_AutoConfig_t),
2766 M_RAIDFRAME,
2767 M_NOWAIT);
2768 if (ac == NULL) {
2769 /* XXX should panic?? */
2770 return(NULL);
2771 }
2772
2773 sprintf(ac->devname, "%s%c",
2774 dv->dv_xname, 'a'+i);
2775 ac->dev = dev;
2776 ac->vp = vp;
2777 ac->clabel = clabel;
2778 ac->next = ac_list;
2779 ac_list = ac;
2780 good_one = 1;
2781 }
2782 }
2783 if (!good_one) {
2784 /* cleanup */
2785 free(clabel, M_RAIDFRAME);
2786 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2787 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2788 vput(vp);
2789 }
2790 }
2791 }
2792 return(ac_list);
2793 }
2794
2795 static int
2796 rf_reasonable_label(clabel)
2797 RF_ComponentLabel_t *clabel;
2798 {
2799
2800 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2801 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2802 ((clabel->clean == RF_RAID_CLEAN) ||
2803 (clabel->clean == RF_RAID_DIRTY)) &&
2804 clabel->row >=0 &&
2805 clabel->column >= 0 &&
2806 clabel->num_rows > 0 &&
2807 clabel->num_columns > 0 &&
2808 clabel->row < clabel->num_rows &&
2809 clabel->column < clabel->num_columns &&
2810 clabel->blockSize > 0 &&
2811 clabel->numBlocks > 0) {
2812 /* label looks reasonable enough... */
2813 return(1);
2814 }
2815 return(0);
2816 }
2817
2818
2819 #if DEBUG
2820 void
2821 rf_print_component_label(clabel)
2822 RF_ComponentLabel_t *clabel;
2823 {
2824 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2825 clabel->row, clabel->column,
2826 clabel->num_rows, clabel->num_columns);
2827 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2828 clabel->version, clabel->serial_number,
2829 clabel->mod_counter);
2830 printf(" Clean: %s Status: %d\n",
2831 clabel->clean ? "Yes" : "No", clabel->status );
2832 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2833 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2834 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2835 (char) clabel->parityConfig, clabel->blockSize,
2836 clabel->numBlocks);
2837 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2838 printf(" Contains root partition: %s\n",
2839 clabel->root_partition ? "Yes" : "No" );
2840 printf(" Last configured as: raid%d\n", clabel->last_unit );
2841 #if 0
2842 printf(" Config order: %d\n", clabel->config_order);
2843 #endif
2844
2845 }
2846 #endif
2847
2848 RF_ConfigSet_t *
2849 rf_create_auto_sets(ac_list)
2850 RF_AutoConfig_t *ac_list;
2851 {
2852 RF_AutoConfig_t *ac;
2853 RF_ConfigSet_t *config_sets;
2854 RF_ConfigSet_t *cset;
2855 RF_AutoConfig_t *ac_next;
2856
2857
2858 config_sets = NULL;
2859
2860 /* Go through the AutoConfig list, and figure out which components
2861 belong to what sets. */
2862 ac = ac_list;
2863 while(ac!=NULL) {
2864 /* we're going to putz with ac->next, so save it here
2865 for use at the end of the loop */
2866 ac_next = ac->next;
2867
2868 if (config_sets == NULL) {
2869 /* will need at least this one... */
2870 config_sets = (RF_ConfigSet_t *)
2871 malloc(sizeof(RF_ConfigSet_t),
2872 M_RAIDFRAME, M_NOWAIT);
2873 if (config_sets == NULL) {
2874 panic("rf_create_auto_sets: No memory!\n");
2875 }
2876 /* this one is easy :) */
2877 config_sets->ac = ac;
2878 config_sets->next = NULL;
2879 config_sets->rootable = 0;
2880 ac->next = NULL;
2881 } else {
2882 /* which set does this component fit into? */
2883 cset = config_sets;
2884 while(cset!=NULL) {
2885 if (rf_does_it_fit(cset, ac)) {
2886 /* looks like it matches... */
2887 ac->next = cset->ac;
2888 cset->ac = ac;
2889 break;
2890 }
2891 cset = cset->next;
2892 }
2893 if (cset==NULL) {
2894 /* didn't find a match above... new set..*/
2895 cset = (RF_ConfigSet_t *)
2896 malloc(sizeof(RF_ConfigSet_t),
2897 M_RAIDFRAME, M_NOWAIT);
2898 if (cset == NULL) {
2899 panic("rf_create_auto_sets: No memory!\n");
2900 }
2901 cset->ac = ac;
2902 ac->next = NULL;
2903 cset->next = config_sets;
2904 cset->rootable = 0;
2905 config_sets = cset;
2906 }
2907 }
2908 ac = ac_next;
2909 }
2910
2911
2912 return(config_sets);
2913 }
2914
2915 static int
2916 rf_does_it_fit(cset, ac)
2917 RF_ConfigSet_t *cset;
2918 RF_AutoConfig_t *ac;
2919 {
2920 RF_ComponentLabel_t *clabel1, *clabel2;
2921
2922 /* If this one matches the *first* one in the set, that's good
2923 enough, since the other members of the set would have been
2924 through here too... */
2925 /* note that we are not checking partitionSize here..
2926
2927 Note that we are also not checking the mod_counters here.
2928 If everything else matches execpt the mod_counter, that's
2929 good enough for this test. We will deal with the mod_counters
2930 a little later in the autoconfiguration process.
2931
2932 (clabel1->mod_counter == clabel2->mod_counter) &&
2933
2934 The reason we don't check for this is that failed disks
2935 will have lower modification counts. If those disks are
2936 not added to the set they used to belong to, then they will
2937 form their own set, which may result in 2 different sets,
2938 for example, competing to be configured at raid0, and
2939 perhaps competing to be the root filesystem set. If the
2940 wrong ones get configured, or both attempt to become /,
2941 weird behaviour and or serious lossage will occur. Thus we
2942 need to bring them into the fold here, and kick them out at
2943 a later point.
2944
2945 */
2946
2947 clabel1 = cset->ac->clabel;
2948 clabel2 = ac->clabel;
2949 if ((clabel1->version == clabel2->version) &&
2950 (clabel1->serial_number == clabel2->serial_number) &&
2951 (clabel1->num_rows == clabel2->num_rows) &&
2952 (clabel1->num_columns == clabel2->num_columns) &&
2953 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2954 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2955 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2956 (clabel1->parityConfig == clabel2->parityConfig) &&
2957 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2958 (clabel1->blockSize == clabel2->blockSize) &&
2959 (clabel1->numBlocks == clabel2->numBlocks) &&
2960 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2961 (clabel1->root_partition == clabel2->root_partition) &&
2962 (clabel1->last_unit == clabel2->last_unit) &&
2963 (clabel1->config_order == clabel2->config_order)) {
2964 /* if it get's here, it almost *has* to be a match */
2965 } else {
2966 /* it's not consistent with somebody in the set..
2967 punt */
2968 return(0);
2969 }
2970 /* all was fine.. it must fit... */
2971 return(1);
2972 }
2973
2974 int
2975 rf_have_enough_components(cset)
2976 RF_ConfigSet_t *cset;
2977 {
2978 RF_AutoConfig_t *ac;
2979 RF_AutoConfig_t *auto_config;
2980 RF_ComponentLabel_t *clabel;
2981 int r,c;
2982 int num_rows;
2983 int num_cols;
2984 int num_missing;
2985 int mod_counter;
2986 int mod_counter_found;
2987 int even_pair_failed;
2988 char parity_type;
2989
2990
2991 /* check to see that we have enough 'live' components
2992 of this set. If so, we can configure it if necessary */
2993
2994 num_rows = cset->ac->clabel->num_rows;
2995 num_cols = cset->ac->clabel->num_columns;
2996 parity_type = cset->ac->clabel->parityConfig;
2997
2998 /* XXX Check for duplicate components!?!?!? */
2999
3000 /* Determine what the mod_counter is supposed to be for this set. */
3001
3002 mod_counter_found = 0;
3003 mod_counter = 0;
3004 ac = cset->ac;
3005 while(ac!=NULL) {
3006 if (mod_counter_found==0) {
3007 mod_counter = ac->clabel->mod_counter;
3008 mod_counter_found = 1;
3009 } else {
3010 if (ac->clabel->mod_counter > mod_counter) {
3011 mod_counter = ac->clabel->mod_counter;
3012 }
3013 }
3014 ac = ac->next;
3015 }
3016
3017 num_missing = 0;
3018 auto_config = cset->ac;
3019
3020 for(r=0; r<num_rows; r++) {
3021 even_pair_failed = 0;
3022 for(c=0; c<num_cols; c++) {
3023 ac = auto_config;
3024 while(ac!=NULL) {
3025 if ((ac->clabel->row == r) &&
3026 (ac->clabel->column == c) &&
3027 (ac->clabel->mod_counter == mod_counter)) {
3028 /* it's this one... */
3029 #if DEBUG
3030 printf("Found: %s at %d,%d\n",
3031 ac->devname,r,c);
3032 #endif
3033 break;
3034 }
3035 ac=ac->next;
3036 }
3037 if (ac==NULL) {
3038 /* Didn't find one here! */
3039 /* special case for RAID 1, especially
3040 where there are more than 2
3041 components (where RAIDframe treats
3042 things a little differently :( ) */
3043 if (parity_type == '1') {
3044 if (c%2 == 0) { /* even component */
3045 even_pair_failed = 1;
3046 } else { /* odd component. If
3047 we're failed, and
3048 so is the even
3049 component, it's
3050 "Good Night, Charlie" */
3051 if (even_pair_failed == 1) {
3052 return(0);
3053 }
3054 }
3055 } else {
3056 /* normal accounting */
3057 num_missing++;
3058 }
3059 }
3060 if ((parity_type == '1') && (c%2 == 1)) {
3061 /* Just did an even component, and we didn't
3062 bail.. reset the even_pair_failed flag,
3063 and go on to the next component.... */
3064 even_pair_failed = 0;
3065 }
3066 }
3067 }
3068
3069 clabel = cset->ac->clabel;
3070
3071 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3072 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3073 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3074 /* XXX this needs to be made *much* more general */
3075 /* Too many failures */
3076 return(0);
3077 }
3078 /* otherwise, all is well, and we've got enough to take a kick
3079 at autoconfiguring this set */
3080 return(1);
3081 }
3082
3083 void
3084 rf_create_configuration(ac,config,raidPtr)
3085 RF_AutoConfig_t *ac;
3086 RF_Config_t *config;
3087 RF_Raid_t *raidPtr;
3088 {
3089 RF_ComponentLabel_t *clabel;
3090 int i;
3091
3092 clabel = ac->clabel;
3093
3094 /* 1. Fill in the common stuff */
3095 config->numRow = clabel->num_rows;
3096 config->numCol = clabel->num_columns;
3097 config->numSpare = 0; /* XXX should this be set here? */
3098 config->sectPerSU = clabel->sectPerSU;
3099 config->SUsPerPU = clabel->SUsPerPU;
3100 config->SUsPerRU = clabel->SUsPerRU;
3101 config->parityConfig = clabel->parityConfig;
3102 /* XXX... */
3103 strcpy(config->diskQueueType,"fifo");
3104 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3105 config->layoutSpecificSize = 0; /* XXX ?? */
3106
3107 while(ac!=NULL) {
3108 /* row/col values will be in range due to the checks
3109 in reasonable_label() */
3110 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3111 ac->devname);
3112 ac = ac->next;
3113 }
3114
3115 for(i=0;i<RF_MAXDBGV;i++) {
3116 config->debugVars[i][0] = NULL;
3117 }
3118 }
3119
3120 int
3121 rf_set_autoconfig(raidPtr, new_value)
3122 RF_Raid_t *raidPtr;
3123 int new_value;
3124 {
3125 RF_ComponentLabel_t clabel;
3126 struct vnode *vp;
3127 dev_t dev;
3128 int row, column;
3129
3130 raidPtr->autoconfigure = new_value;
3131 for(row=0; row<raidPtr->numRow; row++) {
3132 for(column=0; column<raidPtr->numCol; column++) {
3133 if (raidPtr->Disks[row][column].status ==
3134 rf_ds_optimal) {
3135 dev = raidPtr->Disks[row][column].dev;
3136 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3137 raidread_component_label(dev, vp, &clabel);
3138 clabel.autoconfigure = new_value;
3139 raidwrite_component_label(dev, vp, &clabel);
3140 }
3141 }
3142 }
3143 return(new_value);
3144 }
3145
3146 int
3147 rf_set_rootpartition(raidPtr, new_value)
3148 RF_Raid_t *raidPtr;
3149 int new_value;
3150 {
3151 RF_ComponentLabel_t clabel;
3152 struct vnode *vp;
3153 dev_t dev;
3154 int row, column;
3155
3156 raidPtr->root_partition = new_value;
3157 for(row=0; row<raidPtr->numRow; row++) {
3158 for(column=0; column<raidPtr->numCol; column++) {
3159 if (raidPtr->Disks[row][column].status ==
3160 rf_ds_optimal) {
3161 dev = raidPtr->Disks[row][column].dev;
3162 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3163 raidread_component_label(dev, vp, &clabel);
3164 clabel.root_partition = new_value;
3165 raidwrite_component_label(dev, vp, &clabel);
3166 }
3167 }
3168 }
3169 return(new_value);
3170 }
3171
3172 void
3173 rf_release_all_vps(cset)
3174 RF_ConfigSet_t *cset;
3175 {
3176 RF_AutoConfig_t *ac;
3177
3178 ac = cset->ac;
3179 while(ac!=NULL) {
3180 /* Close the vp, and give it back */
3181 if (ac->vp) {
3182 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3183 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3184 vput(ac->vp);
3185 ac->vp = NULL;
3186 }
3187 ac = ac->next;
3188 }
3189 }
3190
3191
3192 void
3193 rf_cleanup_config_set(cset)
3194 RF_ConfigSet_t *cset;
3195 {
3196 RF_AutoConfig_t *ac;
3197 RF_AutoConfig_t *next_ac;
3198
3199 ac = cset->ac;
3200 while(ac!=NULL) {
3201 next_ac = ac->next;
3202 /* nuke the label */
3203 free(ac->clabel, M_RAIDFRAME);
3204 /* cleanup the config structure */
3205 free(ac, M_RAIDFRAME);
3206 /* "next.." */
3207 ac = next_ac;
3208 }
3209 /* and, finally, nuke the config set */
3210 free(cset, M_RAIDFRAME);
3211 }
3212
3213
3214 void
3215 raid_init_component_label(raidPtr, clabel)
3216 RF_Raid_t *raidPtr;
3217 RF_ComponentLabel_t *clabel;
3218 {
3219 /* current version number */
3220 clabel->version = RF_COMPONENT_LABEL_VERSION;
3221 clabel->serial_number = raidPtr->serial_number;
3222 clabel->mod_counter = raidPtr->mod_counter;
3223 clabel->num_rows = raidPtr->numRow;
3224 clabel->num_columns = raidPtr->numCol;
3225 clabel->clean = RF_RAID_DIRTY; /* not clean */
3226 clabel->status = rf_ds_optimal; /* "It's good!" */
3227
3228 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3229 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3230 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3231
3232 clabel->blockSize = raidPtr->bytesPerSector;
3233 clabel->numBlocks = raidPtr->sectorsPerDisk;
3234
3235 /* XXX not portable */
3236 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3237 clabel->maxOutstanding = raidPtr->maxOutstanding;
3238 clabel->autoconfigure = raidPtr->autoconfigure;
3239 clabel->root_partition = raidPtr->root_partition;
3240 clabel->last_unit = raidPtr->raidid;
3241 clabel->config_order = raidPtr->config_order;
3242 }
3243
3244 int
3245 rf_auto_config_set(cset,unit)
3246 RF_ConfigSet_t *cset;
3247 int *unit;
3248 {
3249 RF_Raid_t *raidPtr;
3250 RF_Config_t *config;
3251 int raidID;
3252 int retcode;
3253
3254 #if DEBUG
3255 printf("RAID autoconfigure\n");
3256 #endif
3257
3258 retcode = 0;
3259 *unit = -1;
3260
3261 /* 1. Create a config structure */
3262
3263 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3264 M_RAIDFRAME,
3265 M_NOWAIT);
3266 if (config==NULL) {
3267 printf("Out of mem!?!?\n");
3268 /* XXX do something more intelligent here. */
3269 return(1);
3270 }
3271
3272 memset(config, 0, sizeof(RF_Config_t));
3273
3274 /*
3275 2. Figure out what RAID ID this one is supposed to live at
3276 See if we can get the same RAID dev that it was configured
3277 on last time..
3278 */
3279
3280 raidID = cset->ac->clabel->last_unit;
3281 if ((raidID < 0) || (raidID >= numraid)) {
3282 /* let's not wander off into lala land. */
3283 raidID = numraid - 1;
3284 }
3285 if (raidPtrs[raidID]->valid != 0) {
3286
3287 /*
3288 Nope... Go looking for an alternative...
3289 Start high so we don't immediately use raid0 if that's
3290 not taken.
3291 */
3292
3293 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3294 if (raidPtrs[raidID]->valid == 0) {
3295 /* can use this one! */
3296 break;
3297 }
3298 }
3299 }
3300
3301 if (raidID < 0) {
3302 /* punt... */
3303 printf("Unable to auto configure this set!\n");
3304 printf("(Out of RAID devs!)\n");
3305 return(1);
3306 }
3307
3308 #if DEBUG
3309 printf("Configuring raid%d:\n",raidID);
3310 #endif
3311
3312 raidPtr = raidPtrs[raidID];
3313
3314 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3315 raidPtr->raidid = raidID;
3316 raidPtr->openings = RAIDOUTSTANDING;
3317
3318 /* 3. Build the configuration structure */
3319 rf_create_configuration(cset->ac, config, raidPtr);
3320
3321 /* 4. Do the configuration */
3322 retcode = rf_Configure(raidPtr, config, cset->ac);
3323
3324 if (retcode == 0) {
3325
3326 raidinit(raidPtrs[raidID]);
3327
3328 rf_markalldirty(raidPtrs[raidID]);
3329 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3330 if (cset->ac->clabel->root_partition==1) {
3331 /* everything configured just fine. Make a note
3332 that this set is eligible to be root. */
3333 cset->rootable = 1;
3334 /* XXX do this here? */
3335 raidPtrs[raidID]->root_partition = 1;
3336 }
3337 }
3338
3339 /* 5. Cleanup */
3340 free(config, M_RAIDFRAME);
3341
3342 *unit = raidID;
3343 return(retcode);
3344 }
3345
3346 void
3347 rf_disk_unbusy(desc)
3348 RF_RaidAccessDesc_t *desc;
3349 {
3350 struct buf *bp;
3351
3352 bp = (struct buf *)desc->bp;
3353 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3354 (bp->b_bcount - bp->b_resid));
3355 }
3356