Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.224.2.3
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.224.2.3 2008/09/25 19:45:22 bouyer Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.224.2.3 2008/09/25 19:45:22 bouyer Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/namei.h>
    163 #include <sys/vnode.h>
    164 #include <sys/disklabel.h>
    165 #include <sys/conf.h>
    166 #include <sys/lock.h>
    167 #include <sys/buf.h>
    168 #include <sys/bufq.h>
    169 #include <sys/user.h>
    170 #include <sys/reboot.h>
    171 #include <sys/kauth.h>
    172 
    173 #include <prop/proplib.h>
    174 
    175 #include <dev/raidframe/raidframevar.h>
    176 #include <dev/raidframe/raidframeio.h>
    177 #include "raid.h"
    178 #include "opt_raid_autoconfig.h"
    179 #include "rf_raid.h"
    180 #include "rf_copyback.h"
    181 #include "rf_dag.h"
    182 #include "rf_dagflags.h"
    183 #include "rf_desc.h"
    184 #include "rf_diskqueue.h"
    185 #include "rf_etimer.h"
    186 #include "rf_general.h"
    187 #include "rf_kintf.h"
    188 #include "rf_options.h"
    189 #include "rf_driver.h"
    190 #include "rf_parityscan.h"
    191 #include "rf_threadstuff.h"
    192 
    193 #ifdef DEBUG
    194 int     rf_kdebug_level = 0;
    195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    196 #else				/* DEBUG */
    197 #define db1_printf(a) { }
    198 #endif				/* DEBUG */
    199 
    200 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    201 
    202 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    203 
    204 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    205 						 * spare table */
    206 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    207 						 * installation process */
    208 
    209 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    210 
    211 /* prototypes */
    212 static void KernelWakeupFunc(struct buf *);
    213 static void InitBP(struct buf *, struct vnode *, unsigned,
    214     dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
    215     void *, int, struct proc *);
    216 static void raidinit(RF_Raid_t *);
    217 
    218 void raidattach(int);
    219 static int raid_match(struct device *, struct cfdata *, void *);
    220 static void raid_attach(struct device *, struct device *, void *);
    221 static int raid_detach(struct device *, int);
    222 
    223 dev_type_open(raidopen);
    224 dev_type_close(raidclose);
    225 dev_type_read(raidread);
    226 dev_type_write(raidwrite);
    227 dev_type_ioctl(raidioctl);
    228 dev_type_strategy(raidstrategy);
    229 dev_type_dump(raiddump);
    230 dev_type_size(raidsize);
    231 
    232 const struct bdevsw raid_bdevsw = {
    233 	raidopen, raidclose, raidstrategy, raidioctl,
    234 	raiddump, raidsize, D_DISK
    235 };
    236 
    237 const struct cdevsw raid_cdevsw = {
    238 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    239 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    240 };
    241 
    242 /* XXX Not sure if the following should be replacing the raidPtrs above,
    243    or if it should be used in conjunction with that...
    244 */
    245 
    246 struct raid_softc {
    247 	struct device *sc_dev;
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	uint64_t sc_size;	/* size of the raid device */
    251 	char    sc_xname[20];	/* XXX external name */
    252 	struct disk sc_dkdev;	/* generic disk device info */
    253 	struct bufq_state *buf_queue;	/* used for the device queue */
    254 };
    255 /* sc_flags */
    256 #define RAIDF_INITED	0x01	/* unit has been initialized */
    257 #define RAIDF_WLABEL	0x02	/* label area is writable */
    258 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    259 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    260 #define RAIDF_LOCKED	0x80	/* unit is locked */
    261 
    262 #define	raidunit(x)	DISKUNIT(x)
    263 int numraid = 0;
    264 
    265 extern struct cfdriver raid_cd;
    266 CFATTACH_DECL(raid, sizeof(struct raid_softc),
    267     raid_match, raid_attach, raid_detach, NULL);
    268 
    269 /*
    270  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    271  * Be aware that large numbers can allow the driver to consume a lot of
    272  * kernel memory, especially on writes, and in degraded mode reads.
    273  *
    274  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    275  * a single 64K write will typically require 64K for the old data,
    276  * 64K for the old parity, and 64K for the new parity, for a total
    277  * of 192K (if the parity buffer is not re-used immediately).
    278  * Even it if is used immediately, that's still 128K, which when multiplied
    279  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    280  *
    281  * Now in degraded mode, for example, a 64K read on the above setup may
    282  * require data reconstruction, which will require *all* of the 4 remaining
    283  * disks to participate -- 4 * 32K/disk == 128K again.
    284  */
    285 
    286 #ifndef RAIDOUTSTANDING
    287 #define RAIDOUTSTANDING   6
    288 #endif
    289 
    290 #define RAIDLABELDEV(dev)	\
    291 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    292 
    293 /* declared here, and made public, for the benefit of KVM stuff.. */
    294 struct raid_softc *raid_softc;
    295 
    296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    297 				     struct disklabel *);
    298 static void raidgetdisklabel(dev_t);
    299 static void raidmakedisklabel(struct raid_softc *);
    300 
    301 static int raidlock(struct raid_softc *);
    302 static void raidunlock(struct raid_softc *);
    303 
    304 static void rf_markalldirty(RF_Raid_t *);
    305 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    306 
    307 void rf_ReconThread(struct rf_recon_req *);
    308 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    309 void rf_CopybackThread(RF_Raid_t *raidPtr);
    310 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    311 int rf_autoconfig(struct device *self);
    312 void rf_buildroothack(RF_ConfigSet_t *);
    313 
    314 RF_AutoConfig_t *rf_find_raid_components(void);
    315 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    316 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    317 static int rf_reasonable_label(RF_ComponentLabel_t *);
    318 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    319 int rf_set_autoconfig(RF_Raid_t *, int);
    320 int rf_set_rootpartition(RF_Raid_t *, int);
    321 void rf_release_all_vps(RF_ConfigSet_t *);
    322 void rf_cleanup_config_set(RF_ConfigSet_t *);
    323 int rf_have_enough_components(RF_ConfigSet_t *);
    324 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    325 
    326 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    327 				  allow autoconfig to take place.
    328 				  Note that this is overridden by having
    329 				  RAID_AUTOCONFIG as an option in the
    330 				  kernel config file.  */
    331 
    332 struct RF_Pools_s rf_pools;
    333 
    334 void
    335 raidattach(int num)
    336 {
    337 	int raidID;
    338 	int i, rc;
    339 
    340 #ifdef DEBUG
    341 	printf("raidattach: Asked for %d units\n", num);
    342 #endif
    343 
    344 	if (num <= 0) {
    345 #ifdef DIAGNOSTIC
    346 		panic("raidattach: count <= 0");
    347 #endif
    348 		return;
    349 	}
    350 	/* This is where all the initialization stuff gets done. */
    351 
    352 	numraid = num;
    353 
    354 	/* Make some space for requested number of units... */
    355 
    356 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    357 	if (raidPtrs == NULL) {
    358 		panic("raidPtrs is NULL!!");
    359 	}
    360 
    361 	rf_mutex_init(&rf_sparet_wait_mutex);
    362 
    363 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    364 
    365 	for (i = 0; i < num; i++)
    366 		raidPtrs[i] = NULL;
    367 	rc = rf_BootRaidframe();
    368 	if (rc == 0)
    369 		printf("Kernelized RAIDframe activated\n");
    370 	else
    371 		panic("Serious error booting RAID!!");
    372 
    373 	/* put together some datastructures like the CCD device does.. This
    374 	 * lets us lock the device and what-not when it gets opened. */
    375 
    376 	raid_softc = (struct raid_softc *)
    377 		malloc(num * sizeof(struct raid_softc),
    378 		       M_RAIDFRAME, M_NOWAIT);
    379 	if (raid_softc == NULL) {
    380 		printf("WARNING: no memory for RAIDframe driver\n");
    381 		return;
    382 	}
    383 
    384 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    385 
    386 	for (raidID = 0; raidID < num; raidID++) {
    387 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    388 
    389 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    390 			  (RF_Raid_t *));
    391 		if (raidPtrs[raidID] == NULL) {
    392 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    393 			numraid = raidID;
    394 			return;
    395 		}
    396 	}
    397 
    398 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    399 		printf("config_cfattach_attach failed?\n");
    400 	}
    401 
    402 #ifdef RAID_AUTOCONFIG
    403 	raidautoconfig = 1;
    404 #endif
    405 
    406 	/*
    407 	 * Register a finalizer which will be used to auto-config RAID
    408 	 * sets once all real hardware devices have been found.
    409 	 */
    410 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    411 		printf("WARNING: unable to register RAIDframe finalizer\n");
    412 }
    413 
    414 int
    415 rf_autoconfig(struct device *self)
    416 {
    417 	RF_AutoConfig_t *ac_list;
    418 	RF_ConfigSet_t *config_sets;
    419 
    420 	if (raidautoconfig == 0)
    421 		return (0);
    422 
    423 	/* XXX This code can only be run once. */
    424 	raidautoconfig = 0;
    425 
    426 	/* 1. locate all RAID components on the system */
    427 #ifdef DEBUG
    428 	printf("Searching for RAID components...\n");
    429 #endif
    430 	ac_list = rf_find_raid_components();
    431 
    432 	/* 2. Sort them into their respective sets. */
    433 	config_sets = rf_create_auto_sets(ac_list);
    434 
    435 	/*
    436 	 * 3. Evaluate each set andconfigure the valid ones.
    437 	 * This gets done in rf_buildroothack().
    438 	 */
    439 	rf_buildroothack(config_sets);
    440 
    441 	return 1;
    442 }
    443 
    444 void
    445 rf_buildroothack(RF_ConfigSet_t *config_sets)
    446 {
    447 	RF_ConfigSet_t *cset;
    448 	RF_ConfigSet_t *next_cset;
    449 	int retcode;
    450 	int raidID;
    451 	int rootID;
    452 	int num_root;
    453 
    454 	rootID = 0;
    455 	num_root = 0;
    456 	cset = config_sets;
    457 	while(cset != NULL ) {
    458 		next_cset = cset->next;
    459 		if (rf_have_enough_components(cset) &&
    460 		    cset->ac->clabel->autoconfigure==1) {
    461 			retcode = rf_auto_config_set(cset,&raidID);
    462 			if (!retcode) {
    463 #ifdef DEBUG
    464 				printf("raid%d: configured ok\n", raidID);
    465 #endif
    466 				if (cset->rootable) {
    467 					rootID = raidID;
    468 					num_root++;
    469 				}
    470 			} else {
    471 				/* The autoconfig didn't work :( */
    472 #ifdef DEBUG
    473 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    474 #endif
    475 				rf_release_all_vps(cset);
    476 			}
    477 		} else {
    478 #ifdef DEBUG
    479 			printf("raid%d: not enough components\n", raidID);
    480 #endif
    481 			/* we're not autoconfiguring this set...
    482 			   release the associated resources */
    483 			rf_release_all_vps(cset);
    484 		}
    485 		/* cleanup */
    486 		rf_cleanup_config_set(cset);
    487 		cset = next_cset;
    488 	}
    489 
    490 	/* if the user has specified what the root device should be
    491 	   then we don't touch booted_device or boothowto... */
    492 
    493 	if (rootspec != NULL)
    494 		return;
    495 
    496 	/* we found something bootable... */
    497 
    498 	if (num_root == 1) {
    499 		booted_device = raid_softc[rootID].sc_dev;
    500 	} else if (num_root > 1) {
    501 		/* we can't guess.. require the user to answer... */
    502 		boothowto |= RB_ASKNAME;
    503 	}
    504 }
    505 
    506 
    507 int
    508 raidsize(dev_t dev)
    509 {
    510 	struct raid_softc *rs;
    511 	struct disklabel *lp;
    512 	int     part, unit, omask, size;
    513 
    514 	unit = raidunit(dev);
    515 	if (unit >= numraid)
    516 		return (-1);
    517 	rs = &raid_softc[unit];
    518 
    519 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    520 		return (-1);
    521 
    522 	part = DISKPART(dev);
    523 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    524 	lp = rs->sc_dkdev.dk_label;
    525 
    526 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    527 		return (-1);
    528 
    529 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    530 		size = -1;
    531 	else
    532 		size = lp->d_partitions[part].p_size *
    533 		    (lp->d_secsize / DEV_BSIZE);
    534 
    535 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    536 		return (-1);
    537 
    538 	return (size);
    539 
    540 }
    541 
    542 int
    543 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
    544 {
    545 	int     unit = raidunit(dev);
    546 	struct raid_softc *rs;
    547 	const struct bdevsw *bdev;
    548 	struct disklabel *lp;
    549 	RF_Raid_t *raidPtr;
    550 	daddr_t offset;
    551 	int     part, c, sparecol, j, scol, dumpto;
    552 	int     error = 0;
    553 
    554 	if (unit >= numraid)
    555 		return (ENXIO);
    556 
    557 	rs = &raid_softc[unit];
    558 	raidPtr = raidPtrs[unit];
    559 
    560 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    561 		return ENXIO;
    562 
    563 	/* we only support dumping to RAID 1 sets */
    564 	if (raidPtr->Layout.numDataCol != 1 ||
    565 	    raidPtr->Layout.numParityCol != 1)
    566 		return EINVAL;
    567 
    568 
    569 	if ((error = raidlock(rs)) != 0)
    570 		return error;
    571 
    572 	if (size % DEV_BSIZE != 0) {
    573 		error = EINVAL;
    574 		goto out;
    575 	}
    576 
    577 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    578 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    579 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    580 		    size / DEV_BSIZE, rs->sc_size);
    581 		error = EINVAL;
    582 		goto out;
    583 	}
    584 
    585 	part = DISKPART(dev);
    586 	lp = rs->sc_dkdev.dk_label;
    587 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    588 
    589 	/* figure out what device is alive.. */
    590 
    591 	/*
    592 	   Look for a component to dump to.  The preference for the
    593 	   component to dump to is as follows:
    594 	   1) the master
    595 	   2) a used_spare of the master
    596 	   3) the slave
    597 	   4) a used_spare of the slave
    598 	*/
    599 
    600 	dumpto = -1;
    601 	for (c = 0; c < raidPtr->numCol; c++) {
    602 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    603 			/* this might be the one */
    604 			dumpto = c;
    605 			break;
    606 		}
    607 	}
    608 
    609 	/*
    610 	   At this point we have possibly selected a live master or a
    611 	   live slave.  We now check to see if there is a spared
    612 	   master (or a spared slave), if we didn't find a live master
    613 	   or a live slave.
    614 	*/
    615 
    616 	for (c = 0; c < raidPtr->numSpare; c++) {
    617 		sparecol = raidPtr->numCol + c;
    618 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    619 			/* How about this one? */
    620 			scol = -1;
    621 			for(j=0;j<raidPtr->numCol;j++) {
    622 				if (raidPtr->Disks[j].spareCol == sparecol) {
    623 					scol = j;
    624 					break;
    625 				}
    626 			}
    627 			if (scol == 0) {
    628 				/*
    629 				   We must have found a spared master!
    630 				   We'll take that over anything else
    631 				   found so far.  (We couldn't have
    632 				   found a real master before, since
    633 				   this is a used spare, and it's
    634 				   saying that it's replacing the
    635 				   master.)  On reboot (with
    636 				   autoconfiguration turned on)
    637 				   sparecol will become the 1st
    638 				   component (component0) of this set.
    639 				*/
    640 				dumpto = sparecol;
    641 				break;
    642 			} else if (scol != -1) {
    643 				/*
    644 				   Must be a spared slave.  We'll dump
    645 				   to that if we havn't found anything
    646 				   else so far.
    647 				*/
    648 				if (dumpto == -1)
    649 					dumpto = sparecol;
    650 			}
    651 		}
    652 	}
    653 
    654 	if (dumpto == -1) {
    655 		/* we couldn't find any live components to dump to!?!?
    656 		 */
    657 		error = EINVAL;
    658 		goto out;
    659 	}
    660 
    661 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    662 
    663 	/*
    664 	   Note that blkno is relative to this particular partition.
    665 	   By adding the offset of this partition in the RAID
    666 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    667 	   value that is relative to the partition used for the
    668 	   underlying component.
    669 	*/
    670 
    671 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    672 				blkno + offset, va, size);
    673 
    674 out:
    675 	raidunlock(rs);
    676 
    677 	return error;
    678 }
    679 /* ARGSUSED */
    680 int
    681 raidopen(dev_t dev, int flags, int fmt,
    682     struct lwp *l)
    683 {
    684 	int     unit = raidunit(dev);
    685 	struct raid_softc *rs;
    686 	struct disklabel *lp;
    687 	int     part, pmask;
    688 	int     error = 0;
    689 
    690 	if (unit >= numraid)
    691 		return (ENXIO);
    692 	rs = &raid_softc[unit];
    693 
    694 	if ((error = raidlock(rs)) != 0)
    695 		return (error);
    696 	lp = rs->sc_dkdev.dk_label;
    697 
    698 	part = DISKPART(dev);
    699 
    700 	/*
    701 	 * If there are wedges, and this is not RAW_PART, then we
    702 	 * need to fail.
    703 	 */
    704 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    705 		error = EBUSY;
    706 		goto bad;
    707 	}
    708 	pmask = (1 << part);
    709 
    710 	if ((rs->sc_flags & RAIDF_INITED) &&
    711 	    (rs->sc_dkdev.dk_openmask == 0))
    712 		raidgetdisklabel(dev);
    713 
    714 	/* make sure that this partition exists */
    715 
    716 	if (part != RAW_PART) {
    717 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    718 		    ((part >= lp->d_npartitions) ||
    719 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    720 			error = ENXIO;
    721 			goto bad;
    722 		}
    723 	}
    724 	/* Prevent this unit from being unconfigured while open. */
    725 	switch (fmt) {
    726 	case S_IFCHR:
    727 		rs->sc_dkdev.dk_copenmask |= pmask;
    728 		break;
    729 
    730 	case S_IFBLK:
    731 		rs->sc_dkdev.dk_bopenmask |= pmask;
    732 		break;
    733 	}
    734 
    735 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    736 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    737 		/* First one... mark things as dirty... Note that we *MUST*
    738 		 have done a configure before this.  I DO NOT WANT TO BE
    739 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    740 		 THAT THEY BELONG TOGETHER!!!!! */
    741 		/* XXX should check to see if we're only open for reading
    742 		   here... If so, we needn't do this, but then need some
    743 		   other way of keeping track of what's happened.. */
    744 
    745 		rf_markalldirty( raidPtrs[unit] );
    746 	}
    747 
    748 
    749 	rs->sc_dkdev.dk_openmask =
    750 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    751 
    752 bad:
    753 	raidunlock(rs);
    754 
    755 	return (error);
    756 
    757 
    758 }
    759 /* ARGSUSED */
    760 int
    761 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    762 {
    763 	int     unit = raidunit(dev);
    764 	struct cfdata *cf;
    765 	struct raid_softc *rs;
    766 	int     error = 0;
    767 	int     part;
    768 
    769 	if (unit >= numraid)
    770 		return (ENXIO);
    771 	rs = &raid_softc[unit];
    772 
    773 	if ((error = raidlock(rs)) != 0)
    774 		return (error);
    775 
    776 	part = DISKPART(dev);
    777 
    778 	/* ...that much closer to allowing unconfiguration... */
    779 	switch (fmt) {
    780 	case S_IFCHR:
    781 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    782 		break;
    783 
    784 	case S_IFBLK:
    785 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    786 		break;
    787 	}
    788 	rs->sc_dkdev.dk_openmask =
    789 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    790 
    791 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    792 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    793 		/* Last one... device is not unconfigured yet.
    794 		   Device shutdown has taken care of setting the
    795 		   clean bits if RAIDF_INITED is not set
    796 		   mark things as clean... */
    797 
    798 		rf_update_component_labels(raidPtrs[unit],
    799 						 RF_FINAL_COMPONENT_UPDATE);
    800 		if (doing_shutdown) {
    801 			/* last one, and we're going down, so
    802 			   lights out for this RAID set too. */
    803 			error = rf_Shutdown(raidPtrs[unit]);
    804 
    805 			/* It's no longer initialized... */
    806 			rs->sc_flags &= ~RAIDF_INITED;
    807 
    808 			/* detach the device */
    809 
    810 			cf = device_cfdata(rs->sc_dev);
    811 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    812 			free(cf, M_RAIDFRAME);
    813 
    814 			/* Detach the disk. */
    815 			pseudo_disk_detach(&rs->sc_dkdev);
    816 		}
    817 	}
    818 
    819 	raidunlock(rs);
    820 	return (0);
    821 
    822 }
    823 
    824 void
    825 raidstrategy(struct buf *bp)
    826 {
    827 	int s;
    828 
    829 	unsigned int raidID = raidunit(bp->b_dev);
    830 	RF_Raid_t *raidPtr;
    831 	struct raid_softc *rs = &raid_softc[raidID];
    832 	int     wlabel;
    833 
    834 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    835 		bp->b_error = ENXIO;
    836 		bp->b_flags |= B_ERROR;
    837 		goto done;
    838 	}
    839 	if (raidID >= numraid || !raidPtrs[raidID]) {
    840 		bp->b_error = ENODEV;
    841 		bp->b_flags |= B_ERROR;
    842 		goto done;
    843 	}
    844 	raidPtr = raidPtrs[raidID];
    845 	if (!raidPtr->valid) {
    846 		bp->b_error = ENODEV;
    847 		bp->b_flags |= B_ERROR;
    848 		goto done;
    849 	}
    850 	if (bp->b_bcount == 0) {
    851 		db1_printf(("b_bcount is zero..\n"));
    852 		goto done;
    853 	}
    854 
    855 	/*
    856 	 * Do bounds checking and adjust transfer.  If there's an
    857 	 * error, the bounds check will flag that for us.
    858 	 */
    859 
    860 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    861 	if (DISKPART(bp->b_dev) == RAW_PART) {
    862 		uint64_t size; /* device size in DEV_BSIZE unit */
    863 
    864 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    865 			size = raidPtr->totalSectors <<
    866 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    867 		} else {
    868 			size = raidPtr->totalSectors >>
    869 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    870 		}
    871 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    872 			goto done;
    873 		}
    874 	} else {
    875 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    876 			db1_printf(("Bounds check failed!!:%d %d\n",
    877 				(int) bp->b_blkno, (int) wlabel));
    878 			goto done;
    879 		}
    880 	}
    881 	s = splbio();
    882 
    883 	bp->b_resid = 0;
    884 
    885 	/* stuff it onto our queue */
    886 	BUFQ_PUT(rs->buf_queue, bp);
    887 
    888 	/* scheduled the IO to happen at the next convenient time */
    889 	wakeup(&(raidPtrs[raidID]->iodone));
    890 
    891 	splx(s);
    892 	return;
    893 
    894 done:
    895 	bp->b_resid = bp->b_bcount;
    896 	biodone(bp);
    897 }
    898 /* ARGSUSED */
    899 int
    900 raidread(dev_t dev, struct uio *uio, int flags)
    901 {
    902 	int     unit = raidunit(dev);
    903 	struct raid_softc *rs;
    904 
    905 	if (unit >= numraid)
    906 		return (ENXIO);
    907 	rs = &raid_softc[unit];
    908 
    909 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    910 		return (ENXIO);
    911 
    912 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    913 
    914 }
    915 /* ARGSUSED */
    916 int
    917 raidwrite(dev_t dev, struct uio *uio, int flags)
    918 {
    919 	int     unit = raidunit(dev);
    920 	struct raid_softc *rs;
    921 
    922 	if (unit >= numraid)
    923 		return (ENXIO);
    924 	rs = &raid_softc[unit];
    925 
    926 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    927 		return (ENXIO);
    928 
    929 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    930 
    931 }
    932 
    933 int
    934 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
    935 {
    936 	int     unit = raidunit(dev);
    937 	int     error = 0;
    938 	int     part, pmask;
    939 	struct cfdata *cf;
    940 	struct raid_softc *rs;
    941 	RF_Config_t *k_cfg, *u_cfg;
    942 	RF_Raid_t *raidPtr;
    943 	RF_RaidDisk_t *diskPtr;
    944 	RF_AccTotals_t *totals;
    945 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    946 	u_char *specific_buf;
    947 	int retcode = 0;
    948 	int column;
    949 	int raidid;
    950 	struct rf_recon_req *rrcopy, *rr;
    951 	RF_ComponentLabel_t *clabel;
    952 	RF_ComponentLabel_t *ci_label;
    953 	RF_ComponentLabel_t **clabel_ptr;
    954 	RF_SingleComponent_t *sparePtr,*componentPtr;
    955 	RF_SingleComponent_t component;
    956 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    957 	int i, j, d;
    958 #ifdef __HAVE_OLD_DISKLABEL
    959 	struct disklabel newlabel;
    960 #endif
    961 	struct dkwedge_info *dkw;
    962 
    963 	if (unit >= numraid)
    964 		return (ENXIO);
    965 	rs = &raid_softc[unit];
    966 	raidPtr = raidPtrs[unit];
    967 
    968 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    969 		(int) DISKPART(dev), (int) unit, (int) cmd));
    970 
    971 	/* Must be open for writes for these commands... */
    972 	switch (cmd) {
    973 #ifdef DIOCGSECTORSIZE
    974 	case DIOCGSECTORSIZE:
    975 		*(u_int *)data = raidPtr->bytesPerSector;
    976 		return 0;
    977 	case DIOCGMEDIASIZE:
    978 		*(off_t *)data =
    979 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
    980 		return 0;
    981 #endif
    982 	case DIOCSDINFO:
    983 	case DIOCWDINFO:
    984 #ifdef __HAVE_OLD_DISKLABEL
    985 	case ODIOCWDINFO:
    986 	case ODIOCSDINFO:
    987 #endif
    988 	case DIOCWLABEL:
    989 	case DIOCAWEDGE:
    990 	case DIOCDWEDGE:
    991 		if ((flag & FWRITE) == 0)
    992 			return (EBADF);
    993 	}
    994 
    995 	/* Must be initialized for these... */
    996 	switch (cmd) {
    997 	case DIOCGDINFO:
    998 	case DIOCSDINFO:
    999 	case DIOCWDINFO:
   1000 #ifdef __HAVE_OLD_DISKLABEL
   1001 	case ODIOCGDINFO:
   1002 	case ODIOCWDINFO:
   1003 	case ODIOCSDINFO:
   1004 	case ODIOCGDEFLABEL:
   1005 #endif
   1006 	case DIOCGPART:
   1007 	case DIOCWLABEL:
   1008 	case DIOCGDEFLABEL:
   1009 	case DIOCAWEDGE:
   1010 	case DIOCDWEDGE:
   1011 	case DIOCLWEDGES:
   1012 	case RAIDFRAME_SHUTDOWN:
   1013 	case RAIDFRAME_REWRITEPARITY:
   1014 	case RAIDFRAME_GET_INFO:
   1015 	case RAIDFRAME_RESET_ACCTOTALS:
   1016 	case RAIDFRAME_GET_ACCTOTALS:
   1017 	case RAIDFRAME_KEEP_ACCTOTALS:
   1018 	case RAIDFRAME_GET_SIZE:
   1019 	case RAIDFRAME_FAIL_DISK:
   1020 	case RAIDFRAME_COPYBACK:
   1021 	case RAIDFRAME_CHECK_RECON_STATUS:
   1022 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1023 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1024 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1025 	case RAIDFRAME_ADD_HOT_SPARE:
   1026 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1027 	case RAIDFRAME_INIT_LABELS:
   1028 	case RAIDFRAME_REBUILD_IN_PLACE:
   1029 	case RAIDFRAME_CHECK_PARITY:
   1030 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1031 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1032 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1033 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1034 	case RAIDFRAME_SET_AUTOCONFIG:
   1035 	case RAIDFRAME_SET_ROOT:
   1036 	case RAIDFRAME_DELETE_COMPONENT:
   1037 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1038 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1039 			return (ENXIO);
   1040 	}
   1041 
   1042 	switch (cmd) {
   1043 
   1044 		/* configure the system */
   1045 	case RAIDFRAME_CONFIGURE:
   1046 
   1047 		if (raidPtr->valid) {
   1048 			/* There is a valid RAID set running on this unit! */
   1049 			printf("raid%d: Device already configured!\n",unit);
   1050 			return(EINVAL);
   1051 		}
   1052 
   1053 		/* copy-in the configuration information */
   1054 		/* data points to a pointer to the configuration structure */
   1055 
   1056 		u_cfg = *((RF_Config_t **) data);
   1057 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1058 		if (k_cfg == NULL) {
   1059 			return (ENOMEM);
   1060 		}
   1061 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1062 		if (retcode) {
   1063 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1064 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1065 				retcode));
   1066 			return (retcode);
   1067 		}
   1068 		/* allocate a buffer for the layout-specific data, and copy it
   1069 		 * in */
   1070 		if (k_cfg->layoutSpecificSize) {
   1071 			if (k_cfg->layoutSpecificSize > 10000) {
   1072 				/* sanity check */
   1073 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1074 				return (EINVAL);
   1075 			}
   1076 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1077 			    (u_char *));
   1078 			if (specific_buf == NULL) {
   1079 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1080 				return (ENOMEM);
   1081 			}
   1082 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1083 			    k_cfg->layoutSpecificSize);
   1084 			if (retcode) {
   1085 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1086 				RF_Free(specific_buf,
   1087 					k_cfg->layoutSpecificSize);
   1088 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1089 					retcode));
   1090 				return (retcode);
   1091 			}
   1092 		} else
   1093 			specific_buf = NULL;
   1094 		k_cfg->layoutSpecific = specific_buf;
   1095 
   1096 		/* should do some kind of sanity check on the configuration.
   1097 		 * Store the sum of all the bytes in the last byte? */
   1098 
   1099 		/* configure the system */
   1100 
   1101 		/*
   1102 		 * Clear the entire RAID descriptor, just to make sure
   1103 		 *  there is no stale data left in the case of a
   1104 		 *  reconfiguration
   1105 		 */
   1106 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1107 		raidPtr->raidid = unit;
   1108 
   1109 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1110 
   1111 		if (retcode == 0) {
   1112 
   1113 			/* allow this many simultaneous IO's to
   1114 			   this RAID device */
   1115 			raidPtr->openings = RAIDOUTSTANDING;
   1116 
   1117 			raidinit(raidPtr);
   1118 			rf_markalldirty(raidPtr);
   1119 		}
   1120 		/* free the buffers.  No return code here. */
   1121 		if (k_cfg->layoutSpecificSize) {
   1122 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1123 		}
   1124 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1125 
   1126 		return (retcode);
   1127 
   1128 		/* shutdown the system */
   1129 	case RAIDFRAME_SHUTDOWN:
   1130 
   1131 		if ((error = raidlock(rs)) != 0)
   1132 			return (error);
   1133 
   1134 		/*
   1135 		 * If somebody has a partition mounted, we shouldn't
   1136 		 * shutdown.
   1137 		 */
   1138 
   1139 		part = DISKPART(dev);
   1140 		pmask = (1 << part);
   1141 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1142 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1143 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1144 			raidunlock(rs);
   1145 			return (EBUSY);
   1146 		}
   1147 
   1148 		retcode = rf_Shutdown(raidPtr);
   1149 
   1150 		/* It's no longer initialized... */
   1151 		rs->sc_flags &= ~RAIDF_INITED;
   1152 
   1153 		/* free the pseudo device attach bits */
   1154 
   1155 		cf = device_cfdata(rs->sc_dev);
   1156 		/* XXX this causes us to not return any errors
   1157 		   from the above call to rf_Shutdown() */
   1158 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1159 		free(cf, M_RAIDFRAME);
   1160 
   1161 		/* Detach the disk. */
   1162 		pseudo_disk_detach(&rs->sc_dkdev);
   1163 
   1164 		raidunlock(rs);
   1165 
   1166 		return (retcode);
   1167 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1168 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1169 		/* need to read the component label for the disk indicated
   1170 		   by row,column in clabel */
   1171 
   1172 		/* For practice, let's get it directly fromdisk, rather
   1173 		   than from the in-core copy */
   1174 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1175 			   (RF_ComponentLabel_t *));
   1176 		if (clabel == NULL)
   1177 			return (ENOMEM);
   1178 
   1179 		retcode = copyin( *clabel_ptr, clabel,
   1180 				  sizeof(RF_ComponentLabel_t));
   1181 
   1182 		if (retcode) {
   1183 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1184 			return(retcode);
   1185 		}
   1186 
   1187 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1188 
   1189 		column = clabel->column;
   1190 
   1191 		if ((column < 0) || (column >= raidPtr->numCol +
   1192 				     raidPtr->numSpare)) {
   1193 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1194 			return(EINVAL);
   1195 		}
   1196 
   1197 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1198 				raidPtr->raid_cinfo[column].ci_vp,
   1199 				clabel );
   1200 
   1201 		if (retcode == 0) {
   1202 			retcode = copyout(clabel, *clabel_ptr,
   1203 					  sizeof(RF_ComponentLabel_t));
   1204 		}
   1205 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1206 		return (retcode);
   1207 
   1208 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1209 		clabel = (RF_ComponentLabel_t *) data;
   1210 
   1211 		/* XXX check the label for valid stuff... */
   1212 		/* Note that some things *should not* get modified --
   1213 		   the user should be re-initing the labels instead of
   1214 		   trying to patch things.
   1215 		   */
   1216 
   1217 		raidid = raidPtr->raidid;
   1218 #ifdef DEBUG
   1219 		printf("raid%d: Got component label:\n", raidid);
   1220 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1221 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1222 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1223 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1224 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1225 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1226 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1227 #endif
   1228 		clabel->row = 0;
   1229 		column = clabel->column;
   1230 
   1231 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1232 			return(EINVAL);
   1233 		}
   1234 
   1235 		/* XXX this isn't allowed to do anything for now :-) */
   1236 
   1237 		/* XXX and before it is, we need to fill in the rest
   1238 		   of the fields!?!?!?! */
   1239 #if 0
   1240 		raidwrite_component_label(
   1241 		     raidPtr->Disks[column].dev,
   1242 			    raidPtr->raid_cinfo[column].ci_vp,
   1243 			    clabel );
   1244 #endif
   1245 		return (0);
   1246 
   1247 	case RAIDFRAME_INIT_LABELS:
   1248 		clabel = (RF_ComponentLabel_t *) data;
   1249 		/*
   1250 		   we only want the serial number from
   1251 		   the above.  We get all the rest of the information
   1252 		   from the config that was used to create this RAID
   1253 		   set.
   1254 		   */
   1255 
   1256 		raidPtr->serial_number = clabel->serial_number;
   1257 
   1258 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1259 			  (RF_ComponentLabel_t *));
   1260 		if (ci_label == NULL)
   1261 			return (ENOMEM);
   1262 
   1263 		raid_init_component_label(raidPtr, ci_label);
   1264 		ci_label->serial_number = clabel->serial_number;
   1265 		ci_label->row = 0; /* we dont' pretend to support more */
   1266 
   1267 		for(column=0;column<raidPtr->numCol;column++) {
   1268 			diskPtr = &raidPtr->Disks[column];
   1269 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1270 				ci_label->partitionSize = diskPtr->partitionSize;
   1271 				ci_label->column = column;
   1272 				raidwrite_component_label(
   1273 							  raidPtr->Disks[column].dev,
   1274 							  raidPtr->raid_cinfo[column].ci_vp,
   1275 							  ci_label );
   1276 			}
   1277 		}
   1278 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1279 
   1280 		return (retcode);
   1281 	case RAIDFRAME_SET_AUTOCONFIG:
   1282 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1283 		printf("raid%d: New autoconfig value is: %d\n",
   1284 		       raidPtr->raidid, d);
   1285 		*(int *) data = d;
   1286 		return (retcode);
   1287 
   1288 	case RAIDFRAME_SET_ROOT:
   1289 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1290 		printf("raid%d: New rootpartition value is: %d\n",
   1291 		       raidPtr->raidid, d);
   1292 		*(int *) data = d;
   1293 		return (retcode);
   1294 
   1295 		/* initialize all parity */
   1296 	case RAIDFRAME_REWRITEPARITY:
   1297 
   1298 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1299 			/* Parity for RAID 0 is trivially correct */
   1300 			raidPtr->parity_good = RF_RAID_CLEAN;
   1301 			return(0);
   1302 		}
   1303 
   1304 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1305 			/* Re-write is already in progress! */
   1306 			return(EINVAL);
   1307 		}
   1308 
   1309 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1310 					   rf_RewriteParityThread,
   1311 					   raidPtr,"raid_parity");
   1312 		return (retcode);
   1313 
   1314 
   1315 	case RAIDFRAME_ADD_HOT_SPARE:
   1316 		sparePtr = (RF_SingleComponent_t *) data;
   1317 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1318 		retcode = rf_add_hot_spare(raidPtr, &component);
   1319 		return(retcode);
   1320 
   1321 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1322 		return(retcode);
   1323 
   1324 	case RAIDFRAME_DELETE_COMPONENT:
   1325 		componentPtr = (RF_SingleComponent_t *)data;
   1326 		memcpy( &component, componentPtr,
   1327 			sizeof(RF_SingleComponent_t));
   1328 		retcode = rf_delete_component(raidPtr, &component);
   1329 		return(retcode);
   1330 
   1331 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1332 		componentPtr = (RF_SingleComponent_t *)data;
   1333 		memcpy( &component, componentPtr,
   1334 			sizeof(RF_SingleComponent_t));
   1335 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1336 		return(retcode);
   1337 
   1338 	case RAIDFRAME_REBUILD_IN_PLACE:
   1339 
   1340 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1341 			/* Can't do this on a RAID 0!! */
   1342 			return(EINVAL);
   1343 		}
   1344 
   1345 		if (raidPtr->recon_in_progress == 1) {
   1346 			/* a reconstruct is already in progress! */
   1347 			return(EINVAL);
   1348 		}
   1349 
   1350 		componentPtr = (RF_SingleComponent_t *) data;
   1351 		memcpy( &component, componentPtr,
   1352 			sizeof(RF_SingleComponent_t));
   1353 		component.row = 0; /* we don't support any more */
   1354 		column = component.column;
   1355 
   1356 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1357 			return(EINVAL);
   1358 		}
   1359 
   1360 		RF_LOCK_MUTEX(raidPtr->mutex);
   1361 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1362 		    (raidPtr->numFailures > 0)) {
   1363 			/* XXX 0 above shouldn't be constant!!! */
   1364 			/* some component other than this has failed.
   1365 			   Let's not make things worse than they already
   1366 			   are... */
   1367 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1368 			       raidPtr->raidid);
   1369 			printf("raid%d:     Col: %d   Too many failures.\n",
   1370 			       raidPtr->raidid, column);
   1371 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1372 			return (EINVAL);
   1373 		}
   1374 		if (raidPtr->Disks[column].status ==
   1375 		    rf_ds_reconstructing) {
   1376 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1377 			       raidPtr->raidid);
   1378 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1379 
   1380 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1381 			return (EINVAL);
   1382 		}
   1383 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1384 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1385 			return (EINVAL);
   1386 		}
   1387 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1388 
   1389 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1390 		if (rrcopy == NULL)
   1391 			return(ENOMEM);
   1392 
   1393 		rrcopy->raidPtr = (void *) raidPtr;
   1394 		rrcopy->col = column;
   1395 
   1396 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1397 					   rf_ReconstructInPlaceThread,
   1398 					   rrcopy,"raid_reconip");
   1399 		return(retcode);
   1400 
   1401 	case RAIDFRAME_GET_INFO:
   1402 		if (!raidPtr->valid)
   1403 			return (ENODEV);
   1404 		ucfgp = (RF_DeviceConfig_t **) data;
   1405 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1406 			  (RF_DeviceConfig_t *));
   1407 		if (d_cfg == NULL)
   1408 			return (ENOMEM);
   1409 		d_cfg->rows = 1; /* there is only 1 row now */
   1410 		d_cfg->cols = raidPtr->numCol;
   1411 		d_cfg->ndevs = raidPtr->numCol;
   1412 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1413 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1414 			return (ENOMEM);
   1415 		}
   1416 		d_cfg->nspares = raidPtr->numSpare;
   1417 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1418 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1419 			return (ENOMEM);
   1420 		}
   1421 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1422 		d = 0;
   1423 		for (j = 0; j < d_cfg->cols; j++) {
   1424 			d_cfg->devs[d] = raidPtr->Disks[j];
   1425 			d++;
   1426 		}
   1427 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1428 			d_cfg->spares[i] = raidPtr->Disks[j];
   1429 		}
   1430 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1431 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1432 
   1433 		return (retcode);
   1434 
   1435 	case RAIDFRAME_CHECK_PARITY:
   1436 		*(int *) data = raidPtr->parity_good;
   1437 		return (0);
   1438 
   1439 	case RAIDFRAME_RESET_ACCTOTALS:
   1440 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1441 		return (0);
   1442 
   1443 	case RAIDFRAME_GET_ACCTOTALS:
   1444 		totals = (RF_AccTotals_t *) data;
   1445 		*totals = raidPtr->acc_totals;
   1446 		return (0);
   1447 
   1448 	case RAIDFRAME_KEEP_ACCTOTALS:
   1449 		raidPtr->keep_acc_totals = *(int *)data;
   1450 		return (0);
   1451 
   1452 	case RAIDFRAME_GET_SIZE:
   1453 		*(int *) data = raidPtr->totalSectors;
   1454 		return (0);
   1455 
   1456 		/* fail a disk & optionally start reconstruction */
   1457 	case RAIDFRAME_FAIL_DISK:
   1458 
   1459 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1460 			/* Can't do this on a RAID 0!! */
   1461 			return(EINVAL);
   1462 		}
   1463 
   1464 		rr = (struct rf_recon_req *) data;
   1465 		rr->row = 0;
   1466 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1467 			return (EINVAL);
   1468 
   1469 
   1470 		RF_LOCK_MUTEX(raidPtr->mutex);
   1471 		if (raidPtr->status == rf_rs_reconstructing) {
   1472 			/* you can't fail a disk while we're reconstructing! */
   1473 			/* XXX wrong for RAID6 */
   1474 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1475 			return (EINVAL);
   1476 		}
   1477 		if ((raidPtr->Disks[rr->col].status ==
   1478 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1479 			/* some other component has failed.  Let's not make
   1480 			   things worse. XXX wrong for RAID6 */
   1481 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1482 			return (EINVAL);
   1483 		}
   1484 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1485 			/* Can't fail a spared disk! */
   1486 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1487 			return (EINVAL);
   1488 		}
   1489 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1490 
   1491 		/* make a copy of the recon request so that we don't rely on
   1492 		 * the user's buffer */
   1493 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1494 		if (rrcopy == NULL)
   1495 			return(ENOMEM);
   1496 		memcpy(rrcopy, rr, sizeof(*rr));
   1497 		rrcopy->raidPtr = (void *) raidPtr;
   1498 
   1499 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1500 					   rf_ReconThread,
   1501 					   rrcopy,"raid_recon");
   1502 		return (0);
   1503 
   1504 		/* invoke a copyback operation after recon on whatever disk
   1505 		 * needs it, if any */
   1506 	case RAIDFRAME_COPYBACK:
   1507 
   1508 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1509 			/* This makes no sense on a RAID 0!! */
   1510 			return(EINVAL);
   1511 		}
   1512 
   1513 		if (raidPtr->copyback_in_progress == 1) {
   1514 			/* Copyback is already in progress! */
   1515 			return(EINVAL);
   1516 		}
   1517 
   1518 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1519 					   rf_CopybackThread,
   1520 					   raidPtr,"raid_copyback");
   1521 		return (retcode);
   1522 
   1523 		/* return the percentage completion of reconstruction */
   1524 	case RAIDFRAME_CHECK_RECON_STATUS:
   1525 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1526 			/* This makes no sense on a RAID 0, so tell the
   1527 			   user it's done. */
   1528 			*(int *) data = 100;
   1529 			return(0);
   1530 		}
   1531 		if (raidPtr->status != rf_rs_reconstructing)
   1532 			*(int *) data = 100;
   1533 		else {
   1534 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1535 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1536 			} else {
   1537 				*(int *) data = 0;
   1538 			}
   1539 		}
   1540 		return (0);
   1541 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1542 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1543 		if (raidPtr->status != rf_rs_reconstructing) {
   1544 			progressInfo.remaining = 0;
   1545 			progressInfo.completed = 100;
   1546 			progressInfo.total = 100;
   1547 		} else {
   1548 			progressInfo.total =
   1549 				raidPtr->reconControl->numRUsTotal;
   1550 			progressInfo.completed =
   1551 				raidPtr->reconControl->numRUsComplete;
   1552 			progressInfo.remaining = progressInfo.total -
   1553 				progressInfo.completed;
   1554 		}
   1555 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1556 				  sizeof(RF_ProgressInfo_t));
   1557 		return (retcode);
   1558 
   1559 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1560 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1561 			/* This makes no sense on a RAID 0, so tell the
   1562 			   user it's done. */
   1563 			*(int *) data = 100;
   1564 			return(0);
   1565 		}
   1566 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1567 			*(int *) data = 100 *
   1568 				raidPtr->parity_rewrite_stripes_done /
   1569 				raidPtr->Layout.numStripe;
   1570 		} else {
   1571 			*(int *) data = 100;
   1572 		}
   1573 		return (0);
   1574 
   1575 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1576 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1577 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1578 			progressInfo.total = raidPtr->Layout.numStripe;
   1579 			progressInfo.completed =
   1580 				raidPtr->parity_rewrite_stripes_done;
   1581 			progressInfo.remaining = progressInfo.total -
   1582 				progressInfo.completed;
   1583 		} else {
   1584 			progressInfo.remaining = 0;
   1585 			progressInfo.completed = 100;
   1586 			progressInfo.total = 100;
   1587 		}
   1588 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1589 				  sizeof(RF_ProgressInfo_t));
   1590 		return (retcode);
   1591 
   1592 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1593 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1594 			/* This makes no sense on a RAID 0 */
   1595 			*(int *) data = 100;
   1596 			return(0);
   1597 		}
   1598 		if (raidPtr->copyback_in_progress == 1) {
   1599 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1600 				raidPtr->Layout.numStripe;
   1601 		} else {
   1602 			*(int *) data = 100;
   1603 		}
   1604 		return (0);
   1605 
   1606 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1607 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1608 		if (raidPtr->copyback_in_progress == 1) {
   1609 			progressInfo.total = raidPtr->Layout.numStripe;
   1610 			progressInfo.completed =
   1611 				raidPtr->copyback_stripes_done;
   1612 			progressInfo.remaining = progressInfo.total -
   1613 				progressInfo.completed;
   1614 		} else {
   1615 			progressInfo.remaining = 0;
   1616 			progressInfo.completed = 100;
   1617 			progressInfo.total = 100;
   1618 		}
   1619 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1620 				  sizeof(RF_ProgressInfo_t));
   1621 		return (retcode);
   1622 
   1623 		/* the sparetable daemon calls this to wait for the kernel to
   1624 		 * need a spare table. this ioctl does not return until a
   1625 		 * spare table is needed. XXX -- calling mpsleep here in the
   1626 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1627 		 * -- I should either compute the spare table in the kernel,
   1628 		 * or have a different -- XXX XXX -- interface (a different
   1629 		 * character device) for delivering the table     -- XXX */
   1630 #if 0
   1631 	case RAIDFRAME_SPARET_WAIT:
   1632 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1633 		while (!rf_sparet_wait_queue)
   1634 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1635 		waitreq = rf_sparet_wait_queue;
   1636 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1637 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1638 
   1639 		/* structure assignment */
   1640 		*((RF_SparetWait_t *) data) = *waitreq;
   1641 
   1642 		RF_Free(waitreq, sizeof(*waitreq));
   1643 		return (0);
   1644 
   1645 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1646 		 * code in it that will cause the dameon to exit */
   1647 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1648 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1649 		waitreq->fcol = -1;
   1650 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1651 		waitreq->next = rf_sparet_wait_queue;
   1652 		rf_sparet_wait_queue = waitreq;
   1653 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1654 		wakeup(&rf_sparet_wait_queue);
   1655 		return (0);
   1656 
   1657 		/* used by the spare table daemon to deliver a spare table
   1658 		 * into the kernel */
   1659 	case RAIDFRAME_SEND_SPARET:
   1660 
   1661 		/* install the spare table */
   1662 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1663 
   1664 		/* respond to the requestor.  the return status of the spare
   1665 		 * table installation is passed in the "fcol" field */
   1666 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1667 		waitreq->fcol = retcode;
   1668 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1669 		waitreq->next = rf_sparet_resp_queue;
   1670 		rf_sparet_resp_queue = waitreq;
   1671 		wakeup(&rf_sparet_resp_queue);
   1672 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1673 
   1674 		return (retcode);
   1675 #endif
   1676 
   1677 	default:
   1678 		break; /* fall through to the os-specific code below */
   1679 
   1680 	}
   1681 
   1682 	if (!raidPtr->valid)
   1683 		return (EINVAL);
   1684 
   1685 	/*
   1686 	 * Add support for "regular" device ioctls here.
   1687 	 */
   1688 
   1689 	switch (cmd) {
   1690 	case DIOCGDINFO:
   1691 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1692 		break;
   1693 #ifdef __HAVE_OLD_DISKLABEL
   1694 	case ODIOCGDINFO:
   1695 		newlabel = *(rs->sc_dkdev.dk_label);
   1696 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1697 			return ENOTTY;
   1698 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1699 		break;
   1700 #endif
   1701 
   1702 	case DIOCGPART:
   1703 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1704 		((struct partinfo *) data)->part =
   1705 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1706 		break;
   1707 
   1708 	case DIOCWDINFO:
   1709 	case DIOCSDINFO:
   1710 #ifdef __HAVE_OLD_DISKLABEL
   1711 	case ODIOCWDINFO:
   1712 	case ODIOCSDINFO:
   1713 #endif
   1714 	{
   1715 		struct disklabel *lp;
   1716 #ifdef __HAVE_OLD_DISKLABEL
   1717 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1718 			memset(&newlabel, 0, sizeof newlabel);
   1719 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1720 			lp = &newlabel;
   1721 		} else
   1722 #endif
   1723 		lp = (struct disklabel *)data;
   1724 
   1725 		if ((error = raidlock(rs)) != 0)
   1726 			return (error);
   1727 
   1728 		rs->sc_flags |= RAIDF_LABELLING;
   1729 
   1730 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1731 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1732 		if (error == 0) {
   1733 			if (cmd == DIOCWDINFO
   1734 #ifdef __HAVE_OLD_DISKLABEL
   1735 			    || cmd == ODIOCWDINFO
   1736 #endif
   1737 			   )
   1738 				error = writedisklabel(RAIDLABELDEV(dev),
   1739 				    raidstrategy, rs->sc_dkdev.dk_label,
   1740 				    rs->sc_dkdev.dk_cpulabel);
   1741 		}
   1742 		rs->sc_flags &= ~RAIDF_LABELLING;
   1743 
   1744 		raidunlock(rs);
   1745 
   1746 		if (error)
   1747 			return (error);
   1748 		break;
   1749 	}
   1750 
   1751 	case DIOCWLABEL:
   1752 		if (*(int *) data != 0)
   1753 			rs->sc_flags |= RAIDF_WLABEL;
   1754 		else
   1755 			rs->sc_flags &= ~RAIDF_WLABEL;
   1756 		break;
   1757 
   1758 	case DIOCGDEFLABEL:
   1759 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1760 		break;
   1761 
   1762 #ifdef __HAVE_OLD_DISKLABEL
   1763 	case ODIOCGDEFLABEL:
   1764 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1765 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1766 			return ENOTTY;
   1767 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1768 		break;
   1769 #endif
   1770 
   1771 	case DIOCAWEDGE:
   1772 	case DIOCDWEDGE:
   1773 	    	dkw = (void *)data;
   1774 
   1775 		/* If the ioctl happens here, the parent is us. */
   1776 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1777 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1778 
   1779 	case DIOCLWEDGES:
   1780 		return dkwedge_list(&rs->sc_dkdev,
   1781 		    (struct dkwedge_list *)data, l);
   1782 
   1783 	default:
   1784 		retcode = ENOTTY;
   1785 	}
   1786 	return (retcode);
   1787 
   1788 }
   1789 
   1790 
   1791 /* raidinit -- complete the rest of the initialization for the
   1792    RAIDframe device.  */
   1793 
   1794 
   1795 static void
   1796 raidinit(RF_Raid_t *raidPtr)
   1797 {
   1798 	struct cfdata *cf;
   1799 	struct raid_softc *rs;
   1800 	int     unit;
   1801 
   1802 	unit = raidPtr->raidid;
   1803 
   1804 	rs = &raid_softc[unit];
   1805 
   1806 	/* XXX should check return code first... */
   1807 	rs->sc_flags |= RAIDF_INITED;
   1808 
   1809 	/* XXX doesn't check bounds. */
   1810 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1811 
   1812 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1813 
   1814 	/* attach the pseudo device */
   1815 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1816 	cf->cf_name = raid_cd.cd_name;
   1817 	cf->cf_atname = raid_cd.cd_name;
   1818 	cf->cf_unit = unit;
   1819 	cf->cf_fstate = FSTATE_STAR;
   1820 
   1821 	rs->sc_dev = config_attach_pseudo(cf);
   1822 
   1823 	if (rs->sc_dev==NULL) {
   1824 		printf("raid%d: config_attach_pseudo failed\n",
   1825 		       raidPtr->raidid);
   1826 	}
   1827 
   1828 	/* disk_attach actually creates space for the CPU disklabel, among
   1829 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1830 	 * with disklabels. */
   1831 
   1832 	disk_attach(&rs->sc_dkdev);
   1833 
   1834 	/* XXX There may be a weird interaction here between this, and
   1835 	 * protectedSectors, as used in RAIDframe.  */
   1836 
   1837 	rs->sc_size = raidPtr->totalSectors;
   1838 
   1839 	dkwedge_discover(&rs->sc_dkdev);
   1840 
   1841 	rf_set_properties(rs, raidPtr);
   1842 
   1843 }
   1844 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1845 /* wake up the daemon & tell it to get us a spare table
   1846  * XXX
   1847  * the entries in the queues should be tagged with the raidPtr
   1848  * so that in the extremely rare case that two recons happen at once,
   1849  * we know for which device were requesting a spare table
   1850  * XXX
   1851  *
   1852  * XXX This code is not currently used. GO
   1853  */
   1854 int
   1855 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1856 {
   1857 	int     retcode;
   1858 
   1859 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1860 	req->next = rf_sparet_wait_queue;
   1861 	rf_sparet_wait_queue = req;
   1862 	wakeup(&rf_sparet_wait_queue);
   1863 
   1864 	/* mpsleep unlocks the mutex */
   1865 	while (!rf_sparet_resp_queue) {
   1866 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1867 		    "raidframe getsparetable", 0);
   1868 	}
   1869 	req = rf_sparet_resp_queue;
   1870 	rf_sparet_resp_queue = req->next;
   1871 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1872 
   1873 	retcode = req->fcol;
   1874 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1875 					 * alloc'd */
   1876 	return (retcode);
   1877 }
   1878 #endif
   1879 
   1880 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1881  * bp & passes it down.
   1882  * any calls originating in the kernel must use non-blocking I/O
   1883  * do some extra sanity checking to return "appropriate" error values for
   1884  * certain conditions (to make some standard utilities work)
   1885  *
   1886  * Formerly known as: rf_DoAccessKernel
   1887  */
   1888 void
   1889 raidstart(RF_Raid_t *raidPtr)
   1890 {
   1891 	RF_SectorCount_t num_blocks, pb, sum;
   1892 	RF_RaidAddr_t raid_addr;
   1893 	struct partition *pp;
   1894 	daddr_t blocknum;
   1895 	int     unit;
   1896 	struct raid_softc *rs;
   1897 	int     do_async;
   1898 	struct buf *bp;
   1899 	int rc;
   1900 
   1901 	unit = raidPtr->raidid;
   1902 	rs = &raid_softc[unit];
   1903 
   1904 	/* quick check to see if anything has died recently */
   1905 	RF_LOCK_MUTEX(raidPtr->mutex);
   1906 	if (raidPtr->numNewFailures > 0) {
   1907 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1908 		rf_update_component_labels(raidPtr,
   1909 					   RF_NORMAL_COMPONENT_UPDATE);
   1910 		RF_LOCK_MUTEX(raidPtr->mutex);
   1911 		raidPtr->numNewFailures--;
   1912 	}
   1913 
   1914 	/* Check to see if we're at the limit... */
   1915 	while (raidPtr->openings > 0) {
   1916 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1917 
   1918 		/* get the next item, if any, from the queue */
   1919 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1920 			/* nothing more to do */
   1921 			return;
   1922 		}
   1923 
   1924 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1925 		 * partition.. Need to make it absolute to the underlying
   1926 		 * device.. */
   1927 
   1928 		blocknum = bp->b_blkno;
   1929 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1930 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1931 			blocknum += pp->p_offset;
   1932 		}
   1933 
   1934 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1935 			    (int) blocknum));
   1936 
   1937 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1938 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1939 
   1940 		/* *THIS* is where we adjust what block we're going to...
   1941 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1942 		raid_addr = blocknum;
   1943 
   1944 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1945 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1946 		sum = raid_addr + num_blocks + pb;
   1947 		if (1 || rf_debugKernelAccess) {
   1948 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1949 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1950 				    (int) pb, (int) bp->b_resid));
   1951 		}
   1952 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1953 		    || (sum < num_blocks) || (sum < pb)) {
   1954 			bp->b_error = ENOSPC;
   1955 			bp->b_flags |= B_ERROR;
   1956 			bp->b_resid = bp->b_bcount;
   1957 			biodone(bp);
   1958 			RF_LOCK_MUTEX(raidPtr->mutex);
   1959 			continue;
   1960 		}
   1961 		/*
   1962 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1963 		 */
   1964 
   1965 		if (bp->b_bcount & raidPtr->sectorMask) {
   1966 			bp->b_error = EINVAL;
   1967 			bp->b_flags |= B_ERROR;
   1968 			bp->b_resid = bp->b_bcount;
   1969 			biodone(bp);
   1970 			RF_LOCK_MUTEX(raidPtr->mutex);
   1971 			continue;
   1972 
   1973 		}
   1974 		db1_printf(("Calling DoAccess..\n"));
   1975 
   1976 
   1977 		RF_LOCK_MUTEX(raidPtr->mutex);
   1978 		raidPtr->openings--;
   1979 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1980 
   1981 		/*
   1982 		 * Everything is async.
   1983 		 */
   1984 		do_async = 1;
   1985 
   1986 		disk_busy(&rs->sc_dkdev);
   1987 
   1988 		/* XXX we're still at splbio() here... do we *really*
   1989 		   need to be? */
   1990 
   1991 		/* don't ever condition on bp->b_flags & B_WRITE.
   1992 		 * always condition on B_READ instead */
   1993 
   1994 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1995 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1996 				 do_async, raid_addr, num_blocks,
   1997 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1998 
   1999 		if (rc) {
   2000 			bp->b_error = rc;
   2001 			bp->b_flags |= B_ERROR;
   2002 			bp->b_resid = bp->b_bcount;
   2003 			biodone(bp);
   2004 			/* continue loop */
   2005 		}
   2006 
   2007 		RF_LOCK_MUTEX(raidPtr->mutex);
   2008 	}
   2009 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2010 }
   2011 
   2012 
   2013 
   2014 
   2015 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2016 
   2017 int
   2018 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2019 {
   2020 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2021 	struct buf *bp;
   2022 
   2023 	req->queue = queue;
   2024 
   2025 #if DIAGNOSTIC
   2026 	if (queue->raidPtr->raidid >= numraid) {
   2027 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2028 		    numraid);
   2029 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2030 	}
   2031 #endif
   2032 
   2033 	bp = req->bp;
   2034 
   2035 	switch (req->type) {
   2036 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2037 		/* XXX need to do something extra here.. */
   2038 		/* I'm leaving this in, as I've never actually seen it used,
   2039 		 * and I'd like folks to report it... GO */
   2040 		printf(("WAKEUP CALLED\n"));
   2041 		queue->numOutstanding++;
   2042 
   2043 		bp->b_flags = 0;
   2044 		bp->b_private = req;
   2045 
   2046 		KernelWakeupFunc(bp);
   2047 		break;
   2048 
   2049 	case RF_IO_TYPE_READ:
   2050 	case RF_IO_TYPE_WRITE:
   2051 #if RF_ACC_TRACE > 0
   2052 		if (req->tracerec) {
   2053 			RF_ETIMER_START(req->tracerec->timer);
   2054 		}
   2055 #endif
   2056 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2057 		    op, queue->rf_cinfo->ci_dev,
   2058 		    req->sectorOffset, req->numSector,
   2059 		    req->buf, KernelWakeupFunc, (void *) req,
   2060 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2061 
   2062 		if (rf_debugKernelAccess) {
   2063 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2064 				(long) bp->b_blkno));
   2065 		}
   2066 		queue->numOutstanding++;
   2067 		queue->last_deq_sector = req->sectorOffset;
   2068 		/* acc wouldn't have been let in if there were any pending
   2069 		 * reqs at any other priority */
   2070 		queue->curPriority = req->priority;
   2071 
   2072 		db1_printf(("Going for %c to unit %d col %d\n",
   2073 			    req->type, queue->raidPtr->raidid,
   2074 			    queue->col));
   2075 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2076 			(int) req->sectorOffset, (int) req->numSector,
   2077 			(int) (req->numSector <<
   2078 			    queue->raidPtr->logBytesPerSector),
   2079 			(int) queue->raidPtr->logBytesPerSector));
   2080 		VOP_STRATEGY(bp->b_vp, bp);
   2081 
   2082 		break;
   2083 
   2084 	default:
   2085 		panic("bad req->type in rf_DispatchKernelIO");
   2086 	}
   2087 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2088 
   2089 	return (0);
   2090 }
   2091 /* this is the callback function associated with a I/O invoked from
   2092    kernel code.
   2093  */
   2094 static void
   2095 KernelWakeupFunc(struct buf *bp)
   2096 {
   2097 	RF_DiskQueueData_t *req = NULL;
   2098 	RF_DiskQueue_t *queue;
   2099 	int s;
   2100 
   2101 	s = splbio();
   2102 	db1_printf(("recovering the request queue:\n"));
   2103 	req = bp->b_private;
   2104 
   2105 	queue = (RF_DiskQueue_t *) req->queue;
   2106 
   2107 #if RF_ACC_TRACE > 0
   2108 	if (req->tracerec) {
   2109 		RF_ETIMER_STOP(req->tracerec->timer);
   2110 		RF_ETIMER_EVAL(req->tracerec->timer);
   2111 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2112 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2113 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2114 		req->tracerec->num_phys_ios++;
   2115 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2116 	}
   2117 #endif
   2118 
   2119 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   2120 	 * ballistic, and mark the component as hosed... */
   2121 
   2122 	if (bp->b_flags & B_ERROR) {
   2123 		/* Mark the disk as dead */
   2124 		/* but only mark it once... */
   2125 		/* and only if it wouldn't leave this RAID set
   2126 		   completely broken */
   2127 		if (((queue->raidPtr->Disks[queue->col].status ==
   2128 		      rf_ds_optimal) ||
   2129 		     (queue->raidPtr->Disks[queue->col].status ==
   2130 		      rf_ds_used_spare)) &&
   2131 		     (queue->raidPtr->numFailures <
   2132 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2133 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2134 			       queue->raidPtr->raidid,
   2135 			       queue->raidPtr->Disks[queue->col].devname);
   2136 			queue->raidPtr->Disks[queue->col].status =
   2137 			    rf_ds_failed;
   2138 			queue->raidPtr->status = rf_rs_degraded;
   2139 			queue->raidPtr->numFailures++;
   2140 			queue->raidPtr->numNewFailures++;
   2141 		} else {	/* Disk is already dead... */
   2142 			/* printf("Disk already marked as dead!\n"); */
   2143 		}
   2144 
   2145 	}
   2146 
   2147 	/* Fill in the error value */
   2148 
   2149 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   2150 
   2151 	simple_lock(&queue->raidPtr->iodone_lock);
   2152 
   2153 	/* Drop this one on the "finished" queue... */
   2154 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2155 
   2156 	/* Let the raidio thread know there is work to be done. */
   2157 	wakeup(&(queue->raidPtr->iodone));
   2158 
   2159 	simple_unlock(&queue->raidPtr->iodone_lock);
   2160 
   2161 	splx(s);
   2162 }
   2163 
   2164 
   2165 
   2166 /*
   2167  * initialize a buf structure for doing an I/O in the kernel.
   2168  */
   2169 static void
   2170 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2171        RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
   2172        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2173        struct proc *b_proc)
   2174 {
   2175 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2176 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   2177 	bp->b_bcount = numSect << logBytesPerSector;
   2178 	bp->b_bufsize = bp->b_bcount;
   2179 	bp->b_error = 0;
   2180 	bp->b_dev = dev;
   2181 	bp->b_data = bf;
   2182 	bp->b_blkno = startSect;
   2183 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2184 	if (bp->b_bcount == 0) {
   2185 		panic("bp->b_bcount is zero in InitBP!!");
   2186 	}
   2187 	bp->b_proc = b_proc;
   2188 	bp->b_iodone = cbFunc;
   2189 	bp->b_private = cbArg;
   2190 	bp->b_vp = b_vp;
   2191 	if ((bp->b_flags & B_READ) == 0) {
   2192 		bp->b_vp->v_numoutput++;
   2193 	}
   2194 
   2195 }
   2196 
   2197 static void
   2198 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2199 		    struct disklabel *lp)
   2200 {
   2201 	memset(lp, 0, sizeof(*lp));
   2202 
   2203 	/* fabricate a label... */
   2204 	lp->d_secperunit = raidPtr->totalSectors;
   2205 	lp->d_secsize = raidPtr->bytesPerSector;
   2206 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2207 	lp->d_ntracks = 4 * raidPtr->numCol;
   2208 	lp->d_ncylinders = raidPtr->totalSectors /
   2209 		(lp->d_nsectors * lp->d_ntracks);
   2210 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2211 
   2212 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2213 	lp->d_type = DTYPE_RAID;
   2214 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2215 	lp->d_rpm = 3600;
   2216 	lp->d_interleave = 1;
   2217 	lp->d_flags = 0;
   2218 
   2219 	lp->d_partitions[RAW_PART].p_offset = 0;
   2220 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2221 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2222 	lp->d_npartitions = RAW_PART + 1;
   2223 
   2224 	lp->d_magic = DISKMAGIC;
   2225 	lp->d_magic2 = DISKMAGIC;
   2226 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2227 
   2228 }
   2229 /*
   2230  * Read the disklabel from the raid device.  If one is not present, fake one
   2231  * up.
   2232  */
   2233 static void
   2234 raidgetdisklabel(dev_t dev)
   2235 {
   2236 	int     unit = raidunit(dev);
   2237 	struct raid_softc *rs = &raid_softc[unit];
   2238 	const char   *errstring;
   2239 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2240 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2241 	RF_Raid_t *raidPtr;
   2242 
   2243 	db1_printf(("Getting the disklabel...\n"));
   2244 
   2245 	memset(clp, 0, sizeof(*clp));
   2246 
   2247 	raidPtr = raidPtrs[unit];
   2248 
   2249 	raidgetdefaultlabel(raidPtr, rs, lp);
   2250 
   2251 	/*
   2252 	 * Call the generic disklabel extraction routine.
   2253 	 */
   2254 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2255 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2256 	if (errstring)
   2257 		raidmakedisklabel(rs);
   2258 	else {
   2259 		int     i;
   2260 		struct partition *pp;
   2261 
   2262 		/*
   2263 		 * Sanity check whether the found disklabel is valid.
   2264 		 *
   2265 		 * This is necessary since total size of the raid device
   2266 		 * may vary when an interleave is changed even though exactly
   2267 		 * same components are used, and old disklabel may used
   2268 		 * if that is found.
   2269 		 */
   2270 		if (lp->d_secperunit != rs->sc_size)
   2271 			printf("raid%d: WARNING: %s: "
   2272 			    "total sector size in disklabel (%d) != "
   2273 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2274 			    lp->d_secperunit, (long) rs->sc_size);
   2275 		for (i = 0; i < lp->d_npartitions; i++) {
   2276 			pp = &lp->d_partitions[i];
   2277 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2278 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2279 				       "exceeds the size of raid (%ld)\n",
   2280 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2281 		}
   2282 	}
   2283 
   2284 }
   2285 /*
   2286  * Take care of things one might want to take care of in the event
   2287  * that a disklabel isn't present.
   2288  */
   2289 static void
   2290 raidmakedisklabel(struct raid_softc *rs)
   2291 {
   2292 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2293 	db1_printf(("Making a label..\n"));
   2294 
   2295 	/*
   2296 	 * For historical reasons, if there's no disklabel present
   2297 	 * the raw partition must be marked FS_BSDFFS.
   2298 	 */
   2299 
   2300 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2301 
   2302 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2303 
   2304 	lp->d_checksum = dkcksum(lp);
   2305 }
   2306 /*
   2307  * Wait interruptibly for an exclusive lock.
   2308  *
   2309  * XXX
   2310  * Several drivers do this; it should be abstracted and made MP-safe.
   2311  * (Hmm... where have we seen this warning before :->  GO )
   2312  */
   2313 static int
   2314 raidlock(struct raid_softc *rs)
   2315 {
   2316 	int     error;
   2317 
   2318 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2319 		rs->sc_flags |= RAIDF_WANTED;
   2320 		if ((error =
   2321 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2322 			return (error);
   2323 	}
   2324 	rs->sc_flags |= RAIDF_LOCKED;
   2325 	return (0);
   2326 }
   2327 /*
   2328  * Unlock and wake up any waiters.
   2329  */
   2330 static void
   2331 raidunlock(struct raid_softc *rs)
   2332 {
   2333 
   2334 	rs->sc_flags &= ~RAIDF_LOCKED;
   2335 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2336 		rs->sc_flags &= ~RAIDF_WANTED;
   2337 		wakeup(rs);
   2338 	}
   2339 }
   2340 
   2341 
   2342 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2343 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2344 
   2345 int
   2346 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2347 {
   2348 	RF_ComponentLabel_t clabel;
   2349 	raidread_component_label(dev, b_vp, &clabel);
   2350 	clabel.mod_counter = mod_counter;
   2351 	clabel.clean = RF_RAID_CLEAN;
   2352 	raidwrite_component_label(dev, b_vp, &clabel);
   2353 	return(0);
   2354 }
   2355 
   2356 
   2357 int
   2358 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2359 {
   2360 	RF_ComponentLabel_t clabel;
   2361 	raidread_component_label(dev, b_vp, &clabel);
   2362 	clabel.mod_counter = mod_counter;
   2363 	clabel.clean = RF_RAID_DIRTY;
   2364 	raidwrite_component_label(dev, b_vp, &clabel);
   2365 	return(0);
   2366 }
   2367 
   2368 /* ARGSUSED */
   2369 int
   2370 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2371 			 RF_ComponentLabel_t *clabel)
   2372 {
   2373 	struct buf *bp;
   2374 	const struct bdevsw *bdev;
   2375 	int error;
   2376 
   2377 	/* XXX should probably ensure that we don't try to do this if
   2378 	   someone has changed rf_protected_sectors. */
   2379 
   2380 	if (b_vp == NULL) {
   2381 		/* For whatever reason, this component is not valid.
   2382 		   Don't try to read a component label from it. */
   2383 		return(EINVAL);
   2384 	}
   2385 
   2386 	/* get a block of the appropriate size... */
   2387 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2388 	bp->b_dev = dev;
   2389 
   2390 	/* get our ducks in a row for the read */
   2391 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2392 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2393 	bp->b_flags |= B_READ;
   2394  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2395 
   2396 	bdev = bdevsw_lookup(bp->b_dev);
   2397 	if (bdev == NULL)
   2398 		return (ENXIO);
   2399 	(*bdev->d_strategy)(bp);
   2400 
   2401 	error = biowait(bp);
   2402 
   2403 	if (!error) {
   2404 		memcpy(clabel, bp->b_data,
   2405 		       sizeof(RF_ComponentLabel_t));
   2406 	}
   2407 
   2408 	brelse(bp);
   2409 	return(error);
   2410 }
   2411 /* ARGSUSED */
   2412 int
   2413 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2414 			  RF_ComponentLabel_t *clabel)
   2415 {
   2416 	struct buf *bp;
   2417 	const struct bdevsw *bdev;
   2418 	int error;
   2419 
   2420 	/* get a block of the appropriate size... */
   2421 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2422 	bp->b_dev = dev;
   2423 
   2424 	/* get our ducks in a row for the write */
   2425 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2426 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2427 	bp->b_flags |= B_WRITE;
   2428  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2429 
   2430 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2431 
   2432 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2433 
   2434 	bdev = bdevsw_lookup(bp->b_dev);
   2435 	if (bdev == NULL)
   2436 		return (ENXIO);
   2437 	(*bdev->d_strategy)(bp);
   2438 	error = biowait(bp);
   2439 	brelse(bp);
   2440 	if (error) {
   2441 #if 1
   2442 		printf("Failed to write RAID component info!\n");
   2443 #endif
   2444 	}
   2445 
   2446 	return(error);
   2447 }
   2448 
   2449 void
   2450 rf_markalldirty(RF_Raid_t *raidPtr)
   2451 {
   2452 	RF_ComponentLabel_t clabel;
   2453 	int sparecol;
   2454 	int c;
   2455 	int j;
   2456 	int scol = -1;
   2457 
   2458 	raidPtr->mod_counter++;
   2459 	for (c = 0; c < raidPtr->numCol; c++) {
   2460 		/* we don't want to touch (at all) a disk that has
   2461 		   failed */
   2462 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2463 			raidread_component_label(
   2464 						 raidPtr->Disks[c].dev,
   2465 						 raidPtr->raid_cinfo[c].ci_vp,
   2466 						 &clabel);
   2467 			if (clabel.status == rf_ds_spared) {
   2468 				/* XXX do something special...
   2469 				   but whatever you do, don't
   2470 				   try to access it!! */
   2471 			} else {
   2472 				raidmarkdirty(
   2473 					      raidPtr->Disks[c].dev,
   2474 					      raidPtr->raid_cinfo[c].ci_vp,
   2475 					      raidPtr->mod_counter);
   2476 			}
   2477 		}
   2478 	}
   2479 
   2480 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2481 		sparecol = raidPtr->numCol + c;
   2482 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2483 			/*
   2484 
   2485 			   we claim this disk is "optimal" if it's
   2486 			   rf_ds_used_spare, as that means it should be
   2487 			   directly substitutable for the disk it replaced.
   2488 			   We note that too...
   2489 
   2490 			 */
   2491 
   2492 			for(j=0;j<raidPtr->numCol;j++) {
   2493 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2494 					scol = j;
   2495 					break;
   2496 				}
   2497 			}
   2498 
   2499 			raidread_component_label(
   2500 				 raidPtr->Disks[sparecol].dev,
   2501 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2502 				 &clabel);
   2503 			/* make sure status is noted */
   2504 
   2505 			raid_init_component_label(raidPtr, &clabel);
   2506 
   2507 			clabel.row = 0;
   2508 			clabel.column = scol;
   2509 			/* Note: we *don't* change status from rf_ds_used_spare
   2510 			   to rf_ds_optimal */
   2511 			/* clabel.status = rf_ds_optimal; */
   2512 
   2513 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2514 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2515 				      raidPtr->mod_counter);
   2516 		}
   2517 	}
   2518 }
   2519 
   2520 
   2521 void
   2522 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2523 {
   2524 	RF_ComponentLabel_t clabel;
   2525 	int sparecol;
   2526 	int c;
   2527 	int j;
   2528 	int scol;
   2529 
   2530 	scol = -1;
   2531 
   2532 	/* XXX should do extra checks to make sure things really are clean,
   2533 	   rather than blindly setting the clean bit... */
   2534 
   2535 	raidPtr->mod_counter++;
   2536 
   2537 	for (c = 0; c < raidPtr->numCol; c++) {
   2538 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2539 			raidread_component_label(
   2540 						 raidPtr->Disks[c].dev,
   2541 						 raidPtr->raid_cinfo[c].ci_vp,
   2542 						 &clabel);
   2543 			/* make sure status is noted */
   2544 			clabel.status = rf_ds_optimal;
   2545 
   2546 			/* bump the counter */
   2547 			clabel.mod_counter = raidPtr->mod_counter;
   2548 
   2549 			/* note what unit we are configured as */
   2550 			clabel.last_unit = raidPtr->raidid;
   2551 
   2552 			raidwrite_component_label(
   2553 						  raidPtr->Disks[c].dev,
   2554 						  raidPtr->raid_cinfo[c].ci_vp,
   2555 						  &clabel);
   2556 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2557 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2558 					raidmarkclean(
   2559 						      raidPtr->Disks[c].dev,
   2560 						      raidPtr->raid_cinfo[c].ci_vp,
   2561 						      raidPtr->mod_counter);
   2562 				}
   2563 			}
   2564 		}
   2565 		/* else we don't touch it.. */
   2566 	}
   2567 
   2568 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2569 		sparecol = raidPtr->numCol + c;
   2570 		/* Need to ensure that the reconstruct actually completed! */
   2571 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2572 			/*
   2573 
   2574 			   we claim this disk is "optimal" if it's
   2575 			   rf_ds_used_spare, as that means it should be
   2576 			   directly substitutable for the disk it replaced.
   2577 			   We note that too...
   2578 
   2579 			 */
   2580 
   2581 			for(j=0;j<raidPtr->numCol;j++) {
   2582 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2583 					scol = j;
   2584 					break;
   2585 				}
   2586 			}
   2587 
   2588 			/* XXX shouldn't *really* need this... */
   2589 			raidread_component_label(
   2590 				      raidPtr->Disks[sparecol].dev,
   2591 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2592 				      &clabel);
   2593 			/* make sure status is noted */
   2594 
   2595 			raid_init_component_label(raidPtr, &clabel);
   2596 
   2597 			clabel.mod_counter = raidPtr->mod_counter;
   2598 			clabel.column = scol;
   2599 			clabel.status = rf_ds_optimal;
   2600 			clabel.last_unit = raidPtr->raidid;
   2601 
   2602 			raidwrite_component_label(
   2603 				      raidPtr->Disks[sparecol].dev,
   2604 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2605 				      &clabel);
   2606 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2607 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2608 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2609 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2610 						       raidPtr->mod_counter);
   2611 				}
   2612 			}
   2613 		}
   2614 	}
   2615 }
   2616 
   2617 void
   2618 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2619 {
   2620 	struct proc *p;
   2621 	struct lwp *l;
   2622 
   2623 	p = raidPtr->engine_thread;
   2624 	l = LIST_FIRST(&p->p_lwps);
   2625 
   2626 	if (vp != NULL) {
   2627 		if (auto_configured == 1) {
   2628 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2629 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2630 			vput(vp);
   2631 
   2632 		} else {
   2633 			(void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
   2634 		}
   2635 	}
   2636 }
   2637 
   2638 
   2639 void
   2640 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2641 {
   2642 	int r,c;
   2643 	struct vnode *vp;
   2644 	int acd;
   2645 
   2646 
   2647 	/* We take this opportunity to close the vnodes like we should.. */
   2648 
   2649 	for (c = 0; c < raidPtr->numCol; c++) {
   2650 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2651 		acd = raidPtr->Disks[c].auto_configured;
   2652 		rf_close_component(raidPtr, vp, acd);
   2653 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2654 		raidPtr->Disks[c].auto_configured = 0;
   2655 	}
   2656 
   2657 	for (r = 0; r < raidPtr->numSpare; r++) {
   2658 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2659 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2660 		rf_close_component(raidPtr, vp, acd);
   2661 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2662 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2663 	}
   2664 }
   2665 
   2666 
   2667 void
   2668 rf_ReconThread(struct rf_recon_req *req)
   2669 {
   2670 	int     s;
   2671 	RF_Raid_t *raidPtr;
   2672 
   2673 	s = splbio();
   2674 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2675 	raidPtr->recon_in_progress = 1;
   2676 
   2677 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2678 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2679 
   2680 	RF_Free(req, sizeof(*req));
   2681 
   2682 	raidPtr->recon_in_progress = 0;
   2683 	splx(s);
   2684 
   2685 	/* That's all... */
   2686 	kthread_exit(0);	/* does not return */
   2687 }
   2688 
   2689 void
   2690 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2691 {
   2692 	int retcode;
   2693 	int s;
   2694 
   2695 	raidPtr->parity_rewrite_stripes_done = 0;
   2696 	raidPtr->parity_rewrite_in_progress = 1;
   2697 	s = splbio();
   2698 	retcode = rf_RewriteParity(raidPtr);
   2699 	splx(s);
   2700 	if (retcode) {
   2701 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2702 	} else {
   2703 		/* set the clean bit!  If we shutdown correctly,
   2704 		   the clean bit on each component label will get
   2705 		   set */
   2706 		raidPtr->parity_good = RF_RAID_CLEAN;
   2707 	}
   2708 	raidPtr->parity_rewrite_in_progress = 0;
   2709 
   2710 	/* Anyone waiting for us to stop?  If so, inform them... */
   2711 	if (raidPtr->waitShutdown) {
   2712 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2713 	}
   2714 
   2715 	/* That's all... */
   2716 	kthread_exit(0);	/* does not return */
   2717 }
   2718 
   2719 
   2720 void
   2721 rf_CopybackThread(RF_Raid_t *raidPtr)
   2722 {
   2723 	int s;
   2724 
   2725 	raidPtr->copyback_in_progress = 1;
   2726 	s = splbio();
   2727 	rf_CopybackReconstructedData(raidPtr);
   2728 	splx(s);
   2729 	raidPtr->copyback_in_progress = 0;
   2730 
   2731 	/* That's all... */
   2732 	kthread_exit(0);	/* does not return */
   2733 }
   2734 
   2735 
   2736 void
   2737 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2738 {
   2739 	int s;
   2740 	RF_Raid_t *raidPtr;
   2741 
   2742 	s = splbio();
   2743 	raidPtr = req->raidPtr;
   2744 	raidPtr->recon_in_progress = 1;
   2745 	rf_ReconstructInPlace(raidPtr, req->col);
   2746 	RF_Free(req, sizeof(*req));
   2747 	raidPtr->recon_in_progress = 0;
   2748 	splx(s);
   2749 
   2750 	/* That's all... */
   2751 	kthread_exit(0);	/* does not return */
   2752 }
   2753 
   2754 static RF_AutoConfig_t *
   2755 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2756     const char *cname, RF_SectorCount_t size)
   2757 {
   2758 	int good_one = 0;
   2759 	RF_ComponentLabel_t *clabel;
   2760 	RF_AutoConfig_t *ac;
   2761 
   2762 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2763 	if (clabel == NULL) {
   2764 oomem:
   2765 		    while(ac_list) {
   2766 			    ac = ac_list;
   2767 			    if (ac->clabel)
   2768 				    free(ac->clabel, M_RAIDFRAME);
   2769 			    ac_list = ac_list->next;
   2770 			    free(ac, M_RAIDFRAME);
   2771 		    }
   2772 		    printf("RAID auto config: out of memory!\n");
   2773 		    return NULL; /* XXX probably should panic? */
   2774 	}
   2775 
   2776 	if (!raidread_component_label(dev, vp, clabel)) {
   2777 		    /* Got the label.  Does it look reasonable? */
   2778 		    if (rf_reasonable_label(clabel) &&
   2779 			(clabel->partitionSize <= size)) {
   2780 #ifdef DEBUG
   2781 			    printf("Component on: %s: %llu\n",
   2782 				cname, (unsigned long long)size);
   2783 			    rf_print_component_label(clabel);
   2784 #endif
   2785 			    /* if it's reasonable, add it, else ignore it. */
   2786 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2787 				M_NOWAIT);
   2788 			    if (ac == NULL) {
   2789 				    free(clabel, M_RAIDFRAME);
   2790 				    goto oomem;
   2791 			    }
   2792 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2793 			    ac->dev = dev;
   2794 			    ac->vp = vp;
   2795 			    ac->clabel = clabel;
   2796 			    ac->next = ac_list;
   2797 			    ac_list = ac;
   2798 			    good_one = 1;
   2799 		    }
   2800 	}
   2801 	if (!good_one) {
   2802 		/* cleanup */
   2803 		free(clabel, M_RAIDFRAME);
   2804 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2805 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2806 		vput(vp);
   2807 	}
   2808 	return ac_list;
   2809 }
   2810 
   2811 RF_AutoConfig_t *
   2812 rf_find_raid_components()
   2813 {
   2814 	struct vnode *vp;
   2815 	struct disklabel label;
   2816 	struct device *dv;
   2817 	dev_t dev;
   2818 	int bmajor, bminor, wedge;
   2819 	int error;
   2820 	int i;
   2821 	RF_AutoConfig_t *ac_list;
   2822 
   2823 
   2824 	/* initialize the AutoConfig list */
   2825 	ac_list = NULL;
   2826 
   2827 	/* we begin by trolling through *all* the devices on the system */
   2828 
   2829 	for (dv = alldevs.tqh_first; dv != NULL;
   2830 	     dv = dv->dv_list.tqe_next) {
   2831 
   2832 		/* we are only interested in disks... */
   2833 		if (device_class(dv) != DV_DISK)
   2834 			continue;
   2835 
   2836 		/* we don't care about floppies... */
   2837 		if (device_is_a(dv, "fd")) {
   2838 			continue;
   2839 		}
   2840 
   2841 		/* we don't care about CD's... */
   2842 		if (device_is_a(dv, "cd")) {
   2843 			continue;
   2844 		}
   2845 
   2846 		/* hdfd is the Atari/Hades floppy driver */
   2847 		if (device_is_a(dv, "hdfd")) {
   2848 			continue;
   2849 		}
   2850 
   2851 		/* fdisa is the Atari/Milan floppy driver */
   2852 		if (device_is_a(dv, "fdisa")) {
   2853 			continue;
   2854 		}
   2855 
   2856 		/* need to find the device_name_to_block_device_major stuff */
   2857 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2858 
   2859 		/* get a vnode for the raw partition of this disk */
   2860 
   2861 		wedge = device_is_a(dv, "dk");
   2862 		bminor = minor(device_unit(dv));
   2863 		dev = wedge ? makedev(bmajor, bminor) :
   2864 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2865 		if (bdevvp(dev, &vp))
   2866 			panic("RAID can't alloc vnode");
   2867 
   2868 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2869 
   2870 		if (error) {
   2871 			/* "Who cares."  Continue looking
   2872 			   for something that exists*/
   2873 			vput(vp);
   2874 			continue;
   2875 		}
   2876 
   2877 		if (wedge) {
   2878 			struct dkwedge_info dkw;
   2879 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2880 			    NOCRED, 0);
   2881 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2882 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2883 			vput(vp);
   2884 			if (error) {
   2885 				printf("RAIDframe: can't get wedge info for "
   2886 				    "dev %s (%d)\n", dv->dv_xname, error);
   2887 				continue;
   2888 			}
   2889 
   2890 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
   2891 				continue;
   2892 
   2893 			ac_list = rf_get_component(ac_list, dev, vp,
   2894 			    dv->dv_xname, dkw.dkw_size);
   2895 			continue;
   2896 		}
   2897 
   2898 		/* Ok, the disk exists.  Go get the disklabel. */
   2899 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
   2900 		if (error) {
   2901 			/*
   2902 			 * XXX can't happen - open() would
   2903 			 * have errored out (or faked up one)
   2904 			 */
   2905 			if (error != ENOTTY)
   2906 				printf("RAIDframe: can't get label for dev "
   2907 				    "%s (%d)\n", dv->dv_xname, error);
   2908 		}
   2909 
   2910 		/* don't need this any more.  We'll allocate it again
   2911 		   a little later if we really do... */
   2912 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2913 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2914 		vput(vp);
   2915 
   2916 		if (error)
   2917 			continue;
   2918 
   2919 		for (i = 0; i < label.d_npartitions; i++) {
   2920 			char cname[sizeof(ac_list->devname)];
   2921 
   2922 			/* We only support partitions marked as RAID */
   2923 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2924 				continue;
   2925 
   2926 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2927 			if (bdevvp(dev, &vp))
   2928 				panic("RAID can't alloc vnode");
   2929 
   2930 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2931 			if (error) {
   2932 				/* Whatever... */
   2933 				vput(vp);
   2934 				continue;
   2935 			}
   2936 			snprintf(cname, sizeof(cname), "%s%c",
   2937 			    dv->dv_xname, 'a' + i);
   2938 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2939 				label.d_partitions[i].p_size);
   2940 		}
   2941 	}
   2942 	return ac_list;
   2943 }
   2944 
   2945 
   2946 static int
   2947 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2948 {
   2949 
   2950 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2951 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2952 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2953 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2954 	    clabel->row >=0 &&
   2955 	    clabel->column >= 0 &&
   2956 	    clabel->num_rows > 0 &&
   2957 	    clabel->num_columns > 0 &&
   2958 	    clabel->row < clabel->num_rows &&
   2959 	    clabel->column < clabel->num_columns &&
   2960 	    clabel->blockSize > 0 &&
   2961 	    clabel->numBlocks > 0) {
   2962 		/* label looks reasonable enough... */
   2963 		return(1);
   2964 	}
   2965 	return(0);
   2966 }
   2967 
   2968 
   2969 #ifdef DEBUG
   2970 void
   2971 rf_print_component_label(RF_ComponentLabel_t *clabel)
   2972 {
   2973 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2974 	       clabel->row, clabel->column,
   2975 	       clabel->num_rows, clabel->num_columns);
   2976 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2977 	       clabel->version, clabel->serial_number,
   2978 	       clabel->mod_counter);
   2979 	printf("   Clean: %s Status: %d\n",
   2980 	       clabel->clean ? "Yes" : "No", clabel->status );
   2981 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2982 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2983 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2984 	       (char) clabel->parityConfig, clabel->blockSize,
   2985 	       clabel->numBlocks);
   2986 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2987 	printf("   Contains root partition: %s\n",
   2988 	       clabel->root_partition ? "Yes" : "No" );
   2989 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2990 #if 0
   2991 	   printf("   Config order: %d\n", clabel->config_order);
   2992 #endif
   2993 
   2994 }
   2995 #endif
   2996 
   2997 RF_ConfigSet_t *
   2998 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   2999 {
   3000 	RF_AutoConfig_t *ac;
   3001 	RF_ConfigSet_t *config_sets;
   3002 	RF_ConfigSet_t *cset;
   3003 	RF_AutoConfig_t *ac_next;
   3004 
   3005 
   3006 	config_sets = NULL;
   3007 
   3008 	/* Go through the AutoConfig list, and figure out which components
   3009 	   belong to what sets.  */
   3010 	ac = ac_list;
   3011 	while(ac!=NULL) {
   3012 		/* we're going to putz with ac->next, so save it here
   3013 		   for use at the end of the loop */
   3014 		ac_next = ac->next;
   3015 
   3016 		if (config_sets == NULL) {
   3017 			/* will need at least this one... */
   3018 			config_sets = (RF_ConfigSet_t *)
   3019 				malloc(sizeof(RF_ConfigSet_t),
   3020 				       M_RAIDFRAME, M_NOWAIT);
   3021 			if (config_sets == NULL) {
   3022 				panic("rf_create_auto_sets: No memory!");
   3023 			}
   3024 			/* this one is easy :) */
   3025 			config_sets->ac = ac;
   3026 			config_sets->next = NULL;
   3027 			config_sets->rootable = 0;
   3028 			ac->next = NULL;
   3029 		} else {
   3030 			/* which set does this component fit into? */
   3031 			cset = config_sets;
   3032 			while(cset!=NULL) {
   3033 				if (rf_does_it_fit(cset, ac)) {
   3034 					/* looks like it matches... */
   3035 					ac->next = cset->ac;
   3036 					cset->ac = ac;
   3037 					break;
   3038 				}
   3039 				cset = cset->next;
   3040 			}
   3041 			if (cset==NULL) {
   3042 				/* didn't find a match above... new set..*/
   3043 				cset = (RF_ConfigSet_t *)
   3044 					malloc(sizeof(RF_ConfigSet_t),
   3045 					       M_RAIDFRAME, M_NOWAIT);
   3046 				if (cset == NULL) {
   3047 					panic("rf_create_auto_sets: No memory!");
   3048 				}
   3049 				cset->ac = ac;
   3050 				ac->next = NULL;
   3051 				cset->next = config_sets;
   3052 				cset->rootable = 0;
   3053 				config_sets = cset;
   3054 			}
   3055 		}
   3056 		ac = ac_next;
   3057 	}
   3058 
   3059 
   3060 	return(config_sets);
   3061 }
   3062 
   3063 static int
   3064 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3065 {
   3066 	RF_ComponentLabel_t *clabel1, *clabel2;
   3067 
   3068 	/* If this one matches the *first* one in the set, that's good
   3069 	   enough, since the other members of the set would have been
   3070 	   through here too... */
   3071 	/* note that we are not checking partitionSize here..
   3072 
   3073 	   Note that we are also not checking the mod_counters here.
   3074 	   If everything else matches execpt the mod_counter, that's
   3075 	   good enough for this test.  We will deal with the mod_counters
   3076 	   a little later in the autoconfiguration process.
   3077 
   3078 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3079 
   3080 	   The reason we don't check for this is that failed disks
   3081 	   will have lower modification counts.  If those disks are
   3082 	   not added to the set they used to belong to, then they will
   3083 	   form their own set, which may result in 2 different sets,
   3084 	   for example, competing to be configured at raid0, and
   3085 	   perhaps competing to be the root filesystem set.  If the
   3086 	   wrong ones get configured, or both attempt to become /,
   3087 	   weird behaviour and or serious lossage will occur.  Thus we
   3088 	   need to bring them into the fold here, and kick them out at
   3089 	   a later point.
   3090 
   3091 	*/
   3092 
   3093 	clabel1 = cset->ac->clabel;
   3094 	clabel2 = ac->clabel;
   3095 	if ((clabel1->version == clabel2->version) &&
   3096 	    (clabel1->serial_number == clabel2->serial_number) &&
   3097 	    (clabel1->num_rows == clabel2->num_rows) &&
   3098 	    (clabel1->num_columns == clabel2->num_columns) &&
   3099 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3100 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3101 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3102 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3103 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3104 	    (clabel1->blockSize == clabel2->blockSize) &&
   3105 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3106 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3107 	    (clabel1->root_partition == clabel2->root_partition) &&
   3108 	    (clabel1->last_unit == clabel2->last_unit) &&
   3109 	    (clabel1->config_order == clabel2->config_order)) {
   3110 		/* if it get's here, it almost *has* to be a match */
   3111 	} else {
   3112 		/* it's not consistent with somebody in the set..
   3113 		   punt */
   3114 		return(0);
   3115 	}
   3116 	/* all was fine.. it must fit... */
   3117 	return(1);
   3118 }
   3119 
   3120 int
   3121 rf_have_enough_components(RF_ConfigSet_t *cset)
   3122 {
   3123 	RF_AutoConfig_t *ac;
   3124 	RF_AutoConfig_t *auto_config;
   3125 	RF_ComponentLabel_t *clabel;
   3126 	int c;
   3127 	int num_cols;
   3128 	int num_missing;
   3129 	int mod_counter;
   3130 	int mod_counter_found;
   3131 	int even_pair_failed;
   3132 	char parity_type;
   3133 
   3134 
   3135 	/* check to see that we have enough 'live' components
   3136 	   of this set.  If so, we can configure it if necessary */
   3137 
   3138 	num_cols = cset->ac->clabel->num_columns;
   3139 	parity_type = cset->ac->clabel->parityConfig;
   3140 
   3141 	/* XXX Check for duplicate components!?!?!? */
   3142 
   3143 	/* Determine what the mod_counter is supposed to be for this set. */
   3144 
   3145 	mod_counter_found = 0;
   3146 	mod_counter = 0;
   3147 	ac = cset->ac;
   3148 	while(ac!=NULL) {
   3149 		if (mod_counter_found==0) {
   3150 			mod_counter = ac->clabel->mod_counter;
   3151 			mod_counter_found = 1;
   3152 		} else {
   3153 			if (ac->clabel->mod_counter > mod_counter) {
   3154 				mod_counter = ac->clabel->mod_counter;
   3155 			}
   3156 		}
   3157 		ac = ac->next;
   3158 	}
   3159 
   3160 	num_missing = 0;
   3161 	auto_config = cset->ac;
   3162 
   3163 	even_pair_failed = 0;
   3164 	for(c=0; c<num_cols; c++) {
   3165 		ac = auto_config;
   3166 		while(ac!=NULL) {
   3167 			if ((ac->clabel->column == c) &&
   3168 			    (ac->clabel->mod_counter == mod_counter)) {
   3169 				/* it's this one... */
   3170 #ifdef DEBUG
   3171 				printf("Found: %s at %d\n",
   3172 				       ac->devname,c);
   3173 #endif
   3174 				break;
   3175 			}
   3176 			ac=ac->next;
   3177 		}
   3178 		if (ac==NULL) {
   3179 				/* Didn't find one here! */
   3180 				/* special case for RAID 1, especially
   3181 				   where there are more than 2
   3182 				   components (where RAIDframe treats
   3183 				   things a little differently :( ) */
   3184 			if (parity_type == '1') {
   3185 				if (c%2 == 0) { /* even component */
   3186 					even_pair_failed = 1;
   3187 				} else { /* odd component.  If
   3188 					    we're failed, and
   3189 					    so is the even
   3190 					    component, it's
   3191 					    "Good Night, Charlie" */
   3192 					if (even_pair_failed == 1) {
   3193 						return(0);
   3194 					}
   3195 				}
   3196 			} else {
   3197 				/* normal accounting */
   3198 				num_missing++;
   3199 			}
   3200 		}
   3201 		if ((parity_type == '1') && (c%2 == 1)) {
   3202 				/* Just did an even component, and we didn't
   3203 				   bail.. reset the even_pair_failed flag,
   3204 				   and go on to the next component.... */
   3205 			even_pair_failed = 0;
   3206 		}
   3207 	}
   3208 
   3209 	clabel = cset->ac->clabel;
   3210 
   3211 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3212 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3213 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3214 		/* XXX this needs to be made *much* more general */
   3215 		/* Too many failures */
   3216 		return(0);
   3217 	}
   3218 	/* otherwise, all is well, and we've got enough to take a kick
   3219 	   at autoconfiguring this set */
   3220 	return(1);
   3221 }
   3222 
   3223 void
   3224 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3225 			RF_Raid_t *raidPtr)
   3226 {
   3227 	RF_ComponentLabel_t *clabel;
   3228 	int i;
   3229 
   3230 	clabel = ac->clabel;
   3231 
   3232 	/* 1. Fill in the common stuff */
   3233 	config->numRow = clabel->num_rows = 1;
   3234 	config->numCol = clabel->num_columns;
   3235 	config->numSpare = 0; /* XXX should this be set here? */
   3236 	config->sectPerSU = clabel->sectPerSU;
   3237 	config->SUsPerPU = clabel->SUsPerPU;
   3238 	config->SUsPerRU = clabel->SUsPerRU;
   3239 	config->parityConfig = clabel->parityConfig;
   3240 	/* XXX... */
   3241 	strcpy(config->diskQueueType,"fifo");
   3242 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3243 	config->layoutSpecificSize = 0; /* XXX ?? */
   3244 
   3245 	while(ac!=NULL) {
   3246 		/* row/col values will be in range due to the checks
   3247 		   in reasonable_label() */
   3248 		strcpy(config->devnames[0][ac->clabel->column],
   3249 		       ac->devname);
   3250 		ac = ac->next;
   3251 	}
   3252 
   3253 	for(i=0;i<RF_MAXDBGV;i++) {
   3254 		config->debugVars[i][0] = 0;
   3255 	}
   3256 }
   3257 
   3258 int
   3259 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3260 {
   3261 	RF_ComponentLabel_t clabel;
   3262 	struct vnode *vp;
   3263 	dev_t dev;
   3264 	int column;
   3265 	int sparecol;
   3266 
   3267 	raidPtr->autoconfigure = new_value;
   3268 
   3269 	for(column=0; column<raidPtr->numCol; column++) {
   3270 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3271 			dev = raidPtr->Disks[column].dev;
   3272 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3273 			raidread_component_label(dev, vp, &clabel);
   3274 			clabel.autoconfigure = new_value;
   3275 			raidwrite_component_label(dev, vp, &clabel);
   3276 		}
   3277 	}
   3278 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3279 		sparecol = raidPtr->numCol + column;
   3280 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3281 			dev = raidPtr->Disks[sparecol].dev;
   3282 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3283 			raidread_component_label(dev, vp, &clabel);
   3284 			clabel.autoconfigure = new_value;
   3285 			raidwrite_component_label(dev, vp, &clabel);
   3286 		}
   3287 	}
   3288 	return(new_value);
   3289 }
   3290 
   3291 int
   3292 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3293 {
   3294 	RF_ComponentLabel_t clabel;
   3295 	struct vnode *vp;
   3296 	dev_t dev;
   3297 	int column;
   3298 	int sparecol;
   3299 
   3300 	raidPtr->root_partition = new_value;
   3301 	for(column=0; column<raidPtr->numCol; column++) {
   3302 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3303 			dev = raidPtr->Disks[column].dev;
   3304 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3305 			raidread_component_label(dev, vp, &clabel);
   3306 			clabel.root_partition = new_value;
   3307 			raidwrite_component_label(dev, vp, &clabel);
   3308 		}
   3309 	}
   3310 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3311 		sparecol = raidPtr->numCol + column;
   3312 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3313 			dev = raidPtr->Disks[sparecol].dev;
   3314 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3315 			raidread_component_label(dev, vp, &clabel);
   3316 			clabel.root_partition = new_value;
   3317 			raidwrite_component_label(dev, vp, &clabel);
   3318 		}
   3319 	}
   3320 	return(new_value);
   3321 }
   3322 
   3323 void
   3324 rf_release_all_vps(RF_ConfigSet_t *cset)
   3325 {
   3326 	RF_AutoConfig_t *ac;
   3327 
   3328 	ac = cset->ac;
   3329 	while(ac!=NULL) {
   3330 		/* Close the vp, and give it back */
   3331 		if (ac->vp) {
   3332 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3333 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3334 			vput(ac->vp);
   3335 			ac->vp = NULL;
   3336 		}
   3337 		ac = ac->next;
   3338 	}
   3339 }
   3340 
   3341 
   3342 void
   3343 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3344 {
   3345 	RF_AutoConfig_t *ac;
   3346 	RF_AutoConfig_t *next_ac;
   3347 
   3348 	ac = cset->ac;
   3349 	while(ac!=NULL) {
   3350 		next_ac = ac->next;
   3351 		/* nuke the label */
   3352 		free(ac->clabel, M_RAIDFRAME);
   3353 		/* cleanup the config structure */
   3354 		free(ac, M_RAIDFRAME);
   3355 		/* "next.." */
   3356 		ac = next_ac;
   3357 	}
   3358 	/* and, finally, nuke the config set */
   3359 	free(cset, M_RAIDFRAME);
   3360 }
   3361 
   3362 
   3363 void
   3364 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3365 {
   3366 	/* current version number */
   3367 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3368 	clabel->serial_number = raidPtr->serial_number;
   3369 	clabel->mod_counter = raidPtr->mod_counter;
   3370 	clabel->num_rows = 1;
   3371 	clabel->num_columns = raidPtr->numCol;
   3372 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3373 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3374 
   3375 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3376 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3377 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3378 
   3379 	clabel->blockSize = raidPtr->bytesPerSector;
   3380 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3381 
   3382 	/* XXX not portable */
   3383 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3384 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3385 	clabel->autoconfigure = raidPtr->autoconfigure;
   3386 	clabel->root_partition = raidPtr->root_partition;
   3387 	clabel->last_unit = raidPtr->raidid;
   3388 	clabel->config_order = raidPtr->config_order;
   3389 }
   3390 
   3391 int
   3392 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3393 {
   3394 	RF_Raid_t *raidPtr;
   3395 	RF_Config_t *config;
   3396 	int raidID;
   3397 	int retcode;
   3398 
   3399 #ifdef DEBUG
   3400 	printf("RAID autoconfigure\n");
   3401 #endif
   3402 
   3403 	retcode = 0;
   3404 	*unit = -1;
   3405 
   3406 	/* 1. Create a config structure */
   3407 
   3408 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3409 				       M_RAIDFRAME,
   3410 				       M_NOWAIT);
   3411 	if (config==NULL) {
   3412 		printf("Out of mem!?!?\n");
   3413 				/* XXX do something more intelligent here. */
   3414 		return(1);
   3415 	}
   3416 
   3417 	memset(config, 0, sizeof(RF_Config_t));
   3418 
   3419 	/*
   3420 	   2. Figure out what RAID ID this one is supposed to live at
   3421 	   See if we can get the same RAID dev that it was configured
   3422 	   on last time..
   3423 	*/
   3424 
   3425 	raidID = cset->ac->clabel->last_unit;
   3426 	if ((raidID < 0) || (raidID >= numraid)) {
   3427 		/* let's not wander off into lala land. */
   3428 		raidID = numraid - 1;
   3429 	}
   3430 	if (raidPtrs[raidID]->valid != 0) {
   3431 
   3432 		/*
   3433 		   Nope... Go looking for an alternative...
   3434 		   Start high so we don't immediately use raid0 if that's
   3435 		   not taken.
   3436 		*/
   3437 
   3438 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3439 			if (raidPtrs[raidID]->valid == 0) {
   3440 				/* can use this one! */
   3441 				break;
   3442 			}
   3443 		}
   3444 	}
   3445 
   3446 	if (raidID < 0) {
   3447 		/* punt... */
   3448 		printf("Unable to auto configure this set!\n");
   3449 		printf("(Out of RAID devs!)\n");
   3450 		free(config, M_RAIDFRAME);
   3451 		return(1);
   3452 	}
   3453 
   3454 #ifdef DEBUG
   3455 	printf("Configuring raid%d:\n",raidID);
   3456 #endif
   3457 
   3458 	raidPtr = raidPtrs[raidID];
   3459 
   3460 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3461 	raidPtr->raidid = raidID;
   3462 	raidPtr->openings = RAIDOUTSTANDING;
   3463 
   3464 	/* 3. Build the configuration structure */
   3465 	rf_create_configuration(cset->ac, config, raidPtr);
   3466 
   3467 	/* 4. Do the configuration */
   3468 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3469 
   3470 	if (retcode == 0) {
   3471 
   3472 		raidinit(raidPtrs[raidID]);
   3473 
   3474 		rf_markalldirty(raidPtrs[raidID]);
   3475 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3476 		if (cset->ac->clabel->root_partition==1) {
   3477 			/* everything configured just fine.  Make a note
   3478 			   that this set is eligible to be root. */
   3479 			cset->rootable = 1;
   3480 			/* XXX do this here? */
   3481 			raidPtrs[raidID]->root_partition = 1;
   3482 		}
   3483 	}
   3484 
   3485 	/* 5. Cleanup */
   3486 	free(config, M_RAIDFRAME);
   3487 
   3488 	*unit = raidID;
   3489 	return(retcode);
   3490 }
   3491 
   3492 void
   3493 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3494 {
   3495 	struct buf *bp;
   3496 
   3497 	bp = (struct buf *)desc->bp;
   3498 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3499 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3500 }
   3501 
   3502 void
   3503 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3504 	     size_t xmin, size_t xmax)
   3505 {
   3506 	pool_init(p, size, 0, 0, 0, w_chan, NULL);
   3507 	pool_sethiwat(p, xmax);
   3508 	pool_prime(p, xmin);
   3509 	pool_setlowat(p, xmin);
   3510 }
   3511 
   3512 /*
   3513  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3514  * if there is IO pending and if that IO could possibly be done for a
   3515  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3516  * otherwise.
   3517  *
   3518  */
   3519 
   3520 int
   3521 rf_buf_queue_check(int raidid)
   3522 {
   3523 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3524 	    raidPtrs[raidid]->openings > 0) {
   3525 		/* there is work to do */
   3526 		return 0;
   3527 	}
   3528 	/* default is nothing to do */
   3529 	return 1;
   3530 }
   3531 
   3532 int
   3533 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3534 {
   3535 	struct partinfo dpart;
   3536 	struct dkwedge_info dkw;
   3537 	int error;
   3538 
   3539 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
   3540 	if (error == 0) {
   3541 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3542 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3543 		diskPtr->partitionSize = dpart.part->p_size;
   3544 		return 0;
   3545 	}
   3546 
   3547 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
   3548 	if (error == 0) {
   3549 		diskPtr->blockSize = 512;	/* XXX */
   3550 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3551 		diskPtr->partitionSize = dkw.dkw_size;
   3552 		return 0;
   3553 	}
   3554 	return error;
   3555 }
   3556 
   3557 static int
   3558 raid_match(struct device *self, struct cfdata *cfdata,
   3559     void *aux)
   3560 {
   3561 	return 1;
   3562 }
   3563 
   3564 static void
   3565 raid_attach(struct device *parent, struct device *self,
   3566     void *aux)
   3567 {
   3568 
   3569 }
   3570 
   3571 
   3572 static int
   3573 raid_detach(struct device *self, int flags)
   3574 {
   3575 	struct raid_softc *rs = (struct raid_softc *)self;
   3576 
   3577 	if (rs->sc_flags & RAIDF_INITED)
   3578 		return EBUSY;
   3579 
   3580 	return 0;
   3581 }
   3582 
   3583 static void
   3584 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3585 {
   3586 	prop_dictionary_t disk_info, odisk_info, geom;
   3587 	disk_info = prop_dictionary_create();
   3588 	geom = prop_dictionary_create();
   3589 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3590 				   raidPtr->totalSectors);
   3591 	prop_dictionary_set_uint32(geom, "sector-size",
   3592 				   raidPtr->bytesPerSector);
   3593 
   3594 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3595 				   raidPtr->Layout.dataSectorsPerStripe);
   3596 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3597 				   4 * raidPtr->numCol);
   3598 
   3599 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3600 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3601 	   (4 * raidPtr->numCol)));
   3602 
   3603 	prop_dictionary_set(disk_info, "geometry", geom);
   3604 	prop_object_release(geom);
   3605 	prop_dictionary_set(device_properties(rs->sc_dev),
   3606 			    "disk-info", disk_info);
   3607 	odisk_info = rs->sc_dkdev.dk_info;
   3608 	rs->sc_dkdev.dk_info = disk_info;
   3609 	if (odisk_info)
   3610 		prop_object_release(odisk_info);
   3611 }
   3612