Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.104.2.17
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.104.2.17 2002/11/11 22:11:57 nathanw Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.17 2002/11/11 22:11:57 nathanw Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/proc.h>
    123 #include <sys/queue.h>
    124 #include <sys/disk.h>
    125 #include <sys/device.h>
    126 #include <sys/stat.h>
    127 #include <sys/ioctl.h>
    128 #include <sys/fcntl.h>
    129 #include <sys/systm.h>
    130 #include <sys/namei.h>
    131 #include <sys/vnode.h>
    132 #include <sys/disklabel.h>
    133 #include <sys/conf.h>
    134 #include <sys/lock.h>
    135 #include <sys/buf.h>
    136 #include <sys/user.h>
    137 #include <sys/reboot.h>
    138 
    139 #include <dev/raidframe/raidframevar.h>
    140 #include <dev/raidframe/raidframeio.h>
    141 #include "raid.h"
    142 #include "opt_raid_autoconfig.h"
    143 #include "rf_raid.h"
    144 #include "rf_copyback.h"
    145 #include "rf_dag.h"
    146 #include "rf_dagflags.h"
    147 #include "rf_desc.h"
    148 #include "rf_diskqueue.h"
    149 #include "rf_etimer.h"
    150 #include "rf_general.h"
    151 #include "rf_kintf.h"
    152 #include "rf_options.h"
    153 #include "rf_driver.h"
    154 #include "rf_parityscan.h"
    155 #include "rf_threadstuff.h"
    156 
    157 #ifdef DEBUG
    158 int     rf_kdebug_level = 0;
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    165 
    166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    167 
    168 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    169 						 * spare table */
    170 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    171 						 * installation process */
    172 
    173 /* prototypes */
    174 static void KernelWakeupFunc(struct buf * bp);
    175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    176 		   dev_t dev, RF_SectorNum_t startSect,
    177 		   RF_SectorCount_t numSect, caddr_t buf,
    178 		   void (*cbFunc) (struct buf *), void *cbArg,
    179 		   int logBytesPerSector, struct proc * b_proc);
    180 static void raidinit(RF_Raid_t *);
    181 
    182 void raidattach(int);
    183 
    184 dev_type_open(raidopen);
    185 dev_type_close(raidclose);
    186 dev_type_read(raidread);
    187 dev_type_write(raidwrite);
    188 dev_type_ioctl(raidioctl);
    189 dev_type_strategy(raidstrategy);
    190 dev_type_dump(raiddump);
    191 dev_type_size(raidsize);
    192 
    193 const struct bdevsw raid_bdevsw = {
    194 	raidopen, raidclose, raidstrategy, raidioctl,
    195 	raiddump, raidsize, D_DISK
    196 };
    197 
    198 const struct cdevsw raid_cdevsw = {
    199 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    200 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    201 };
    202 
    203 /*
    204  * Pilfered from ccd.c
    205  */
    206 
    207 struct raidbuf {
    208 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    209 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    210 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    211 };
    212 
    213 /* component buffer pool */
    214 struct pool raidframe_cbufpool;
    215 
    216 /* XXX Not sure if the following should be replacing the raidPtrs above,
    217    or if it should be used in conjunction with that...
    218 */
    219 
    220 struct raid_softc {
    221 	int     sc_flags;	/* flags */
    222 	int     sc_cflags;	/* configuration flags */
    223 	size_t  sc_size;        /* size of the raid device */
    224 	char    sc_xname[20];	/* XXX external name */
    225 	struct disk sc_dkdev;	/* generic disk device info */
    226 	struct bufq_state buf_queue;	/* used for the device queue */
    227 };
    228 /* sc_flags */
    229 #define RAIDF_INITED	0x01	/* unit has been initialized */
    230 #define RAIDF_WLABEL	0x02	/* label area is writable */
    231 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    232 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    233 #define RAIDF_LOCKED	0x80	/* unit is locked */
    234 
    235 #define	raidunit(x)	DISKUNIT(x)
    236 int numraid = 0;
    237 
    238 /*
    239  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    240  * Be aware that large numbers can allow the driver to consume a lot of
    241  * kernel memory, especially on writes, and in degraded mode reads.
    242  *
    243  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    244  * a single 64K write will typically require 64K for the old data,
    245  * 64K for the old parity, and 64K for the new parity, for a total
    246  * of 192K (if the parity buffer is not re-used immediately).
    247  * Even it if is used immediately, that's still 128K, which when multiplied
    248  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    249  *
    250  * Now in degraded mode, for example, a 64K read on the above setup may
    251  * require data reconstruction, which will require *all* of the 4 remaining
    252  * disks to participate -- 4 * 32K/disk == 128K again.
    253  */
    254 
    255 #ifndef RAIDOUTSTANDING
    256 #define RAIDOUTSTANDING   6
    257 #endif
    258 
    259 #define RAIDLABELDEV(dev)	\
    260 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    261 
    262 /* declared here, and made public, for the benefit of KVM stuff.. */
    263 struct raid_softc *raid_softc;
    264 
    265 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    266 				     struct disklabel *);
    267 static void raidgetdisklabel(dev_t);
    268 static void raidmakedisklabel(struct raid_softc *);
    269 
    270 static int raidlock(struct raid_softc *);
    271 static void raidunlock(struct raid_softc *);
    272 
    273 static void rf_markalldirty(RF_Raid_t *);
    274 
    275 struct device *raidrootdev;
    276 
    277 void rf_ReconThread(struct rf_recon_req *);
    278 /* XXX what I want is: */
    279 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    280 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    281 void rf_CopybackThread(RF_Raid_t *raidPtr);
    282 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    283 int rf_autoconfig(struct device *self);
    284 void rf_buildroothack(RF_ConfigSet_t *);
    285 
    286 RF_AutoConfig_t *rf_find_raid_components(void);
    287 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    288 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    289 static int rf_reasonable_label(RF_ComponentLabel_t *);
    290 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    291 int rf_set_autoconfig(RF_Raid_t *, int);
    292 int rf_set_rootpartition(RF_Raid_t *, int);
    293 void rf_release_all_vps(RF_ConfigSet_t *);
    294 void rf_cleanup_config_set(RF_ConfigSet_t *);
    295 int rf_have_enough_components(RF_ConfigSet_t *);
    296 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place.
    300 			          Note that this is overridden by having
    301 			          RAID_AUTOCONFIG as an option in the
    302 			          kernel config file.  */
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	numraid = num;
    324 
    325 	/* Make some space for requested number of units... */
    326 
    327 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    328 	if (raidPtrs == NULL) {
    329 		panic("raidPtrs is NULL!!");
    330 	}
    331 
    332 	/* Initialize the component buffer pool. */
    333 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    334 	    0, 0, "raidpl", NULL);
    335 
    336 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    337 	if (rc) {
    338 		RF_PANIC();
    339 	}
    340 
    341 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    342 
    343 	for (i = 0; i < num; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!");
    350 
    351 	/* put together some datastructures like the CCD device does.. This
    352 	 * lets us lock the device and what-not when it gets opened. */
    353 
    354 	raid_softc = (struct raid_softc *)
    355 		malloc(num * sizeof(struct raid_softc),
    356 		       M_RAIDFRAME, M_NOWAIT);
    357 	if (raid_softc == NULL) {
    358 		printf("WARNING: no memory for RAIDframe driver\n");
    359 		return;
    360 	}
    361 
    362 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    363 
    364 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    365 					      M_RAIDFRAME, M_NOWAIT);
    366 	if (raidrootdev == NULL) {
    367 		panic("No memory for RAIDframe driver!!?!?!");
    368 	}
    369 
    370 	for (raidID = 0; raidID < num; raidID++) {
    371 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    372 
    373 		raidrootdev[raidID].dv_class  = DV_DISK;
    374 		raidrootdev[raidID].dv_cfdata = NULL;
    375 		raidrootdev[raidID].dv_unit   = raidID;
    376 		raidrootdev[raidID].dv_parent = NULL;
    377 		raidrootdev[raidID].dv_flags  = 0;
    378 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    379 
    380 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    381 			  (RF_Raid_t *));
    382 		if (raidPtrs[raidID] == NULL) {
    383 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    384 			numraid = raidID;
    385 			return;
    386 		}
    387 	}
    388 
    389 #ifdef RAID_AUTOCONFIG
    390 	raidautoconfig = 1;
    391 #endif
    392 
    393 	/*
    394 	 * Register a finalizer which will be used to auto-config RAID
    395 	 * sets once all real hardware devices have been found.
    396 	 */
    397 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    398 		printf("WARNING: unable to register RAIDframe finalizer\n");
    399 }
    400 
    401 int
    402 rf_autoconfig(struct device *self)
    403 {
    404 	RF_AutoConfig_t *ac_list;
    405 	RF_ConfigSet_t *config_sets;
    406 
    407 	if (raidautoconfig == 0)
    408 		return (0);
    409 
    410 	/* XXX This code can only be run once. */
    411 	raidautoconfig = 0;
    412 
    413 	/* 1. locate all RAID components on the system */
    414 #ifdef DEBUG
    415 	printf("Searching for RAID components...\n");
    416 #endif
    417 	ac_list = rf_find_raid_components();
    418 
    419 	/* 2. Sort them into their respective sets. */
    420 	config_sets = rf_create_auto_sets(ac_list);
    421 
    422 	/*
    423 	 * 3. Evaluate each set andconfigure the valid ones.
    424 	 * This gets done in rf_buildroothack().
    425 	 */
    426 	rf_buildroothack(config_sets);
    427 
    428 	return (1);
    429 }
    430 
    431 void
    432 rf_buildroothack(RF_ConfigSet_t *config_sets)
    433 {
    434 	RF_ConfigSet_t *cset;
    435 	RF_ConfigSet_t *next_cset;
    436 	int retcode;
    437 	int raidID;
    438 	int rootID;
    439 	int num_root;
    440 
    441 	rootID = 0;
    442 	num_root = 0;
    443 	cset = config_sets;
    444 	while(cset != NULL ) {
    445 		next_cset = cset->next;
    446 		if (rf_have_enough_components(cset) &&
    447 		    cset->ac->clabel->autoconfigure==1) {
    448 			retcode = rf_auto_config_set(cset,&raidID);
    449 			if (!retcode) {
    450 				if (cset->rootable) {
    451 					rootID = raidID;
    452 					num_root++;
    453 				}
    454 			} else {
    455 				/* The autoconfig didn't work :( */
    456 #if DEBUG
    457 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    458 #endif
    459 				rf_release_all_vps(cset);
    460 			}
    461 		} else {
    462 			/* we're not autoconfiguring this set...
    463 			   release the associated resources */
    464 			rf_release_all_vps(cset);
    465 		}
    466 		/* cleanup */
    467 		rf_cleanup_config_set(cset);
    468 		cset = next_cset;
    469 	}
    470 
    471 	/* we found something bootable... */
    472 
    473 	if (num_root == 1) {
    474 		booted_device = &raidrootdev[rootID];
    475 	} else if (num_root > 1) {
    476 		/* we can't guess.. require the user to answer... */
    477 		boothowto |= RB_ASKNAME;
    478 	}
    479 }
    480 
    481 
    482 int
    483 raidsize(dev)
    484 	dev_t   dev;
    485 {
    486 	struct raid_softc *rs;
    487 	struct disklabel *lp;
    488 	int     part, unit, omask, size;
    489 
    490 	unit = raidunit(dev);
    491 	if (unit >= numraid)
    492 		return (-1);
    493 	rs = &raid_softc[unit];
    494 
    495 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    496 		return (-1);
    497 
    498 	part = DISKPART(dev);
    499 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    500 	lp = rs->sc_dkdev.dk_label;
    501 
    502 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    503 		return (-1);
    504 
    505 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    506 		size = -1;
    507 	else
    508 		size = lp->d_partitions[part].p_size *
    509 		    (lp->d_secsize / DEV_BSIZE);
    510 
    511 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    512 		return (-1);
    513 
    514 	return (size);
    515 
    516 }
    517 
    518 int
    519 raiddump(dev, blkno, va, size)
    520 	dev_t   dev;
    521 	daddr_t blkno;
    522 	caddr_t va;
    523 	size_t  size;
    524 {
    525 	/* Not implemented. */
    526 	return ENXIO;
    527 }
    528 /* ARGSUSED */
    529 int
    530 raidopen(dev, flags, fmt, p)
    531 	dev_t   dev;
    532 	int     flags, fmt;
    533 	struct proc *p;
    534 {
    535 	int     unit = raidunit(dev);
    536 	struct raid_softc *rs;
    537 	struct disklabel *lp;
    538 	int     part, pmask;
    539 	int     error = 0;
    540 
    541 	if (unit >= numraid)
    542 		return (ENXIO);
    543 	rs = &raid_softc[unit];
    544 
    545 	if ((error = raidlock(rs)) != 0)
    546 		return (error);
    547 	lp = rs->sc_dkdev.dk_label;
    548 
    549 	part = DISKPART(dev);
    550 	pmask = (1 << part);
    551 
    552 	db1_printf(("Opening raid device number: %d partition: %d\n",
    553 		unit, part));
    554 
    555 
    556 	if ((rs->sc_flags & RAIDF_INITED) &&
    557 	    (rs->sc_dkdev.dk_openmask == 0))
    558 		raidgetdisklabel(dev);
    559 
    560 	/* make sure that this partition exists */
    561 
    562 	if (part != RAW_PART) {
    563 		db1_printf(("Not a raw partition..\n"));
    564 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    565 		    ((part >= lp->d_npartitions) ||
    566 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    567 			error = ENXIO;
    568 			raidunlock(rs);
    569 			db1_printf(("Bailing out...\n"));
    570 			return (error);
    571 		}
    572 	}
    573 	/* Prevent this unit from being unconfigured while open. */
    574 	switch (fmt) {
    575 	case S_IFCHR:
    576 		rs->sc_dkdev.dk_copenmask |= pmask;
    577 		break;
    578 
    579 	case S_IFBLK:
    580 		rs->sc_dkdev.dk_bopenmask |= pmask;
    581 		break;
    582 	}
    583 
    584 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    585 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    586 		/* First one... mark things as dirty... Note that we *MUST*
    587 		 have done a configure before this.  I DO NOT WANT TO BE
    588 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    589 		 THAT THEY BELONG TOGETHER!!!!! */
    590 		/* XXX should check to see if we're only open for reading
    591 		   here... If so, we needn't do this, but then need some
    592 		   other way of keeping track of what's happened.. */
    593 
    594 		rf_markalldirty( raidPtrs[unit] );
    595 	}
    596 
    597 
    598 	rs->sc_dkdev.dk_openmask =
    599 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    600 
    601 	raidunlock(rs);
    602 
    603 	return (error);
    604 
    605 
    606 }
    607 /* ARGSUSED */
    608 int
    609 raidclose(dev, flags, fmt, p)
    610 	dev_t   dev;
    611 	int     flags, fmt;
    612 	struct proc *p;
    613 {
    614 	int     unit = raidunit(dev);
    615 	struct raid_softc *rs;
    616 	int     error = 0;
    617 	int     part;
    618 
    619 	if (unit >= numraid)
    620 		return (ENXIO);
    621 	rs = &raid_softc[unit];
    622 
    623 	if ((error = raidlock(rs)) != 0)
    624 		return (error);
    625 
    626 	part = DISKPART(dev);
    627 
    628 	/* ...that much closer to allowing unconfiguration... */
    629 	switch (fmt) {
    630 	case S_IFCHR:
    631 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    632 		break;
    633 
    634 	case S_IFBLK:
    635 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    636 		break;
    637 	}
    638 	rs->sc_dkdev.dk_openmask =
    639 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    640 
    641 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    642 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    643 		/* Last one... device is not unconfigured yet.
    644 		   Device shutdown has taken care of setting the
    645 		   clean bits if RAIDF_INITED is not set
    646 		   mark things as clean... */
    647 #if 0
    648 		printf("Last one on raid%d.  Updating status.\n",unit);
    649 #endif
    650 		rf_update_component_labels(raidPtrs[unit],
    651 						 RF_FINAL_COMPONENT_UPDATE);
    652 		if (doing_shutdown) {
    653 			/* last one, and we're going down, so
    654 			   lights out for this RAID set too. */
    655 			error = rf_Shutdown(raidPtrs[unit]);
    656 
    657 			/* It's no longer initialized... */
    658 			rs->sc_flags &= ~RAIDF_INITED;
    659 
    660 			/* Detach the disk. */
    661 			disk_detach(&rs->sc_dkdev);
    662 		}
    663 	}
    664 
    665 	raidunlock(rs);
    666 	return (0);
    667 
    668 }
    669 
    670 void
    671 raidstrategy(bp)
    672 	struct buf *bp;
    673 {
    674 	int s;
    675 
    676 	unsigned int raidID = raidunit(bp->b_dev);
    677 	RF_Raid_t *raidPtr;
    678 	struct raid_softc *rs = &raid_softc[raidID];
    679 	struct disklabel *lp;
    680 	int     wlabel;
    681 
    682 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    683 		bp->b_error = ENXIO;
    684 		bp->b_flags |= B_ERROR;
    685 		bp->b_resid = bp->b_bcount;
    686 		biodone(bp);
    687 		return;
    688 	}
    689 	if (raidID >= numraid || !raidPtrs[raidID]) {
    690 		bp->b_error = ENODEV;
    691 		bp->b_flags |= B_ERROR;
    692 		bp->b_resid = bp->b_bcount;
    693 		biodone(bp);
    694 		return;
    695 	}
    696 	raidPtr = raidPtrs[raidID];
    697 	if (!raidPtr->valid) {
    698 		bp->b_error = ENODEV;
    699 		bp->b_flags |= B_ERROR;
    700 		bp->b_resid = bp->b_bcount;
    701 		biodone(bp);
    702 		return;
    703 	}
    704 	if (bp->b_bcount == 0) {
    705 		db1_printf(("b_bcount is zero..\n"));
    706 		biodone(bp);
    707 		return;
    708 	}
    709 	lp = rs->sc_dkdev.dk_label;
    710 
    711 	/*
    712 	 * Do bounds checking and adjust transfer.  If there's an
    713 	 * error, the bounds check will flag that for us.
    714 	 */
    715 
    716 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    717 	if (DISKPART(bp->b_dev) != RAW_PART)
    718 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    719 			db1_printf(("Bounds check failed!!:%d %d\n",
    720 				(int) bp->b_blkno, (int) wlabel));
    721 			biodone(bp);
    722 			return;
    723 		}
    724 	s = splbio();
    725 
    726 	bp->b_resid = 0;
    727 
    728 	/* stuff it onto our queue */
    729 	BUFQ_PUT(&rs->buf_queue, bp);
    730 
    731 	raidstart(raidPtrs[raidID]);
    732 
    733 	splx(s);
    734 }
    735 /* ARGSUSED */
    736 int
    737 raidread(dev, uio, flags)
    738 	dev_t   dev;
    739 	struct uio *uio;
    740 	int     flags;
    741 {
    742 	int     unit = raidunit(dev);
    743 	struct raid_softc *rs;
    744 	int     part;
    745 
    746 	if (unit >= numraid)
    747 		return (ENXIO);
    748 	rs = &raid_softc[unit];
    749 
    750 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    751 		return (ENXIO);
    752 	part = DISKPART(dev);
    753 
    754 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    755 
    756 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    757 
    758 }
    759 /* ARGSUSED */
    760 int
    761 raidwrite(dev, uio, flags)
    762 	dev_t   dev;
    763 	struct uio *uio;
    764 	int     flags;
    765 {
    766 	int     unit = raidunit(dev);
    767 	struct raid_softc *rs;
    768 
    769 	if (unit >= numraid)
    770 		return (ENXIO);
    771 	rs = &raid_softc[unit];
    772 
    773 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    774 		return (ENXIO);
    775 	db1_printf(("raidwrite\n"));
    776 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    777 
    778 }
    779 
    780 int
    781 raidioctl(dev, cmd, data, flag, p)
    782 	dev_t   dev;
    783 	u_long  cmd;
    784 	caddr_t data;
    785 	int     flag;
    786 	struct proc *p;
    787 {
    788 	int     unit = raidunit(dev);
    789 	int     error = 0;
    790 	int     part, pmask;
    791 	struct raid_softc *rs;
    792 	RF_Config_t *k_cfg, *u_cfg;
    793 	RF_Raid_t *raidPtr;
    794 	RF_RaidDisk_t *diskPtr;
    795 	RF_AccTotals_t *totals;
    796 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    797 	u_char *specific_buf;
    798 	int retcode = 0;
    799 	int row;
    800 	int column;
    801 	int raidid;
    802 	struct rf_recon_req *rrcopy, *rr;
    803 	RF_ComponentLabel_t *clabel;
    804 	RF_ComponentLabel_t ci_label;
    805 	RF_ComponentLabel_t **clabel_ptr;
    806 	RF_SingleComponent_t *sparePtr,*componentPtr;
    807 	RF_SingleComponent_t hot_spare;
    808 	RF_SingleComponent_t component;
    809 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    810 	int i, j, d;
    811 #ifdef __HAVE_OLD_DISKLABEL
    812 	struct disklabel newlabel;
    813 #endif
    814 
    815 	if (unit >= numraid)
    816 		return (ENXIO);
    817 	rs = &raid_softc[unit];
    818 	raidPtr = raidPtrs[unit];
    819 
    820 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    821 		(int) DISKPART(dev), (int) unit, (int) cmd));
    822 
    823 	/* Must be open for writes for these commands... */
    824 	switch (cmd) {
    825 	case DIOCSDINFO:
    826 	case DIOCWDINFO:
    827 #ifdef __HAVE_OLD_DISKLABEL
    828 	case ODIOCWDINFO:
    829 	case ODIOCSDINFO:
    830 #endif
    831 	case DIOCWLABEL:
    832 		if ((flag & FWRITE) == 0)
    833 			return (EBADF);
    834 	}
    835 
    836 	/* Must be initialized for these... */
    837 	switch (cmd) {
    838 	case DIOCGDINFO:
    839 	case DIOCSDINFO:
    840 	case DIOCWDINFO:
    841 #ifdef __HAVE_OLD_DISKLABEL
    842 	case ODIOCGDINFO:
    843 	case ODIOCWDINFO:
    844 	case ODIOCSDINFO:
    845 	case ODIOCGDEFLABEL:
    846 #endif
    847 	case DIOCGPART:
    848 	case DIOCWLABEL:
    849 	case DIOCGDEFLABEL:
    850 	case RAIDFRAME_SHUTDOWN:
    851 	case RAIDFRAME_REWRITEPARITY:
    852 	case RAIDFRAME_GET_INFO:
    853 	case RAIDFRAME_RESET_ACCTOTALS:
    854 	case RAIDFRAME_GET_ACCTOTALS:
    855 	case RAIDFRAME_KEEP_ACCTOTALS:
    856 	case RAIDFRAME_GET_SIZE:
    857 	case RAIDFRAME_FAIL_DISK:
    858 	case RAIDFRAME_COPYBACK:
    859 	case RAIDFRAME_CHECK_RECON_STATUS:
    860 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    861 	case RAIDFRAME_GET_COMPONENT_LABEL:
    862 	case RAIDFRAME_SET_COMPONENT_LABEL:
    863 	case RAIDFRAME_ADD_HOT_SPARE:
    864 	case RAIDFRAME_REMOVE_HOT_SPARE:
    865 	case RAIDFRAME_INIT_LABELS:
    866 	case RAIDFRAME_REBUILD_IN_PLACE:
    867 	case RAIDFRAME_CHECK_PARITY:
    868 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    869 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    870 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    871 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    872 	case RAIDFRAME_SET_AUTOCONFIG:
    873 	case RAIDFRAME_SET_ROOT:
    874 	case RAIDFRAME_DELETE_COMPONENT:
    875 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    876 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    877 			return (ENXIO);
    878 	}
    879 
    880 	switch (cmd) {
    881 
    882 		/* configure the system */
    883 	case RAIDFRAME_CONFIGURE:
    884 
    885 		if (raidPtr->valid) {
    886 			/* There is a valid RAID set running on this unit! */
    887 			printf("raid%d: Device already configured!\n",unit);
    888 			return(EINVAL);
    889 		}
    890 
    891 		/* copy-in the configuration information */
    892 		/* data points to a pointer to the configuration structure */
    893 
    894 		u_cfg = *((RF_Config_t **) data);
    895 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    896 		if (k_cfg == NULL) {
    897 			return (ENOMEM);
    898 		}
    899 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    900 		    sizeof(RF_Config_t));
    901 		if (retcode) {
    902 			RF_Free(k_cfg, sizeof(RF_Config_t));
    903 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    904 				retcode));
    905 			return (retcode);
    906 		}
    907 		/* allocate a buffer for the layout-specific data, and copy it
    908 		 * in */
    909 		if (k_cfg->layoutSpecificSize) {
    910 			if (k_cfg->layoutSpecificSize > 10000) {
    911 				/* sanity check */
    912 				RF_Free(k_cfg, sizeof(RF_Config_t));
    913 				return (EINVAL);
    914 			}
    915 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    916 			    (u_char *));
    917 			if (specific_buf == NULL) {
    918 				RF_Free(k_cfg, sizeof(RF_Config_t));
    919 				return (ENOMEM);
    920 			}
    921 			retcode = copyin(k_cfg->layoutSpecific,
    922 			    (caddr_t) specific_buf,
    923 			    k_cfg->layoutSpecificSize);
    924 			if (retcode) {
    925 				RF_Free(k_cfg, sizeof(RF_Config_t));
    926 				RF_Free(specific_buf,
    927 					k_cfg->layoutSpecificSize);
    928 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    929 					retcode));
    930 				return (retcode);
    931 			}
    932 		} else
    933 			specific_buf = NULL;
    934 		k_cfg->layoutSpecific = specific_buf;
    935 
    936 		/* should do some kind of sanity check on the configuration.
    937 		 * Store the sum of all the bytes in the last byte? */
    938 
    939 		/* configure the system */
    940 
    941 		/*
    942 		 * Clear the entire RAID descriptor, just to make sure
    943 		 *  there is no stale data left in the case of a
    944 		 *  reconfiguration
    945 		 */
    946 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    947 		raidPtr->raidid = unit;
    948 
    949 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    950 
    951 		if (retcode == 0) {
    952 
    953 			/* allow this many simultaneous IO's to
    954 			   this RAID device */
    955 			raidPtr->openings = RAIDOUTSTANDING;
    956 
    957 			raidinit(raidPtr);
    958 			rf_markalldirty(raidPtr);
    959 		}
    960 		/* free the buffers.  No return code here. */
    961 		if (k_cfg->layoutSpecificSize) {
    962 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    963 		}
    964 		RF_Free(k_cfg, sizeof(RF_Config_t));
    965 
    966 		return (retcode);
    967 
    968 		/* shutdown the system */
    969 	case RAIDFRAME_SHUTDOWN:
    970 
    971 		if ((error = raidlock(rs)) != 0)
    972 			return (error);
    973 
    974 		/*
    975 		 * If somebody has a partition mounted, we shouldn't
    976 		 * shutdown.
    977 		 */
    978 
    979 		part = DISKPART(dev);
    980 		pmask = (1 << part);
    981 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    982 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    983 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    984 			raidunlock(rs);
    985 			return (EBUSY);
    986 		}
    987 
    988 		retcode = rf_Shutdown(raidPtr);
    989 
    990 		/* It's no longer initialized... */
    991 		rs->sc_flags &= ~RAIDF_INITED;
    992 
    993 		/* Detach the disk. */
    994 		disk_detach(&rs->sc_dkdev);
    995 
    996 		raidunlock(rs);
    997 
    998 		return (retcode);
    999 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1000 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1001 		/* need to read the component label for the disk indicated
   1002 		   by row,column in clabel */
   1003 
   1004 		/* For practice, let's get it directly fromdisk, rather
   1005 		   than from the in-core copy */
   1006 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1007 			   (RF_ComponentLabel_t *));
   1008 		if (clabel == NULL)
   1009 			return (ENOMEM);
   1010 
   1011 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1012 
   1013 		retcode = copyin( *clabel_ptr, clabel,
   1014 				  sizeof(RF_ComponentLabel_t));
   1015 
   1016 		if (retcode) {
   1017 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1018 			return(retcode);
   1019 		}
   1020 
   1021 		row = clabel->row;
   1022 		column = clabel->column;
   1023 
   1024 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1025 		    (column < 0) || (column >= raidPtr->numCol +
   1026 				     raidPtr->numSpare)) {
   1027 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1028 			return(EINVAL);
   1029 		}
   1030 
   1031 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1032 				raidPtr->raid_cinfo[row][column].ci_vp,
   1033 				clabel );
   1034 
   1035 		retcode = copyout((caddr_t) clabel,
   1036 				  (caddr_t) *clabel_ptr,
   1037 				  sizeof(RF_ComponentLabel_t));
   1038 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1039 		return (retcode);
   1040 
   1041 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1042 		clabel = (RF_ComponentLabel_t *) data;
   1043 
   1044 		/* XXX check the label for valid stuff... */
   1045 		/* Note that some things *should not* get modified --
   1046 		   the user should be re-initing the labels instead of
   1047 		   trying to patch things.
   1048 		   */
   1049 
   1050 		raidid = raidPtr->raidid;
   1051 		printf("raid%d: Got component label:\n", raidid);
   1052 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1053 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1054 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1055 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1056 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1057 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1058 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1059 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1060 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1061 
   1062 		row = clabel->row;
   1063 		column = clabel->column;
   1064 
   1065 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1066 		    (column < 0) || (column >= raidPtr->numCol)) {
   1067 			return(EINVAL);
   1068 		}
   1069 
   1070 		/* XXX this isn't allowed to do anything for now :-) */
   1071 
   1072 		/* XXX and before it is, we need to fill in the rest
   1073 		   of the fields!?!?!?! */
   1074 #if 0
   1075 		raidwrite_component_label(
   1076                             raidPtr->Disks[row][column].dev,
   1077 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1078 			    clabel );
   1079 #endif
   1080 		return (0);
   1081 
   1082 	case RAIDFRAME_INIT_LABELS:
   1083 		clabel = (RF_ComponentLabel_t *) data;
   1084 		/*
   1085 		   we only want the serial number from
   1086 		   the above.  We get all the rest of the information
   1087 		   from the config that was used to create this RAID
   1088 		   set.
   1089 		   */
   1090 
   1091 		raidPtr->serial_number = clabel->serial_number;
   1092 
   1093 		raid_init_component_label(raidPtr, &ci_label);
   1094 		ci_label.serial_number = clabel->serial_number;
   1095 
   1096 		for(row=0;row<raidPtr->numRow;row++) {
   1097 			ci_label.row = row;
   1098 			for(column=0;column<raidPtr->numCol;column++) {
   1099 				diskPtr = &raidPtr->Disks[row][column];
   1100 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1101 					ci_label.partitionSize = diskPtr->partitionSize;
   1102 					ci_label.column = column;
   1103 					raidwrite_component_label(
   1104 					  raidPtr->Disks[row][column].dev,
   1105 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1106 					  &ci_label );
   1107 				}
   1108 			}
   1109 		}
   1110 
   1111 		return (retcode);
   1112 	case RAIDFRAME_SET_AUTOCONFIG:
   1113 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1114 		printf("raid%d: New autoconfig value is: %d\n",
   1115 		       raidPtr->raidid, d);
   1116 		*(int *) data = d;
   1117 		return (retcode);
   1118 
   1119 	case RAIDFRAME_SET_ROOT:
   1120 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1121 		printf("raid%d: New rootpartition value is: %d\n",
   1122 		       raidPtr->raidid, d);
   1123 		*(int *) data = d;
   1124 		return (retcode);
   1125 
   1126 		/* initialize all parity */
   1127 	case RAIDFRAME_REWRITEPARITY:
   1128 
   1129 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1130 			/* Parity for RAID 0 is trivially correct */
   1131 			raidPtr->parity_good = RF_RAID_CLEAN;
   1132 			return(0);
   1133 		}
   1134 
   1135 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1136 			/* Re-write is already in progress! */
   1137 			return(EINVAL);
   1138 		}
   1139 
   1140 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1141 					   rf_RewriteParityThread,
   1142 					   raidPtr,"raid_parity");
   1143 		return (retcode);
   1144 
   1145 
   1146 	case RAIDFRAME_ADD_HOT_SPARE:
   1147 		sparePtr = (RF_SingleComponent_t *) data;
   1148 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1149 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1150 		return(retcode);
   1151 
   1152 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1153 		return(retcode);
   1154 
   1155 	case RAIDFRAME_DELETE_COMPONENT:
   1156 		componentPtr = (RF_SingleComponent_t *)data;
   1157 		memcpy( &component, componentPtr,
   1158 			sizeof(RF_SingleComponent_t));
   1159 		retcode = rf_delete_component(raidPtr, &component);
   1160 		return(retcode);
   1161 
   1162 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1163 		componentPtr = (RF_SingleComponent_t *)data;
   1164 		memcpy( &component, componentPtr,
   1165 			sizeof(RF_SingleComponent_t));
   1166 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1167 		return(retcode);
   1168 
   1169 	case RAIDFRAME_REBUILD_IN_PLACE:
   1170 
   1171 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1172 			/* Can't do this on a RAID 0!! */
   1173 			return(EINVAL);
   1174 		}
   1175 
   1176 		if (raidPtr->recon_in_progress == 1) {
   1177 			/* a reconstruct is already in progress! */
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		componentPtr = (RF_SingleComponent_t *) data;
   1182 		memcpy( &component, componentPtr,
   1183 			sizeof(RF_SingleComponent_t));
   1184 		row = component.row;
   1185 		column = component.column;
   1186 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1187 		       row, column);
   1188 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1189 		    (column < 0) || (column >= raidPtr->numCol)) {
   1190 			return(EINVAL);
   1191 		}
   1192 
   1193 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1194 		if (rrcopy == NULL)
   1195 			return(ENOMEM);
   1196 
   1197 		rrcopy->raidPtr = (void *) raidPtr;
   1198 		rrcopy->row = row;
   1199 		rrcopy->col = column;
   1200 
   1201 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1202 					   rf_ReconstructInPlaceThread,
   1203 					   rrcopy,"raid_reconip");
   1204 		return(retcode);
   1205 
   1206 	case RAIDFRAME_GET_INFO:
   1207 		if (!raidPtr->valid)
   1208 			return (ENODEV);
   1209 		ucfgp = (RF_DeviceConfig_t **) data;
   1210 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1211 			  (RF_DeviceConfig_t *));
   1212 		if (d_cfg == NULL)
   1213 			return (ENOMEM);
   1214 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1215 		d_cfg->rows = raidPtr->numRow;
   1216 		d_cfg->cols = raidPtr->numCol;
   1217 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1218 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1219 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1220 			return (ENOMEM);
   1221 		}
   1222 		d_cfg->nspares = raidPtr->numSpare;
   1223 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1224 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1225 			return (ENOMEM);
   1226 		}
   1227 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1228 		d = 0;
   1229 		for (i = 0; i < d_cfg->rows; i++) {
   1230 			for (j = 0; j < d_cfg->cols; j++) {
   1231 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1232 				d++;
   1233 			}
   1234 		}
   1235 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1236 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1237 		}
   1238 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1239 				  sizeof(RF_DeviceConfig_t));
   1240 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1241 
   1242 		return (retcode);
   1243 
   1244 	case RAIDFRAME_CHECK_PARITY:
   1245 		*(int *) data = raidPtr->parity_good;
   1246 		return (0);
   1247 
   1248 	case RAIDFRAME_RESET_ACCTOTALS:
   1249 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1250 		return (0);
   1251 
   1252 	case RAIDFRAME_GET_ACCTOTALS:
   1253 		totals = (RF_AccTotals_t *) data;
   1254 		*totals = raidPtr->acc_totals;
   1255 		return (0);
   1256 
   1257 	case RAIDFRAME_KEEP_ACCTOTALS:
   1258 		raidPtr->keep_acc_totals = *(int *)data;
   1259 		return (0);
   1260 
   1261 	case RAIDFRAME_GET_SIZE:
   1262 		*(int *) data = raidPtr->totalSectors;
   1263 		return (0);
   1264 
   1265 		/* fail a disk & optionally start reconstruction */
   1266 	case RAIDFRAME_FAIL_DISK:
   1267 
   1268 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1269 			/* Can't do this on a RAID 0!! */
   1270 			return(EINVAL);
   1271 		}
   1272 
   1273 		rr = (struct rf_recon_req *) data;
   1274 
   1275 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1276 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1277 			return (EINVAL);
   1278 
   1279 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1280 		       unit, rr->row, rr->col);
   1281 
   1282 		/* make a copy of the recon request so that we don't rely on
   1283 		 * the user's buffer */
   1284 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1285 		if (rrcopy == NULL)
   1286 			return(ENOMEM);
   1287 		memcpy(rrcopy, rr, sizeof(*rr));
   1288 		rrcopy->raidPtr = (void *) raidPtr;
   1289 
   1290 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1291 					   rf_ReconThread,
   1292 					   rrcopy,"raid_recon");
   1293 		return (0);
   1294 
   1295 		/* invoke a copyback operation after recon on whatever disk
   1296 		 * needs it, if any */
   1297 	case RAIDFRAME_COPYBACK:
   1298 
   1299 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1300 			/* This makes no sense on a RAID 0!! */
   1301 			return(EINVAL);
   1302 		}
   1303 
   1304 		if (raidPtr->copyback_in_progress == 1) {
   1305 			/* Copyback is already in progress! */
   1306 			return(EINVAL);
   1307 		}
   1308 
   1309 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1310 					   rf_CopybackThread,
   1311 					   raidPtr,"raid_copyback");
   1312 		return (retcode);
   1313 
   1314 		/* return the percentage completion of reconstruction */
   1315 	case RAIDFRAME_CHECK_RECON_STATUS:
   1316 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1317 			/* This makes no sense on a RAID 0, so tell the
   1318 			   user it's done. */
   1319 			*(int *) data = 100;
   1320 			return(0);
   1321 		}
   1322 		row = 0; /* XXX we only consider a single row... */
   1323 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1324 			*(int *) data = 100;
   1325 		else
   1326 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1327 		return (0);
   1328 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1329 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1330 		row = 0; /* XXX we only consider a single row... */
   1331 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1332 			progressInfo.remaining = 0;
   1333 			progressInfo.completed = 100;
   1334 			progressInfo.total = 100;
   1335 		} else {
   1336 			progressInfo.total =
   1337 				raidPtr->reconControl[row]->numRUsTotal;
   1338 			progressInfo.completed =
   1339 				raidPtr->reconControl[row]->numRUsComplete;
   1340 			progressInfo.remaining = progressInfo.total -
   1341 				progressInfo.completed;
   1342 		}
   1343 		retcode = copyout((caddr_t) &progressInfo,
   1344 				  (caddr_t) *progressInfoPtr,
   1345 				  sizeof(RF_ProgressInfo_t));
   1346 		return (retcode);
   1347 
   1348 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1349 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1350 			/* This makes no sense on a RAID 0, so tell the
   1351 			   user it's done. */
   1352 			*(int *) data = 100;
   1353 			return(0);
   1354 		}
   1355 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1356 			*(int *) data = 100 *
   1357 				raidPtr->parity_rewrite_stripes_done /
   1358 				raidPtr->Layout.numStripe;
   1359 		} else {
   1360 			*(int *) data = 100;
   1361 		}
   1362 		return (0);
   1363 
   1364 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1365 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1366 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1367 			progressInfo.total = raidPtr->Layout.numStripe;
   1368 			progressInfo.completed =
   1369 				raidPtr->parity_rewrite_stripes_done;
   1370 			progressInfo.remaining = progressInfo.total -
   1371 				progressInfo.completed;
   1372 		} else {
   1373 			progressInfo.remaining = 0;
   1374 			progressInfo.completed = 100;
   1375 			progressInfo.total = 100;
   1376 		}
   1377 		retcode = copyout((caddr_t) &progressInfo,
   1378 				  (caddr_t) *progressInfoPtr,
   1379 				  sizeof(RF_ProgressInfo_t));
   1380 		return (retcode);
   1381 
   1382 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1383 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1384 			/* This makes no sense on a RAID 0 */
   1385 			*(int *) data = 100;
   1386 			return(0);
   1387 		}
   1388 		if (raidPtr->copyback_in_progress == 1) {
   1389 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1390 				raidPtr->Layout.numStripe;
   1391 		} else {
   1392 			*(int *) data = 100;
   1393 		}
   1394 		return (0);
   1395 
   1396 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1397 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1398 		if (raidPtr->copyback_in_progress == 1) {
   1399 			progressInfo.total = raidPtr->Layout.numStripe;
   1400 			progressInfo.completed =
   1401 				raidPtr->copyback_stripes_done;
   1402 			progressInfo.remaining = progressInfo.total -
   1403 				progressInfo.completed;
   1404 		} else {
   1405 			progressInfo.remaining = 0;
   1406 			progressInfo.completed = 100;
   1407 			progressInfo.total = 100;
   1408 		}
   1409 		retcode = copyout((caddr_t) &progressInfo,
   1410 				  (caddr_t) *progressInfoPtr,
   1411 				  sizeof(RF_ProgressInfo_t));
   1412 		return (retcode);
   1413 
   1414 		/* the sparetable daemon calls this to wait for the kernel to
   1415 		 * need a spare table. this ioctl does not return until a
   1416 		 * spare table is needed. XXX -- calling mpsleep here in the
   1417 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1418 		 * -- I should either compute the spare table in the kernel,
   1419 		 * or have a different -- XXX XXX -- interface (a different
   1420 		 * character device) for delivering the table     -- XXX */
   1421 #if 0
   1422 	case RAIDFRAME_SPARET_WAIT:
   1423 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1424 		while (!rf_sparet_wait_queue)
   1425 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1426 		waitreq = rf_sparet_wait_queue;
   1427 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1428 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1429 
   1430 		/* structure assignment */
   1431 		*((RF_SparetWait_t *) data) = *waitreq;
   1432 
   1433 		RF_Free(waitreq, sizeof(*waitreq));
   1434 		return (0);
   1435 
   1436 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1437 		 * code in it that will cause the dameon to exit */
   1438 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1439 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1440 		waitreq->fcol = -1;
   1441 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1442 		waitreq->next = rf_sparet_wait_queue;
   1443 		rf_sparet_wait_queue = waitreq;
   1444 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1445 		wakeup(&rf_sparet_wait_queue);
   1446 		return (0);
   1447 
   1448 		/* used by the spare table daemon to deliver a spare table
   1449 		 * into the kernel */
   1450 	case RAIDFRAME_SEND_SPARET:
   1451 
   1452 		/* install the spare table */
   1453 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1454 
   1455 		/* respond to the requestor.  the return status of the spare
   1456 		 * table installation is passed in the "fcol" field */
   1457 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1458 		waitreq->fcol = retcode;
   1459 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1460 		waitreq->next = rf_sparet_resp_queue;
   1461 		rf_sparet_resp_queue = waitreq;
   1462 		wakeup(&rf_sparet_resp_queue);
   1463 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1464 
   1465 		return (retcode);
   1466 #endif
   1467 
   1468 	default:
   1469 		break; /* fall through to the os-specific code below */
   1470 
   1471 	}
   1472 
   1473 	if (!raidPtr->valid)
   1474 		return (EINVAL);
   1475 
   1476 	/*
   1477 	 * Add support for "regular" device ioctls here.
   1478 	 */
   1479 
   1480 	switch (cmd) {
   1481 	case DIOCGDINFO:
   1482 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1483 		break;
   1484 #ifdef __HAVE_OLD_DISKLABEL
   1485 	case ODIOCGDINFO:
   1486 		newlabel = *(rs->sc_dkdev.dk_label);
   1487 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1488 			return ENOTTY;
   1489 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1490 		break;
   1491 #endif
   1492 
   1493 	case DIOCGPART:
   1494 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1495 		((struct partinfo *) data)->part =
   1496 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1497 		break;
   1498 
   1499 	case DIOCWDINFO:
   1500 	case DIOCSDINFO:
   1501 #ifdef __HAVE_OLD_DISKLABEL
   1502 	case ODIOCWDINFO:
   1503 	case ODIOCSDINFO:
   1504 #endif
   1505 	{
   1506 		struct disklabel *lp;
   1507 #ifdef __HAVE_OLD_DISKLABEL
   1508 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1509 			memset(&newlabel, 0, sizeof newlabel);
   1510 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1511 			lp = &newlabel;
   1512 		} else
   1513 #endif
   1514 		lp = (struct disklabel *)data;
   1515 
   1516 		if ((error = raidlock(rs)) != 0)
   1517 			return (error);
   1518 
   1519 		rs->sc_flags |= RAIDF_LABELLING;
   1520 
   1521 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1522 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1523 		if (error == 0) {
   1524 			if (cmd == DIOCWDINFO
   1525 #ifdef __HAVE_OLD_DISKLABEL
   1526 			    || cmd == ODIOCWDINFO
   1527 #endif
   1528 			   )
   1529 				error = writedisklabel(RAIDLABELDEV(dev),
   1530 				    raidstrategy, rs->sc_dkdev.dk_label,
   1531 				    rs->sc_dkdev.dk_cpulabel);
   1532 		}
   1533 		rs->sc_flags &= ~RAIDF_LABELLING;
   1534 
   1535 		raidunlock(rs);
   1536 
   1537 		if (error)
   1538 			return (error);
   1539 		break;
   1540 	}
   1541 
   1542 	case DIOCWLABEL:
   1543 		if (*(int *) data != 0)
   1544 			rs->sc_flags |= RAIDF_WLABEL;
   1545 		else
   1546 			rs->sc_flags &= ~RAIDF_WLABEL;
   1547 		break;
   1548 
   1549 	case DIOCGDEFLABEL:
   1550 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1551 		break;
   1552 
   1553 #ifdef __HAVE_OLD_DISKLABEL
   1554 	case ODIOCGDEFLABEL:
   1555 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1556 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1557 			return ENOTTY;
   1558 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1559 		break;
   1560 #endif
   1561 
   1562 	default:
   1563 		retcode = ENOTTY;
   1564 	}
   1565 	return (retcode);
   1566 
   1567 }
   1568 
   1569 
   1570 /* raidinit -- complete the rest of the initialization for the
   1571    RAIDframe device.  */
   1572 
   1573 
   1574 static void
   1575 raidinit(raidPtr)
   1576 	RF_Raid_t *raidPtr;
   1577 {
   1578 	struct raid_softc *rs;
   1579 	int     unit;
   1580 
   1581 	unit = raidPtr->raidid;
   1582 
   1583 	rs = &raid_softc[unit];
   1584 
   1585 	/* XXX should check return code first... */
   1586 	rs->sc_flags |= RAIDF_INITED;
   1587 
   1588 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1589 
   1590 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1591 
   1592 	/* disk_attach actually creates space for the CPU disklabel, among
   1593 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1594 	 * with disklabels. */
   1595 
   1596 	disk_attach(&rs->sc_dkdev);
   1597 
   1598 	/* XXX There may be a weird interaction here between this, and
   1599 	 * protectedSectors, as used in RAIDframe.  */
   1600 
   1601 	rs->sc_size = raidPtr->totalSectors;
   1602 
   1603 }
   1604 
   1605 /* wake up the daemon & tell it to get us a spare table
   1606  * XXX
   1607  * the entries in the queues should be tagged with the raidPtr
   1608  * so that in the extremely rare case that two recons happen at once,
   1609  * we know for which device were requesting a spare table
   1610  * XXX
   1611  *
   1612  * XXX This code is not currently used. GO
   1613  */
   1614 int
   1615 rf_GetSpareTableFromDaemon(req)
   1616 	RF_SparetWait_t *req;
   1617 {
   1618 	int     retcode;
   1619 
   1620 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1621 	req->next = rf_sparet_wait_queue;
   1622 	rf_sparet_wait_queue = req;
   1623 	wakeup(&rf_sparet_wait_queue);
   1624 
   1625 	/* mpsleep unlocks the mutex */
   1626 	while (!rf_sparet_resp_queue) {
   1627 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1628 		    "raidframe getsparetable", 0);
   1629 	}
   1630 	req = rf_sparet_resp_queue;
   1631 	rf_sparet_resp_queue = req->next;
   1632 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1633 
   1634 	retcode = req->fcol;
   1635 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1636 					 * alloc'd */
   1637 	return (retcode);
   1638 }
   1639 
   1640 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1641  * bp & passes it down.
   1642  * any calls originating in the kernel must use non-blocking I/O
   1643  * do some extra sanity checking to return "appropriate" error values for
   1644  * certain conditions (to make some standard utilities work)
   1645  *
   1646  * Formerly known as: rf_DoAccessKernel
   1647  */
   1648 void
   1649 raidstart(raidPtr)
   1650 	RF_Raid_t *raidPtr;
   1651 {
   1652 	RF_SectorCount_t num_blocks, pb, sum;
   1653 	RF_RaidAddr_t raid_addr;
   1654 	int     retcode;
   1655 	struct partition *pp;
   1656 	daddr_t blocknum;
   1657 	int     unit;
   1658 	struct raid_softc *rs;
   1659 	int     do_async;
   1660 	struct buf *bp;
   1661 
   1662 	unit = raidPtr->raidid;
   1663 	rs = &raid_softc[unit];
   1664 
   1665 	/* quick check to see if anything has died recently */
   1666 	RF_LOCK_MUTEX(raidPtr->mutex);
   1667 	if (raidPtr->numNewFailures > 0) {
   1668 		rf_update_component_labels(raidPtr,
   1669 					   RF_NORMAL_COMPONENT_UPDATE);
   1670 		raidPtr->numNewFailures--;
   1671 	}
   1672 
   1673 	/* Check to see if we're at the limit... */
   1674 	while (raidPtr->openings > 0) {
   1675 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1676 
   1677 		/* get the next item, if any, from the queue */
   1678 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1679 			/* nothing more to do */
   1680 			return;
   1681 		}
   1682 
   1683 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1684 		 * partition.. Need to make it absolute to the underlying
   1685 		 * device.. */
   1686 
   1687 		blocknum = bp->b_blkno;
   1688 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1689 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1690 			blocknum += pp->p_offset;
   1691 		}
   1692 
   1693 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1694 			    (int) blocknum));
   1695 
   1696 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1697 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1698 
   1699 		/* *THIS* is where we adjust what block we're going to...
   1700 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1701 		raid_addr = blocknum;
   1702 
   1703 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1704 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1705 		sum = raid_addr + num_blocks + pb;
   1706 		if (1 || rf_debugKernelAccess) {
   1707 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1708 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1709 				    (int) pb, (int) bp->b_resid));
   1710 		}
   1711 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1712 		    || (sum < num_blocks) || (sum < pb)) {
   1713 			bp->b_error = ENOSPC;
   1714 			bp->b_flags |= B_ERROR;
   1715 			bp->b_resid = bp->b_bcount;
   1716 			biodone(bp);
   1717 			RF_LOCK_MUTEX(raidPtr->mutex);
   1718 			continue;
   1719 		}
   1720 		/*
   1721 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1722 		 */
   1723 
   1724 		if (bp->b_bcount & raidPtr->sectorMask) {
   1725 			bp->b_error = EINVAL;
   1726 			bp->b_flags |= B_ERROR;
   1727 			bp->b_resid = bp->b_bcount;
   1728 			biodone(bp);
   1729 			RF_LOCK_MUTEX(raidPtr->mutex);
   1730 			continue;
   1731 
   1732 		}
   1733 		db1_printf(("Calling DoAccess..\n"));
   1734 
   1735 
   1736 		RF_LOCK_MUTEX(raidPtr->mutex);
   1737 		raidPtr->openings--;
   1738 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1739 
   1740 		/*
   1741 		 * Everything is async.
   1742 		 */
   1743 		do_async = 1;
   1744 
   1745 		disk_busy(&rs->sc_dkdev);
   1746 
   1747 		/* XXX we're still at splbio() here... do we *really*
   1748 		   need to be? */
   1749 
   1750 		/* don't ever condition on bp->b_flags & B_WRITE.
   1751 		 * always condition on B_READ instead */
   1752 
   1753 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1754 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1755 				      do_async, raid_addr, num_blocks,
   1756 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1757 
   1758 		RF_LOCK_MUTEX(raidPtr->mutex);
   1759 	}
   1760 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1761 }
   1762 
   1763 
   1764 
   1765 
   1766 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1767 
   1768 int
   1769 rf_DispatchKernelIO(queue, req)
   1770 	RF_DiskQueue_t *queue;
   1771 	RF_DiskQueueData_t *req;
   1772 {
   1773 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1774 	struct buf *bp;
   1775 	struct raidbuf *raidbp = NULL;
   1776 
   1777 	req->queue = queue;
   1778 
   1779 #if DIAGNOSTIC
   1780 	if (queue->raidPtr->raidid >= numraid) {
   1781 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1782 		    numraid);
   1783 		panic("Invalid Unit number in rf_DispatchKernelIO");
   1784 	}
   1785 #endif
   1786 
   1787 	bp = req->bp;
   1788 #if 1
   1789 	/* XXX when there is a physical disk failure, someone is passing us a
   1790 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1791 	 * without taking a performance hit... (not sure where the real bug
   1792 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1793 
   1794 	if (bp->b_flags & B_ERROR) {
   1795 		bp->b_flags &= ~B_ERROR;
   1796 	}
   1797 	if (bp->b_error != 0) {
   1798 		bp->b_error = 0;
   1799 	}
   1800 #endif
   1801 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1802 
   1803 	/*
   1804 	 * context for raidiodone
   1805 	 */
   1806 	raidbp->rf_obp = bp;
   1807 	raidbp->req = req;
   1808 
   1809 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1810 
   1811 	switch (req->type) {
   1812 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1813 		/* XXX need to do something extra here.. */
   1814 		/* I'm leaving this in, as I've never actually seen it used,
   1815 		 * and I'd like folks to report it... GO */
   1816 		printf(("WAKEUP CALLED\n"));
   1817 		queue->numOutstanding++;
   1818 
   1819 		/* XXX need to glue the original buffer into this??  */
   1820 
   1821 		KernelWakeupFunc(&raidbp->rf_buf);
   1822 		break;
   1823 
   1824 	case RF_IO_TYPE_READ:
   1825 	case RF_IO_TYPE_WRITE:
   1826 
   1827 		if (req->tracerec) {
   1828 			RF_ETIMER_START(req->tracerec->timer);
   1829 		}
   1830 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1831 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1832 		    req->sectorOffset, req->numSector,
   1833 		    req->buf, KernelWakeupFunc, (void *) req,
   1834 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1835 
   1836 		if (rf_debugKernelAccess) {
   1837 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1838 				(long) bp->b_blkno));
   1839 		}
   1840 		queue->numOutstanding++;
   1841 		queue->last_deq_sector = req->sectorOffset;
   1842 		/* acc wouldn't have been let in if there were any pending
   1843 		 * reqs at any other priority */
   1844 		queue->curPriority = req->priority;
   1845 
   1846 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1847 			    req->type, queue->raidPtr->raidid,
   1848 			    queue->row, queue->col));
   1849 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1850 			(int) req->sectorOffset, (int) req->numSector,
   1851 			(int) (req->numSector <<
   1852 			    queue->raidPtr->logBytesPerSector),
   1853 			(int) queue->raidPtr->logBytesPerSector));
   1854 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1855 			raidbp->rf_buf.b_vp->v_numoutput++;
   1856 		}
   1857 		VOP_STRATEGY(&raidbp->rf_buf);
   1858 
   1859 		break;
   1860 
   1861 	default:
   1862 		panic("bad req->type in rf_DispatchKernelIO");
   1863 	}
   1864 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1865 
   1866 	return (0);
   1867 }
   1868 /* this is the callback function associated with a I/O invoked from
   1869    kernel code.
   1870  */
   1871 static void
   1872 KernelWakeupFunc(vbp)
   1873 	struct buf *vbp;
   1874 {
   1875 	RF_DiskQueueData_t *req = NULL;
   1876 	RF_DiskQueue_t *queue;
   1877 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1878 	struct buf *bp;
   1879 	int s;
   1880 
   1881 	s = splbio();
   1882 	db1_printf(("recovering the request queue:\n"));
   1883 	req = raidbp->req;
   1884 
   1885 	bp = raidbp->rf_obp;
   1886 
   1887 	queue = (RF_DiskQueue_t *) req->queue;
   1888 
   1889 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1890 		bp->b_flags |= B_ERROR;
   1891 		bp->b_error = raidbp->rf_buf.b_error ?
   1892 		    raidbp->rf_buf.b_error : EIO;
   1893 	}
   1894 
   1895 	/* XXX methinks this could be wrong... */
   1896 #if 1
   1897 	bp->b_resid = raidbp->rf_buf.b_resid;
   1898 #endif
   1899 
   1900 	if (req->tracerec) {
   1901 		RF_ETIMER_STOP(req->tracerec->timer);
   1902 		RF_ETIMER_EVAL(req->tracerec->timer);
   1903 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1904 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1905 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1906 		req->tracerec->num_phys_ios++;
   1907 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1908 	}
   1909 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1910 
   1911 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1912 	 * ballistic, and mark the component as hosed... */
   1913 
   1914 	if (bp->b_flags & B_ERROR) {
   1915 		/* Mark the disk as dead */
   1916 		/* but only mark it once... */
   1917 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1918 		    rf_ds_optimal) {
   1919 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1920 			       queue->raidPtr->raidid,
   1921 			       queue->raidPtr->Disks[queue->row][queue->col].devname);
   1922 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1923 			    rf_ds_failed;
   1924 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1925 			queue->raidPtr->numFailures++;
   1926 			queue->raidPtr->numNewFailures++;
   1927 		} else {	/* Disk is already dead... */
   1928 			/* printf("Disk already marked as dead!\n"); */
   1929 		}
   1930 
   1931 	}
   1932 
   1933 	pool_put(&raidframe_cbufpool, raidbp);
   1934 
   1935 	/* Fill in the error value */
   1936 
   1937 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   1938 
   1939 	simple_lock(&queue->raidPtr->iodone_lock);
   1940 
   1941 	/* Drop this one on the "finished" queue... */
   1942 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   1943 
   1944 	/* Let the raidio thread know there is work to be done. */
   1945 	wakeup(&(queue->raidPtr->iodone));
   1946 
   1947 	simple_unlock(&queue->raidPtr->iodone_lock);
   1948 
   1949 	splx(s);
   1950 }
   1951 
   1952 
   1953 
   1954 /*
   1955  * initialize a buf structure for doing an I/O in the kernel.
   1956  */
   1957 static void
   1958 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1959        logBytesPerSector, b_proc)
   1960 	struct buf *bp;
   1961 	struct vnode *b_vp;
   1962 	unsigned rw_flag;
   1963 	dev_t dev;
   1964 	RF_SectorNum_t startSect;
   1965 	RF_SectorCount_t numSect;
   1966 	caddr_t buf;
   1967 	void (*cbFunc) (struct buf *);
   1968 	void *cbArg;
   1969 	int logBytesPerSector;
   1970 	struct proc *b_proc;
   1971 {
   1972 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1973 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1974 	bp->b_bcount = numSect << logBytesPerSector;
   1975 	bp->b_bufsize = bp->b_bcount;
   1976 	bp->b_error = 0;
   1977 	bp->b_dev = dev;
   1978 	bp->b_data = buf;
   1979 	bp->b_blkno = startSect;
   1980 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1981 	if (bp->b_bcount == 0) {
   1982 		panic("bp->b_bcount is zero in InitBP!!");
   1983 	}
   1984 	bp->b_proc = b_proc;
   1985 	bp->b_iodone = cbFunc;
   1986 	bp->b_vp = b_vp;
   1987 
   1988 }
   1989 
   1990 static void
   1991 raidgetdefaultlabel(raidPtr, rs, lp)
   1992 	RF_Raid_t *raidPtr;
   1993 	struct raid_softc *rs;
   1994 	struct disklabel *lp;
   1995 {
   1996 	db1_printf(("Building a default label...\n"));
   1997 	memset(lp, 0, sizeof(*lp));
   1998 
   1999 	/* fabricate a label... */
   2000 	lp->d_secperunit = raidPtr->totalSectors;
   2001 	lp->d_secsize = raidPtr->bytesPerSector;
   2002 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2003 	lp->d_ntracks = 4 * raidPtr->numCol;
   2004 	lp->d_ncylinders = raidPtr->totalSectors /
   2005 		(lp->d_nsectors * lp->d_ntracks);
   2006 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2007 
   2008 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2009 	lp->d_type = DTYPE_RAID;
   2010 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2011 	lp->d_rpm = 3600;
   2012 	lp->d_interleave = 1;
   2013 	lp->d_flags = 0;
   2014 
   2015 	lp->d_partitions[RAW_PART].p_offset = 0;
   2016 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2017 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2018 	lp->d_npartitions = RAW_PART + 1;
   2019 
   2020 	lp->d_magic = DISKMAGIC;
   2021 	lp->d_magic2 = DISKMAGIC;
   2022 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2023 
   2024 }
   2025 /*
   2026  * Read the disklabel from the raid device.  If one is not present, fake one
   2027  * up.
   2028  */
   2029 static void
   2030 raidgetdisklabel(dev)
   2031 	dev_t   dev;
   2032 {
   2033 	int     unit = raidunit(dev);
   2034 	struct raid_softc *rs = &raid_softc[unit];
   2035 	char   *errstring;
   2036 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2037 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2038 	RF_Raid_t *raidPtr;
   2039 
   2040 	db1_printf(("Getting the disklabel...\n"));
   2041 
   2042 	memset(clp, 0, sizeof(*clp));
   2043 
   2044 	raidPtr = raidPtrs[unit];
   2045 
   2046 	raidgetdefaultlabel(raidPtr, rs, lp);
   2047 
   2048 	/*
   2049 	 * Call the generic disklabel extraction routine.
   2050 	 */
   2051 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2052 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2053 	if (errstring)
   2054 		raidmakedisklabel(rs);
   2055 	else {
   2056 		int     i;
   2057 		struct partition *pp;
   2058 
   2059 		/*
   2060 		 * Sanity check whether the found disklabel is valid.
   2061 		 *
   2062 		 * This is necessary since total size of the raid device
   2063 		 * may vary when an interleave is changed even though exactly
   2064 		 * same componets are used, and old disklabel may used
   2065 		 * if that is found.
   2066 		 */
   2067 		if (lp->d_secperunit != rs->sc_size)
   2068 			printf("raid%d: WARNING: %s: "
   2069 			    "total sector size in disklabel (%d) != "
   2070 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2071 			    lp->d_secperunit, (long) rs->sc_size);
   2072 		for (i = 0; i < lp->d_npartitions; i++) {
   2073 			pp = &lp->d_partitions[i];
   2074 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2075 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2076 				       "exceeds the size of raid (%ld)\n",
   2077 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2078 		}
   2079 	}
   2080 
   2081 }
   2082 /*
   2083  * Take care of things one might want to take care of in the event
   2084  * that a disklabel isn't present.
   2085  */
   2086 static void
   2087 raidmakedisklabel(rs)
   2088 	struct raid_softc *rs;
   2089 {
   2090 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2091 	db1_printf(("Making a label..\n"));
   2092 
   2093 	/*
   2094 	 * For historical reasons, if there's no disklabel present
   2095 	 * the raw partition must be marked FS_BSDFFS.
   2096 	 */
   2097 
   2098 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2099 
   2100 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2101 
   2102 	lp->d_checksum = dkcksum(lp);
   2103 }
   2104 /*
   2105  * Lookup the provided name in the filesystem.  If the file exists,
   2106  * is a valid block device, and isn't being used by anyone else,
   2107  * set *vpp to the file's vnode.
   2108  * You'll find the original of this in ccd.c
   2109  */
   2110 int
   2111 raidlookup(path, p, vpp)
   2112 	char   *path;
   2113 	struct proc *p;
   2114 	struct vnode **vpp;	/* result */
   2115 {
   2116 	struct nameidata nd;
   2117 	struct vnode *vp;
   2118 	struct vattr va;
   2119 	int     error;
   2120 
   2121 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2122 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2123 #if 0
   2124 		printf("RAIDframe: vn_open returned %d\n", error);
   2125 #endif
   2126 		return (error);
   2127 	}
   2128 	vp = nd.ni_vp;
   2129 	if (vp->v_usecount > 1) {
   2130 		VOP_UNLOCK(vp, 0);
   2131 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2132 		return (EBUSY);
   2133 	}
   2134 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2135 		VOP_UNLOCK(vp, 0);
   2136 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2137 		return (error);
   2138 	}
   2139 	/* XXX: eventually we should handle VREG, too. */
   2140 	if (va.va_type != VBLK) {
   2141 		VOP_UNLOCK(vp, 0);
   2142 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2143 		return (ENOTBLK);
   2144 	}
   2145 	VOP_UNLOCK(vp, 0);
   2146 	*vpp = vp;
   2147 	return (0);
   2148 }
   2149 /*
   2150  * Wait interruptibly for an exclusive lock.
   2151  *
   2152  * XXX
   2153  * Several drivers do this; it should be abstracted and made MP-safe.
   2154  * (Hmm... where have we seen this warning before :->  GO )
   2155  */
   2156 static int
   2157 raidlock(rs)
   2158 	struct raid_softc *rs;
   2159 {
   2160 	int     error;
   2161 
   2162 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2163 		rs->sc_flags |= RAIDF_WANTED;
   2164 		if ((error =
   2165 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2166 			return (error);
   2167 	}
   2168 	rs->sc_flags |= RAIDF_LOCKED;
   2169 	return (0);
   2170 }
   2171 /*
   2172  * Unlock and wake up any waiters.
   2173  */
   2174 static void
   2175 raidunlock(rs)
   2176 	struct raid_softc *rs;
   2177 {
   2178 
   2179 	rs->sc_flags &= ~RAIDF_LOCKED;
   2180 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2181 		rs->sc_flags &= ~RAIDF_WANTED;
   2182 		wakeup(rs);
   2183 	}
   2184 }
   2185 
   2186 
   2187 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2188 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2189 
   2190 int
   2191 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2192 {
   2193 	RF_ComponentLabel_t clabel;
   2194 	raidread_component_label(dev, b_vp, &clabel);
   2195 	clabel.mod_counter = mod_counter;
   2196 	clabel.clean = RF_RAID_CLEAN;
   2197 	raidwrite_component_label(dev, b_vp, &clabel);
   2198 	return(0);
   2199 }
   2200 
   2201 
   2202 int
   2203 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2204 {
   2205 	RF_ComponentLabel_t clabel;
   2206 	raidread_component_label(dev, b_vp, &clabel);
   2207 	clabel.mod_counter = mod_counter;
   2208 	clabel.clean = RF_RAID_DIRTY;
   2209 	raidwrite_component_label(dev, b_vp, &clabel);
   2210 	return(0);
   2211 }
   2212 
   2213 /* ARGSUSED */
   2214 int
   2215 raidread_component_label(dev, b_vp, clabel)
   2216 	dev_t dev;
   2217 	struct vnode *b_vp;
   2218 	RF_ComponentLabel_t *clabel;
   2219 {
   2220 	struct buf *bp;
   2221 	const struct bdevsw *bdev;
   2222 	int error;
   2223 
   2224 	/* XXX should probably ensure that we don't try to do this if
   2225 	   someone has changed rf_protected_sectors. */
   2226 
   2227 	if (b_vp == NULL) {
   2228 		/* For whatever reason, this component is not valid.
   2229 		   Don't try to read a component label from it. */
   2230 		return(EINVAL);
   2231 	}
   2232 
   2233 	/* get a block of the appropriate size... */
   2234 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2235 	bp->b_dev = dev;
   2236 
   2237 	/* get our ducks in a row for the read */
   2238 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2239 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2240 	bp->b_flags |= B_READ;
   2241  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2242 
   2243 	bdev = bdevsw_lookup(bp->b_dev);
   2244 	if (bdev == NULL)
   2245 		return (ENXIO);
   2246 	(*bdev->d_strategy)(bp);
   2247 
   2248 	error = biowait(bp);
   2249 
   2250 	if (!error) {
   2251 		memcpy(clabel, bp->b_data,
   2252 		       sizeof(RF_ComponentLabel_t));
   2253 #if 0
   2254 		rf_print_component_label( clabel );
   2255 #endif
   2256         } else {
   2257 #if 0
   2258 		printf("Failed to read RAID component label!\n");
   2259 #endif
   2260 	}
   2261 
   2262 	brelse(bp);
   2263 	return(error);
   2264 }
   2265 /* ARGSUSED */
   2266 int
   2267 raidwrite_component_label(dev, b_vp, clabel)
   2268 	dev_t dev;
   2269 	struct vnode *b_vp;
   2270 	RF_ComponentLabel_t *clabel;
   2271 {
   2272 	struct buf *bp;
   2273 	const struct bdevsw *bdev;
   2274 	int error;
   2275 
   2276 	/* get a block of the appropriate size... */
   2277 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2278 	bp->b_dev = dev;
   2279 
   2280 	/* get our ducks in a row for the write */
   2281 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2282 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2283 	bp->b_flags |= B_WRITE;
   2284  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2285 
   2286 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2287 
   2288 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2289 
   2290 	bdev = bdevsw_lookup(bp->b_dev);
   2291 	if (bdev == NULL)
   2292 		return (ENXIO);
   2293 	(*bdev->d_strategy)(bp);
   2294 	error = biowait(bp);
   2295 	brelse(bp);
   2296 	if (error) {
   2297 #if 1
   2298 		printf("Failed to write RAID component info!\n");
   2299 #endif
   2300 	}
   2301 
   2302 	return(error);
   2303 }
   2304 
   2305 void
   2306 rf_markalldirty(raidPtr)
   2307 	RF_Raid_t *raidPtr;
   2308 {
   2309 	RF_ComponentLabel_t clabel;
   2310 	int r,c;
   2311 
   2312 	raidPtr->mod_counter++;
   2313 	for (r = 0; r < raidPtr->numRow; r++) {
   2314 		for (c = 0; c < raidPtr->numCol; c++) {
   2315 			/* we don't want to touch (at all) a disk that has
   2316 			   failed */
   2317 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2318 				raidread_component_label(
   2319 					raidPtr->Disks[r][c].dev,
   2320 					raidPtr->raid_cinfo[r][c].ci_vp,
   2321 					&clabel);
   2322 				if (clabel.status == rf_ds_spared) {
   2323 					/* XXX do something special...
   2324 					 but whatever you do, don't
   2325 					 try to access it!! */
   2326 				} else {
   2327 #if 0
   2328 				clabel.status =
   2329 					raidPtr->Disks[r][c].status;
   2330 				raidwrite_component_label(
   2331 					raidPtr->Disks[r][c].dev,
   2332 					raidPtr->raid_cinfo[r][c].ci_vp,
   2333 					&clabel);
   2334 #endif
   2335 				raidmarkdirty(
   2336 				       raidPtr->Disks[r][c].dev,
   2337 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2338 				       raidPtr->mod_counter);
   2339 				}
   2340 			}
   2341 		}
   2342 	}
   2343 	/* printf("Component labels marked dirty.\n"); */
   2344 #if 0
   2345 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2346 		sparecol = raidPtr->numCol + c;
   2347 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2348 			/*
   2349 
   2350 			   XXX this is where we get fancy and map this spare
   2351 			   into it's correct spot in the array.
   2352 
   2353 			 */
   2354 			/*
   2355 
   2356 			   we claim this disk is "optimal" if it's
   2357 			   rf_ds_used_spare, as that means it should be
   2358 			   directly substitutable for the disk it replaced.
   2359 			   We note that too...
   2360 
   2361 			 */
   2362 
   2363 			for(i=0;i<raidPtr->numRow;i++) {
   2364 				for(j=0;j<raidPtr->numCol;j++) {
   2365 					if ((raidPtr->Disks[i][j].spareRow ==
   2366 					     r) &&
   2367 					    (raidPtr->Disks[i][j].spareCol ==
   2368 					     sparecol)) {
   2369 						srow = r;
   2370 						scol = sparecol;
   2371 						break;
   2372 					}
   2373 				}
   2374 			}
   2375 
   2376 			raidread_component_label(
   2377 				      raidPtr->Disks[r][sparecol].dev,
   2378 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2379 				      &clabel);
   2380 			/* make sure status is noted */
   2381 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2382 			clabel.mod_counter = raidPtr->mod_counter;
   2383 			clabel.serial_number = raidPtr->serial_number;
   2384 			clabel.row = srow;
   2385 			clabel.column = scol;
   2386 			clabel.num_rows = raidPtr->numRow;
   2387 			clabel.num_columns = raidPtr->numCol;
   2388 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2389 			clabel.status = rf_ds_optimal;
   2390 			raidwrite_component_label(
   2391 				      raidPtr->Disks[r][sparecol].dev,
   2392 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2393 				      &clabel);
   2394 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2395 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2396 		}
   2397 	}
   2398 
   2399 #endif
   2400 }
   2401 
   2402 
   2403 void
   2404 rf_update_component_labels(raidPtr, final)
   2405 	RF_Raid_t *raidPtr;
   2406 	int final;
   2407 {
   2408 	RF_ComponentLabel_t clabel;
   2409 	int sparecol;
   2410 	int r,c;
   2411 	int i,j;
   2412 	int srow, scol;
   2413 
   2414 	srow = -1;
   2415 	scol = -1;
   2416 
   2417 	/* XXX should do extra checks to make sure things really are clean,
   2418 	   rather than blindly setting the clean bit... */
   2419 
   2420 	raidPtr->mod_counter++;
   2421 
   2422 	for (r = 0; r < raidPtr->numRow; r++) {
   2423 		for (c = 0; c < raidPtr->numCol; c++) {
   2424 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2425 				raidread_component_label(
   2426 					raidPtr->Disks[r][c].dev,
   2427 					raidPtr->raid_cinfo[r][c].ci_vp,
   2428 					&clabel);
   2429 				/* make sure status is noted */
   2430 				clabel.status = rf_ds_optimal;
   2431 				/* bump the counter */
   2432 				clabel.mod_counter = raidPtr->mod_counter;
   2433 
   2434 				raidwrite_component_label(
   2435 					raidPtr->Disks[r][c].dev,
   2436 					raidPtr->raid_cinfo[r][c].ci_vp,
   2437 					&clabel);
   2438 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2439 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2440 						raidmarkclean(
   2441 							      raidPtr->Disks[r][c].dev,
   2442 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2443 							      raidPtr->mod_counter);
   2444 					}
   2445 				}
   2446 			}
   2447 			/* else we don't touch it.. */
   2448 		}
   2449 	}
   2450 
   2451 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2452 		sparecol = raidPtr->numCol + c;
   2453 		/* Need to ensure that the reconstruct actually completed! */
   2454 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2455 			/*
   2456 
   2457 			   we claim this disk is "optimal" if it's
   2458 			   rf_ds_used_spare, as that means it should be
   2459 			   directly substitutable for the disk it replaced.
   2460 			   We note that too...
   2461 
   2462 			 */
   2463 
   2464 			for(i=0;i<raidPtr->numRow;i++) {
   2465 				for(j=0;j<raidPtr->numCol;j++) {
   2466 					if ((raidPtr->Disks[i][j].spareRow ==
   2467 					     0) &&
   2468 					    (raidPtr->Disks[i][j].spareCol ==
   2469 					     sparecol)) {
   2470 						srow = i;
   2471 						scol = j;
   2472 						break;
   2473 					}
   2474 				}
   2475 			}
   2476 
   2477 			/* XXX shouldn't *really* need this... */
   2478 			raidread_component_label(
   2479 				      raidPtr->Disks[0][sparecol].dev,
   2480 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2481 				      &clabel);
   2482 			/* make sure status is noted */
   2483 
   2484 			raid_init_component_label(raidPtr, &clabel);
   2485 
   2486 			clabel.mod_counter = raidPtr->mod_counter;
   2487 			clabel.row = srow;
   2488 			clabel.column = scol;
   2489 			clabel.status = rf_ds_optimal;
   2490 
   2491 			raidwrite_component_label(
   2492 				      raidPtr->Disks[0][sparecol].dev,
   2493 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2494 				      &clabel);
   2495 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2496 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2497 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2498 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2499 						       raidPtr->mod_counter);
   2500 				}
   2501 			}
   2502 		}
   2503 	}
   2504 	/* 	printf("Component labels updated\n"); */
   2505 }
   2506 
   2507 void
   2508 rf_close_component(raidPtr, vp, auto_configured)
   2509 	RF_Raid_t *raidPtr;
   2510 	struct vnode *vp;
   2511 	int auto_configured;
   2512 {
   2513 	struct proc *p;
   2514 
   2515 	p = raidPtr->engine_thread;
   2516 
   2517 	if (vp != NULL) {
   2518 		if (auto_configured == 1) {
   2519 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2520 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2521 			vput(vp);
   2522 
   2523 		} else {
   2524 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2525 		}
   2526 	} else {
   2527 #if 0
   2528 		printf("vnode was NULL\n");
   2529 #endif
   2530 	}
   2531 }
   2532 
   2533 
   2534 void
   2535 rf_UnconfigureVnodes(raidPtr)
   2536 	RF_Raid_t *raidPtr;
   2537 {
   2538 	int r,c;
   2539 	struct vnode *vp;
   2540 	int acd;
   2541 
   2542 
   2543 	/* We take this opportunity to close the vnodes like we should.. */
   2544 
   2545 	for (r = 0; r < raidPtr->numRow; r++) {
   2546 		for (c = 0; c < raidPtr->numCol; c++) {
   2547 #if 0
   2548 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2549 			       raidPtr->raidid, r, c);
   2550 #endif
   2551 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2552 			acd = raidPtr->Disks[r][c].auto_configured;
   2553 			rf_close_component(raidPtr, vp, acd);
   2554 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2555 			raidPtr->Disks[r][c].auto_configured = 0;
   2556 		}
   2557 	}
   2558 	for (r = 0; r < raidPtr->numSpare; r++) {
   2559 #if 0
   2560 		printf("raid%d: Closing vnode for spare: %d\n",
   2561 		       raidPtr->raidid, r);
   2562 #endif
   2563 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2564 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2565 		rf_close_component(raidPtr, vp, acd);
   2566 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2567 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2568 	}
   2569 }
   2570 
   2571 
   2572 void
   2573 rf_ReconThread(req)
   2574 	struct rf_recon_req *req;
   2575 {
   2576 	int     s;
   2577 	RF_Raid_t *raidPtr;
   2578 
   2579 	s = splbio();
   2580 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2581 	raidPtr->recon_in_progress = 1;
   2582 
   2583 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2584 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2585 
   2586 	/* XXX get rid of this! we don't need it at all.. */
   2587 	RF_Free(req, sizeof(*req));
   2588 
   2589 	raidPtr->recon_in_progress = 0;
   2590 	splx(s);
   2591 
   2592 	/* That's all... */
   2593 	kthread_exit(0);        /* does not return */
   2594 }
   2595 
   2596 void
   2597 rf_RewriteParityThread(raidPtr)
   2598 	RF_Raid_t *raidPtr;
   2599 {
   2600 	int retcode;
   2601 	int s;
   2602 
   2603 	raidPtr->parity_rewrite_in_progress = 1;
   2604 	s = splbio();
   2605 	retcode = rf_RewriteParity(raidPtr);
   2606 	splx(s);
   2607 	if (retcode) {
   2608 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2609 	} else {
   2610 		/* set the clean bit!  If we shutdown correctly,
   2611 		   the clean bit on each component label will get
   2612 		   set */
   2613 		raidPtr->parity_good = RF_RAID_CLEAN;
   2614 	}
   2615 	raidPtr->parity_rewrite_in_progress = 0;
   2616 
   2617 	/* Anyone waiting for us to stop?  If so, inform them... */
   2618 	if (raidPtr->waitShutdown) {
   2619 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2620 	}
   2621 
   2622 	/* That's all... */
   2623 	kthread_exit(0);        /* does not return */
   2624 }
   2625 
   2626 
   2627 void
   2628 rf_CopybackThread(raidPtr)
   2629 	RF_Raid_t *raidPtr;
   2630 {
   2631 	int s;
   2632 
   2633 	raidPtr->copyback_in_progress = 1;
   2634 	s = splbio();
   2635 	rf_CopybackReconstructedData(raidPtr);
   2636 	splx(s);
   2637 	raidPtr->copyback_in_progress = 0;
   2638 
   2639 	/* That's all... */
   2640 	kthread_exit(0);        /* does not return */
   2641 }
   2642 
   2643 
   2644 void
   2645 rf_ReconstructInPlaceThread(req)
   2646 	struct rf_recon_req *req;
   2647 {
   2648 	int retcode;
   2649 	int s;
   2650 	RF_Raid_t *raidPtr;
   2651 
   2652 	s = splbio();
   2653 	raidPtr = req->raidPtr;
   2654 	raidPtr->recon_in_progress = 1;
   2655 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2656 	RF_Free(req, sizeof(*req));
   2657 	raidPtr->recon_in_progress = 0;
   2658 	splx(s);
   2659 
   2660 	/* That's all... */
   2661 	kthread_exit(0);        /* does not return */
   2662 }
   2663 
   2664 RF_AutoConfig_t *
   2665 rf_find_raid_components()
   2666 {
   2667 	struct vnode *vp;
   2668 	struct disklabel label;
   2669 	struct device *dv;
   2670 	dev_t dev;
   2671 	int bmajor;
   2672 	int error;
   2673 	int i;
   2674 	int good_one;
   2675 	RF_ComponentLabel_t *clabel;
   2676 	RF_AutoConfig_t *ac_list;
   2677 	RF_AutoConfig_t *ac;
   2678 
   2679 
   2680 	/* initialize the AutoConfig list */
   2681 	ac_list = NULL;
   2682 
   2683 	/* we begin by trolling through *all* the devices on the system */
   2684 
   2685 	for (dv = alldevs.tqh_first; dv != NULL;
   2686 	     dv = dv->dv_list.tqe_next) {
   2687 
   2688 		/* we are only interested in disks... */
   2689 		if (dv->dv_class != DV_DISK)
   2690 			continue;
   2691 
   2692 		/* we don't care about floppies... */
   2693 		if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
   2694 			continue;
   2695 		}
   2696 
   2697 		/* we don't care about CD's... */
   2698 		if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
   2699 			continue;
   2700 		}
   2701 
   2702 		/* hdfd is the Atari/Hades floppy driver */
   2703 		if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
   2704 			continue;
   2705 		}
   2706 		/* fdisa is the Atari/Milan floppy driver */
   2707 		if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
   2708 			continue;
   2709 		}
   2710 
   2711 		/* need to find the device_name_to_block_device_major stuff */
   2712 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2713 
   2714 		/* get a vnode for the raw partition of this disk */
   2715 
   2716 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2717 		if (bdevvp(dev, &vp))
   2718 			panic("RAID can't alloc vnode");
   2719 
   2720 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2721 
   2722 		if (error) {
   2723 			/* "Who cares."  Continue looking
   2724 			   for something that exists*/
   2725 			vput(vp);
   2726 			continue;
   2727 		}
   2728 
   2729 		/* Ok, the disk exists.  Go get the disklabel. */
   2730 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2731 				  FREAD, NOCRED, 0);
   2732 		if (error) {
   2733 			/*
   2734 			 * XXX can't happen - open() would
   2735 			 * have errored out (or faked up one)
   2736 			 */
   2737 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2738 			       dv->dv_xname, 'a' + RAW_PART, error);
   2739 		}
   2740 
   2741 		/* don't need this any more.  We'll allocate it again
   2742 		   a little later if we really do... */
   2743 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2744 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2745 		vput(vp);
   2746 
   2747 		for (i=0; i < label.d_npartitions; i++) {
   2748 			/* We only support partitions marked as RAID */
   2749 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2750 				continue;
   2751 
   2752 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2753 			if (bdevvp(dev, &vp))
   2754 				panic("RAID can't alloc vnode");
   2755 
   2756 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2757 			if (error) {
   2758 				/* Whatever... */
   2759 				vput(vp);
   2760 				continue;
   2761 			}
   2762 
   2763 			good_one = 0;
   2764 
   2765 			clabel = (RF_ComponentLabel_t *)
   2766 				malloc(sizeof(RF_ComponentLabel_t),
   2767 				       M_RAIDFRAME, M_NOWAIT);
   2768 			if (clabel == NULL) {
   2769 				/* XXX CLEANUP HERE */
   2770 				printf("RAID auto config: out of memory!\n");
   2771 				return(NULL); /* XXX probably should panic? */
   2772 			}
   2773 
   2774 			if (!raidread_component_label(dev, vp, clabel)) {
   2775 				/* Got the label.  Does it look reasonable? */
   2776 				if (rf_reasonable_label(clabel) &&
   2777 				    (clabel->partitionSize <=
   2778 				     label.d_partitions[i].p_size)) {
   2779 #if DEBUG
   2780 					printf("Component on: %s%c: %d\n",
   2781 					       dv->dv_xname, 'a'+i,
   2782 					       label.d_partitions[i].p_size);
   2783 					rf_print_component_label(clabel);
   2784 #endif
   2785 					/* if it's reasonable, add it,
   2786 					   else ignore it. */
   2787 					ac = (RF_AutoConfig_t *)
   2788 						malloc(sizeof(RF_AutoConfig_t),
   2789 						       M_RAIDFRAME,
   2790 						       M_NOWAIT);
   2791 					if (ac == NULL) {
   2792 						/* XXX should panic?? */
   2793 						return(NULL);
   2794 					}
   2795 
   2796 					sprintf(ac->devname, "%s%c",
   2797 						dv->dv_xname, 'a'+i);
   2798 					ac->dev = dev;
   2799 					ac->vp = vp;
   2800 					ac->clabel = clabel;
   2801 					ac->next = ac_list;
   2802 					ac_list = ac;
   2803 					good_one = 1;
   2804 				}
   2805 			}
   2806 			if (!good_one) {
   2807 				/* cleanup */
   2808 				free(clabel, M_RAIDFRAME);
   2809 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2810 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2811 				vput(vp);
   2812 			}
   2813 		}
   2814 	}
   2815 	return(ac_list);
   2816 }
   2817 
   2818 static int
   2819 rf_reasonable_label(clabel)
   2820 	RF_ComponentLabel_t *clabel;
   2821 {
   2822 
   2823 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2824 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2825 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2826 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2827 	    clabel->row >=0 &&
   2828 	    clabel->column >= 0 &&
   2829 	    clabel->num_rows > 0 &&
   2830 	    clabel->num_columns > 0 &&
   2831 	    clabel->row < clabel->num_rows &&
   2832 	    clabel->column < clabel->num_columns &&
   2833 	    clabel->blockSize > 0 &&
   2834 	    clabel->numBlocks > 0) {
   2835 		/* label looks reasonable enough... */
   2836 		return(1);
   2837 	}
   2838 	return(0);
   2839 }
   2840 
   2841 
   2842 #if DEBUG
   2843 void
   2844 rf_print_component_label(clabel)
   2845 	RF_ComponentLabel_t *clabel;
   2846 {
   2847 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2848 	       clabel->row, clabel->column,
   2849 	       clabel->num_rows, clabel->num_columns);
   2850 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2851 	       clabel->version, clabel->serial_number,
   2852 	       clabel->mod_counter);
   2853 	printf("   Clean: %s Status: %d\n",
   2854 	       clabel->clean ? "Yes" : "No", clabel->status );
   2855 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2856 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2857 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2858 	       (char) clabel->parityConfig, clabel->blockSize,
   2859 	       clabel->numBlocks);
   2860 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2861 	printf("   Contains root partition: %s\n",
   2862 	       clabel->root_partition ? "Yes" : "No" );
   2863 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2864 #if 0
   2865 	   printf("   Config order: %d\n", clabel->config_order);
   2866 #endif
   2867 
   2868 }
   2869 #endif
   2870 
   2871 RF_ConfigSet_t *
   2872 rf_create_auto_sets(ac_list)
   2873 	RF_AutoConfig_t *ac_list;
   2874 {
   2875 	RF_AutoConfig_t *ac;
   2876 	RF_ConfigSet_t *config_sets;
   2877 	RF_ConfigSet_t *cset;
   2878 	RF_AutoConfig_t *ac_next;
   2879 
   2880 
   2881 	config_sets = NULL;
   2882 
   2883 	/* Go through the AutoConfig list, and figure out which components
   2884 	   belong to what sets.  */
   2885 	ac = ac_list;
   2886 	while(ac!=NULL) {
   2887 		/* we're going to putz with ac->next, so save it here
   2888 		   for use at the end of the loop */
   2889 		ac_next = ac->next;
   2890 
   2891 		if (config_sets == NULL) {
   2892 			/* will need at least this one... */
   2893 			config_sets = (RF_ConfigSet_t *)
   2894 				malloc(sizeof(RF_ConfigSet_t),
   2895 				       M_RAIDFRAME, M_NOWAIT);
   2896 			if (config_sets == NULL) {
   2897 				panic("rf_create_auto_sets: No memory!");
   2898 			}
   2899 			/* this one is easy :) */
   2900 			config_sets->ac = ac;
   2901 			config_sets->next = NULL;
   2902 			config_sets->rootable = 0;
   2903 			ac->next = NULL;
   2904 		} else {
   2905 			/* which set does this component fit into? */
   2906 			cset = config_sets;
   2907 			while(cset!=NULL) {
   2908 				if (rf_does_it_fit(cset, ac)) {
   2909 					/* looks like it matches... */
   2910 					ac->next = cset->ac;
   2911 					cset->ac = ac;
   2912 					break;
   2913 				}
   2914 				cset = cset->next;
   2915 			}
   2916 			if (cset==NULL) {
   2917 				/* didn't find a match above... new set..*/
   2918 				cset = (RF_ConfigSet_t *)
   2919 					malloc(sizeof(RF_ConfigSet_t),
   2920 					       M_RAIDFRAME, M_NOWAIT);
   2921 				if (cset == NULL) {
   2922 					panic("rf_create_auto_sets: No memory!");
   2923 				}
   2924 				cset->ac = ac;
   2925 				ac->next = NULL;
   2926 				cset->next = config_sets;
   2927 				cset->rootable = 0;
   2928 				config_sets = cset;
   2929 			}
   2930 		}
   2931 		ac = ac_next;
   2932 	}
   2933 
   2934 
   2935 	return(config_sets);
   2936 }
   2937 
   2938 static int
   2939 rf_does_it_fit(cset, ac)
   2940 	RF_ConfigSet_t *cset;
   2941 	RF_AutoConfig_t *ac;
   2942 {
   2943 	RF_ComponentLabel_t *clabel1, *clabel2;
   2944 
   2945 	/* If this one matches the *first* one in the set, that's good
   2946 	   enough, since the other members of the set would have been
   2947 	   through here too... */
   2948 	/* note that we are not checking partitionSize here..
   2949 
   2950 	   Note that we are also not checking the mod_counters here.
   2951 	   If everything else matches execpt the mod_counter, that's
   2952 	   good enough for this test.  We will deal with the mod_counters
   2953 	   a little later in the autoconfiguration process.
   2954 
   2955 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2956 
   2957 	   The reason we don't check for this is that failed disks
   2958 	   will have lower modification counts.  If those disks are
   2959 	   not added to the set they used to belong to, then they will
   2960 	   form their own set, which may result in 2 different sets,
   2961 	   for example, competing to be configured at raid0, and
   2962 	   perhaps competing to be the root filesystem set.  If the
   2963 	   wrong ones get configured, or both attempt to become /,
   2964 	   weird behaviour and or serious lossage will occur.  Thus we
   2965 	   need to bring them into the fold here, and kick them out at
   2966 	   a later point.
   2967 
   2968 	*/
   2969 
   2970 	clabel1 = cset->ac->clabel;
   2971 	clabel2 = ac->clabel;
   2972 	if ((clabel1->version == clabel2->version) &&
   2973 	    (clabel1->serial_number == clabel2->serial_number) &&
   2974 	    (clabel1->num_rows == clabel2->num_rows) &&
   2975 	    (clabel1->num_columns == clabel2->num_columns) &&
   2976 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2977 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2978 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2979 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2980 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2981 	    (clabel1->blockSize == clabel2->blockSize) &&
   2982 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2983 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2984 	    (clabel1->root_partition == clabel2->root_partition) &&
   2985 	    (clabel1->last_unit == clabel2->last_unit) &&
   2986 	    (clabel1->config_order == clabel2->config_order)) {
   2987 		/* if it get's here, it almost *has* to be a match */
   2988 	} else {
   2989 		/* it's not consistent with somebody in the set..
   2990 		   punt */
   2991 		return(0);
   2992 	}
   2993 	/* all was fine.. it must fit... */
   2994 	return(1);
   2995 }
   2996 
   2997 int
   2998 rf_have_enough_components(cset)
   2999 	RF_ConfigSet_t *cset;
   3000 {
   3001 	RF_AutoConfig_t *ac;
   3002 	RF_AutoConfig_t *auto_config;
   3003 	RF_ComponentLabel_t *clabel;
   3004 	int r,c;
   3005 	int num_rows;
   3006 	int num_cols;
   3007 	int num_missing;
   3008 	int mod_counter;
   3009 	int mod_counter_found;
   3010 	int even_pair_failed;
   3011 	char parity_type;
   3012 
   3013 
   3014 	/* check to see that we have enough 'live' components
   3015 	   of this set.  If so, we can configure it if necessary */
   3016 
   3017 	num_rows = cset->ac->clabel->num_rows;
   3018 	num_cols = cset->ac->clabel->num_columns;
   3019 	parity_type = cset->ac->clabel->parityConfig;
   3020 
   3021 	/* XXX Check for duplicate components!?!?!? */
   3022 
   3023 	/* Determine what the mod_counter is supposed to be for this set. */
   3024 
   3025 	mod_counter_found = 0;
   3026 	mod_counter = 0;
   3027 	ac = cset->ac;
   3028 	while(ac!=NULL) {
   3029 		if (mod_counter_found==0) {
   3030 			mod_counter = ac->clabel->mod_counter;
   3031 			mod_counter_found = 1;
   3032 		} else {
   3033 			if (ac->clabel->mod_counter > mod_counter) {
   3034 				mod_counter = ac->clabel->mod_counter;
   3035 			}
   3036 		}
   3037 		ac = ac->next;
   3038 	}
   3039 
   3040 	num_missing = 0;
   3041 	auto_config = cset->ac;
   3042 
   3043 	for(r=0; r<num_rows; r++) {
   3044 		even_pair_failed = 0;
   3045 		for(c=0; c<num_cols; c++) {
   3046 			ac = auto_config;
   3047 			while(ac!=NULL) {
   3048 				if ((ac->clabel->row == r) &&
   3049 				    (ac->clabel->column == c) &&
   3050 				    (ac->clabel->mod_counter == mod_counter)) {
   3051 					/* it's this one... */
   3052 #if DEBUG
   3053 					printf("Found: %s at %d,%d\n",
   3054 					       ac->devname,r,c);
   3055 #endif
   3056 					break;
   3057 				}
   3058 				ac=ac->next;
   3059 			}
   3060 			if (ac==NULL) {
   3061 				/* Didn't find one here! */
   3062 				/* special case for RAID 1, especially
   3063 				   where there are more than 2
   3064 				   components (where RAIDframe treats
   3065 				   things a little differently :( ) */
   3066 				if (parity_type == '1') {
   3067 					if (c%2 == 0) { /* even component */
   3068 						even_pair_failed = 1;
   3069 					} else { /* odd component.  If
   3070                                                     we're failed, and
   3071                                                     so is the even
   3072                                                     component, it's
   3073                                                     "Good Night, Charlie" */
   3074 						if (even_pair_failed == 1) {
   3075 							return(0);
   3076 						}
   3077 					}
   3078 				} else {
   3079 					/* normal accounting */
   3080 					num_missing++;
   3081 				}
   3082 			}
   3083 			if ((parity_type == '1') && (c%2 == 1)) {
   3084 				/* Just did an even component, and we didn't
   3085 				   bail.. reset the even_pair_failed flag,
   3086 				   and go on to the next component.... */
   3087 				even_pair_failed = 0;
   3088 			}
   3089 		}
   3090 	}
   3091 
   3092 	clabel = cset->ac->clabel;
   3093 
   3094 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3095 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3096 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3097 		/* XXX this needs to be made *much* more general */
   3098 		/* Too many failures */
   3099 		return(0);
   3100 	}
   3101 	/* otherwise, all is well, and we've got enough to take a kick
   3102 	   at autoconfiguring this set */
   3103 	return(1);
   3104 }
   3105 
   3106 void
   3107 rf_create_configuration(ac,config,raidPtr)
   3108 	RF_AutoConfig_t *ac;
   3109 	RF_Config_t *config;
   3110 	RF_Raid_t *raidPtr;
   3111 {
   3112 	RF_ComponentLabel_t *clabel;
   3113 	int i;
   3114 
   3115 	clabel = ac->clabel;
   3116 
   3117 	/* 1. Fill in the common stuff */
   3118 	config->numRow = clabel->num_rows;
   3119 	config->numCol = clabel->num_columns;
   3120 	config->numSpare = 0; /* XXX should this be set here? */
   3121 	config->sectPerSU = clabel->sectPerSU;
   3122 	config->SUsPerPU = clabel->SUsPerPU;
   3123 	config->SUsPerRU = clabel->SUsPerRU;
   3124 	config->parityConfig = clabel->parityConfig;
   3125 	/* XXX... */
   3126 	strcpy(config->diskQueueType,"fifo");
   3127 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3128 	config->layoutSpecificSize = 0; /* XXX ?? */
   3129 
   3130 	while(ac!=NULL) {
   3131 		/* row/col values will be in range due to the checks
   3132 		   in reasonable_label() */
   3133 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3134 		       ac->devname);
   3135 		ac = ac->next;
   3136 	}
   3137 
   3138 	for(i=0;i<RF_MAXDBGV;i++) {
   3139 		config->debugVars[i][0] = NULL;
   3140 	}
   3141 }
   3142 
   3143 int
   3144 rf_set_autoconfig(raidPtr, new_value)
   3145 	RF_Raid_t *raidPtr;
   3146 	int new_value;
   3147 {
   3148 	RF_ComponentLabel_t clabel;
   3149 	struct vnode *vp;
   3150 	dev_t dev;
   3151 	int row, column;
   3152 
   3153 	raidPtr->autoconfigure = new_value;
   3154 	for(row=0; row<raidPtr->numRow; row++) {
   3155 		for(column=0; column<raidPtr->numCol; column++) {
   3156 			if (raidPtr->Disks[row][column].status ==
   3157 			    rf_ds_optimal) {
   3158 				dev = raidPtr->Disks[row][column].dev;
   3159 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3160 				raidread_component_label(dev, vp, &clabel);
   3161 				clabel.autoconfigure = new_value;
   3162 				raidwrite_component_label(dev, vp, &clabel);
   3163 			}
   3164 		}
   3165 	}
   3166 	return(new_value);
   3167 }
   3168 
   3169 int
   3170 rf_set_rootpartition(raidPtr, new_value)
   3171 	RF_Raid_t *raidPtr;
   3172 	int new_value;
   3173 {
   3174 	RF_ComponentLabel_t clabel;
   3175 	struct vnode *vp;
   3176 	dev_t dev;
   3177 	int row, column;
   3178 
   3179 	raidPtr->root_partition = new_value;
   3180 	for(row=0; row<raidPtr->numRow; row++) {
   3181 		for(column=0; column<raidPtr->numCol; column++) {
   3182 			if (raidPtr->Disks[row][column].status ==
   3183 			    rf_ds_optimal) {
   3184 				dev = raidPtr->Disks[row][column].dev;
   3185 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3186 				raidread_component_label(dev, vp, &clabel);
   3187 				clabel.root_partition = new_value;
   3188 				raidwrite_component_label(dev, vp, &clabel);
   3189 			}
   3190 		}
   3191 	}
   3192 	return(new_value);
   3193 }
   3194 
   3195 void
   3196 rf_release_all_vps(cset)
   3197 	RF_ConfigSet_t *cset;
   3198 {
   3199 	RF_AutoConfig_t *ac;
   3200 
   3201 	ac = cset->ac;
   3202 	while(ac!=NULL) {
   3203 		/* Close the vp, and give it back */
   3204 		if (ac->vp) {
   3205 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3206 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3207 			vput(ac->vp);
   3208 			ac->vp = NULL;
   3209 		}
   3210 		ac = ac->next;
   3211 	}
   3212 }
   3213 
   3214 
   3215 void
   3216 rf_cleanup_config_set(cset)
   3217 	RF_ConfigSet_t *cset;
   3218 {
   3219 	RF_AutoConfig_t *ac;
   3220 	RF_AutoConfig_t *next_ac;
   3221 
   3222 	ac = cset->ac;
   3223 	while(ac!=NULL) {
   3224 		next_ac = ac->next;
   3225 		/* nuke the label */
   3226 		free(ac->clabel, M_RAIDFRAME);
   3227 		/* cleanup the config structure */
   3228 		free(ac, M_RAIDFRAME);
   3229 		/* "next.." */
   3230 		ac = next_ac;
   3231 	}
   3232 	/* and, finally, nuke the config set */
   3233 	free(cset, M_RAIDFRAME);
   3234 }
   3235 
   3236 
   3237 void
   3238 raid_init_component_label(raidPtr, clabel)
   3239 	RF_Raid_t *raidPtr;
   3240 	RF_ComponentLabel_t *clabel;
   3241 {
   3242 	/* current version number */
   3243 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3244 	clabel->serial_number = raidPtr->serial_number;
   3245 	clabel->mod_counter = raidPtr->mod_counter;
   3246 	clabel->num_rows = raidPtr->numRow;
   3247 	clabel->num_columns = raidPtr->numCol;
   3248 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3249 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3250 
   3251 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3252 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3253 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3254 
   3255 	clabel->blockSize = raidPtr->bytesPerSector;
   3256 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3257 
   3258 	/* XXX not portable */
   3259 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3260 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3261 	clabel->autoconfigure = raidPtr->autoconfigure;
   3262 	clabel->root_partition = raidPtr->root_partition;
   3263 	clabel->last_unit = raidPtr->raidid;
   3264 	clabel->config_order = raidPtr->config_order;
   3265 }
   3266 
   3267 int
   3268 rf_auto_config_set(cset,unit)
   3269 	RF_ConfigSet_t *cset;
   3270 	int *unit;
   3271 {
   3272 	RF_Raid_t *raidPtr;
   3273 	RF_Config_t *config;
   3274 	int raidID;
   3275 	int retcode;
   3276 
   3277 #if DEBUG
   3278 	printf("RAID autoconfigure\n");
   3279 #endif
   3280 
   3281 	retcode = 0;
   3282 	*unit = -1;
   3283 
   3284 	/* 1. Create a config structure */
   3285 
   3286 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3287 				       M_RAIDFRAME,
   3288 				       M_NOWAIT);
   3289 	if (config==NULL) {
   3290 		printf("Out of mem!?!?\n");
   3291 				/* XXX do something more intelligent here. */
   3292 		return(1);
   3293 	}
   3294 
   3295 	memset(config, 0, sizeof(RF_Config_t));
   3296 
   3297 	/*
   3298 	   2. Figure out what RAID ID this one is supposed to live at
   3299 	   See if we can get the same RAID dev that it was configured
   3300 	   on last time..
   3301 	*/
   3302 
   3303 	raidID = cset->ac->clabel->last_unit;
   3304 	if ((raidID < 0) || (raidID >= numraid)) {
   3305 		/* let's not wander off into lala land. */
   3306 		raidID = numraid - 1;
   3307 	}
   3308 	if (raidPtrs[raidID]->valid != 0) {
   3309 
   3310 		/*
   3311 		   Nope... Go looking for an alternative...
   3312 		   Start high so we don't immediately use raid0 if that's
   3313 		   not taken.
   3314 		*/
   3315 
   3316 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3317 			if (raidPtrs[raidID]->valid == 0) {
   3318 				/* can use this one! */
   3319 				break;
   3320 			}
   3321 		}
   3322 	}
   3323 
   3324 	if (raidID < 0) {
   3325 		/* punt... */
   3326 		printf("Unable to auto configure this set!\n");
   3327 		printf("(Out of RAID devs!)\n");
   3328 		return(1);
   3329 	}
   3330 
   3331 #if DEBUG
   3332 	printf("Configuring raid%d:\n",raidID);
   3333 #endif
   3334 
   3335 	raidPtr = raidPtrs[raidID];
   3336 
   3337 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3338 	raidPtr->raidid = raidID;
   3339 	raidPtr->openings = RAIDOUTSTANDING;
   3340 
   3341 	/* 3. Build the configuration structure */
   3342 	rf_create_configuration(cset->ac, config, raidPtr);
   3343 
   3344 	/* 4. Do the configuration */
   3345 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3346 
   3347 	if (retcode == 0) {
   3348 
   3349 		raidinit(raidPtrs[raidID]);
   3350 
   3351 		rf_markalldirty(raidPtrs[raidID]);
   3352 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3353 		if (cset->ac->clabel->root_partition==1) {
   3354 			/* everything configured just fine.  Make a note
   3355 			   that this set is eligible to be root. */
   3356 			cset->rootable = 1;
   3357 			/* XXX do this here? */
   3358 			raidPtrs[raidID]->root_partition = 1;
   3359 		}
   3360 	}
   3361 
   3362 	/* 5. Cleanup */
   3363 	free(config, M_RAIDFRAME);
   3364 
   3365 	*unit = raidID;
   3366 	return(retcode);
   3367 }
   3368 
   3369 void
   3370 rf_disk_unbusy(desc)
   3371 	RF_RaidAccessDesc_t *desc;
   3372 {
   3373 	struct buf *bp;
   3374 
   3375 	bp = (struct buf *)desc->bp;
   3376 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3377 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3378 }
   3379