Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.117.6.1
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.117.6.1 2002/05/16 11:47:15 gehenna Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.1 2002/05/16 11:47:15 gehenna Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_acctrace.h"
    149 #include "rf_etimer.h"
    150 #include "rf_general.h"
    151 #include "rf_debugMem.h"
    152 #include "rf_kintf.h"
    153 #include "rf_options.h"
    154 #include "rf_driver.h"
    155 #include "rf_parityscan.h"
    156 #include "rf_debugprint.h"
    157 #include "rf_threadstuff.h"
    158 
    159 int     rf_kdebug_level = 0;
    160 
    161 #ifdef DEBUG
    162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    163 #else				/* DEBUG */
    164 #define db1_printf(a) { }
    165 #endif				/* DEBUG */
    166 
    167 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    168 
    169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    170 
    171 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    172 						 * spare table */
    173 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    174 						 * installation process */
    175 
    176 /* prototypes */
    177 static void KernelWakeupFunc(struct buf * bp);
    178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    179 		   dev_t dev, RF_SectorNum_t startSect,
    180 		   RF_SectorCount_t numSect, caddr_t buf,
    181 		   void (*cbFunc) (struct buf *), void *cbArg,
    182 		   int logBytesPerSector, struct proc * b_proc);
    183 static void raidinit(RF_Raid_t *);
    184 
    185 void raidattach(int);
    186 
    187 dev_type_open(raidopen);
    188 dev_type_close(raidclose);
    189 dev_type_read(raidread);
    190 dev_type_write(raidwrite);
    191 dev_type_ioctl(raidioctl);
    192 dev_type_strategy(raidstrategy);
    193 dev_type_dump(raiddump);
    194 dev_type_size(raidsize);
    195 
    196 const struct bdevsw raid_bdevsw = {
    197 	raidopen, raidclose, raidstrategy, raidioctl,
    198 	raiddump, raidsize, D_DISK
    199 };
    200 
    201 const struct cdevsw raid_cdevsw = {
    202 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    203 	nostop, notty, nopoll, nommap, D_DISK
    204 };
    205 
    206 /*
    207  * Pilfered from ccd.c
    208  */
    209 
    210 struct raidbuf {
    211 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    212 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    213 	int     rf_flags;	/* misc. flags */
    214 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    215 };
    216 
    217 /* component buffer pool */
    218 struct pool raidframe_cbufpool;
    219 
    220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    221 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    222 
    223 /* XXX Not sure if the following should be replacing the raidPtrs above,
    224    or if it should be used in conjunction with that...
    225 */
    226 
    227 struct raid_softc {
    228 	int     sc_flags;	/* flags */
    229 	int     sc_cflags;	/* configuration flags */
    230 	size_t  sc_size;        /* size of the raid device */
    231 	char    sc_xname[20];	/* XXX external name */
    232 	struct disk sc_dkdev;	/* generic disk device info */
    233 	struct buf_queue buf_queue;	/* used for the device queue */
    234 };
    235 /* sc_flags */
    236 #define RAIDF_INITED	0x01	/* unit has been initialized */
    237 #define RAIDF_WLABEL	0x02	/* label area is writable */
    238 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    239 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    240 #define RAIDF_LOCKED	0x80	/* unit is locked */
    241 
    242 #define	raidunit(x)	DISKUNIT(x)
    243 int numraid = 0;
    244 
    245 /*
    246  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    247  * Be aware that large numbers can allow the driver to consume a lot of
    248  * kernel memory, especially on writes, and in degraded mode reads.
    249  *
    250  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    251  * a single 64K write will typically require 64K for the old data,
    252  * 64K for the old parity, and 64K for the new parity, for a total
    253  * of 192K (if the parity buffer is not re-used immediately).
    254  * Even it if is used immediately, that's still 128K, which when multiplied
    255  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    256  *
    257  * Now in degraded mode, for example, a 64K read on the above setup may
    258  * require data reconstruction, which will require *all* of the 4 remaining
    259  * disks to participate -- 4 * 32K/disk == 128K again.
    260  */
    261 
    262 #ifndef RAIDOUTSTANDING
    263 #define RAIDOUTSTANDING   6
    264 #endif
    265 
    266 #define RAIDLABELDEV(dev)	\
    267 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    268 
    269 /* declared here, and made public, for the benefit of KVM stuff.. */
    270 struct raid_softc *raid_softc;
    271 
    272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    273 				     struct disklabel *);
    274 static void raidgetdisklabel(dev_t);
    275 static void raidmakedisklabel(struct raid_softc *);
    276 
    277 static int raidlock(struct raid_softc *);
    278 static void raidunlock(struct raid_softc *);
    279 
    280 static void rf_markalldirty(RF_Raid_t *);
    281 void rf_mountroot_hook(struct device *);
    282 
    283 struct device *raidrootdev;
    284 
    285 void rf_ReconThread(struct rf_recon_req *);
    286 /* XXX what I want is: */
    287 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    289 void rf_CopybackThread(RF_Raid_t *raidPtr);
    290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    291 void rf_buildroothack(void *);
    292 
    293 RF_AutoConfig_t *rf_find_raid_components(void);
    294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    296 static int rf_reasonable_label(RF_ComponentLabel_t *);
    297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    298 int rf_set_autoconfig(RF_Raid_t *, int);
    299 int rf_set_rootpartition(RF_Raid_t *, int);
    300 void rf_release_all_vps(RF_ConfigSet_t *);
    301 void rf_cleanup_config_set(RF_ConfigSet_t *);
    302 int rf_have_enough_components(RF_ConfigSet_t *);
    303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    304 
    305 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    306 				  allow autoconfig to take place.
    307 			          Note that this is overridden by having
    308 			          RAID_AUTOCONFIG as an option in the
    309 			          kernel config file.  */
    310 
    311 void
    312 raidattach(num)
    313 	int     num;
    314 {
    315 	int raidID;
    316 	int i, rc;
    317 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    318 	RF_ConfigSet_t *config_sets;
    319 
    320 #ifdef DEBUG
    321 	printf("raidattach: Asked for %d units\n", num);
    322 #endif
    323 
    324 	if (num <= 0) {
    325 #ifdef DIAGNOSTIC
    326 		panic("raidattach: count <= 0");
    327 #endif
    328 		return;
    329 	}
    330 	/* This is where all the initialization stuff gets done. */
    331 
    332 	numraid = num;
    333 
    334 	/* Make some space for requested number of units... */
    335 
    336 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    337 	if (raidPtrs == NULL) {
    338 		panic("raidPtrs is NULL!!\n");
    339 	}
    340 
    341 	/* Initialize the component buffer pool. */
    342 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    343 	    0, 0, "raidpl", NULL);
    344 
    345 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    346 	if (rc) {
    347 		RF_PANIC();
    348 	}
    349 
    350 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    351 
    352 	for (i = 0; i < num; i++)
    353 		raidPtrs[i] = NULL;
    354 	rc = rf_BootRaidframe();
    355 	if (rc == 0)
    356 		printf("Kernelized RAIDframe activated\n");
    357 	else
    358 		panic("Serious error booting RAID!!\n");
    359 
    360 	/* put together some datastructures like the CCD device does.. This
    361 	 * lets us lock the device and what-not when it gets opened. */
    362 
    363 	raid_softc = (struct raid_softc *)
    364 		malloc(num * sizeof(struct raid_softc),
    365 		       M_RAIDFRAME, M_NOWAIT);
    366 	if (raid_softc == NULL) {
    367 		printf("WARNING: no memory for RAIDframe driver\n");
    368 		return;
    369 	}
    370 
    371 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    372 
    373 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    374 					      M_RAIDFRAME, M_NOWAIT);
    375 	if (raidrootdev == NULL) {
    376 		panic("No memory for RAIDframe driver!!?!?!\n");
    377 	}
    378 
    379 	for (raidID = 0; raidID < num; raidID++) {
    380 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    381 
    382 		raidrootdev[raidID].dv_class  = DV_DISK;
    383 		raidrootdev[raidID].dv_cfdata = NULL;
    384 		raidrootdev[raidID].dv_unit   = raidID;
    385 		raidrootdev[raidID].dv_parent = NULL;
    386 		raidrootdev[raidID].dv_flags  = 0;
    387 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    388 
    389 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    390 			  (RF_Raid_t *));
    391 		if (raidPtrs[raidID] == NULL) {
    392 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    393 			numraid = raidID;
    394 			return;
    395 		}
    396 	}
    397 
    398 #ifdef RAID_AUTOCONFIG
    399 	raidautoconfig = 1;
    400 #endif
    401 
    402 if (raidautoconfig) {
    403 	/* 1. locate all RAID components on the system */
    404 
    405 #if DEBUG
    406 	printf("Searching for raid components...\n");
    407 #endif
    408 	ac_list = rf_find_raid_components();
    409 
    410 	/* 2. sort them into their respective sets */
    411 
    412 	config_sets = rf_create_auto_sets(ac_list);
    413 
    414 	/* 3. evaluate each set and configure the valid ones
    415 	   This gets done in rf_buildroothack() */
    416 
    417 	/* schedule the creation of the thread to do the
    418 	   "/ on RAID" stuff */
    419 
    420 	kthread_create(rf_buildroothack,config_sets);
    421 
    422 #if 0
    423 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    424 #endif
    425 }
    426 
    427 }
    428 
    429 void
    430 rf_buildroothack(arg)
    431 	void *arg;
    432 {
    433 	RF_ConfigSet_t *config_sets = arg;
    434 	RF_ConfigSet_t *cset;
    435 	RF_ConfigSet_t *next_cset;
    436 	int retcode;
    437 	int raidID;
    438 	int rootID;
    439 	int num_root;
    440 
    441 	rootID = 0;
    442 	num_root = 0;
    443 	cset = config_sets;
    444 	while(cset != NULL ) {
    445 		next_cset = cset->next;
    446 		if (rf_have_enough_components(cset) &&
    447 		    cset->ac->clabel->autoconfigure==1) {
    448 			retcode = rf_auto_config_set(cset,&raidID);
    449 			if (!retcode) {
    450 				if (cset->rootable) {
    451 					rootID = raidID;
    452 					num_root++;
    453 				}
    454 			} else {
    455 				/* The autoconfig didn't work :( */
    456 #if DEBUG
    457 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    458 #endif
    459 				rf_release_all_vps(cset);
    460 			}
    461 		} else {
    462 			/* we're not autoconfiguring this set...
    463 			   release the associated resources */
    464 			rf_release_all_vps(cset);
    465 		}
    466 		/* cleanup */
    467 		rf_cleanup_config_set(cset);
    468 		cset = next_cset;
    469 	}
    470 	if (boothowto & RB_ASKNAME) {
    471 		/* We don't auto-config... */
    472 	} else {
    473 		/* They didn't ask, and we found something bootable... */
    474 
    475 		if (num_root == 1) {
    476 			booted_device = &raidrootdev[rootID];
    477 		} else if (num_root > 1) {
    478 			/* we can't guess.. require the user to answer... */
    479 			boothowto |= RB_ASKNAME;
    480 		}
    481 	}
    482 }
    483 
    484 
    485 int
    486 raidsize(dev)
    487 	dev_t   dev;
    488 {
    489 	struct raid_softc *rs;
    490 	struct disklabel *lp;
    491 	int     part, unit, omask, size;
    492 
    493 	unit = raidunit(dev);
    494 	if (unit >= numraid)
    495 		return (-1);
    496 	rs = &raid_softc[unit];
    497 
    498 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    499 		return (-1);
    500 
    501 	part = DISKPART(dev);
    502 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    503 	lp = rs->sc_dkdev.dk_label;
    504 
    505 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    506 		return (-1);
    507 
    508 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    509 		size = -1;
    510 	else
    511 		size = lp->d_partitions[part].p_size *
    512 		    (lp->d_secsize / DEV_BSIZE);
    513 
    514 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    515 		return (-1);
    516 
    517 	return (size);
    518 
    519 }
    520 
    521 int
    522 raiddump(dev, blkno, va, size)
    523 	dev_t   dev;
    524 	daddr_t blkno;
    525 	caddr_t va;
    526 	size_t  size;
    527 {
    528 	/* Not implemented. */
    529 	return ENXIO;
    530 }
    531 /* ARGSUSED */
    532 int
    533 raidopen(dev, flags, fmt, p)
    534 	dev_t   dev;
    535 	int     flags, fmt;
    536 	struct proc *p;
    537 {
    538 	int     unit = raidunit(dev);
    539 	struct raid_softc *rs;
    540 	struct disklabel *lp;
    541 	int     part, pmask;
    542 	int     error = 0;
    543 
    544 	if (unit >= numraid)
    545 		return (ENXIO);
    546 	rs = &raid_softc[unit];
    547 
    548 	if ((error = raidlock(rs)) != 0)
    549 		return (error);
    550 	lp = rs->sc_dkdev.dk_label;
    551 
    552 	part = DISKPART(dev);
    553 	pmask = (1 << part);
    554 
    555 	db1_printf(("Opening raid device number: %d partition: %d\n",
    556 		unit, part));
    557 
    558 
    559 	if ((rs->sc_flags & RAIDF_INITED) &&
    560 	    (rs->sc_dkdev.dk_openmask == 0))
    561 		raidgetdisklabel(dev);
    562 
    563 	/* make sure that this partition exists */
    564 
    565 	if (part != RAW_PART) {
    566 		db1_printf(("Not a raw partition..\n"));
    567 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    568 		    ((part >= lp->d_npartitions) ||
    569 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    570 			error = ENXIO;
    571 			raidunlock(rs);
    572 			db1_printf(("Bailing out...\n"));
    573 			return (error);
    574 		}
    575 	}
    576 	/* Prevent this unit from being unconfigured while open. */
    577 	switch (fmt) {
    578 	case S_IFCHR:
    579 		rs->sc_dkdev.dk_copenmask |= pmask;
    580 		break;
    581 
    582 	case S_IFBLK:
    583 		rs->sc_dkdev.dk_bopenmask |= pmask;
    584 		break;
    585 	}
    586 
    587 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    588 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    589 		/* First one... mark things as dirty... Note that we *MUST*
    590 		 have done a configure before this.  I DO NOT WANT TO BE
    591 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    592 		 THAT THEY BELONG TOGETHER!!!!! */
    593 		/* XXX should check to see if we're only open for reading
    594 		   here... If so, we needn't do this, but then need some
    595 		   other way of keeping track of what's happened.. */
    596 
    597 		rf_markalldirty( raidPtrs[unit] );
    598 	}
    599 
    600 
    601 	rs->sc_dkdev.dk_openmask =
    602 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    603 
    604 	raidunlock(rs);
    605 
    606 	return (error);
    607 
    608 
    609 }
    610 /* ARGSUSED */
    611 int
    612 raidclose(dev, flags, fmt, p)
    613 	dev_t   dev;
    614 	int     flags, fmt;
    615 	struct proc *p;
    616 {
    617 	int     unit = raidunit(dev);
    618 	struct raid_softc *rs;
    619 	int     error = 0;
    620 	int     part;
    621 
    622 	if (unit >= numraid)
    623 		return (ENXIO);
    624 	rs = &raid_softc[unit];
    625 
    626 	if ((error = raidlock(rs)) != 0)
    627 		return (error);
    628 
    629 	part = DISKPART(dev);
    630 
    631 	/* ...that much closer to allowing unconfiguration... */
    632 	switch (fmt) {
    633 	case S_IFCHR:
    634 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    635 		break;
    636 
    637 	case S_IFBLK:
    638 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    639 		break;
    640 	}
    641 	rs->sc_dkdev.dk_openmask =
    642 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    643 
    644 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    645 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    646 		/* Last one... device is not unconfigured yet.
    647 		   Device shutdown has taken care of setting the
    648 		   clean bits if RAIDF_INITED is not set
    649 		   mark things as clean... */
    650 #if 0
    651 		printf("Last one on raid%d.  Updating status.\n",unit);
    652 #endif
    653 		rf_update_component_labels(raidPtrs[unit],
    654 						 RF_FINAL_COMPONENT_UPDATE);
    655 		if (doing_shutdown) {
    656 			/* last one, and we're going down, so
    657 			   lights out for this RAID set too. */
    658 			error = rf_Shutdown(raidPtrs[unit]);
    659 
    660 			/* It's no longer initialized... */
    661 			rs->sc_flags &= ~RAIDF_INITED;
    662 
    663 			/* Detach the disk. */
    664 			disk_detach(&rs->sc_dkdev);
    665 		}
    666 	}
    667 
    668 	raidunlock(rs);
    669 	return (0);
    670 
    671 }
    672 
    673 void
    674 raidstrategy(bp)
    675 	struct buf *bp;
    676 {
    677 	int s;
    678 
    679 	unsigned int raidID = raidunit(bp->b_dev);
    680 	RF_Raid_t *raidPtr;
    681 	struct raid_softc *rs = &raid_softc[raidID];
    682 	struct disklabel *lp;
    683 	int     wlabel;
    684 
    685 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    686 		bp->b_error = ENXIO;
    687 		bp->b_flags |= B_ERROR;
    688 		bp->b_resid = bp->b_bcount;
    689 		biodone(bp);
    690 		return;
    691 	}
    692 	if (raidID >= numraid || !raidPtrs[raidID]) {
    693 		bp->b_error = ENODEV;
    694 		bp->b_flags |= B_ERROR;
    695 		bp->b_resid = bp->b_bcount;
    696 		biodone(bp);
    697 		return;
    698 	}
    699 	raidPtr = raidPtrs[raidID];
    700 	if (!raidPtr->valid) {
    701 		bp->b_error = ENODEV;
    702 		bp->b_flags |= B_ERROR;
    703 		bp->b_resid = bp->b_bcount;
    704 		biodone(bp);
    705 		return;
    706 	}
    707 	if (bp->b_bcount == 0) {
    708 		db1_printf(("b_bcount is zero..\n"));
    709 		biodone(bp);
    710 		return;
    711 	}
    712 	lp = rs->sc_dkdev.dk_label;
    713 
    714 	/*
    715 	 * Do bounds checking and adjust transfer.  If there's an
    716 	 * error, the bounds check will flag that for us.
    717 	 */
    718 
    719 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    720 	if (DISKPART(bp->b_dev) != RAW_PART)
    721 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    722 			db1_printf(("Bounds check failed!!:%d %d\n",
    723 				(int) bp->b_blkno, (int) wlabel));
    724 			biodone(bp);
    725 			return;
    726 		}
    727 	s = splbio();
    728 
    729 	bp->b_resid = 0;
    730 
    731 	/* stuff it onto our queue */
    732 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    733 
    734 	raidstart(raidPtrs[raidID]);
    735 
    736 	splx(s);
    737 }
    738 /* ARGSUSED */
    739 int
    740 raidread(dev, uio, flags)
    741 	dev_t   dev;
    742 	struct uio *uio;
    743 	int     flags;
    744 {
    745 	int     unit = raidunit(dev);
    746 	struct raid_softc *rs;
    747 	int     part;
    748 
    749 	if (unit >= numraid)
    750 		return (ENXIO);
    751 	rs = &raid_softc[unit];
    752 
    753 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    754 		return (ENXIO);
    755 	part = DISKPART(dev);
    756 
    757 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    758 
    759 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    760 
    761 }
    762 /* ARGSUSED */
    763 int
    764 raidwrite(dev, uio, flags)
    765 	dev_t   dev;
    766 	struct uio *uio;
    767 	int     flags;
    768 {
    769 	int     unit = raidunit(dev);
    770 	struct raid_softc *rs;
    771 
    772 	if (unit >= numraid)
    773 		return (ENXIO);
    774 	rs = &raid_softc[unit];
    775 
    776 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    777 		return (ENXIO);
    778 	db1_printf(("raidwrite\n"));
    779 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    780 
    781 }
    782 
    783 int
    784 raidioctl(dev, cmd, data, flag, p)
    785 	dev_t   dev;
    786 	u_long  cmd;
    787 	caddr_t data;
    788 	int     flag;
    789 	struct proc *p;
    790 {
    791 	int     unit = raidunit(dev);
    792 	int     error = 0;
    793 	int     part, pmask;
    794 	struct raid_softc *rs;
    795 	RF_Config_t *k_cfg, *u_cfg;
    796 	RF_Raid_t *raidPtr;
    797 	RF_RaidDisk_t *diskPtr;
    798 	RF_AccTotals_t *totals;
    799 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    800 	u_char *specific_buf;
    801 	int retcode = 0;
    802 	int row;
    803 	int column;
    804 	struct rf_recon_req *rrcopy, *rr;
    805 	RF_ComponentLabel_t *clabel;
    806 	RF_ComponentLabel_t ci_label;
    807 	RF_ComponentLabel_t **clabel_ptr;
    808 	RF_SingleComponent_t *sparePtr,*componentPtr;
    809 	RF_SingleComponent_t hot_spare;
    810 	RF_SingleComponent_t component;
    811 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    812 	int i, j, d;
    813 #ifdef __HAVE_OLD_DISKLABEL
    814 	struct disklabel newlabel;
    815 #endif
    816 
    817 	if (unit >= numraid)
    818 		return (ENXIO);
    819 	rs = &raid_softc[unit];
    820 	raidPtr = raidPtrs[unit];
    821 
    822 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    823 		(int) DISKPART(dev), (int) unit, (int) cmd));
    824 
    825 	/* Must be open for writes for these commands... */
    826 	switch (cmd) {
    827 	case DIOCSDINFO:
    828 	case DIOCWDINFO:
    829 #ifdef __HAVE_OLD_DISKLABEL
    830 	case ODIOCWDINFO:
    831 	case ODIOCSDINFO:
    832 #endif
    833 	case DIOCWLABEL:
    834 		if ((flag & FWRITE) == 0)
    835 			return (EBADF);
    836 	}
    837 
    838 	/* Must be initialized for these... */
    839 	switch (cmd) {
    840 	case DIOCGDINFO:
    841 	case DIOCSDINFO:
    842 	case DIOCWDINFO:
    843 #ifdef __HAVE_OLD_DISKLABEL
    844 	case ODIOCGDINFO:
    845 	case ODIOCWDINFO:
    846 	case ODIOCSDINFO:
    847 	case ODIOCGDEFLABEL:
    848 #endif
    849 	case DIOCGPART:
    850 	case DIOCWLABEL:
    851 	case DIOCGDEFLABEL:
    852 	case RAIDFRAME_SHUTDOWN:
    853 	case RAIDFRAME_REWRITEPARITY:
    854 	case RAIDFRAME_GET_INFO:
    855 	case RAIDFRAME_RESET_ACCTOTALS:
    856 	case RAIDFRAME_GET_ACCTOTALS:
    857 	case RAIDFRAME_KEEP_ACCTOTALS:
    858 	case RAIDFRAME_GET_SIZE:
    859 	case RAIDFRAME_FAIL_DISK:
    860 	case RAIDFRAME_COPYBACK:
    861 	case RAIDFRAME_CHECK_RECON_STATUS:
    862 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    863 	case RAIDFRAME_GET_COMPONENT_LABEL:
    864 	case RAIDFRAME_SET_COMPONENT_LABEL:
    865 	case RAIDFRAME_ADD_HOT_SPARE:
    866 	case RAIDFRAME_REMOVE_HOT_SPARE:
    867 	case RAIDFRAME_INIT_LABELS:
    868 	case RAIDFRAME_REBUILD_IN_PLACE:
    869 	case RAIDFRAME_CHECK_PARITY:
    870 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    871 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    872 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    873 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    874 	case RAIDFRAME_SET_AUTOCONFIG:
    875 	case RAIDFRAME_SET_ROOT:
    876 	case RAIDFRAME_DELETE_COMPONENT:
    877 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    878 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    879 			return (ENXIO);
    880 	}
    881 
    882 	switch (cmd) {
    883 
    884 		/* configure the system */
    885 	case RAIDFRAME_CONFIGURE:
    886 
    887 		if (raidPtr->valid) {
    888 			/* There is a valid RAID set running on this unit! */
    889 			printf("raid%d: Device already configured!\n",unit);
    890 			return(EINVAL);
    891 		}
    892 
    893 		/* copy-in the configuration information */
    894 		/* data points to a pointer to the configuration structure */
    895 
    896 		u_cfg = *((RF_Config_t **) data);
    897 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    898 		if (k_cfg == NULL) {
    899 			return (ENOMEM);
    900 		}
    901 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    902 		    sizeof(RF_Config_t));
    903 		if (retcode) {
    904 			RF_Free(k_cfg, sizeof(RF_Config_t));
    905 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    906 				retcode));
    907 			return (retcode);
    908 		}
    909 		/* allocate a buffer for the layout-specific data, and copy it
    910 		 * in */
    911 		if (k_cfg->layoutSpecificSize) {
    912 			if (k_cfg->layoutSpecificSize > 10000) {
    913 				/* sanity check */
    914 				RF_Free(k_cfg, sizeof(RF_Config_t));
    915 				return (EINVAL);
    916 			}
    917 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    918 			    (u_char *));
    919 			if (specific_buf == NULL) {
    920 				RF_Free(k_cfg, sizeof(RF_Config_t));
    921 				return (ENOMEM);
    922 			}
    923 			retcode = copyin(k_cfg->layoutSpecific,
    924 			    (caddr_t) specific_buf,
    925 			    k_cfg->layoutSpecificSize);
    926 			if (retcode) {
    927 				RF_Free(k_cfg, sizeof(RF_Config_t));
    928 				RF_Free(specific_buf,
    929 					k_cfg->layoutSpecificSize);
    930 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    931 					retcode));
    932 				return (retcode);
    933 			}
    934 		} else
    935 			specific_buf = NULL;
    936 		k_cfg->layoutSpecific = specific_buf;
    937 
    938 		/* should do some kind of sanity check on the configuration.
    939 		 * Store the sum of all the bytes in the last byte? */
    940 
    941 		/* configure the system */
    942 
    943 		/*
    944 		 * Clear the entire RAID descriptor, just to make sure
    945 		 *  there is no stale data left in the case of a
    946 		 *  reconfiguration
    947 		 */
    948 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    949 		raidPtr->raidid = unit;
    950 
    951 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    952 
    953 		if (retcode == 0) {
    954 
    955 			/* allow this many simultaneous IO's to
    956 			   this RAID device */
    957 			raidPtr->openings = RAIDOUTSTANDING;
    958 
    959 			raidinit(raidPtr);
    960 			rf_markalldirty(raidPtr);
    961 		}
    962 		/* free the buffers.  No return code here. */
    963 		if (k_cfg->layoutSpecificSize) {
    964 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    965 		}
    966 		RF_Free(k_cfg, sizeof(RF_Config_t));
    967 
    968 		return (retcode);
    969 
    970 		/* shutdown the system */
    971 	case RAIDFRAME_SHUTDOWN:
    972 
    973 		if ((error = raidlock(rs)) != 0)
    974 			return (error);
    975 
    976 		/*
    977 		 * If somebody has a partition mounted, we shouldn't
    978 		 * shutdown.
    979 		 */
    980 
    981 		part = DISKPART(dev);
    982 		pmask = (1 << part);
    983 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    984 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    985 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    986 			raidunlock(rs);
    987 			return (EBUSY);
    988 		}
    989 
    990 		retcode = rf_Shutdown(raidPtr);
    991 
    992 		/* It's no longer initialized... */
    993 		rs->sc_flags &= ~RAIDF_INITED;
    994 
    995 		/* Detach the disk. */
    996 		disk_detach(&rs->sc_dkdev);
    997 
    998 		raidunlock(rs);
    999 
   1000 		return (retcode);
   1001 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1002 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1003 		/* need to read the component label for the disk indicated
   1004 		   by row,column in clabel */
   1005 
   1006 		/* For practice, let's get it directly fromdisk, rather
   1007 		   than from the in-core copy */
   1008 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1009 			   (RF_ComponentLabel_t *));
   1010 		if (clabel == NULL)
   1011 			return (ENOMEM);
   1012 
   1013 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1014 
   1015 		retcode = copyin( *clabel_ptr, clabel,
   1016 				  sizeof(RF_ComponentLabel_t));
   1017 
   1018 		if (retcode) {
   1019 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1020 			return(retcode);
   1021 		}
   1022 
   1023 		row = clabel->row;
   1024 		column = clabel->column;
   1025 
   1026 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1027 		    (column < 0) || (column >= raidPtr->numCol +
   1028 				     raidPtr->numSpare)) {
   1029 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1030 			return(EINVAL);
   1031 		}
   1032 
   1033 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1034 				raidPtr->raid_cinfo[row][column].ci_vp,
   1035 				clabel );
   1036 
   1037 		retcode = copyout((caddr_t) clabel,
   1038 				  (caddr_t) *clabel_ptr,
   1039 				  sizeof(RF_ComponentLabel_t));
   1040 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1041 		return (retcode);
   1042 
   1043 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1044 		clabel = (RF_ComponentLabel_t *) data;
   1045 
   1046 		/* XXX check the label for valid stuff... */
   1047 		/* Note that some things *should not* get modified --
   1048 		   the user should be re-initing the labels instead of
   1049 		   trying to patch things.
   1050 		   */
   1051 
   1052 		printf("Got component label:\n");
   1053 		printf("Version: %d\n",clabel->version);
   1054 		printf("Serial Number: %d\n",clabel->serial_number);
   1055 		printf("Mod counter: %d\n",clabel->mod_counter);
   1056 		printf("Row: %d\n", clabel->row);
   1057 		printf("Column: %d\n", clabel->column);
   1058 		printf("Num Rows: %d\n", clabel->num_rows);
   1059 		printf("Num Columns: %d\n", clabel->num_columns);
   1060 		printf("Clean: %d\n", clabel->clean);
   1061 		printf("Status: %d\n", clabel->status);
   1062 
   1063 		row = clabel->row;
   1064 		column = clabel->column;
   1065 
   1066 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1067 		    (column < 0) || (column >= raidPtr->numCol)) {
   1068 			return(EINVAL);
   1069 		}
   1070 
   1071 		/* XXX this isn't allowed to do anything for now :-) */
   1072 
   1073 		/* XXX and before it is, we need to fill in the rest
   1074 		   of the fields!?!?!?! */
   1075 #if 0
   1076 		raidwrite_component_label(
   1077                             raidPtr->Disks[row][column].dev,
   1078 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1079 			    clabel );
   1080 #endif
   1081 		return (0);
   1082 
   1083 	case RAIDFRAME_INIT_LABELS:
   1084 		clabel = (RF_ComponentLabel_t *) data;
   1085 		/*
   1086 		   we only want the serial number from
   1087 		   the above.  We get all the rest of the information
   1088 		   from the config that was used to create this RAID
   1089 		   set.
   1090 		   */
   1091 
   1092 		raidPtr->serial_number = clabel->serial_number;
   1093 
   1094 		raid_init_component_label(raidPtr, &ci_label);
   1095 		ci_label.serial_number = clabel->serial_number;
   1096 
   1097 		for(row=0;row<raidPtr->numRow;row++) {
   1098 			ci_label.row = row;
   1099 			for(column=0;column<raidPtr->numCol;column++) {
   1100 				diskPtr = &raidPtr->Disks[row][column];
   1101 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1102 					ci_label.partitionSize = diskPtr->partitionSize;
   1103 					ci_label.column = column;
   1104 					raidwrite_component_label(
   1105 					  raidPtr->Disks[row][column].dev,
   1106 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1107 					  &ci_label );
   1108 				}
   1109 			}
   1110 		}
   1111 
   1112 		return (retcode);
   1113 	case RAIDFRAME_SET_AUTOCONFIG:
   1114 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1115 		printf("New autoconfig value is: %d\n", d);
   1116 		*(int *) data = d;
   1117 		return (retcode);
   1118 
   1119 	case RAIDFRAME_SET_ROOT:
   1120 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1121 		printf("New rootpartition value is: %d\n", d);
   1122 		*(int *) data = d;
   1123 		return (retcode);
   1124 
   1125 		/* initialize all parity */
   1126 	case RAIDFRAME_REWRITEPARITY:
   1127 
   1128 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1129 			/* Parity for RAID 0 is trivially correct */
   1130 			raidPtr->parity_good = RF_RAID_CLEAN;
   1131 			return(0);
   1132 		}
   1133 
   1134 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1135 			/* Re-write is already in progress! */
   1136 			return(EINVAL);
   1137 		}
   1138 
   1139 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1140 					   rf_RewriteParityThread,
   1141 					   raidPtr,"raid_parity");
   1142 		return (retcode);
   1143 
   1144 
   1145 	case RAIDFRAME_ADD_HOT_SPARE:
   1146 		sparePtr = (RF_SingleComponent_t *) data;
   1147 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1148 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1149 		return(retcode);
   1150 
   1151 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1152 		return(retcode);
   1153 
   1154 	case RAIDFRAME_DELETE_COMPONENT:
   1155 		componentPtr = (RF_SingleComponent_t *)data;
   1156 		memcpy( &component, componentPtr,
   1157 			sizeof(RF_SingleComponent_t));
   1158 		retcode = rf_delete_component(raidPtr, &component);
   1159 		return(retcode);
   1160 
   1161 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1162 		componentPtr = (RF_SingleComponent_t *)data;
   1163 		memcpy( &component, componentPtr,
   1164 			sizeof(RF_SingleComponent_t));
   1165 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1166 		return(retcode);
   1167 
   1168 	case RAIDFRAME_REBUILD_IN_PLACE:
   1169 
   1170 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1171 			/* Can't do this on a RAID 0!! */
   1172 			return(EINVAL);
   1173 		}
   1174 
   1175 		if (raidPtr->recon_in_progress == 1) {
   1176 			/* a reconstruct is already in progress! */
   1177 			return(EINVAL);
   1178 		}
   1179 
   1180 		componentPtr = (RF_SingleComponent_t *) data;
   1181 		memcpy( &component, componentPtr,
   1182 			sizeof(RF_SingleComponent_t));
   1183 		row = component.row;
   1184 		column = component.column;
   1185 		printf("Rebuild: %d %d\n",row, column);
   1186 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1187 		    (column < 0) || (column >= raidPtr->numCol)) {
   1188 			return(EINVAL);
   1189 		}
   1190 
   1191 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1192 		if (rrcopy == NULL)
   1193 			return(ENOMEM);
   1194 
   1195 		rrcopy->raidPtr = (void *) raidPtr;
   1196 		rrcopy->row = row;
   1197 		rrcopy->col = column;
   1198 
   1199 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1200 					   rf_ReconstructInPlaceThread,
   1201 					   rrcopy,"raid_reconip");
   1202 		return(retcode);
   1203 
   1204 	case RAIDFRAME_GET_INFO:
   1205 		if (!raidPtr->valid)
   1206 			return (ENODEV);
   1207 		ucfgp = (RF_DeviceConfig_t **) data;
   1208 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1209 			  (RF_DeviceConfig_t *));
   1210 		if (d_cfg == NULL)
   1211 			return (ENOMEM);
   1212 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1213 		d_cfg->rows = raidPtr->numRow;
   1214 		d_cfg->cols = raidPtr->numCol;
   1215 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1216 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1217 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1218 			return (ENOMEM);
   1219 		}
   1220 		d_cfg->nspares = raidPtr->numSpare;
   1221 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1222 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1223 			return (ENOMEM);
   1224 		}
   1225 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1226 		d = 0;
   1227 		for (i = 0; i < d_cfg->rows; i++) {
   1228 			for (j = 0; j < d_cfg->cols; j++) {
   1229 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1230 				d++;
   1231 			}
   1232 		}
   1233 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1234 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1235 		}
   1236 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1237 				  sizeof(RF_DeviceConfig_t));
   1238 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1239 
   1240 		return (retcode);
   1241 
   1242 	case RAIDFRAME_CHECK_PARITY:
   1243 		*(int *) data = raidPtr->parity_good;
   1244 		return (0);
   1245 
   1246 	case RAIDFRAME_RESET_ACCTOTALS:
   1247 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1248 		return (0);
   1249 
   1250 	case RAIDFRAME_GET_ACCTOTALS:
   1251 		totals = (RF_AccTotals_t *) data;
   1252 		*totals = raidPtr->acc_totals;
   1253 		return (0);
   1254 
   1255 	case RAIDFRAME_KEEP_ACCTOTALS:
   1256 		raidPtr->keep_acc_totals = *(int *)data;
   1257 		return (0);
   1258 
   1259 	case RAIDFRAME_GET_SIZE:
   1260 		*(int *) data = raidPtr->totalSectors;
   1261 		return (0);
   1262 
   1263 		/* fail a disk & optionally start reconstruction */
   1264 	case RAIDFRAME_FAIL_DISK:
   1265 
   1266 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1267 			/* Can't do this on a RAID 0!! */
   1268 			return(EINVAL);
   1269 		}
   1270 
   1271 		rr = (struct rf_recon_req *) data;
   1272 
   1273 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1274 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1275 			return (EINVAL);
   1276 
   1277 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1278 		       unit, rr->row, rr->col);
   1279 
   1280 		/* make a copy of the recon request so that we don't rely on
   1281 		 * the user's buffer */
   1282 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1283 		if (rrcopy == NULL)
   1284 			return(ENOMEM);
   1285 		bcopy(rr, rrcopy, sizeof(*rr));
   1286 		rrcopy->raidPtr = (void *) raidPtr;
   1287 
   1288 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1289 					   rf_ReconThread,
   1290 					   rrcopy,"raid_recon");
   1291 		return (0);
   1292 
   1293 		/* invoke a copyback operation after recon on whatever disk
   1294 		 * needs it, if any */
   1295 	case RAIDFRAME_COPYBACK:
   1296 
   1297 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1298 			/* This makes no sense on a RAID 0!! */
   1299 			return(EINVAL);
   1300 		}
   1301 
   1302 		if (raidPtr->copyback_in_progress == 1) {
   1303 			/* Copyback is already in progress! */
   1304 			return(EINVAL);
   1305 		}
   1306 
   1307 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1308 					   rf_CopybackThread,
   1309 					   raidPtr,"raid_copyback");
   1310 		return (retcode);
   1311 
   1312 		/* return the percentage completion of reconstruction */
   1313 	case RAIDFRAME_CHECK_RECON_STATUS:
   1314 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1315 			/* This makes no sense on a RAID 0, so tell the
   1316 			   user it's done. */
   1317 			*(int *) data = 100;
   1318 			return(0);
   1319 		}
   1320 		row = 0; /* XXX we only consider a single row... */
   1321 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1322 			*(int *) data = 100;
   1323 		else
   1324 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1325 		return (0);
   1326 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1327 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1328 		row = 0; /* XXX we only consider a single row... */
   1329 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1330 			progressInfo.remaining = 0;
   1331 			progressInfo.completed = 100;
   1332 			progressInfo.total = 100;
   1333 		} else {
   1334 			progressInfo.total =
   1335 				raidPtr->reconControl[row]->numRUsTotal;
   1336 			progressInfo.completed =
   1337 				raidPtr->reconControl[row]->numRUsComplete;
   1338 			progressInfo.remaining = progressInfo.total -
   1339 				progressInfo.completed;
   1340 		}
   1341 		retcode = copyout((caddr_t) &progressInfo,
   1342 				  (caddr_t) *progressInfoPtr,
   1343 				  sizeof(RF_ProgressInfo_t));
   1344 		return (retcode);
   1345 
   1346 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1347 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1348 			/* This makes no sense on a RAID 0, so tell the
   1349 			   user it's done. */
   1350 			*(int *) data = 100;
   1351 			return(0);
   1352 		}
   1353 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1354 			*(int *) data = 100 *
   1355 				raidPtr->parity_rewrite_stripes_done /
   1356 				raidPtr->Layout.numStripe;
   1357 		} else {
   1358 			*(int *) data = 100;
   1359 		}
   1360 		return (0);
   1361 
   1362 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1363 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1364 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1365 			progressInfo.total = raidPtr->Layout.numStripe;
   1366 			progressInfo.completed =
   1367 				raidPtr->parity_rewrite_stripes_done;
   1368 			progressInfo.remaining = progressInfo.total -
   1369 				progressInfo.completed;
   1370 		} else {
   1371 			progressInfo.remaining = 0;
   1372 			progressInfo.completed = 100;
   1373 			progressInfo.total = 100;
   1374 		}
   1375 		retcode = copyout((caddr_t) &progressInfo,
   1376 				  (caddr_t) *progressInfoPtr,
   1377 				  sizeof(RF_ProgressInfo_t));
   1378 		return (retcode);
   1379 
   1380 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1381 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1382 			/* This makes no sense on a RAID 0 */
   1383 			*(int *) data = 100;
   1384 			return(0);
   1385 		}
   1386 		if (raidPtr->copyback_in_progress == 1) {
   1387 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1388 				raidPtr->Layout.numStripe;
   1389 		} else {
   1390 			*(int *) data = 100;
   1391 		}
   1392 		return (0);
   1393 
   1394 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1395 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1396 		if (raidPtr->copyback_in_progress == 1) {
   1397 			progressInfo.total = raidPtr->Layout.numStripe;
   1398 			progressInfo.completed =
   1399 				raidPtr->copyback_stripes_done;
   1400 			progressInfo.remaining = progressInfo.total -
   1401 				progressInfo.completed;
   1402 		} else {
   1403 			progressInfo.remaining = 0;
   1404 			progressInfo.completed = 100;
   1405 			progressInfo.total = 100;
   1406 		}
   1407 		retcode = copyout((caddr_t) &progressInfo,
   1408 				  (caddr_t) *progressInfoPtr,
   1409 				  sizeof(RF_ProgressInfo_t));
   1410 		return (retcode);
   1411 
   1412 		/* the sparetable daemon calls this to wait for the kernel to
   1413 		 * need a spare table. this ioctl does not return until a
   1414 		 * spare table is needed. XXX -- calling mpsleep here in the
   1415 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1416 		 * -- I should either compute the spare table in the kernel,
   1417 		 * or have a different -- XXX XXX -- interface (a different
   1418 		 * character device) for delivering the table     -- XXX */
   1419 #if 0
   1420 	case RAIDFRAME_SPARET_WAIT:
   1421 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1422 		while (!rf_sparet_wait_queue)
   1423 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1424 		waitreq = rf_sparet_wait_queue;
   1425 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1426 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1427 
   1428 		/* structure assignment */
   1429 		*((RF_SparetWait_t *) data) = *waitreq;
   1430 
   1431 		RF_Free(waitreq, sizeof(*waitreq));
   1432 		return (0);
   1433 
   1434 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1435 		 * code in it that will cause the dameon to exit */
   1436 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1437 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1438 		waitreq->fcol = -1;
   1439 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1440 		waitreq->next = rf_sparet_wait_queue;
   1441 		rf_sparet_wait_queue = waitreq;
   1442 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1443 		wakeup(&rf_sparet_wait_queue);
   1444 		return (0);
   1445 
   1446 		/* used by the spare table daemon to deliver a spare table
   1447 		 * into the kernel */
   1448 	case RAIDFRAME_SEND_SPARET:
   1449 
   1450 		/* install the spare table */
   1451 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1452 
   1453 		/* respond to the requestor.  the return status of the spare
   1454 		 * table installation is passed in the "fcol" field */
   1455 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1456 		waitreq->fcol = retcode;
   1457 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1458 		waitreq->next = rf_sparet_resp_queue;
   1459 		rf_sparet_resp_queue = waitreq;
   1460 		wakeup(&rf_sparet_resp_queue);
   1461 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1462 
   1463 		return (retcode);
   1464 #endif
   1465 
   1466 	default:
   1467 		break; /* fall through to the os-specific code below */
   1468 
   1469 	}
   1470 
   1471 	if (!raidPtr->valid)
   1472 		return (EINVAL);
   1473 
   1474 	/*
   1475 	 * Add support for "regular" device ioctls here.
   1476 	 */
   1477 
   1478 	switch (cmd) {
   1479 	case DIOCGDINFO:
   1480 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1481 		break;
   1482 #ifdef __HAVE_OLD_DISKLABEL
   1483 	case ODIOCGDINFO:
   1484 		newlabel = *(rs->sc_dkdev.dk_label);
   1485 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1486 			return ENOTTY;
   1487 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1488 		break;
   1489 #endif
   1490 
   1491 	case DIOCGPART:
   1492 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1493 		((struct partinfo *) data)->part =
   1494 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1495 		break;
   1496 
   1497 	case DIOCWDINFO:
   1498 	case DIOCSDINFO:
   1499 #ifdef __HAVE_OLD_DISKLABEL
   1500 	case ODIOCWDINFO:
   1501 	case ODIOCSDINFO:
   1502 #endif
   1503 	{
   1504 		struct disklabel *lp;
   1505 #ifdef __HAVE_OLD_DISKLABEL
   1506 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1507 			memset(&newlabel, 0, sizeof newlabel);
   1508 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1509 			lp = &newlabel;
   1510 		} else
   1511 #endif
   1512 		lp = (struct disklabel *)data;
   1513 
   1514 		if ((error = raidlock(rs)) != 0)
   1515 			return (error);
   1516 
   1517 		rs->sc_flags |= RAIDF_LABELLING;
   1518 
   1519 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1520 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1521 		if (error == 0) {
   1522 			if (cmd == DIOCWDINFO
   1523 #ifdef __HAVE_OLD_DISKLABEL
   1524 			    || cmd == ODIOCWDINFO
   1525 #endif
   1526 			   )
   1527 				error = writedisklabel(RAIDLABELDEV(dev),
   1528 				    raidstrategy, rs->sc_dkdev.dk_label,
   1529 				    rs->sc_dkdev.dk_cpulabel);
   1530 		}
   1531 		rs->sc_flags &= ~RAIDF_LABELLING;
   1532 
   1533 		raidunlock(rs);
   1534 
   1535 		if (error)
   1536 			return (error);
   1537 		break;
   1538 	}
   1539 
   1540 	case DIOCWLABEL:
   1541 		if (*(int *) data != 0)
   1542 			rs->sc_flags |= RAIDF_WLABEL;
   1543 		else
   1544 			rs->sc_flags &= ~RAIDF_WLABEL;
   1545 		break;
   1546 
   1547 	case DIOCGDEFLABEL:
   1548 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1549 		break;
   1550 
   1551 #ifdef __HAVE_OLD_DISKLABEL
   1552 	case ODIOCGDEFLABEL:
   1553 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1554 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1555 			return ENOTTY;
   1556 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1557 		break;
   1558 #endif
   1559 
   1560 	default:
   1561 		retcode = ENOTTY;
   1562 	}
   1563 	return (retcode);
   1564 
   1565 }
   1566 
   1567 
   1568 /* raidinit -- complete the rest of the initialization for the
   1569    RAIDframe device.  */
   1570 
   1571 
   1572 static void
   1573 raidinit(raidPtr)
   1574 	RF_Raid_t *raidPtr;
   1575 {
   1576 	struct raid_softc *rs;
   1577 	int     unit;
   1578 
   1579 	unit = raidPtr->raidid;
   1580 
   1581 	rs = &raid_softc[unit];
   1582 
   1583 	/* XXX should check return code first... */
   1584 	rs->sc_flags |= RAIDF_INITED;
   1585 
   1586 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1587 
   1588 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1589 
   1590 	/* disk_attach actually creates space for the CPU disklabel, among
   1591 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1592 	 * with disklabels. */
   1593 
   1594 	disk_attach(&rs->sc_dkdev);
   1595 
   1596 	/* XXX There may be a weird interaction here between this, and
   1597 	 * protectedSectors, as used in RAIDframe.  */
   1598 
   1599 	rs->sc_size = raidPtr->totalSectors;
   1600 
   1601 }
   1602 
   1603 /* wake up the daemon & tell it to get us a spare table
   1604  * XXX
   1605  * the entries in the queues should be tagged with the raidPtr
   1606  * so that in the extremely rare case that two recons happen at once,
   1607  * we know for which device were requesting a spare table
   1608  * XXX
   1609  *
   1610  * XXX This code is not currently used. GO
   1611  */
   1612 int
   1613 rf_GetSpareTableFromDaemon(req)
   1614 	RF_SparetWait_t *req;
   1615 {
   1616 	int     retcode;
   1617 
   1618 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1619 	req->next = rf_sparet_wait_queue;
   1620 	rf_sparet_wait_queue = req;
   1621 	wakeup(&rf_sparet_wait_queue);
   1622 
   1623 	/* mpsleep unlocks the mutex */
   1624 	while (!rf_sparet_resp_queue) {
   1625 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1626 		    "raidframe getsparetable", 0);
   1627 	}
   1628 	req = rf_sparet_resp_queue;
   1629 	rf_sparet_resp_queue = req->next;
   1630 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1631 
   1632 	retcode = req->fcol;
   1633 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1634 					 * alloc'd */
   1635 	return (retcode);
   1636 }
   1637 
   1638 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1639  * bp & passes it down.
   1640  * any calls originating in the kernel must use non-blocking I/O
   1641  * do some extra sanity checking to return "appropriate" error values for
   1642  * certain conditions (to make some standard utilities work)
   1643  *
   1644  * Formerly known as: rf_DoAccessKernel
   1645  */
   1646 void
   1647 raidstart(raidPtr)
   1648 	RF_Raid_t *raidPtr;
   1649 {
   1650 	RF_SectorCount_t num_blocks, pb, sum;
   1651 	RF_RaidAddr_t raid_addr;
   1652 	int     retcode;
   1653 	struct partition *pp;
   1654 	daddr_t blocknum;
   1655 	int     unit;
   1656 	struct raid_softc *rs;
   1657 	int     do_async;
   1658 	struct buf *bp;
   1659 
   1660 	unit = raidPtr->raidid;
   1661 	rs = &raid_softc[unit];
   1662 
   1663 	/* quick check to see if anything has died recently */
   1664 	RF_LOCK_MUTEX(raidPtr->mutex);
   1665 	if (raidPtr->numNewFailures > 0) {
   1666 		rf_update_component_labels(raidPtr,
   1667 					   RF_NORMAL_COMPONENT_UPDATE);
   1668 		raidPtr->numNewFailures--;
   1669 	}
   1670 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1671 
   1672 	/* Check to see if we're at the limit... */
   1673 	RF_LOCK_MUTEX(raidPtr->mutex);
   1674 	while (raidPtr->openings > 0) {
   1675 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1676 
   1677 		/* get the next item, if any, from the queue */
   1678 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1679 			/* nothing more to do */
   1680 			return;
   1681 		}
   1682 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1683 
   1684 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1685 		 * partition.. Need to make it absolute to the underlying
   1686 		 * device.. */
   1687 
   1688 		blocknum = bp->b_blkno;
   1689 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1690 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1691 			blocknum += pp->p_offset;
   1692 		}
   1693 
   1694 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1695 			    (int) blocknum));
   1696 
   1697 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1698 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1699 
   1700 		/* *THIS* is where we adjust what block we're going to...
   1701 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1702 		raid_addr = blocknum;
   1703 
   1704 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1705 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1706 		sum = raid_addr + num_blocks + pb;
   1707 		if (1 || rf_debugKernelAccess) {
   1708 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1709 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1710 				    (int) pb, (int) bp->b_resid));
   1711 		}
   1712 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1713 		    || (sum < num_blocks) || (sum < pb)) {
   1714 			bp->b_error = ENOSPC;
   1715 			bp->b_flags |= B_ERROR;
   1716 			bp->b_resid = bp->b_bcount;
   1717 			biodone(bp);
   1718 			RF_LOCK_MUTEX(raidPtr->mutex);
   1719 			continue;
   1720 		}
   1721 		/*
   1722 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1723 		 */
   1724 
   1725 		if (bp->b_bcount & raidPtr->sectorMask) {
   1726 			bp->b_error = EINVAL;
   1727 			bp->b_flags |= B_ERROR;
   1728 			bp->b_resid = bp->b_bcount;
   1729 			biodone(bp);
   1730 			RF_LOCK_MUTEX(raidPtr->mutex);
   1731 			continue;
   1732 
   1733 		}
   1734 		db1_printf(("Calling DoAccess..\n"));
   1735 
   1736 
   1737 		RF_LOCK_MUTEX(raidPtr->mutex);
   1738 		raidPtr->openings--;
   1739 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1740 
   1741 		/*
   1742 		 * Everything is async.
   1743 		 */
   1744 		do_async = 1;
   1745 
   1746 		disk_busy(&rs->sc_dkdev);
   1747 
   1748 		/* XXX we're still at splbio() here... do we *really*
   1749 		   need to be? */
   1750 
   1751 		/* don't ever condition on bp->b_flags & B_WRITE.
   1752 		 * always condition on B_READ instead */
   1753 
   1754 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1755 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1756 				      do_async, raid_addr, num_blocks,
   1757 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1758 
   1759 		RF_LOCK_MUTEX(raidPtr->mutex);
   1760 	}
   1761 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1762 }
   1763 
   1764 
   1765 
   1766 
   1767 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1768 
   1769 int
   1770 rf_DispatchKernelIO(queue, req)
   1771 	RF_DiskQueue_t *queue;
   1772 	RF_DiskQueueData_t *req;
   1773 {
   1774 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1775 	struct buf *bp;
   1776 	struct raidbuf *raidbp = NULL;
   1777 	struct raid_softc *rs;
   1778 	int     unit;
   1779 	int s;
   1780 
   1781 	s=0;
   1782 	/* s = splbio();*/ /* want to test this */
   1783 	/* XXX along with the vnode, we also need the softc associated with
   1784 	 * this device.. */
   1785 
   1786 	req->queue = queue;
   1787 
   1788 	unit = queue->raidPtr->raidid;
   1789 
   1790 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1791 
   1792 	if (unit >= numraid) {
   1793 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1794 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1795 	}
   1796 	rs = &raid_softc[unit];
   1797 
   1798 	bp = req->bp;
   1799 #if 1
   1800 	/* XXX when there is a physical disk failure, someone is passing us a
   1801 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1802 	 * without taking a performance hit... (not sure where the real bug
   1803 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1804 
   1805 	if (bp->b_flags & B_ERROR) {
   1806 		bp->b_flags &= ~B_ERROR;
   1807 	}
   1808 	if (bp->b_error != 0) {
   1809 		bp->b_error = 0;
   1810 	}
   1811 #endif
   1812 	raidbp = RAIDGETBUF(rs);
   1813 
   1814 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1815 
   1816 	/*
   1817 	 * context for raidiodone
   1818 	 */
   1819 	raidbp->rf_obp = bp;
   1820 	raidbp->req = req;
   1821 
   1822 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1823 
   1824 	switch (req->type) {
   1825 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1826 		/* XXX need to do something extra here.. */
   1827 		/* I'm leaving this in, as I've never actually seen it used,
   1828 		 * and I'd like folks to report it... GO */
   1829 		printf(("WAKEUP CALLED\n"));
   1830 		queue->numOutstanding++;
   1831 
   1832 		/* XXX need to glue the original buffer into this??  */
   1833 
   1834 		KernelWakeupFunc(&raidbp->rf_buf);
   1835 		break;
   1836 
   1837 	case RF_IO_TYPE_READ:
   1838 	case RF_IO_TYPE_WRITE:
   1839 
   1840 		if (req->tracerec) {
   1841 			RF_ETIMER_START(req->tracerec->timer);
   1842 		}
   1843 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1844 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1845 		    req->sectorOffset, req->numSector,
   1846 		    req->buf, KernelWakeupFunc, (void *) req,
   1847 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1848 
   1849 		if (rf_debugKernelAccess) {
   1850 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1851 				(long) bp->b_blkno));
   1852 		}
   1853 		queue->numOutstanding++;
   1854 		queue->last_deq_sector = req->sectorOffset;
   1855 		/* acc wouldn't have been let in if there were any pending
   1856 		 * reqs at any other priority */
   1857 		queue->curPriority = req->priority;
   1858 
   1859 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1860 			req->type, unit, queue->row, queue->col));
   1861 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1862 			(int) req->sectorOffset, (int) req->numSector,
   1863 			(int) (req->numSector <<
   1864 			    queue->raidPtr->logBytesPerSector),
   1865 			(int) queue->raidPtr->logBytesPerSector));
   1866 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1867 			raidbp->rf_buf.b_vp->v_numoutput++;
   1868 		}
   1869 		VOP_STRATEGY(&raidbp->rf_buf);
   1870 
   1871 		break;
   1872 
   1873 	default:
   1874 		panic("bad req->type in rf_DispatchKernelIO");
   1875 	}
   1876 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1877 	/* splx(s); */ /* want to test this */
   1878 	return (0);
   1879 }
   1880 /* this is the callback function associated with a I/O invoked from
   1881    kernel code.
   1882  */
   1883 static void
   1884 KernelWakeupFunc(vbp)
   1885 	struct buf *vbp;
   1886 {
   1887 	RF_DiskQueueData_t *req = NULL;
   1888 	RF_DiskQueue_t *queue;
   1889 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1890 	struct buf *bp;
   1891 	struct raid_softc *rs;
   1892 	int     unit;
   1893 	int s;
   1894 
   1895 	s = splbio();
   1896 	db1_printf(("recovering the request queue:\n"));
   1897 	req = raidbp->req;
   1898 
   1899 	bp = raidbp->rf_obp;
   1900 
   1901 	queue = (RF_DiskQueue_t *) req->queue;
   1902 
   1903 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1904 		bp->b_flags |= B_ERROR;
   1905 		bp->b_error = raidbp->rf_buf.b_error ?
   1906 		    raidbp->rf_buf.b_error : EIO;
   1907 	}
   1908 
   1909 	/* XXX methinks this could be wrong... */
   1910 #if 1
   1911 	bp->b_resid = raidbp->rf_buf.b_resid;
   1912 #endif
   1913 
   1914 	if (req->tracerec) {
   1915 		RF_ETIMER_STOP(req->tracerec->timer);
   1916 		RF_ETIMER_EVAL(req->tracerec->timer);
   1917 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1918 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1919 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1920 		req->tracerec->num_phys_ios++;
   1921 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1922 	}
   1923 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1924 
   1925 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1926 
   1927 
   1928 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1929 	 * ballistic, and mark the component as hosed... */
   1930 
   1931 	if (bp->b_flags & B_ERROR) {
   1932 		/* Mark the disk as dead */
   1933 		/* but only mark it once... */
   1934 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1935 		    rf_ds_optimal) {
   1936 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1937 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1938 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1939 			    rf_ds_failed;
   1940 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1941 			queue->raidPtr->numFailures++;
   1942 			queue->raidPtr->numNewFailures++;
   1943 		} else {	/* Disk is already dead... */
   1944 			/* printf("Disk already marked as dead!\n"); */
   1945 		}
   1946 
   1947 	}
   1948 
   1949 	rs = &raid_softc[unit];
   1950 	RAIDPUTBUF(rs, raidbp);
   1951 
   1952 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1953 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1954 
   1955 	splx(s);
   1956 }
   1957 
   1958 
   1959 
   1960 /*
   1961  * initialize a buf structure for doing an I/O in the kernel.
   1962  */
   1963 static void
   1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1965        logBytesPerSector, b_proc)
   1966 	struct buf *bp;
   1967 	struct vnode *b_vp;
   1968 	unsigned rw_flag;
   1969 	dev_t dev;
   1970 	RF_SectorNum_t startSect;
   1971 	RF_SectorCount_t numSect;
   1972 	caddr_t buf;
   1973 	void (*cbFunc) (struct buf *);
   1974 	void *cbArg;
   1975 	int logBytesPerSector;
   1976 	struct proc *b_proc;
   1977 {
   1978 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1979 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1980 	bp->b_bcount = numSect << logBytesPerSector;
   1981 	bp->b_bufsize = bp->b_bcount;
   1982 	bp->b_error = 0;
   1983 	bp->b_dev = dev;
   1984 	bp->b_data = buf;
   1985 	bp->b_blkno = startSect;
   1986 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1987 	if (bp->b_bcount == 0) {
   1988 		panic("bp->b_bcount is zero in InitBP!!\n");
   1989 	}
   1990 	bp->b_proc = b_proc;
   1991 	bp->b_iodone = cbFunc;
   1992 	bp->b_vp = b_vp;
   1993 
   1994 }
   1995 
   1996 static void
   1997 raidgetdefaultlabel(raidPtr, rs, lp)
   1998 	RF_Raid_t *raidPtr;
   1999 	struct raid_softc *rs;
   2000 	struct disklabel *lp;
   2001 {
   2002 	db1_printf(("Building a default label...\n"));
   2003 	memset(lp, 0, sizeof(*lp));
   2004 
   2005 	/* fabricate a label... */
   2006 	lp->d_secperunit = raidPtr->totalSectors;
   2007 	lp->d_secsize = raidPtr->bytesPerSector;
   2008 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2009 	lp->d_ntracks = 4 * raidPtr->numCol;
   2010 	lp->d_ncylinders = raidPtr->totalSectors /
   2011 		(lp->d_nsectors * lp->d_ntracks);
   2012 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2013 
   2014 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2015 	lp->d_type = DTYPE_RAID;
   2016 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2017 	lp->d_rpm = 3600;
   2018 	lp->d_interleave = 1;
   2019 	lp->d_flags = 0;
   2020 
   2021 	lp->d_partitions[RAW_PART].p_offset = 0;
   2022 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2023 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2024 	lp->d_npartitions = RAW_PART + 1;
   2025 
   2026 	lp->d_magic = DISKMAGIC;
   2027 	lp->d_magic2 = DISKMAGIC;
   2028 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2029 
   2030 }
   2031 /*
   2032  * Read the disklabel from the raid device.  If one is not present, fake one
   2033  * up.
   2034  */
   2035 static void
   2036 raidgetdisklabel(dev)
   2037 	dev_t   dev;
   2038 {
   2039 	int     unit = raidunit(dev);
   2040 	struct raid_softc *rs = &raid_softc[unit];
   2041 	char   *errstring;
   2042 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2043 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2044 	RF_Raid_t *raidPtr;
   2045 
   2046 	db1_printf(("Getting the disklabel...\n"));
   2047 
   2048 	memset(clp, 0, sizeof(*clp));
   2049 
   2050 	raidPtr = raidPtrs[unit];
   2051 
   2052 	raidgetdefaultlabel(raidPtr, rs, lp);
   2053 
   2054 	/*
   2055 	 * Call the generic disklabel extraction routine.
   2056 	 */
   2057 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2058 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2059 	if (errstring)
   2060 		raidmakedisklabel(rs);
   2061 	else {
   2062 		int     i;
   2063 		struct partition *pp;
   2064 
   2065 		/*
   2066 		 * Sanity check whether the found disklabel is valid.
   2067 		 *
   2068 		 * This is necessary since total size of the raid device
   2069 		 * may vary when an interleave is changed even though exactly
   2070 		 * same componets are used, and old disklabel may used
   2071 		 * if that is found.
   2072 		 */
   2073 		if (lp->d_secperunit != rs->sc_size)
   2074 			printf("WARNING: %s: "
   2075 			    "total sector size in disklabel (%d) != "
   2076 			    "the size of raid (%ld)\n", rs->sc_xname,
   2077 			    lp->d_secperunit, (long) rs->sc_size);
   2078 		for (i = 0; i < lp->d_npartitions; i++) {
   2079 			pp = &lp->d_partitions[i];
   2080 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2081 				printf("WARNING: %s: end of partition `%c' "
   2082 				    "exceeds the size of raid (%ld)\n",
   2083 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2084 		}
   2085 	}
   2086 
   2087 }
   2088 /*
   2089  * Take care of things one might want to take care of in the event
   2090  * that a disklabel isn't present.
   2091  */
   2092 static void
   2093 raidmakedisklabel(rs)
   2094 	struct raid_softc *rs;
   2095 {
   2096 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2097 	db1_printf(("Making a label..\n"));
   2098 
   2099 	/*
   2100 	 * For historical reasons, if there's no disklabel present
   2101 	 * the raw partition must be marked FS_BSDFFS.
   2102 	 */
   2103 
   2104 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2105 
   2106 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2107 
   2108 	lp->d_checksum = dkcksum(lp);
   2109 }
   2110 /*
   2111  * Lookup the provided name in the filesystem.  If the file exists,
   2112  * is a valid block device, and isn't being used by anyone else,
   2113  * set *vpp to the file's vnode.
   2114  * You'll find the original of this in ccd.c
   2115  */
   2116 int
   2117 raidlookup(path, p, vpp)
   2118 	char   *path;
   2119 	struct proc *p;
   2120 	struct vnode **vpp;	/* result */
   2121 {
   2122 	struct nameidata nd;
   2123 	struct vnode *vp;
   2124 	struct vattr va;
   2125 	int     error;
   2126 
   2127 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2128 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2129 #ifdef DEBUG
   2130 		printf("RAIDframe: vn_open returned %d\n", error);
   2131 #endif
   2132 		return (error);
   2133 	}
   2134 	vp = nd.ni_vp;
   2135 	if (vp->v_usecount > 1) {
   2136 		VOP_UNLOCK(vp, 0);
   2137 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2138 		return (EBUSY);
   2139 	}
   2140 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2141 		VOP_UNLOCK(vp, 0);
   2142 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2143 		return (error);
   2144 	}
   2145 	/* XXX: eventually we should handle VREG, too. */
   2146 	if (va.va_type != VBLK) {
   2147 		VOP_UNLOCK(vp, 0);
   2148 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2149 		return (ENOTBLK);
   2150 	}
   2151 	VOP_UNLOCK(vp, 0);
   2152 	*vpp = vp;
   2153 	return (0);
   2154 }
   2155 /*
   2156  * Wait interruptibly for an exclusive lock.
   2157  *
   2158  * XXX
   2159  * Several drivers do this; it should be abstracted and made MP-safe.
   2160  * (Hmm... where have we seen this warning before :->  GO )
   2161  */
   2162 static int
   2163 raidlock(rs)
   2164 	struct raid_softc *rs;
   2165 {
   2166 	int     error;
   2167 
   2168 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2169 		rs->sc_flags |= RAIDF_WANTED;
   2170 		if ((error =
   2171 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2172 			return (error);
   2173 	}
   2174 	rs->sc_flags |= RAIDF_LOCKED;
   2175 	return (0);
   2176 }
   2177 /*
   2178  * Unlock and wake up any waiters.
   2179  */
   2180 static void
   2181 raidunlock(rs)
   2182 	struct raid_softc *rs;
   2183 {
   2184 
   2185 	rs->sc_flags &= ~RAIDF_LOCKED;
   2186 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2187 		rs->sc_flags &= ~RAIDF_WANTED;
   2188 		wakeup(rs);
   2189 	}
   2190 }
   2191 
   2192 
   2193 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2194 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2195 
   2196 int
   2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2198 {
   2199 	RF_ComponentLabel_t clabel;
   2200 	raidread_component_label(dev, b_vp, &clabel);
   2201 	clabel.mod_counter = mod_counter;
   2202 	clabel.clean = RF_RAID_CLEAN;
   2203 	raidwrite_component_label(dev, b_vp, &clabel);
   2204 	return(0);
   2205 }
   2206 
   2207 
   2208 int
   2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2210 {
   2211 	RF_ComponentLabel_t clabel;
   2212 	raidread_component_label(dev, b_vp, &clabel);
   2213 	clabel.mod_counter = mod_counter;
   2214 	clabel.clean = RF_RAID_DIRTY;
   2215 	raidwrite_component_label(dev, b_vp, &clabel);
   2216 	return(0);
   2217 }
   2218 
   2219 /* ARGSUSED */
   2220 int
   2221 raidread_component_label(dev, b_vp, clabel)
   2222 	dev_t dev;
   2223 	struct vnode *b_vp;
   2224 	RF_ComponentLabel_t *clabel;
   2225 {
   2226 	struct buf *bp;
   2227 	const struct bdevsw *bdev;
   2228 	int error;
   2229 
   2230 	/* XXX should probably ensure that we don't try to do this if
   2231 	   someone has changed rf_protected_sectors. */
   2232 
   2233 	if (b_vp == NULL) {
   2234 		/* For whatever reason, this component is not valid.
   2235 		   Don't try to read a component label from it. */
   2236 		return(EINVAL);
   2237 	}
   2238 
   2239 	/* get a block of the appropriate size... */
   2240 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2241 	bp->b_dev = dev;
   2242 
   2243 	/* get our ducks in a row for the read */
   2244 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2245 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2246 	bp->b_flags |= B_READ;
   2247  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2248 
   2249 	bdev = bdevsw_lookup(bp->b_dev);
   2250 	if (bdev == NULL)
   2251 		return (ENXIO);
   2252 	(*bdev->d_strategy)(bp);
   2253 
   2254 	error = biowait(bp);
   2255 
   2256 	if (!error) {
   2257 		memcpy(clabel, bp->b_data,
   2258 		       sizeof(RF_ComponentLabel_t));
   2259 #if 0
   2260 		rf_print_component_label( clabel );
   2261 #endif
   2262         } else {
   2263 #if 0
   2264 		printf("Failed to read RAID component label!\n");
   2265 #endif
   2266 	}
   2267 
   2268 	brelse(bp);
   2269 	return(error);
   2270 }
   2271 /* ARGSUSED */
   2272 int
   2273 raidwrite_component_label(dev, b_vp, clabel)
   2274 	dev_t dev;
   2275 	struct vnode *b_vp;
   2276 	RF_ComponentLabel_t *clabel;
   2277 {
   2278 	struct buf *bp;
   2279 	const struct bdevsw *bdev;
   2280 	int error;
   2281 
   2282 	/* get a block of the appropriate size... */
   2283 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2284 	bp->b_dev = dev;
   2285 
   2286 	/* get our ducks in a row for the write */
   2287 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2288 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2289 	bp->b_flags |= B_WRITE;
   2290  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2291 
   2292 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2293 
   2294 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2295 
   2296 	bdev = bdevsw_lookup(bp->b_dev);
   2297 	if (bdev == NULL)
   2298 		return (ENXIO);
   2299 	(*bdev->d_strategy)(bp);
   2300 	error = biowait(bp);
   2301 	brelse(bp);
   2302 	if (error) {
   2303 #if 1
   2304 		printf("Failed to write RAID component info!\n");
   2305 #endif
   2306 	}
   2307 
   2308 	return(error);
   2309 }
   2310 
   2311 void
   2312 rf_markalldirty(raidPtr)
   2313 	RF_Raid_t *raidPtr;
   2314 {
   2315 	RF_ComponentLabel_t clabel;
   2316 	int r,c;
   2317 
   2318 	raidPtr->mod_counter++;
   2319 	for (r = 0; r < raidPtr->numRow; r++) {
   2320 		for (c = 0; c < raidPtr->numCol; c++) {
   2321 			/* we don't want to touch (at all) a disk that has
   2322 			   failed */
   2323 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2324 				raidread_component_label(
   2325 					raidPtr->Disks[r][c].dev,
   2326 					raidPtr->raid_cinfo[r][c].ci_vp,
   2327 					&clabel);
   2328 				if (clabel.status == rf_ds_spared) {
   2329 					/* XXX do something special...
   2330 					 but whatever you do, don't
   2331 					 try to access it!! */
   2332 				} else {
   2333 #if 0
   2334 				clabel.status =
   2335 					raidPtr->Disks[r][c].status;
   2336 				raidwrite_component_label(
   2337 					raidPtr->Disks[r][c].dev,
   2338 					raidPtr->raid_cinfo[r][c].ci_vp,
   2339 					&clabel);
   2340 #endif
   2341 				raidmarkdirty(
   2342 				       raidPtr->Disks[r][c].dev,
   2343 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2344 				       raidPtr->mod_counter);
   2345 				}
   2346 			}
   2347 		}
   2348 	}
   2349 	/* printf("Component labels marked dirty.\n"); */
   2350 #if 0
   2351 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2352 		sparecol = raidPtr->numCol + c;
   2353 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2354 			/*
   2355 
   2356 			   XXX this is where we get fancy and map this spare
   2357 			   into it's correct spot in the array.
   2358 
   2359 			 */
   2360 			/*
   2361 
   2362 			   we claim this disk is "optimal" if it's
   2363 			   rf_ds_used_spare, as that means it should be
   2364 			   directly substitutable for the disk it replaced.
   2365 			   We note that too...
   2366 
   2367 			 */
   2368 
   2369 			for(i=0;i<raidPtr->numRow;i++) {
   2370 				for(j=0;j<raidPtr->numCol;j++) {
   2371 					if ((raidPtr->Disks[i][j].spareRow ==
   2372 					     r) &&
   2373 					    (raidPtr->Disks[i][j].spareCol ==
   2374 					     sparecol)) {
   2375 						srow = r;
   2376 						scol = sparecol;
   2377 						break;
   2378 					}
   2379 				}
   2380 			}
   2381 
   2382 			raidread_component_label(
   2383 				      raidPtr->Disks[r][sparecol].dev,
   2384 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2385 				      &clabel);
   2386 			/* make sure status is noted */
   2387 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2388 			clabel.mod_counter = raidPtr->mod_counter;
   2389 			clabel.serial_number = raidPtr->serial_number;
   2390 			clabel.row = srow;
   2391 			clabel.column = scol;
   2392 			clabel.num_rows = raidPtr->numRow;
   2393 			clabel.num_columns = raidPtr->numCol;
   2394 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2395 			clabel.status = rf_ds_optimal;
   2396 			raidwrite_component_label(
   2397 				      raidPtr->Disks[r][sparecol].dev,
   2398 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2399 				      &clabel);
   2400 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2401 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2402 		}
   2403 	}
   2404 
   2405 #endif
   2406 }
   2407 
   2408 
   2409 void
   2410 rf_update_component_labels(raidPtr, final)
   2411 	RF_Raid_t *raidPtr;
   2412 	int final;
   2413 {
   2414 	RF_ComponentLabel_t clabel;
   2415 	int sparecol;
   2416 	int r,c;
   2417 	int i,j;
   2418 	int srow, scol;
   2419 
   2420 	srow = -1;
   2421 	scol = -1;
   2422 
   2423 	/* XXX should do extra checks to make sure things really are clean,
   2424 	   rather than blindly setting the clean bit... */
   2425 
   2426 	raidPtr->mod_counter++;
   2427 
   2428 	for (r = 0; r < raidPtr->numRow; r++) {
   2429 		for (c = 0; c < raidPtr->numCol; c++) {
   2430 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2431 				raidread_component_label(
   2432 					raidPtr->Disks[r][c].dev,
   2433 					raidPtr->raid_cinfo[r][c].ci_vp,
   2434 					&clabel);
   2435 				/* make sure status is noted */
   2436 				clabel.status = rf_ds_optimal;
   2437 				/* bump the counter */
   2438 				clabel.mod_counter = raidPtr->mod_counter;
   2439 
   2440 				raidwrite_component_label(
   2441 					raidPtr->Disks[r][c].dev,
   2442 					raidPtr->raid_cinfo[r][c].ci_vp,
   2443 					&clabel);
   2444 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2445 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2446 						raidmarkclean(
   2447 							      raidPtr->Disks[r][c].dev,
   2448 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2449 							      raidPtr->mod_counter);
   2450 					}
   2451 				}
   2452 			}
   2453 			/* else we don't touch it.. */
   2454 		}
   2455 	}
   2456 
   2457 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2458 		sparecol = raidPtr->numCol + c;
   2459 		/* Need to ensure that the reconstruct actually completed! */
   2460 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2461 			/*
   2462 
   2463 			   we claim this disk is "optimal" if it's
   2464 			   rf_ds_used_spare, as that means it should be
   2465 			   directly substitutable for the disk it replaced.
   2466 			   We note that too...
   2467 
   2468 			 */
   2469 
   2470 			for(i=0;i<raidPtr->numRow;i++) {
   2471 				for(j=0;j<raidPtr->numCol;j++) {
   2472 					if ((raidPtr->Disks[i][j].spareRow ==
   2473 					     0) &&
   2474 					    (raidPtr->Disks[i][j].spareCol ==
   2475 					     sparecol)) {
   2476 						srow = i;
   2477 						scol = j;
   2478 						break;
   2479 					}
   2480 				}
   2481 			}
   2482 
   2483 			/* XXX shouldn't *really* need this... */
   2484 			raidread_component_label(
   2485 				      raidPtr->Disks[0][sparecol].dev,
   2486 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2487 				      &clabel);
   2488 			/* make sure status is noted */
   2489 
   2490 			raid_init_component_label(raidPtr, &clabel);
   2491 
   2492 			clabel.mod_counter = raidPtr->mod_counter;
   2493 			clabel.row = srow;
   2494 			clabel.column = scol;
   2495 			clabel.status = rf_ds_optimal;
   2496 
   2497 			raidwrite_component_label(
   2498 				      raidPtr->Disks[0][sparecol].dev,
   2499 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2500 				      &clabel);
   2501 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2502 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2503 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2504 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2505 						       raidPtr->mod_counter);
   2506 				}
   2507 			}
   2508 		}
   2509 	}
   2510 	/* 	printf("Component labels updated\n"); */
   2511 }
   2512 
   2513 void
   2514 rf_close_component(raidPtr, vp, auto_configured)
   2515 	RF_Raid_t *raidPtr;
   2516 	struct vnode *vp;
   2517 	int auto_configured;
   2518 {
   2519 	struct proc *p;
   2520 
   2521 	p = raidPtr->engine_thread;
   2522 
   2523 	if (vp != NULL) {
   2524 		if (auto_configured == 1) {
   2525 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2526 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2527 			vput(vp);
   2528 
   2529 		} else {
   2530 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2531 		}
   2532 	} else {
   2533 		printf("vnode was NULL\n");
   2534 	}
   2535 }
   2536 
   2537 
   2538 void
   2539 rf_UnconfigureVnodes(raidPtr)
   2540 	RF_Raid_t *raidPtr;
   2541 {
   2542 	int r,c;
   2543 	struct proc *p;
   2544 	struct vnode *vp;
   2545 	int acd;
   2546 
   2547 
   2548 	/* We take this opportunity to close the vnodes like we should.. */
   2549 
   2550 	p = raidPtr->engine_thread;
   2551 
   2552 	for (r = 0; r < raidPtr->numRow; r++) {
   2553 		for (c = 0; c < raidPtr->numCol; c++) {
   2554 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2555 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2556 			acd = raidPtr->Disks[r][c].auto_configured;
   2557 			rf_close_component(raidPtr, vp, acd);
   2558 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2559 			raidPtr->Disks[r][c].auto_configured = 0;
   2560 		}
   2561 	}
   2562 	for (r = 0; r < raidPtr->numSpare; r++) {
   2563 		printf("Closing vnode for spare: %d\n", r);
   2564 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2565 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2566 		rf_close_component(raidPtr, vp, acd);
   2567 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2568 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2569 	}
   2570 }
   2571 
   2572 
   2573 void
   2574 rf_ReconThread(req)
   2575 	struct rf_recon_req *req;
   2576 {
   2577 	int     s;
   2578 	RF_Raid_t *raidPtr;
   2579 
   2580 	s = splbio();
   2581 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2582 	raidPtr->recon_in_progress = 1;
   2583 
   2584 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2585 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2586 
   2587 	/* XXX get rid of this! we don't need it at all.. */
   2588 	RF_Free(req, sizeof(*req));
   2589 
   2590 	raidPtr->recon_in_progress = 0;
   2591 	splx(s);
   2592 
   2593 	/* That's all... */
   2594 	kthread_exit(0);        /* does not return */
   2595 }
   2596 
   2597 void
   2598 rf_RewriteParityThread(raidPtr)
   2599 	RF_Raid_t *raidPtr;
   2600 {
   2601 	int retcode;
   2602 	int s;
   2603 
   2604 	raidPtr->parity_rewrite_in_progress = 1;
   2605 	s = splbio();
   2606 	retcode = rf_RewriteParity(raidPtr);
   2607 	splx(s);
   2608 	if (retcode) {
   2609 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2610 	} else {
   2611 		/* set the clean bit!  If we shutdown correctly,
   2612 		   the clean bit on each component label will get
   2613 		   set */
   2614 		raidPtr->parity_good = RF_RAID_CLEAN;
   2615 	}
   2616 	raidPtr->parity_rewrite_in_progress = 0;
   2617 
   2618 	/* Anyone waiting for us to stop?  If so, inform them... */
   2619 	if (raidPtr->waitShutdown) {
   2620 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2621 	}
   2622 
   2623 	/* That's all... */
   2624 	kthread_exit(0);        /* does not return */
   2625 }
   2626 
   2627 
   2628 void
   2629 rf_CopybackThread(raidPtr)
   2630 	RF_Raid_t *raidPtr;
   2631 {
   2632 	int s;
   2633 
   2634 	raidPtr->copyback_in_progress = 1;
   2635 	s = splbio();
   2636 	rf_CopybackReconstructedData(raidPtr);
   2637 	splx(s);
   2638 	raidPtr->copyback_in_progress = 0;
   2639 
   2640 	/* That's all... */
   2641 	kthread_exit(0);        /* does not return */
   2642 }
   2643 
   2644 
   2645 void
   2646 rf_ReconstructInPlaceThread(req)
   2647 	struct rf_recon_req *req;
   2648 {
   2649 	int retcode;
   2650 	int s;
   2651 	RF_Raid_t *raidPtr;
   2652 
   2653 	s = splbio();
   2654 	raidPtr = req->raidPtr;
   2655 	raidPtr->recon_in_progress = 1;
   2656 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2657 	RF_Free(req, sizeof(*req));
   2658 	raidPtr->recon_in_progress = 0;
   2659 	splx(s);
   2660 
   2661 	/* That's all... */
   2662 	kthread_exit(0);        /* does not return */
   2663 }
   2664 
   2665 void
   2666 rf_mountroot_hook(dev)
   2667 	struct device *dev;
   2668 {
   2669 
   2670 }
   2671 
   2672 
   2673 RF_AutoConfig_t *
   2674 rf_find_raid_components()
   2675 {
   2676 	struct vnode *vp;
   2677 	struct disklabel label;
   2678 	struct device *dv;
   2679 	dev_t dev;
   2680 	int bmajor;
   2681 	int error;
   2682 	int i;
   2683 	int good_one;
   2684 	RF_ComponentLabel_t *clabel;
   2685 	RF_AutoConfig_t *ac_list;
   2686 	RF_AutoConfig_t *ac;
   2687 
   2688 
   2689 	/* initialize the AutoConfig list */
   2690 	ac_list = NULL;
   2691 
   2692 	/* we begin by trolling through *all* the devices on the system */
   2693 
   2694 	for (dv = alldevs.tqh_first; dv != NULL;
   2695 	     dv = dv->dv_list.tqe_next) {
   2696 
   2697 		/* we are only interested in disks... */
   2698 		if (dv->dv_class != DV_DISK)
   2699 			continue;
   2700 
   2701 		/* we don't care about floppies... */
   2702 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2703 			continue;
   2704 		}
   2705 
   2706 		/* need to find the device_name_to_block_device_major stuff */
   2707 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2708 
   2709 		/* get a vnode for the raw partition of this disk */
   2710 
   2711 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2712 		if (bdevvp(dev, &vp))
   2713 			panic("RAID can't alloc vnode");
   2714 
   2715 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2716 
   2717 		if (error) {
   2718 			/* "Who cares."  Continue looking
   2719 			   for something that exists*/
   2720 			vput(vp);
   2721 			continue;
   2722 		}
   2723 
   2724 		/* Ok, the disk exists.  Go get the disklabel. */
   2725 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2726 				  FREAD, NOCRED, 0);
   2727 		if (error) {
   2728 			/*
   2729 			 * XXX can't happen - open() would
   2730 			 * have errored out (or faked up one)
   2731 			 */
   2732 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2733 			       dv->dv_xname, 'a' + RAW_PART, error);
   2734 		}
   2735 
   2736 		/* don't need this any more.  We'll allocate it again
   2737 		   a little later if we really do... */
   2738 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2739 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2740 		vput(vp);
   2741 
   2742 		for (i=0; i < label.d_npartitions; i++) {
   2743 			/* We only support partitions marked as RAID */
   2744 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2745 				continue;
   2746 
   2747 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2748 			if (bdevvp(dev, &vp))
   2749 				panic("RAID can't alloc vnode");
   2750 
   2751 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2752 			if (error) {
   2753 				/* Whatever... */
   2754 				vput(vp);
   2755 				continue;
   2756 			}
   2757 
   2758 			good_one = 0;
   2759 
   2760 			clabel = (RF_ComponentLabel_t *)
   2761 				malloc(sizeof(RF_ComponentLabel_t),
   2762 				       M_RAIDFRAME, M_NOWAIT);
   2763 			if (clabel == NULL) {
   2764 				/* XXX CLEANUP HERE */
   2765 				printf("RAID auto config: out of memory!\n");
   2766 				return(NULL); /* XXX probably should panic? */
   2767 			}
   2768 
   2769 			if (!raidread_component_label(dev, vp, clabel)) {
   2770 				/* Got the label.  Does it look reasonable? */
   2771 				if (rf_reasonable_label(clabel) &&
   2772 				    (clabel->partitionSize <=
   2773 				     label.d_partitions[i].p_size)) {
   2774 #if DEBUG
   2775 					printf("Component on: %s%c: %d\n",
   2776 					       dv->dv_xname, 'a'+i,
   2777 					       label.d_partitions[i].p_size);
   2778 					rf_print_component_label(clabel);
   2779 #endif
   2780 					/* if it's reasonable, add it,
   2781 					   else ignore it. */
   2782 					ac = (RF_AutoConfig_t *)
   2783 						malloc(sizeof(RF_AutoConfig_t),
   2784 						       M_RAIDFRAME,
   2785 						       M_NOWAIT);
   2786 					if (ac == NULL) {
   2787 						/* XXX should panic?? */
   2788 						return(NULL);
   2789 					}
   2790 
   2791 					sprintf(ac->devname, "%s%c",
   2792 						dv->dv_xname, 'a'+i);
   2793 					ac->dev = dev;
   2794 					ac->vp = vp;
   2795 					ac->clabel = clabel;
   2796 					ac->next = ac_list;
   2797 					ac_list = ac;
   2798 					good_one = 1;
   2799 				}
   2800 			}
   2801 			if (!good_one) {
   2802 				/* cleanup */
   2803 				free(clabel, M_RAIDFRAME);
   2804 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2805 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2806 				vput(vp);
   2807 			}
   2808 		}
   2809 	}
   2810 	return(ac_list);
   2811 }
   2812 
   2813 static int
   2814 rf_reasonable_label(clabel)
   2815 	RF_ComponentLabel_t *clabel;
   2816 {
   2817 
   2818 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2819 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2820 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2821 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2822 	    clabel->row >=0 &&
   2823 	    clabel->column >= 0 &&
   2824 	    clabel->num_rows > 0 &&
   2825 	    clabel->num_columns > 0 &&
   2826 	    clabel->row < clabel->num_rows &&
   2827 	    clabel->column < clabel->num_columns &&
   2828 	    clabel->blockSize > 0 &&
   2829 	    clabel->numBlocks > 0) {
   2830 		/* label looks reasonable enough... */
   2831 		return(1);
   2832 	}
   2833 	return(0);
   2834 }
   2835 
   2836 
   2837 void
   2838 rf_print_component_label(clabel)
   2839 	RF_ComponentLabel_t *clabel;
   2840 {
   2841 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2842 	       clabel->row, clabel->column,
   2843 	       clabel->num_rows, clabel->num_columns);
   2844 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2845 	       clabel->version, clabel->serial_number,
   2846 	       clabel->mod_counter);
   2847 	printf("   Clean: %s Status: %d\n",
   2848 	       clabel->clean ? "Yes" : "No", clabel->status );
   2849 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2850 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2851 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2852 	       (char) clabel->parityConfig, clabel->blockSize,
   2853 	       clabel->numBlocks);
   2854 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2855 	printf("   Contains root partition: %s\n",
   2856 	       clabel->root_partition ? "Yes" : "No" );
   2857 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2858 #if 0
   2859 	   printf("   Config order: %d\n", clabel->config_order);
   2860 #endif
   2861 
   2862 }
   2863 
   2864 RF_ConfigSet_t *
   2865 rf_create_auto_sets(ac_list)
   2866 	RF_AutoConfig_t *ac_list;
   2867 {
   2868 	RF_AutoConfig_t *ac;
   2869 	RF_ConfigSet_t *config_sets;
   2870 	RF_ConfigSet_t *cset;
   2871 	RF_AutoConfig_t *ac_next;
   2872 
   2873 
   2874 	config_sets = NULL;
   2875 
   2876 	/* Go through the AutoConfig list, and figure out which components
   2877 	   belong to what sets.  */
   2878 	ac = ac_list;
   2879 	while(ac!=NULL) {
   2880 		/* we're going to putz with ac->next, so save it here
   2881 		   for use at the end of the loop */
   2882 		ac_next = ac->next;
   2883 
   2884 		if (config_sets == NULL) {
   2885 			/* will need at least this one... */
   2886 			config_sets = (RF_ConfigSet_t *)
   2887 				malloc(sizeof(RF_ConfigSet_t),
   2888 				       M_RAIDFRAME, M_NOWAIT);
   2889 			if (config_sets == NULL) {
   2890 				panic("rf_create_auto_sets: No memory!\n");
   2891 			}
   2892 			/* this one is easy :) */
   2893 			config_sets->ac = ac;
   2894 			config_sets->next = NULL;
   2895 			config_sets->rootable = 0;
   2896 			ac->next = NULL;
   2897 		} else {
   2898 			/* which set does this component fit into? */
   2899 			cset = config_sets;
   2900 			while(cset!=NULL) {
   2901 				if (rf_does_it_fit(cset, ac)) {
   2902 					/* looks like it matches... */
   2903 					ac->next = cset->ac;
   2904 					cset->ac = ac;
   2905 					break;
   2906 				}
   2907 				cset = cset->next;
   2908 			}
   2909 			if (cset==NULL) {
   2910 				/* didn't find a match above... new set..*/
   2911 				cset = (RF_ConfigSet_t *)
   2912 					malloc(sizeof(RF_ConfigSet_t),
   2913 					       M_RAIDFRAME, M_NOWAIT);
   2914 				if (cset == NULL) {
   2915 					panic("rf_create_auto_sets: No memory!\n");
   2916 				}
   2917 				cset->ac = ac;
   2918 				ac->next = NULL;
   2919 				cset->next = config_sets;
   2920 				cset->rootable = 0;
   2921 				config_sets = cset;
   2922 			}
   2923 		}
   2924 		ac = ac_next;
   2925 	}
   2926 
   2927 
   2928 	return(config_sets);
   2929 }
   2930 
   2931 static int
   2932 rf_does_it_fit(cset, ac)
   2933 	RF_ConfigSet_t *cset;
   2934 	RF_AutoConfig_t *ac;
   2935 {
   2936 	RF_ComponentLabel_t *clabel1, *clabel2;
   2937 
   2938 	/* If this one matches the *first* one in the set, that's good
   2939 	   enough, since the other members of the set would have been
   2940 	   through here too... */
   2941 	/* note that we are not checking partitionSize here..
   2942 
   2943 	   Note that we are also not checking the mod_counters here.
   2944 	   If everything else matches execpt the mod_counter, that's
   2945 	   good enough for this test.  We will deal with the mod_counters
   2946 	   a little later in the autoconfiguration process.
   2947 
   2948 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2949 
   2950 	   The reason we don't check for this is that failed disks
   2951 	   will have lower modification counts.  If those disks are
   2952 	   not added to the set they used to belong to, then they will
   2953 	   form their own set, which may result in 2 different sets,
   2954 	   for example, competing to be configured at raid0, and
   2955 	   perhaps competing to be the root filesystem set.  If the
   2956 	   wrong ones get configured, or both attempt to become /,
   2957 	   weird behaviour and or serious lossage will occur.  Thus we
   2958 	   need to bring them into the fold here, and kick them out at
   2959 	   a later point.
   2960 
   2961 	*/
   2962 
   2963 	clabel1 = cset->ac->clabel;
   2964 	clabel2 = ac->clabel;
   2965 	if ((clabel1->version == clabel2->version) &&
   2966 	    (clabel1->serial_number == clabel2->serial_number) &&
   2967 	    (clabel1->num_rows == clabel2->num_rows) &&
   2968 	    (clabel1->num_columns == clabel2->num_columns) &&
   2969 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2970 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2971 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2972 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2973 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2974 	    (clabel1->blockSize == clabel2->blockSize) &&
   2975 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2976 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2977 	    (clabel1->root_partition == clabel2->root_partition) &&
   2978 	    (clabel1->last_unit == clabel2->last_unit) &&
   2979 	    (clabel1->config_order == clabel2->config_order)) {
   2980 		/* if it get's here, it almost *has* to be a match */
   2981 	} else {
   2982 		/* it's not consistent with somebody in the set..
   2983 		   punt */
   2984 		return(0);
   2985 	}
   2986 	/* all was fine.. it must fit... */
   2987 	return(1);
   2988 }
   2989 
   2990 int
   2991 rf_have_enough_components(cset)
   2992 	RF_ConfigSet_t *cset;
   2993 {
   2994 	RF_AutoConfig_t *ac;
   2995 	RF_AutoConfig_t *auto_config;
   2996 	RF_ComponentLabel_t *clabel;
   2997 	int r,c;
   2998 	int num_rows;
   2999 	int num_cols;
   3000 	int num_missing;
   3001 	int mod_counter;
   3002 	int mod_counter_found;
   3003 	int even_pair_failed;
   3004 	char parity_type;
   3005 
   3006 
   3007 	/* check to see that we have enough 'live' components
   3008 	   of this set.  If so, we can configure it if necessary */
   3009 
   3010 	num_rows = cset->ac->clabel->num_rows;
   3011 	num_cols = cset->ac->clabel->num_columns;
   3012 	parity_type = cset->ac->clabel->parityConfig;
   3013 
   3014 	/* XXX Check for duplicate components!?!?!? */
   3015 
   3016 	/* Determine what the mod_counter is supposed to be for this set. */
   3017 
   3018 	mod_counter_found = 0;
   3019 	mod_counter = 0;
   3020 	ac = cset->ac;
   3021 	while(ac!=NULL) {
   3022 		if (mod_counter_found==0) {
   3023 			mod_counter = ac->clabel->mod_counter;
   3024 			mod_counter_found = 1;
   3025 		} else {
   3026 			if (ac->clabel->mod_counter > mod_counter) {
   3027 				mod_counter = ac->clabel->mod_counter;
   3028 			}
   3029 		}
   3030 		ac = ac->next;
   3031 	}
   3032 
   3033 	num_missing = 0;
   3034 	auto_config = cset->ac;
   3035 
   3036 	for(r=0; r<num_rows; r++) {
   3037 		even_pair_failed = 0;
   3038 		for(c=0; c<num_cols; c++) {
   3039 			ac = auto_config;
   3040 			while(ac!=NULL) {
   3041 				if ((ac->clabel->row == r) &&
   3042 				    (ac->clabel->column == c) &&
   3043 				    (ac->clabel->mod_counter == mod_counter)) {
   3044 					/* it's this one... */
   3045 #if DEBUG
   3046 					printf("Found: %s at %d,%d\n",
   3047 					       ac->devname,r,c);
   3048 #endif
   3049 					break;
   3050 				}
   3051 				ac=ac->next;
   3052 			}
   3053 			if (ac==NULL) {
   3054 				/* Didn't find one here! */
   3055 				/* special case for RAID 1, especially
   3056 				   where there are more than 2
   3057 				   components (where RAIDframe treats
   3058 				   things a little differently :( ) */
   3059 				if (parity_type == '1') {
   3060 					if (c%2 == 0) { /* even component */
   3061 						even_pair_failed = 1;
   3062 					} else { /* odd component.  If
   3063                                                     we're failed, and
   3064                                                     so is the even
   3065                                                     component, it's
   3066                                                     "Good Night, Charlie" */
   3067 						if (even_pair_failed == 1) {
   3068 							return(0);
   3069 						}
   3070 					}
   3071 				} else {
   3072 					/* normal accounting */
   3073 					num_missing++;
   3074 				}
   3075 			}
   3076 			if ((parity_type == '1') && (c%2 == 1)) {
   3077 				/* Just did an even component, and we didn't
   3078 				   bail.. reset the even_pair_failed flag,
   3079 				   and go on to the next component.... */
   3080 				even_pair_failed = 0;
   3081 			}
   3082 		}
   3083 	}
   3084 
   3085 	clabel = cset->ac->clabel;
   3086 
   3087 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3088 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3089 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3090 		/* XXX this needs to be made *much* more general */
   3091 		/* Too many failures */
   3092 		return(0);
   3093 	}
   3094 	/* otherwise, all is well, and we've got enough to take a kick
   3095 	   at autoconfiguring this set */
   3096 	return(1);
   3097 }
   3098 
   3099 void
   3100 rf_create_configuration(ac,config,raidPtr)
   3101 	RF_AutoConfig_t *ac;
   3102 	RF_Config_t *config;
   3103 	RF_Raid_t *raidPtr;
   3104 {
   3105 	RF_ComponentLabel_t *clabel;
   3106 	int i;
   3107 
   3108 	clabel = ac->clabel;
   3109 
   3110 	/* 1. Fill in the common stuff */
   3111 	config->numRow = clabel->num_rows;
   3112 	config->numCol = clabel->num_columns;
   3113 	config->numSpare = 0; /* XXX should this be set here? */
   3114 	config->sectPerSU = clabel->sectPerSU;
   3115 	config->SUsPerPU = clabel->SUsPerPU;
   3116 	config->SUsPerRU = clabel->SUsPerRU;
   3117 	config->parityConfig = clabel->parityConfig;
   3118 	/* XXX... */
   3119 	strcpy(config->diskQueueType,"fifo");
   3120 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3121 	config->layoutSpecificSize = 0; /* XXX ?? */
   3122 
   3123 	while(ac!=NULL) {
   3124 		/* row/col values will be in range due to the checks
   3125 		   in reasonable_label() */
   3126 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3127 		       ac->devname);
   3128 		ac = ac->next;
   3129 	}
   3130 
   3131 	for(i=0;i<RF_MAXDBGV;i++) {
   3132 		config->debugVars[i][0] = NULL;
   3133 	}
   3134 }
   3135 
   3136 int
   3137 rf_set_autoconfig(raidPtr, new_value)
   3138 	RF_Raid_t *raidPtr;
   3139 	int new_value;
   3140 {
   3141 	RF_ComponentLabel_t clabel;
   3142 	struct vnode *vp;
   3143 	dev_t dev;
   3144 	int row, column;
   3145 
   3146 	raidPtr->autoconfigure = new_value;
   3147 	for(row=0; row<raidPtr->numRow; row++) {
   3148 		for(column=0; column<raidPtr->numCol; column++) {
   3149 			if (raidPtr->Disks[row][column].status ==
   3150 			    rf_ds_optimal) {
   3151 				dev = raidPtr->Disks[row][column].dev;
   3152 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3153 				raidread_component_label(dev, vp, &clabel);
   3154 				clabel.autoconfigure = new_value;
   3155 				raidwrite_component_label(dev, vp, &clabel);
   3156 			}
   3157 		}
   3158 	}
   3159 	return(new_value);
   3160 }
   3161 
   3162 int
   3163 rf_set_rootpartition(raidPtr, new_value)
   3164 	RF_Raid_t *raidPtr;
   3165 	int new_value;
   3166 {
   3167 	RF_ComponentLabel_t clabel;
   3168 	struct vnode *vp;
   3169 	dev_t dev;
   3170 	int row, column;
   3171 
   3172 	raidPtr->root_partition = new_value;
   3173 	for(row=0; row<raidPtr->numRow; row++) {
   3174 		for(column=0; column<raidPtr->numCol; column++) {
   3175 			if (raidPtr->Disks[row][column].status ==
   3176 			    rf_ds_optimal) {
   3177 				dev = raidPtr->Disks[row][column].dev;
   3178 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3179 				raidread_component_label(dev, vp, &clabel);
   3180 				clabel.root_partition = new_value;
   3181 				raidwrite_component_label(dev, vp, &clabel);
   3182 			}
   3183 		}
   3184 	}
   3185 	return(new_value);
   3186 }
   3187 
   3188 void
   3189 rf_release_all_vps(cset)
   3190 	RF_ConfigSet_t *cset;
   3191 {
   3192 	RF_AutoConfig_t *ac;
   3193 
   3194 	ac = cset->ac;
   3195 	while(ac!=NULL) {
   3196 		/* Close the vp, and give it back */
   3197 		if (ac->vp) {
   3198 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3199 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3200 			vput(ac->vp);
   3201 			ac->vp = NULL;
   3202 		}
   3203 		ac = ac->next;
   3204 	}
   3205 }
   3206 
   3207 
   3208 void
   3209 rf_cleanup_config_set(cset)
   3210 	RF_ConfigSet_t *cset;
   3211 {
   3212 	RF_AutoConfig_t *ac;
   3213 	RF_AutoConfig_t *next_ac;
   3214 
   3215 	ac = cset->ac;
   3216 	while(ac!=NULL) {
   3217 		next_ac = ac->next;
   3218 		/* nuke the label */
   3219 		free(ac->clabel, M_RAIDFRAME);
   3220 		/* cleanup the config structure */
   3221 		free(ac, M_RAIDFRAME);
   3222 		/* "next.." */
   3223 		ac = next_ac;
   3224 	}
   3225 	/* and, finally, nuke the config set */
   3226 	free(cset, M_RAIDFRAME);
   3227 }
   3228 
   3229 
   3230 void
   3231 raid_init_component_label(raidPtr, clabel)
   3232 	RF_Raid_t *raidPtr;
   3233 	RF_ComponentLabel_t *clabel;
   3234 {
   3235 	/* current version number */
   3236 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3237 	clabel->serial_number = raidPtr->serial_number;
   3238 	clabel->mod_counter = raidPtr->mod_counter;
   3239 	clabel->num_rows = raidPtr->numRow;
   3240 	clabel->num_columns = raidPtr->numCol;
   3241 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3242 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3243 
   3244 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3245 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3246 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3247 
   3248 	clabel->blockSize = raidPtr->bytesPerSector;
   3249 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3250 
   3251 	/* XXX not portable */
   3252 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3253 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3254 	clabel->autoconfigure = raidPtr->autoconfigure;
   3255 	clabel->root_partition = raidPtr->root_partition;
   3256 	clabel->last_unit = raidPtr->raidid;
   3257 	clabel->config_order = raidPtr->config_order;
   3258 }
   3259 
   3260 int
   3261 rf_auto_config_set(cset,unit)
   3262 	RF_ConfigSet_t *cset;
   3263 	int *unit;
   3264 {
   3265 	RF_Raid_t *raidPtr;
   3266 	RF_Config_t *config;
   3267 	int raidID;
   3268 	int retcode;
   3269 
   3270 	printf("RAID autoconfigure\n");
   3271 
   3272 	retcode = 0;
   3273 	*unit = -1;
   3274 
   3275 	/* 1. Create a config structure */
   3276 
   3277 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3278 				       M_RAIDFRAME,
   3279 				       M_NOWAIT);
   3280 	if (config==NULL) {
   3281 		printf("Out of mem!?!?\n");
   3282 				/* XXX do something more intelligent here. */
   3283 		return(1);
   3284 	}
   3285 
   3286 	memset(config, 0, sizeof(RF_Config_t));
   3287 
   3288 	/* XXX raidID needs to be set correctly.. */
   3289 
   3290 	/*
   3291 	   2. Figure out what RAID ID this one is supposed to live at
   3292 	   See if we can get the same RAID dev that it was configured
   3293 	   on last time..
   3294 	*/
   3295 
   3296 	raidID = cset->ac->clabel->last_unit;
   3297 	if ((raidID < 0) || (raidID >= numraid)) {
   3298 		/* let's not wander off into lala land. */
   3299 		raidID = numraid - 1;
   3300 	}
   3301 	if (raidPtrs[raidID]->valid != 0) {
   3302 
   3303 		/*
   3304 		   Nope... Go looking for an alternative...
   3305 		   Start high so we don't immediately use raid0 if that's
   3306 		   not taken.
   3307 		*/
   3308 
   3309 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3310 			if (raidPtrs[raidID]->valid == 0) {
   3311 				/* can use this one! */
   3312 				break;
   3313 			}
   3314 		}
   3315 	}
   3316 
   3317 	if (raidID < 0) {
   3318 		/* punt... */
   3319 		printf("Unable to auto configure this set!\n");
   3320 		printf("(Out of RAID devs!)\n");
   3321 		return(1);
   3322 	}
   3323 	printf("Configuring raid%d:\n",raidID);
   3324 	raidPtr = raidPtrs[raidID];
   3325 
   3326 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3327 	raidPtr->raidid = raidID;
   3328 	raidPtr->openings = RAIDOUTSTANDING;
   3329 
   3330 	/* 3. Build the configuration structure */
   3331 	rf_create_configuration(cset->ac, config, raidPtr);
   3332 
   3333 	/* 4. Do the configuration */
   3334 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3335 
   3336 	if (retcode == 0) {
   3337 
   3338 		raidinit(raidPtrs[raidID]);
   3339 
   3340 		rf_markalldirty(raidPtrs[raidID]);
   3341 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3342 		if (cset->ac->clabel->root_partition==1) {
   3343 			/* everything configured just fine.  Make a note
   3344 			   that this set is eligible to be root. */
   3345 			cset->rootable = 1;
   3346 			/* XXX do this here? */
   3347 			raidPtrs[raidID]->root_partition = 1;
   3348 		}
   3349 	}
   3350 
   3351 	/* 5. Cleanup */
   3352 	free(config, M_RAIDFRAME);
   3353 
   3354 	*unit = raidID;
   3355 	return(retcode);
   3356 }
   3357 
   3358 void
   3359 rf_disk_unbusy(desc)
   3360 	RF_RaidAccessDesc_t *desc;
   3361 {
   3362 	struct buf *bp;
   3363 
   3364 	bp = (struct buf *)desc->bp;
   3365 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3366 			    (bp->b_bcount - bp->b_resid));
   3367 }
   3368