Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.104.2.5
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.104.2.5 2001/11/18 13:22:09 scw Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.5 2001/11/18 13:22:09 scw Exp $");
    118 
    119 #include <sys/errno.h>
    120 #include <sys/param.h>
    121 #include <sys/pool.h>
    122 #include <sys/lwp.h>
    123 #include <sys/proc.h>
    124 #include <sys/queue.h>
    125 #include <sys/disk.h>
    126 #include <sys/device.h>
    127 #include <sys/stat.h>
    128 #include <sys/ioctl.h>
    129 #include <sys/fcntl.h>
    130 #include <sys/systm.h>
    131 #include <sys/namei.h>
    132 #include <sys/vnode.h>
    133 #include <sys/param.h>
    134 #include <sys/types.h>
    135 #include <machine/types.h>
    136 #include <sys/disklabel.h>
    137 #include <sys/conf.h>
    138 #include <sys/lock.h>
    139 #include <sys/buf.h>
    140 #include <sys/user.h>
    141 #include <sys/reboot.h>
    142 
    143 #include <dev/raidframe/raidframevar.h>
    144 #include <dev/raidframe/raidframeio.h>
    145 #include "raid.h"
    146 #include "opt_raid_autoconfig.h"
    147 #include "rf_raid.h"
    148 #include "rf_copyback.h"
    149 #include "rf_dag.h"
    150 #include "rf_dagflags.h"
    151 #include "rf_desc.h"
    152 #include "rf_diskqueue.h"
    153 #include "rf_acctrace.h"
    154 #include "rf_etimer.h"
    155 #include "rf_general.h"
    156 #include "rf_debugMem.h"
    157 #include "rf_kintf.h"
    158 #include "rf_options.h"
    159 #include "rf_driver.h"
    160 #include "rf_parityscan.h"
    161 #include "rf_debugprint.h"
    162 #include "rf_threadstuff.h"
    163 
    164 int     rf_kdebug_level = 0;
    165 
    166 #ifdef DEBUG
    167 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    168 #else				/* DEBUG */
    169 #define db1_printf(a) { }
    170 #endif				/* DEBUG */
    171 
    172 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    173 
    174 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    175 
    176 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    177 						 * spare table */
    178 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    179 						 * installation process */
    180 
    181 /* prototypes */
    182 static void KernelWakeupFunc(struct buf * bp);
    183 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    184 		   dev_t dev, RF_SectorNum_t startSect,
    185 		   RF_SectorCount_t numSect, caddr_t buf,
    186 		   void (*cbFunc) (struct buf *), void *cbArg,
    187 		   int logBytesPerSector, struct proc * b_proc);
    188 static void raidinit(RF_Raid_t *);
    189 
    190 void raidattach(int);
    191 int raidsize(dev_t);
    192 int raidopen(dev_t, int, int, struct proc *);
    193 int raidclose(dev_t, int, int, struct proc *);
    194 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
    195 int raidwrite(dev_t, struct uio *, int);
    196 int raidread(dev_t, struct uio *, int);
    197 void raidstrategy(struct buf *);
    198 int raiddump(dev_t, daddr_t, caddr_t, size_t);
    199 
    200 /*
    201  * Pilfered from ccd.c
    202  */
    203 
    204 struct raidbuf {
    205 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    206 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    207 	int     rf_flags;	/* misc. flags */
    208 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    209 };
    210 
    211 
    212 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    213 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    214 
    215 /* XXX Not sure if the following should be replacing the raidPtrs above,
    216    or if it should be used in conjunction with that...
    217 */
    218 
    219 struct raid_softc {
    220 	int     sc_flags;	/* flags */
    221 	int     sc_cflags;	/* configuration flags */
    222 	size_t  sc_size;        /* size of the raid device */
    223 	char    sc_xname[20];	/* XXX external name */
    224 	struct disk sc_dkdev;	/* generic disk device info */
    225 	struct pool sc_cbufpool;	/* component buffer pool */
    226 	struct buf_queue buf_queue;	/* used for the device queue */
    227 };
    228 /* sc_flags */
    229 #define RAIDF_INITED	0x01	/* unit has been initialized */
    230 #define RAIDF_WLABEL	0x02	/* label area is writable */
    231 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    232 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    233 #define RAIDF_LOCKED	0x80	/* unit is locked */
    234 
    235 #define	raidunit(x)	DISKUNIT(x)
    236 int numraid = 0;
    237 
    238 /*
    239  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    240  * Be aware that large numbers can allow the driver to consume a lot of
    241  * kernel memory, especially on writes, and in degraded mode reads.
    242  *
    243  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    244  * a single 64K write will typically require 64K for the old data,
    245  * 64K for the old parity, and 64K for the new parity, for a total
    246  * of 192K (if the parity buffer is not re-used immediately).
    247  * Even it if is used immediately, that's still 128K, which when multiplied
    248  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    249  *
    250  * Now in degraded mode, for example, a 64K read on the above setup may
    251  * require data reconstruction, which will require *all* of the 4 remaining
    252  * disks to participate -- 4 * 32K/disk == 128K again.
    253  */
    254 
    255 #ifndef RAIDOUTSTANDING
    256 #define RAIDOUTSTANDING   6
    257 #endif
    258 
    259 #define RAIDLABELDEV(dev)	\
    260 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    261 
    262 /* declared here, and made public, for the benefit of KVM stuff.. */
    263 struct raid_softc *raid_softc;
    264 
    265 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    266 				     struct disklabel *);
    267 static void raidgetdisklabel(dev_t);
    268 static void raidmakedisklabel(struct raid_softc *);
    269 
    270 static int raidlock(struct raid_softc *);
    271 static void raidunlock(struct raid_softc *);
    272 
    273 static void rf_markalldirty(RF_Raid_t *);
    274 void rf_mountroot_hook(struct device *);
    275 
    276 struct device *raidrootdev;
    277 
    278 void rf_ReconThread(struct rf_recon_req *);
    279 /* XXX what I want is: */
    280 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    281 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    282 void rf_CopybackThread(RF_Raid_t *raidPtr);
    283 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    284 void rf_buildroothack(void *);
    285 
    286 RF_AutoConfig_t *rf_find_raid_components(void);
    287 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    288 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    289 static int rf_reasonable_label(RF_ComponentLabel_t *);
    290 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    291 int rf_set_autoconfig(RF_Raid_t *, int);
    292 int rf_set_rootpartition(RF_Raid_t *, int);
    293 void rf_release_all_vps(RF_ConfigSet_t *);
    294 void rf_cleanup_config_set(RF_ConfigSet_t *);
    295 int rf_have_enough_components(RF_ConfigSet_t *);
    296 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place.
    300 			          Note that this is overridden by having
    301 			          RAID_AUTOCONFIG as an option in the
    302 			          kernel config file.  */
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    311 	RF_ConfigSet_t *config_sets;
    312 
    313 #ifdef DEBUG
    314 	printf("raidattach: Asked for %d units\n", num);
    315 #endif
    316 
    317 	if (num <= 0) {
    318 #ifdef DIAGNOSTIC
    319 		panic("raidattach: count <= 0");
    320 #endif
    321 		return;
    322 	}
    323 	/* This is where all the initialization stuff gets done. */
    324 
    325 	numraid = num;
    326 
    327 	/* Make some space for requested number of units... */
    328 
    329 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    330 	if (raidPtrs == NULL) {
    331 		panic("raidPtrs is NULL!!\n");
    332 	}
    333 
    334 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    335 	if (rc) {
    336 		RF_PANIC();
    337 	}
    338 
    339 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    340 
    341 	for (i = 0; i < num; i++)
    342 		raidPtrs[i] = NULL;
    343 	rc = rf_BootRaidframe();
    344 	if (rc == 0)
    345 		printf("Kernelized RAIDframe activated\n");
    346 	else
    347 		panic("Serious error booting RAID!!\n");
    348 
    349 	/* put together some datastructures like the CCD device does.. This
    350 	 * lets us lock the device and what-not when it gets opened. */
    351 
    352 	raid_softc = (struct raid_softc *)
    353 		malloc(num * sizeof(struct raid_softc),
    354 		       M_RAIDFRAME, M_NOWAIT);
    355 	if (raid_softc == NULL) {
    356 		printf("WARNING: no memory for RAIDframe driver\n");
    357 		return;
    358 	}
    359 
    360 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    361 
    362 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    363 					      M_RAIDFRAME, M_NOWAIT);
    364 	if (raidrootdev == NULL) {
    365 		panic("No memory for RAIDframe driver!!?!?!\n");
    366 	}
    367 
    368 	for (raidID = 0; raidID < num; raidID++) {
    369 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    370 
    371 		raidrootdev[raidID].dv_class  = DV_DISK;
    372 		raidrootdev[raidID].dv_cfdata = NULL;
    373 		raidrootdev[raidID].dv_unit   = raidID;
    374 		raidrootdev[raidID].dv_parent = NULL;
    375 		raidrootdev[raidID].dv_flags  = 0;
    376 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    377 
    378 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    379 			  (RF_Raid_t *));
    380 		if (raidPtrs[raidID] == NULL) {
    381 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    382 			numraid = raidID;
    383 			return;
    384 		}
    385 	}
    386 
    387 #if RAID_AUTOCONFIG
    388 	raidautoconfig = 1;
    389 #endif
    390 
    391 if (raidautoconfig) {
    392 	/* 1. locate all RAID components on the system */
    393 
    394 #if DEBUG
    395 	printf("Searching for raid components...\n");
    396 #endif
    397 	ac_list = rf_find_raid_components();
    398 
    399 	/* 2. sort them into their respective sets */
    400 
    401 	config_sets = rf_create_auto_sets(ac_list);
    402 
    403 	/* 3. evaluate each set and configure the valid ones
    404 	   This gets done in rf_buildroothack() */
    405 
    406 	/* schedule the creation of the thread to do the
    407 	   "/ on RAID" stuff */
    408 
    409 	kthread_create(rf_buildroothack,config_sets);
    410 
    411 #if 0
    412 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    413 #endif
    414 }
    415 
    416 }
    417 
    418 void
    419 rf_buildroothack(arg)
    420 	void *arg;
    421 {
    422 	RF_ConfigSet_t *config_sets = arg;
    423 	RF_ConfigSet_t *cset;
    424 	RF_ConfigSet_t *next_cset;
    425 	int retcode;
    426 	int raidID;
    427 	int rootID;
    428 	int num_root;
    429 
    430 	rootID = 0;
    431 	num_root = 0;
    432 	cset = config_sets;
    433 	while(cset != NULL ) {
    434 		next_cset = cset->next;
    435 		if (rf_have_enough_components(cset) &&
    436 		    cset->ac->clabel->autoconfigure==1) {
    437 			retcode = rf_auto_config_set(cset,&raidID);
    438 			if (!retcode) {
    439 				if (cset->rootable) {
    440 					rootID = raidID;
    441 					num_root++;
    442 				}
    443 			} else {
    444 				/* The autoconfig didn't work :( */
    445 #if DEBUG
    446 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    447 #endif
    448 				rf_release_all_vps(cset);
    449 			}
    450 		} else {
    451 			/* we're not autoconfiguring this set...
    452 			   release the associated resources */
    453 			rf_release_all_vps(cset);
    454 		}
    455 		/* cleanup */
    456 		rf_cleanup_config_set(cset);
    457 		cset = next_cset;
    458 	}
    459 	if (boothowto & RB_ASKNAME) {
    460 		/* We don't auto-config... */
    461 	} else {
    462 		/* They didn't ask, and we found something bootable... */
    463 
    464 		if (num_root == 1) {
    465 			booted_device = &raidrootdev[rootID];
    466 		} else if (num_root > 1) {
    467 			/* we can't guess.. require the user to answer... */
    468 			boothowto |= RB_ASKNAME;
    469 		}
    470 	}
    471 }
    472 
    473 
    474 int
    475 raidsize(dev)
    476 	dev_t   dev;
    477 {
    478 	struct raid_softc *rs;
    479 	struct disklabel *lp;
    480 	int     part, unit, omask, size;
    481 
    482 	unit = raidunit(dev);
    483 	if (unit >= numraid)
    484 		return (-1);
    485 	rs = &raid_softc[unit];
    486 
    487 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    488 		return (-1);
    489 
    490 	part = DISKPART(dev);
    491 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    492 	lp = rs->sc_dkdev.dk_label;
    493 
    494 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc->l_proc))
    495 		return (-1);
    496 
    497 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    498 		size = -1;
    499 	else
    500 		size = lp->d_partitions[part].p_size *
    501 		    (lp->d_secsize / DEV_BSIZE);
    502 
    503 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc->l_proc))
    504 		return (-1);
    505 
    506 	return (size);
    507 
    508 }
    509 
    510 int
    511 raiddump(dev, blkno, va, size)
    512 	dev_t   dev;
    513 	daddr_t blkno;
    514 	caddr_t va;
    515 	size_t  size;
    516 {
    517 	/* Not implemented. */
    518 	return ENXIO;
    519 }
    520 /* ARGSUSED */
    521 int
    522 raidopen(dev, flags, fmt, p)
    523 	dev_t   dev;
    524 	int     flags, fmt;
    525 	struct proc *p;
    526 {
    527 	int     unit = raidunit(dev);
    528 	struct raid_softc *rs;
    529 	struct disklabel *lp;
    530 	int     part, pmask;
    531 	int     error = 0;
    532 
    533 	if (unit >= numraid)
    534 		return (ENXIO);
    535 	rs = &raid_softc[unit];
    536 
    537 	if ((error = raidlock(rs)) != 0)
    538 		return (error);
    539 	lp = rs->sc_dkdev.dk_label;
    540 
    541 	part = DISKPART(dev);
    542 	pmask = (1 << part);
    543 
    544 	db1_printf(("Opening raid device number: %d partition: %d\n",
    545 		unit, part));
    546 
    547 
    548 	if ((rs->sc_flags & RAIDF_INITED) &&
    549 	    (rs->sc_dkdev.dk_openmask == 0))
    550 		raidgetdisklabel(dev);
    551 
    552 	/* make sure that this partition exists */
    553 
    554 	if (part != RAW_PART) {
    555 		db1_printf(("Not a raw partition..\n"));
    556 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    557 		    ((part >= lp->d_npartitions) ||
    558 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    559 			error = ENXIO;
    560 			raidunlock(rs);
    561 			db1_printf(("Bailing out...\n"));
    562 			return (error);
    563 		}
    564 	}
    565 	/* Prevent this unit from being unconfigured while open. */
    566 	switch (fmt) {
    567 	case S_IFCHR:
    568 		rs->sc_dkdev.dk_copenmask |= pmask;
    569 		break;
    570 
    571 	case S_IFBLK:
    572 		rs->sc_dkdev.dk_bopenmask |= pmask;
    573 		break;
    574 	}
    575 
    576 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    577 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    578 		/* First one... mark things as dirty... Note that we *MUST*
    579 		 have done a configure before this.  I DO NOT WANT TO BE
    580 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    581 		 THAT THEY BELONG TOGETHER!!!!! */
    582 		/* XXX should check to see if we're only open for reading
    583 		   here... If so, we needn't do this, but then need some
    584 		   other way of keeping track of what's happened.. */
    585 
    586 		rf_markalldirty( raidPtrs[unit] );
    587 	}
    588 
    589 
    590 	rs->sc_dkdev.dk_openmask =
    591 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    592 
    593 	raidunlock(rs);
    594 
    595 	return (error);
    596 
    597 
    598 }
    599 /* ARGSUSED */
    600 int
    601 raidclose(dev, flags, fmt, p)
    602 	dev_t   dev;
    603 	int     flags, fmt;
    604 	struct proc *p;
    605 {
    606 	int     unit = raidunit(dev);
    607 	struct raid_softc *rs;
    608 	int     error = 0;
    609 	int     part;
    610 
    611 	if (unit >= numraid)
    612 		return (ENXIO);
    613 	rs = &raid_softc[unit];
    614 
    615 	if ((error = raidlock(rs)) != 0)
    616 		return (error);
    617 
    618 	part = DISKPART(dev);
    619 
    620 	/* ...that much closer to allowing unconfiguration... */
    621 	switch (fmt) {
    622 	case S_IFCHR:
    623 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    624 		break;
    625 
    626 	case S_IFBLK:
    627 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    628 		break;
    629 	}
    630 	rs->sc_dkdev.dk_openmask =
    631 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    632 
    633 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    634 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    635 		/* Last one... device is not unconfigured yet.
    636 		   Device shutdown has taken care of setting the
    637 		   clean bits if RAIDF_INITED is not set
    638 		   mark things as clean... */
    639 #if 0
    640 		printf("Last one on raid%d.  Updating status.\n",unit);
    641 #endif
    642 		rf_update_component_labels(raidPtrs[unit],
    643 						 RF_FINAL_COMPONENT_UPDATE);
    644 		if (doing_shutdown) {
    645 			/* last one, and we're going down, so
    646 			   lights out for this RAID set too. */
    647 			error = rf_Shutdown(raidPtrs[unit]);
    648 			pool_destroy(&rs->sc_cbufpool);
    649 
    650 			/* It's no longer initialized... */
    651 			rs->sc_flags &= ~RAIDF_INITED;
    652 
    653 			/* Detach the disk. */
    654 			disk_detach(&rs->sc_dkdev);
    655 		}
    656 	}
    657 
    658 	raidunlock(rs);
    659 	return (0);
    660 
    661 }
    662 
    663 void
    664 raidstrategy(bp)
    665 	struct buf *bp;
    666 {
    667 	int s;
    668 
    669 	unsigned int raidID = raidunit(bp->b_dev);
    670 	RF_Raid_t *raidPtr;
    671 	struct raid_softc *rs = &raid_softc[raidID];
    672 	struct disklabel *lp;
    673 	int     wlabel;
    674 
    675 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    676 		bp->b_error = ENXIO;
    677 		bp->b_flags |= B_ERROR;
    678 		bp->b_resid = bp->b_bcount;
    679 		biodone(bp);
    680 		return;
    681 	}
    682 	if (raidID >= numraid || !raidPtrs[raidID]) {
    683 		bp->b_error = ENODEV;
    684 		bp->b_flags |= B_ERROR;
    685 		bp->b_resid = bp->b_bcount;
    686 		biodone(bp);
    687 		return;
    688 	}
    689 	raidPtr = raidPtrs[raidID];
    690 	if (!raidPtr->valid) {
    691 		bp->b_error = ENODEV;
    692 		bp->b_flags |= B_ERROR;
    693 		bp->b_resid = bp->b_bcount;
    694 		biodone(bp);
    695 		return;
    696 	}
    697 	if (bp->b_bcount == 0) {
    698 		db1_printf(("b_bcount is zero..\n"));
    699 		biodone(bp);
    700 		return;
    701 	}
    702 	lp = rs->sc_dkdev.dk_label;
    703 
    704 	/*
    705 	 * Do bounds checking and adjust transfer.  If there's an
    706 	 * error, the bounds check will flag that for us.
    707 	 */
    708 
    709 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    710 	if (DISKPART(bp->b_dev) != RAW_PART)
    711 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    712 			db1_printf(("Bounds check failed!!:%d %d\n",
    713 				(int) bp->b_blkno, (int) wlabel));
    714 			biodone(bp);
    715 			return;
    716 		}
    717 	s = splbio();
    718 
    719 	bp->b_resid = 0;
    720 
    721 	/* stuff it onto our queue */
    722 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    723 
    724 	raidstart(raidPtrs[raidID]);
    725 
    726 	splx(s);
    727 }
    728 /* ARGSUSED */
    729 int
    730 raidread(dev, uio, flags)
    731 	dev_t   dev;
    732 	struct uio *uio;
    733 	int     flags;
    734 {
    735 	int     unit = raidunit(dev);
    736 	struct raid_softc *rs;
    737 	int     part;
    738 
    739 	if (unit >= numraid)
    740 		return (ENXIO);
    741 	rs = &raid_softc[unit];
    742 
    743 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    744 		return (ENXIO);
    745 	part = DISKPART(dev);
    746 
    747 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    748 
    749 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    750 
    751 }
    752 /* ARGSUSED */
    753 int
    754 raidwrite(dev, uio, flags)
    755 	dev_t   dev;
    756 	struct uio *uio;
    757 	int     flags;
    758 {
    759 	int     unit = raidunit(dev);
    760 	struct raid_softc *rs;
    761 
    762 	if (unit >= numraid)
    763 		return (ENXIO);
    764 	rs = &raid_softc[unit];
    765 
    766 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    767 		return (ENXIO);
    768 	db1_printf(("raidwrite\n"));
    769 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    770 
    771 }
    772 
    773 int
    774 raidioctl(dev, cmd, data, flag, p)
    775 	dev_t   dev;
    776 	u_long  cmd;
    777 	caddr_t data;
    778 	int     flag;
    779 	struct proc *p;
    780 {
    781 	int     unit = raidunit(dev);
    782 	int     error = 0;
    783 	int     part, pmask;
    784 	struct raid_softc *rs;
    785 	RF_Config_t *k_cfg, *u_cfg;
    786 	RF_Raid_t *raidPtr;
    787 	RF_RaidDisk_t *diskPtr;
    788 	RF_AccTotals_t *totals;
    789 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    790 	u_char *specific_buf;
    791 	int retcode = 0;
    792 	int row;
    793 	int column;
    794 	struct rf_recon_req *rrcopy, *rr;
    795 	RF_ComponentLabel_t *clabel;
    796 	RF_ComponentLabel_t ci_label;
    797 	RF_ComponentLabel_t **clabel_ptr;
    798 	RF_SingleComponent_t *sparePtr,*componentPtr;
    799 	RF_SingleComponent_t hot_spare;
    800 	RF_SingleComponent_t component;
    801 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    802 	int i, j, d;
    803 #ifdef __HAVE_OLD_DISKLABEL
    804 	struct disklabel newlabel;
    805 #endif
    806 
    807 	if (unit >= numraid)
    808 		return (ENXIO);
    809 	rs = &raid_softc[unit];
    810 	raidPtr = raidPtrs[unit];
    811 
    812 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    813 		(int) DISKPART(dev), (int) unit, (int) cmd));
    814 
    815 	/* Must be open for writes for these commands... */
    816 	switch (cmd) {
    817 	case DIOCSDINFO:
    818 	case DIOCWDINFO:
    819 #ifdef __HAVE_OLD_DISKLABEL
    820 	case ODIOCWDINFO:
    821 	case ODIOCSDINFO:
    822 #endif
    823 	case DIOCWLABEL:
    824 		if ((flag & FWRITE) == 0)
    825 			return (EBADF);
    826 	}
    827 
    828 	/* Must be initialized for these... */
    829 	switch (cmd) {
    830 	case DIOCGDINFO:
    831 	case DIOCSDINFO:
    832 	case DIOCWDINFO:
    833 #ifdef __HAVE_OLD_DISKLABEL
    834 	case ODIOCGDINFO:
    835 	case ODIOCWDINFO:
    836 	case ODIOCSDINFO:
    837 	case ODIOCGDEFLABEL:
    838 #endif
    839 	case DIOCGPART:
    840 	case DIOCWLABEL:
    841 	case DIOCGDEFLABEL:
    842 	case RAIDFRAME_SHUTDOWN:
    843 	case RAIDFRAME_REWRITEPARITY:
    844 	case RAIDFRAME_GET_INFO:
    845 	case RAIDFRAME_RESET_ACCTOTALS:
    846 	case RAIDFRAME_GET_ACCTOTALS:
    847 	case RAIDFRAME_KEEP_ACCTOTALS:
    848 	case RAIDFRAME_GET_SIZE:
    849 	case RAIDFRAME_FAIL_DISK:
    850 	case RAIDFRAME_COPYBACK:
    851 	case RAIDFRAME_CHECK_RECON_STATUS:
    852 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    853 	case RAIDFRAME_GET_COMPONENT_LABEL:
    854 	case RAIDFRAME_SET_COMPONENT_LABEL:
    855 	case RAIDFRAME_ADD_HOT_SPARE:
    856 	case RAIDFRAME_REMOVE_HOT_SPARE:
    857 	case RAIDFRAME_INIT_LABELS:
    858 	case RAIDFRAME_REBUILD_IN_PLACE:
    859 	case RAIDFRAME_CHECK_PARITY:
    860 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    861 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    862 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    863 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    864 	case RAIDFRAME_SET_AUTOCONFIG:
    865 	case RAIDFRAME_SET_ROOT:
    866 	case RAIDFRAME_DELETE_COMPONENT:
    867 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    868 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    869 			return (ENXIO);
    870 	}
    871 
    872 	switch (cmd) {
    873 
    874 		/* configure the system */
    875 	case RAIDFRAME_CONFIGURE:
    876 
    877 		if (raidPtr->valid) {
    878 			/* There is a valid RAID set running on this unit! */
    879 			printf("raid%d: Device already configured!\n",unit);
    880 			return(EINVAL);
    881 		}
    882 
    883 		/* copy-in the configuration information */
    884 		/* data points to a pointer to the configuration structure */
    885 
    886 		u_cfg = *((RF_Config_t **) data);
    887 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    888 		if (k_cfg == NULL) {
    889 			return (ENOMEM);
    890 		}
    891 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    892 		    sizeof(RF_Config_t));
    893 		if (retcode) {
    894 			RF_Free(k_cfg, sizeof(RF_Config_t));
    895 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    896 				retcode));
    897 			return (retcode);
    898 		}
    899 		/* allocate a buffer for the layout-specific data, and copy it
    900 		 * in */
    901 		if (k_cfg->layoutSpecificSize) {
    902 			if (k_cfg->layoutSpecificSize > 10000) {
    903 				/* sanity check */
    904 				RF_Free(k_cfg, sizeof(RF_Config_t));
    905 				return (EINVAL);
    906 			}
    907 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    908 			    (u_char *));
    909 			if (specific_buf == NULL) {
    910 				RF_Free(k_cfg, sizeof(RF_Config_t));
    911 				return (ENOMEM);
    912 			}
    913 			retcode = copyin(k_cfg->layoutSpecific,
    914 			    (caddr_t) specific_buf,
    915 			    k_cfg->layoutSpecificSize);
    916 			if (retcode) {
    917 				RF_Free(k_cfg, sizeof(RF_Config_t));
    918 				RF_Free(specific_buf,
    919 					k_cfg->layoutSpecificSize);
    920 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    921 					retcode));
    922 				return (retcode);
    923 			}
    924 		} else
    925 			specific_buf = NULL;
    926 		k_cfg->layoutSpecific = specific_buf;
    927 
    928 		/* should do some kind of sanity check on the configuration.
    929 		 * Store the sum of all the bytes in the last byte? */
    930 
    931 		/* configure the system */
    932 
    933 		/*
    934 		 * Clear the entire RAID descriptor, just to make sure
    935 		 *  there is no stale data left in the case of a
    936 		 *  reconfiguration
    937 		 */
    938 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    939 		raidPtr->raidid = unit;
    940 
    941 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    942 
    943 		if (retcode == 0) {
    944 
    945 			/* allow this many simultaneous IO's to
    946 			   this RAID device */
    947 			raidPtr->openings = RAIDOUTSTANDING;
    948 
    949 			raidinit(raidPtr);
    950 			rf_markalldirty(raidPtr);
    951 		}
    952 		/* free the buffers.  No return code here. */
    953 		if (k_cfg->layoutSpecificSize) {
    954 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    955 		}
    956 		RF_Free(k_cfg, sizeof(RF_Config_t));
    957 
    958 		return (retcode);
    959 
    960 		/* shutdown the system */
    961 	case RAIDFRAME_SHUTDOWN:
    962 
    963 		if ((error = raidlock(rs)) != 0)
    964 			return (error);
    965 
    966 		/*
    967 		 * If somebody has a partition mounted, we shouldn't
    968 		 * shutdown.
    969 		 */
    970 
    971 		part = DISKPART(dev);
    972 		pmask = (1 << part);
    973 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    974 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    975 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    976 			raidunlock(rs);
    977 			return (EBUSY);
    978 		}
    979 
    980 		retcode = rf_Shutdown(raidPtr);
    981 
    982 		pool_destroy(&rs->sc_cbufpool);
    983 
    984 		/* It's no longer initialized... */
    985 		rs->sc_flags &= ~RAIDF_INITED;
    986 
    987 		/* Detach the disk. */
    988 		disk_detach(&rs->sc_dkdev);
    989 
    990 		raidunlock(rs);
    991 
    992 		return (retcode);
    993 	case RAIDFRAME_GET_COMPONENT_LABEL:
    994 		clabel_ptr = (RF_ComponentLabel_t **) data;
    995 		/* need to read the component label for the disk indicated
    996 		   by row,column in clabel */
    997 
    998 		/* For practice, let's get it directly fromdisk, rather
    999 		   than from the in-core copy */
   1000 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1001 			   (RF_ComponentLabel_t *));
   1002 		if (clabel == NULL)
   1003 			return (ENOMEM);
   1004 
   1005 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1006 
   1007 		retcode = copyin( *clabel_ptr, clabel,
   1008 				  sizeof(RF_ComponentLabel_t));
   1009 
   1010 		if (retcode) {
   1011 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1012 			return(retcode);
   1013 		}
   1014 
   1015 		row = clabel->row;
   1016 		column = clabel->column;
   1017 
   1018 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1019 		    (column < 0) || (column >= raidPtr->numCol +
   1020 				     raidPtr->numSpare)) {
   1021 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1022 			return(EINVAL);
   1023 		}
   1024 
   1025 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1026 				raidPtr->raid_cinfo[row][column].ci_vp,
   1027 				clabel );
   1028 
   1029 		retcode = copyout((caddr_t) clabel,
   1030 				  (caddr_t) *clabel_ptr,
   1031 				  sizeof(RF_ComponentLabel_t));
   1032 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1033 		return (retcode);
   1034 
   1035 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1036 		clabel = (RF_ComponentLabel_t *) data;
   1037 
   1038 		/* XXX check the label for valid stuff... */
   1039 		/* Note that some things *should not* get modified --
   1040 		   the user should be re-initing the labels instead of
   1041 		   trying to patch things.
   1042 		   */
   1043 
   1044 		printf("Got component label:\n");
   1045 		printf("Version: %d\n",clabel->version);
   1046 		printf("Serial Number: %d\n",clabel->serial_number);
   1047 		printf("Mod counter: %d\n",clabel->mod_counter);
   1048 		printf("Row: %d\n", clabel->row);
   1049 		printf("Column: %d\n", clabel->column);
   1050 		printf("Num Rows: %d\n", clabel->num_rows);
   1051 		printf("Num Columns: %d\n", clabel->num_columns);
   1052 		printf("Clean: %d\n", clabel->clean);
   1053 		printf("Status: %d\n", clabel->status);
   1054 
   1055 		row = clabel->row;
   1056 		column = clabel->column;
   1057 
   1058 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1059 		    (column < 0) || (column >= raidPtr->numCol)) {
   1060 			return(EINVAL);
   1061 		}
   1062 
   1063 		/* XXX this isn't allowed to do anything for now :-) */
   1064 
   1065 		/* XXX and before it is, we need to fill in the rest
   1066 		   of the fields!?!?!?! */
   1067 #if 0
   1068 		raidwrite_component_label(
   1069                             raidPtr->Disks[row][column].dev,
   1070 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1071 			    clabel );
   1072 #endif
   1073 		return (0);
   1074 
   1075 	case RAIDFRAME_INIT_LABELS:
   1076 		clabel = (RF_ComponentLabel_t *) data;
   1077 		/*
   1078 		   we only want the serial number from
   1079 		   the above.  We get all the rest of the information
   1080 		   from the config that was used to create this RAID
   1081 		   set.
   1082 		   */
   1083 
   1084 		raidPtr->serial_number = clabel->serial_number;
   1085 
   1086 		raid_init_component_label(raidPtr, &ci_label);
   1087 		ci_label.serial_number = clabel->serial_number;
   1088 
   1089 		for(row=0;row<raidPtr->numRow;row++) {
   1090 			ci_label.row = row;
   1091 			for(column=0;column<raidPtr->numCol;column++) {
   1092 				diskPtr = &raidPtr->Disks[row][column];
   1093 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1094 					ci_label.partitionSize = diskPtr->partitionSize;
   1095 					ci_label.column = column;
   1096 					raidwrite_component_label(
   1097 					  raidPtr->Disks[row][column].dev,
   1098 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1099 					  &ci_label );
   1100 				}
   1101 			}
   1102 		}
   1103 
   1104 		return (retcode);
   1105 	case RAIDFRAME_SET_AUTOCONFIG:
   1106 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1107 		printf("New autoconfig value is: %d\n", d);
   1108 		*(int *) data = d;
   1109 		return (retcode);
   1110 
   1111 	case RAIDFRAME_SET_ROOT:
   1112 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1113 		printf("New rootpartition value is: %d\n", d);
   1114 		*(int *) data = d;
   1115 		return (retcode);
   1116 
   1117 		/* initialize all parity */
   1118 	case RAIDFRAME_REWRITEPARITY:
   1119 
   1120 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1121 			/* Parity for RAID 0 is trivially correct */
   1122 			raidPtr->parity_good = RF_RAID_CLEAN;
   1123 			return(0);
   1124 		}
   1125 
   1126 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1127 			/* Re-write is already in progress! */
   1128 			return(EINVAL);
   1129 		}
   1130 
   1131 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1132 					   rf_RewriteParityThread,
   1133 					   raidPtr,"raid_parity");
   1134 		return (retcode);
   1135 
   1136 
   1137 	case RAIDFRAME_ADD_HOT_SPARE:
   1138 		sparePtr = (RF_SingleComponent_t *) data;
   1139 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1140 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1141 		return(retcode);
   1142 
   1143 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1144 		return(retcode);
   1145 
   1146 	case RAIDFRAME_DELETE_COMPONENT:
   1147 		componentPtr = (RF_SingleComponent_t *)data;
   1148 		memcpy( &component, componentPtr,
   1149 			sizeof(RF_SingleComponent_t));
   1150 		retcode = rf_delete_component(raidPtr, &component);
   1151 		return(retcode);
   1152 
   1153 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1154 		componentPtr = (RF_SingleComponent_t *)data;
   1155 		memcpy( &component, componentPtr,
   1156 			sizeof(RF_SingleComponent_t));
   1157 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1158 		return(retcode);
   1159 
   1160 	case RAIDFRAME_REBUILD_IN_PLACE:
   1161 
   1162 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1163 			/* Can't do this on a RAID 0!! */
   1164 			return(EINVAL);
   1165 		}
   1166 
   1167 		if (raidPtr->recon_in_progress == 1) {
   1168 			/* a reconstruct is already in progress! */
   1169 			return(EINVAL);
   1170 		}
   1171 
   1172 		componentPtr = (RF_SingleComponent_t *) data;
   1173 		memcpy( &component, componentPtr,
   1174 			sizeof(RF_SingleComponent_t));
   1175 		row = component.row;
   1176 		column = component.column;
   1177 		printf("Rebuild: %d %d\n",row, column);
   1178 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1179 		    (column < 0) || (column >= raidPtr->numCol)) {
   1180 			return(EINVAL);
   1181 		}
   1182 
   1183 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1184 		if (rrcopy == NULL)
   1185 			return(ENOMEM);
   1186 
   1187 		rrcopy->raidPtr = (void *) raidPtr;
   1188 		rrcopy->row = row;
   1189 		rrcopy->col = column;
   1190 
   1191 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1192 					   rf_ReconstructInPlaceThread,
   1193 					   rrcopy,"raid_reconip");
   1194 		return(retcode);
   1195 
   1196 	case RAIDFRAME_GET_INFO:
   1197 		if (!raidPtr->valid)
   1198 			return (ENODEV);
   1199 		ucfgp = (RF_DeviceConfig_t **) data;
   1200 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1201 			  (RF_DeviceConfig_t *));
   1202 		if (d_cfg == NULL)
   1203 			return (ENOMEM);
   1204 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1205 		d_cfg->rows = raidPtr->numRow;
   1206 		d_cfg->cols = raidPtr->numCol;
   1207 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1208 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1209 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1210 			return (ENOMEM);
   1211 		}
   1212 		d_cfg->nspares = raidPtr->numSpare;
   1213 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1214 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1215 			return (ENOMEM);
   1216 		}
   1217 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1218 		d = 0;
   1219 		for (i = 0; i < d_cfg->rows; i++) {
   1220 			for (j = 0; j < d_cfg->cols; j++) {
   1221 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1222 				d++;
   1223 			}
   1224 		}
   1225 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1226 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1227 		}
   1228 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1229 				  sizeof(RF_DeviceConfig_t));
   1230 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1231 
   1232 		return (retcode);
   1233 
   1234 	case RAIDFRAME_CHECK_PARITY:
   1235 		*(int *) data = raidPtr->parity_good;
   1236 		return (0);
   1237 
   1238 	case RAIDFRAME_RESET_ACCTOTALS:
   1239 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1240 		return (0);
   1241 
   1242 	case RAIDFRAME_GET_ACCTOTALS:
   1243 		totals = (RF_AccTotals_t *) data;
   1244 		*totals = raidPtr->acc_totals;
   1245 		return (0);
   1246 
   1247 	case RAIDFRAME_KEEP_ACCTOTALS:
   1248 		raidPtr->keep_acc_totals = *(int *)data;
   1249 		return (0);
   1250 
   1251 	case RAIDFRAME_GET_SIZE:
   1252 		*(int *) data = raidPtr->totalSectors;
   1253 		return (0);
   1254 
   1255 		/* fail a disk & optionally start reconstruction */
   1256 	case RAIDFRAME_FAIL_DISK:
   1257 
   1258 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1259 			/* Can't do this on a RAID 0!! */
   1260 			return(EINVAL);
   1261 		}
   1262 
   1263 		rr = (struct rf_recon_req *) data;
   1264 
   1265 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1266 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1267 			return (EINVAL);
   1268 
   1269 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1270 		       unit, rr->row, rr->col);
   1271 
   1272 		/* make a copy of the recon request so that we don't rely on
   1273 		 * the user's buffer */
   1274 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1275 		if (rrcopy == NULL)
   1276 			return(ENOMEM);
   1277 		bcopy(rr, rrcopy, sizeof(*rr));
   1278 		rrcopy->raidPtr = (void *) raidPtr;
   1279 
   1280 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1281 					   rf_ReconThread,
   1282 					   rrcopy,"raid_recon");
   1283 		return (0);
   1284 
   1285 		/* invoke a copyback operation after recon on whatever disk
   1286 		 * needs it, if any */
   1287 	case RAIDFRAME_COPYBACK:
   1288 
   1289 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1290 			/* This makes no sense on a RAID 0!! */
   1291 			return(EINVAL);
   1292 		}
   1293 
   1294 		if (raidPtr->copyback_in_progress == 1) {
   1295 			/* Copyback is already in progress! */
   1296 			return(EINVAL);
   1297 		}
   1298 
   1299 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1300 					   rf_CopybackThread,
   1301 					   raidPtr,"raid_copyback");
   1302 		return (retcode);
   1303 
   1304 		/* return the percentage completion of reconstruction */
   1305 	case RAIDFRAME_CHECK_RECON_STATUS:
   1306 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1307 			/* This makes no sense on a RAID 0, so tell the
   1308 			   user it's done. */
   1309 			*(int *) data = 100;
   1310 			return(0);
   1311 		}
   1312 		row = 0; /* XXX we only consider a single row... */
   1313 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1314 			*(int *) data = 100;
   1315 		else
   1316 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1317 		return (0);
   1318 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1319 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1320 		row = 0; /* XXX we only consider a single row... */
   1321 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1322 			progressInfo.remaining = 0;
   1323 			progressInfo.completed = 100;
   1324 			progressInfo.total = 100;
   1325 		} else {
   1326 			progressInfo.total =
   1327 				raidPtr->reconControl[row]->numRUsTotal;
   1328 			progressInfo.completed =
   1329 				raidPtr->reconControl[row]->numRUsComplete;
   1330 			progressInfo.remaining = progressInfo.total -
   1331 				progressInfo.completed;
   1332 		}
   1333 		retcode = copyout((caddr_t) &progressInfo,
   1334 				  (caddr_t) *progressInfoPtr,
   1335 				  sizeof(RF_ProgressInfo_t));
   1336 		return (retcode);
   1337 
   1338 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1339 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1340 			/* This makes no sense on a RAID 0, so tell the
   1341 			   user it's done. */
   1342 			*(int *) data = 100;
   1343 			return(0);
   1344 		}
   1345 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1346 			*(int *) data = 100 *
   1347 				raidPtr->parity_rewrite_stripes_done /
   1348 				raidPtr->Layout.numStripe;
   1349 		} else {
   1350 			*(int *) data = 100;
   1351 		}
   1352 		return (0);
   1353 
   1354 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1355 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1356 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1357 			progressInfo.total = raidPtr->Layout.numStripe;
   1358 			progressInfo.completed =
   1359 				raidPtr->parity_rewrite_stripes_done;
   1360 			progressInfo.remaining = progressInfo.total -
   1361 				progressInfo.completed;
   1362 		} else {
   1363 			progressInfo.remaining = 0;
   1364 			progressInfo.completed = 100;
   1365 			progressInfo.total = 100;
   1366 		}
   1367 		retcode = copyout((caddr_t) &progressInfo,
   1368 				  (caddr_t) *progressInfoPtr,
   1369 				  sizeof(RF_ProgressInfo_t));
   1370 		return (retcode);
   1371 
   1372 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1373 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1374 			/* This makes no sense on a RAID 0 */
   1375 			*(int *) data = 100;
   1376 			return(0);
   1377 		}
   1378 		if (raidPtr->copyback_in_progress == 1) {
   1379 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1380 				raidPtr->Layout.numStripe;
   1381 		} else {
   1382 			*(int *) data = 100;
   1383 		}
   1384 		return (0);
   1385 
   1386 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1387 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1388 		if (raidPtr->copyback_in_progress == 1) {
   1389 			progressInfo.total = raidPtr->Layout.numStripe;
   1390 			progressInfo.completed =
   1391 				raidPtr->copyback_stripes_done;
   1392 			progressInfo.remaining = progressInfo.total -
   1393 				progressInfo.completed;
   1394 		} else {
   1395 			progressInfo.remaining = 0;
   1396 			progressInfo.completed = 100;
   1397 			progressInfo.total = 100;
   1398 		}
   1399 		retcode = copyout((caddr_t) &progressInfo,
   1400 				  (caddr_t) *progressInfoPtr,
   1401 				  sizeof(RF_ProgressInfo_t));
   1402 		return (retcode);
   1403 
   1404 		/* the sparetable daemon calls this to wait for the kernel to
   1405 		 * need a spare table. this ioctl does not return until a
   1406 		 * spare table is needed. XXX -- calling mpsleep here in the
   1407 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1408 		 * -- I should either compute the spare table in the kernel,
   1409 		 * or have a different -- XXX XXX -- interface (a different
   1410 		 * character device) for delivering the table     -- XXX */
   1411 #if 0
   1412 	case RAIDFRAME_SPARET_WAIT:
   1413 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1414 		while (!rf_sparet_wait_queue)
   1415 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1416 		waitreq = rf_sparet_wait_queue;
   1417 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1418 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1419 
   1420 		/* structure assignment */
   1421 		*((RF_SparetWait_t *) data) = *waitreq;
   1422 
   1423 		RF_Free(waitreq, sizeof(*waitreq));
   1424 		return (0);
   1425 
   1426 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1427 		 * code in it that will cause the dameon to exit */
   1428 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1429 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1430 		waitreq->fcol = -1;
   1431 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1432 		waitreq->next = rf_sparet_wait_queue;
   1433 		rf_sparet_wait_queue = waitreq;
   1434 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1435 		wakeup(&rf_sparet_wait_queue);
   1436 		return (0);
   1437 
   1438 		/* used by the spare table daemon to deliver a spare table
   1439 		 * into the kernel */
   1440 	case RAIDFRAME_SEND_SPARET:
   1441 
   1442 		/* install the spare table */
   1443 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1444 
   1445 		/* respond to the requestor.  the return status of the spare
   1446 		 * table installation is passed in the "fcol" field */
   1447 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1448 		waitreq->fcol = retcode;
   1449 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1450 		waitreq->next = rf_sparet_resp_queue;
   1451 		rf_sparet_resp_queue = waitreq;
   1452 		wakeup(&rf_sparet_resp_queue);
   1453 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1454 
   1455 		return (retcode);
   1456 #endif
   1457 
   1458 	default:
   1459 		break; /* fall through to the os-specific code below */
   1460 
   1461 	}
   1462 
   1463 	if (!raidPtr->valid)
   1464 		return (EINVAL);
   1465 
   1466 	/*
   1467 	 * Add support for "regular" device ioctls here.
   1468 	 */
   1469 
   1470 	switch (cmd) {
   1471 	case DIOCGDINFO:
   1472 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1473 		break;
   1474 #ifdef __HAVE_OLD_DISKLABEL
   1475 	case ODIOCGDINFO:
   1476 		newlabel = *(rs->sc_dkdev.dk_label);
   1477 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1478 			return ENOTTY;
   1479 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1480 		break;
   1481 #endif
   1482 
   1483 	case DIOCGPART:
   1484 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1485 		((struct partinfo *) data)->part =
   1486 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1487 		break;
   1488 
   1489 	case DIOCWDINFO:
   1490 	case DIOCSDINFO:
   1491 #ifdef __HAVE_OLD_DISKLABEL
   1492 	case ODIOCWDINFO:
   1493 	case ODIOCSDINFO:
   1494 #endif
   1495 	{
   1496 		struct disklabel *lp;
   1497 #ifdef __HAVE_OLD_DISKLABEL
   1498 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1499 			memset(&newlabel, 0, sizeof newlabel);
   1500 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1501 			lp = &newlabel;
   1502 		} else
   1503 #endif
   1504 		lp = (struct disklabel *)data;
   1505 
   1506 		if ((error = raidlock(rs)) != 0)
   1507 			return (error);
   1508 
   1509 		rs->sc_flags |= RAIDF_LABELLING;
   1510 
   1511 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1512 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1513 		if (error == 0) {
   1514 			if (cmd == DIOCWDINFO
   1515 #ifdef __HAVE_OLD_DISKLABEL
   1516 			    || cmd == ODIOCWDINFO
   1517 #endif
   1518 			   )
   1519 				error = writedisklabel(RAIDLABELDEV(dev),
   1520 				    raidstrategy, rs->sc_dkdev.dk_label,
   1521 				    rs->sc_dkdev.dk_cpulabel);
   1522 		}
   1523 		rs->sc_flags &= ~RAIDF_LABELLING;
   1524 
   1525 		raidunlock(rs);
   1526 
   1527 		if (error)
   1528 			return (error);
   1529 		break;
   1530 	}
   1531 
   1532 	case DIOCWLABEL:
   1533 		if (*(int *) data != 0)
   1534 			rs->sc_flags |= RAIDF_WLABEL;
   1535 		else
   1536 			rs->sc_flags &= ~RAIDF_WLABEL;
   1537 		break;
   1538 
   1539 	case DIOCGDEFLABEL:
   1540 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1541 		break;
   1542 
   1543 #ifdef __HAVE_OLD_DISKLABEL
   1544 	case ODIOCGDEFLABEL:
   1545 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1546 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1547 			return ENOTTY;
   1548 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1549 		break;
   1550 #endif
   1551 
   1552 	default:
   1553 		retcode = ENOTTY;
   1554 	}
   1555 	return (retcode);
   1556 
   1557 }
   1558 
   1559 
   1560 /* raidinit -- complete the rest of the initialization for the
   1561    RAIDframe device.  */
   1562 
   1563 
   1564 static void
   1565 raidinit(raidPtr)
   1566 	RF_Raid_t *raidPtr;
   1567 {
   1568 	struct raid_softc *rs;
   1569 	int     unit;
   1570 
   1571 	unit = raidPtr->raidid;
   1572 
   1573 	rs = &raid_softc[unit];
   1574 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1575 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1576 
   1577 
   1578 	/* XXX should check return code first... */
   1579 	rs->sc_flags |= RAIDF_INITED;
   1580 
   1581 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1582 
   1583 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1584 
   1585 	/* disk_attach actually creates space for the CPU disklabel, among
   1586 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1587 	 * with disklabels. */
   1588 
   1589 	disk_attach(&rs->sc_dkdev);
   1590 
   1591 	/* XXX There may be a weird interaction here between this, and
   1592 	 * protectedSectors, as used in RAIDframe.  */
   1593 
   1594 	rs->sc_size = raidPtr->totalSectors;
   1595 
   1596 }
   1597 
   1598 /* wake up the daemon & tell it to get us a spare table
   1599  * XXX
   1600  * the entries in the queues should be tagged with the raidPtr
   1601  * so that in the extremely rare case that two recons happen at once,
   1602  * we know for which device were requesting a spare table
   1603  * XXX
   1604  *
   1605  * XXX This code is not currently used. GO
   1606  */
   1607 int
   1608 rf_GetSpareTableFromDaemon(req)
   1609 	RF_SparetWait_t *req;
   1610 {
   1611 	int     retcode;
   1612 
   1613 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1614 	req->next = rf_sparet_wait_queue;
   1615 	rf_sparet_wait_queue = req;
   1616 	wakeup(&rf_sparet_wait_queue);
   1617 
   1618 	/* mpsleep unlocks the mutex */
   1619 	while (!rf_sparet_resp_queue) {
   1620 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1621 		    "raidframe getsparetable", 0);
   1622 	}
   1623 	req = rf_sparet_resp_queue;
   1624 	rf_sparet_resp_queue = req->next;
   1625 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1626 
   1627 	retcode = req->fcol;
   1628 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1629 					 * alloc'd */
   1630 	return (retcode);
   1631 }
   1632 
   1633 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1634  * bp & passes it down.
   1635  * any calls originating in the kernel must use non-blocking I/O
   1636  * do some extra sanity checking to return "appropriate" error values for
   1637  * certain conditions (to make some standard utilities work)
   1638  *
   1639  * Formerly known as: rf_DoAccessKernel
   1640  */
   1641 void
   1642 raidstart(raidPtr)
   1643 	RF_Raid_t *raidPtr;
   1644 {
   1645 	RF_SectorCount_t num_blocks, pb, sum;
   1646 	RF_RaidAddr_t raid_addr;
   1647 	int     retcode;
   1648 	struct partition *pp;
   1649 	daddr_t blocknum;
   1650 	int     unit;
   1651 	struct raid_softc *rs;
   1652 	int     do_async;
   1653 	struct buf *bp;
   1654 
   1655 	unit = raidPtr->raidid;
   1656 	rs = &raid_softc[unit];
   1657 
   1658 	/* quick check to see if anything has died recently */
   1659 	RF_LOCK_MUTEX(raidPtr->mutex);
   1660 	if (raidPtr->numNewFailures > 0) {
   1661 		rf_update_component_labels(raidPtr,
   1662 					   RF_NORMAL_COMPONENT_UPDATE);
   1663 		raidPtr->numNewFailures--;
   1664 	}
   1665 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1666 
   1667 	/* Check to see if we're at the limit... */
   1668 	RF_LOCK_MUTEX(raidPtr->mutex);
   1669 	while (raidPtr->openings > 0) {
   1670 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1671 
   1672 		/* get the next item, if any, from the queue */
   1673 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1674 			/* nothing more to do */
   1675 			return;
   1676 		}
   1677 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1678 
   1679 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1680 		 * partition.. Need to make it absolute to the underlying
   1681 		 * device.. */
   1682 
   1683 		blocknum = bp->b_blkno;
   1684 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1685 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1686 			blocknum += pp->p_offset;
   1687 		}
   1688 
   1689 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1690 			    (int) blocknum));
   1691 
   1692 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1693 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1694 
   1695 		/* *THIS* is where we adjust what block we're going to...
   1696 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1697 		raid_addr = blocknum;
   1698 
   1699 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1700 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1701 		sum = raid_addr + num_blocks + pb;
   1702 		if (1 || rf_debugKernelAccess) {
   1703 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1704 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1705 				    (int) pb, (int) bp->b_resid));
   1706 		}
   1707 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1708 		    || (sum < num_blocks) || (sum < pb)) {
   1709 			bp->b_error = ENOSPC;
   1710 			bp->b_flags |= B_ERROR;
   1711 			bp->b_resid = bp->b_bcount;
   1712 			biodone(bp);
   1713 			RF_LOCK_MUTEX(raidPtr->mutex);
   1714 			continue;
   1715 		}
   1716 		/*
   1717 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1718 		 */
   1719 
   1720 		if (bp->b_bcount & raidPtr->sectorMask) {
   1721 			bp->b_error = EINVAL;
   1722 			bp->b_flags |= B_ERROR;
   1723 			bp->b_resid = bp->b_bcount;
   1724 			biodone(bp);
   1725 			RF_LOCK_MUTEX(raidPtr->mutex);
   1726 			continue;
   1727 
   1728 		}
   1729 		db1_printf(("Calling DoAccess..\n"));
   1730 
   1731 
   1732 		RF_LOCK_MUTEX(raidPtr->mutex);
   1733 		raidPtr->openings--;
   1734 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1735 
   1736 		/*
   1737 		 * Everything is async.
   1738 		 */
   1739 		do_async = 1;
   1740 
   1741 		disk_busy(&rs->sc_dkdev);
   1742 
   1743 		/* XXX we're still at splbio() here... do we *really*
   1744 		   need to be? */
   1745 
   1746 		/* don't ever condition on bp->b_flags & B_WRITE.
   1747 		 * always condition on B_READ instead */
   1748 
   1749 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1750 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1751 				      do_async, raid_addr, num_blocks,
   1752 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1753 
   1754 		RF_LOCK_MUTEX(raidPtr->mutex);
   1755 	}
   1756 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1757 }
   1758 
   1759 
   1760 
   1761 
   1762 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1763 
   1764 int
   1765 rf_DispatchKernelIO(queue, req)
   1766 	RF_DiskQueue_t *queue;
   1767 	RF_DiskQueueData_t *req;
   1768 {
   1769 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1770 	struct buf *bp;
   1771 	struct raidbuf *raidbp = NULL;
   1772 	struct raid_softc *rs;
   1773 	int     unit;
   1774 	int s;
   1775 
   1776 	s=0;
   1777 	/* s = splbio();*/ /* want to test this */
   1778 	/* XXX along with the vnode, we also need the softc associated with
   1779 	 * this device.. */
   1780 
   1781 	req->queue = queue;
   1782 
   1783 	unit = queue->raidPtr->raidid;
   1784 
   1785 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1786 
   1787 	if (unit >= numraid) {
   1788 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1789 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1790 	}
   1791 	rs = &raid_softc[unit];
   1792 
   1793 	bp = req->bp;
   1794 #if 1
   1795 	/* XXX when there is a physical disk failure, someone is passing us a
   1796 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1797 	 * without taking a performance hit... (not sure where the real bug
   1798 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1799 
   1800 	if (bp->b_flags & B_ERROR) {
   1801 		bp->b_flags &= ~B_ERROR;
   1802 	}
   1803 	if (bp->b_error != 0) {
   1804 		bp->b_error = 0;
   1805 	}
   1806 #endif
   1807 	raidbp = RAIDGETBUF(rs);
   1808 
   1809 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1810 
   1811 	/*
   1812 	 * context for raidiodone
   1813 	 */
   1814 	raidbp->rf_obp = bp;
   1815 	raidbp->req = req;
   1816 
   1817 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1818 
   1819 	switch (req->type) {
   1820 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1821 		/* XXX need to do something extra here.. */
   1822 		/* I'm leaving this in, as I've never actually seen it used,
   1823 		 * and I'd like folks to report it... GO */
   1824 		printf(("WAKEUP CALLED\n"));
   1825 		queue->numOutstanding++;
   1826 
   1827 		/* XXX need to glue the original buffer into this??  */
   1828 
   1829 		KernelWakeupFunc(&raidbp->rf_buf);
   1830 		break;
   1831 
   1832 	case RF_IO_TYPE_READ:
   1833 	case RF_IO_TYPE_WRITE:
   1834 
   1835 		if (req->tracerec) {
   1836 			RF_ETIMER_START(req->tracerec->timer);
   1837 		}
   1838 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1839 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1840 		    req->sectorOffset, req->numSector,
   1841 		    req->buf, KernelWakeupFunc, (void *) req,
   1842 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1843 
   1844 		if (rf_debugKernelAccess) {
   1845 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1846 				(long) bp->b_blkno));
   1847 		}
   1848 		queue->numOutstanding++;
   1849 		queue->last_deq_sector = req->sectorOffset;
   1850 		/* acc wouldn't have been let in if there were any pending
   1851 		 * reqs at any other priority */
   1852 		queue->curPriority = req->priority;
   1853 
   1854 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1855 			req->type, unit, queue->row, queue->col));
   1856 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1857 			(int) req->sectorOffset, (int) req->numSector,
   1858 			(int) (req->numSector <<
   1859 			    queue->raidPtr->logBytesPerSector),
   1860 			(int) queue->raidPtr->logBytesPerSector));
   1861 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1862 			raidbp->rf_buf.b_vp->v_numoutput++;
   1863 		}
   1864 		VOP_STRATEGY(&raidbp->rf_buf);
   1865 
   1866 		break;
   1867 
   1868 	default:
   1869 		panic("bad req->type in rf_DispatchKernelIO");
   1870 	}
   1871 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1872 	/* splx(s); */ /* want to test this */
   1873 	return (0);
   1874 }
   1875 /* this is the callback function associated with a I/O invoked from
   1876    kernel code.
   1877  */
   1878 static void
   1879 KernelWakeupFunc(vbp)
   1880 	struct buf *vbp;
   1881 {
   1882 	RF_DiskQueueData_t *req = NULL;
   1883 	RF_DiskQueue_t *queue;
   1884 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1885 	struct buf *bp;
   1886 	struct raid_softc *rs;
   1887 	int     unit;
   1888 	int s;
   1889 
   1890 	s = splbio();
   1891 	db1_printf(("recovering the request queue:\n"));
   1892 	req = raidbp->req;
   1893 
   1894 	bp = raidbp->rf_obp;
   1895 
   1896 	queue = (RF_DiskQueue_t *) req->queue;
   1897 
   1898 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1899 		bp->b_flags |= B_ERROR;
   1900 		bp->b_error = raidbp->rf_buf.b_error ?
   1901 		    raidbp->rf_buf.b_error : EIO;
   1902 	}
   1903 
   1904 	/* XXX methinks this could be wrong... */
   1905 #if 1
   1906 	bp->b_resid = raidbp->rf_buf.b_resid;
   1907 #endif
   1908 
   1909 	if (req->tracerec) {
   1910 		RF_ETIMER_STOP(req->tracerec->timer);
   1911 		RF_ETIMER_EVAL(req->tracerec->timer);
   1912 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1913 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1914 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1915 		req->tracerec->num_phys_ios++;
   1916 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1917 	}
   1918 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1919 
   1920 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1921 
   1922 
   1923 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1924 	 * ballistic, and mark the component as hosed... */
   1925 
   1926 	if (bp->b_flags & B_ERROR) {
   1927 		/* Mark the disk as dead */
   1928 		/* but only mark it once... */
   1929 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1930 		    rf_ds_optimal) {
   1931 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1932 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1933 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1934 			    rf_ds_failed;
   1935 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1936 			queue->raidPtr->numFailures++;
   1937 			queue->raidPtr->numNewFailures++;
   1938 		} else {	/* Disk is already dead... */
   1939 			/* printf("Disk already marked as dead!\n"); */
   1940 		}
   1941 
   1942 	}
   1943 
   1944 	rs = &raid_softc[unit];
   1945 	RAIDPUTBUF(rs, raidbp);
   1946 
   1947 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1948 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1949 
   1950 	splx(s);
   1951 }
   1952 
   1953 
   1954 
   1955 /*
   1956  * initialize a buf structure for doing an I/O in the kernel.
   1957  */
   1958 static void
   1959 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1960        logBytesPerSector, b_proc)
   1961 	struct buf *bp;
   1962 	struct vnode *b_vp;
   1963 	unsigned rw_flag;
   1964 	dev_t dev;
   1965 	RF_SectorNum_t startSect;
   1966 	RF_SectorCount_t numSect;
   1967 	caddr_t buf;
   1968 	void (*cbFunc) (struct buf *);
   1969 	void *cbArg;
   1970 	int logBytesPerSector;
   1971 	struct proc *b_proc;
   1972 {
   1973 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1974 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1975 	bp->b_bcount = numSect << logBytesPerSector;
   1976 	bp->b_bufsize = bp->b_bcount;
   1977 	bp->b_error = 0;
   1978 	bp->b_dev = dev;
   1979 	bp->b_data = buf;
   1980 	bp->b_blkno = startSect;
   1981 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1982 	if (bp->b_bcount == 0) {
   1983 		panic("bp->b_bcount is zero in InitBP!!\n");
   1984 	}
   1985 	bp->b_proc = b_proc;
   1986 	bp->b_iodone = cbFunc;
   1987 	bp->b_vp = b_vp;
   1988 
   1989 }
   1990 
   1991 static void
   1992 raidgetdefaultlabel(raidPtr, rs, lp)
   1993 	RF_Raid_t *raidPtr;
   1994 	struct raid_softc *rs;
   1995 	struct disklabel *lp;
   1996 {
   1997 	db1_printf(("Building a default label...\n"));
   1998 	memset(lp, 0, sizeof(*lp));
   1999 
   2000 	/* fabricate a label... */
   2001 	lp->d_secperunit = raidPtr->totalSectors;
   2002 	lp->d_secsize = raidPtr->bytesPerSector;
   2003 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2004 	lp->d_ntracks = 4 * raidPtr->numCol;
   2005 	lp->d_ncylinders = raidPtr->totalSectors /
   2006 		(lp->d_nsectors * lp->d_ntracks);
   2007 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2008 
   2009 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2010 	lp->d_type = DTYPE_RAID;
   2011 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2012 	lp->d_rpm = 3600;
   2013 	lp->d_interleave = 1;
   2014 	lp->d_flags = 0;
   2015 
   2016 	lp->d_partitions[RAW_PART].p_offset = 0;
   2017 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2018 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2019 	lp->d_npartitions = RAW_PART + 1;
   2020 
   2021 	lp->d_magic = DISKMAGIC;
   2022 	lp->d_magic2 = DISKMAGIC;
   2023 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2024 
   2025 }
   2026 /*
   2027  * Read the disklabel from the raid device.  If one is not present, fake one
   2028  * up.
   2029  */
   2030 static void
   2031 raidgetdisklabel(dev)
   2032 	dev_t   dev;
   2033 {
   2034 	int     unit = raidunit(dev);
   2035 	struct raid_softc *rs = &raid_softc[unit];
   2036 	char   *errstring;
   2037 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2038 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2039 	RF_Raid_t *raidPtr;
   2040 
   2041 	db1_printf(("Getting the disklabel...\n"));
   2042 
   2043 	memset(clp, 0, sizeof(*clp));
   2044 
   2045 	raidPtr = raidPtrs[unit];
   2046 
   2047 	raidgetdefaultlabel(raidPtr, rs, lp);
   2048 
   2049 	/*
   2050 	 * Call the generic disklabel extraction routine.
   2051 	 */
   2052 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2053 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2054 	if (errstring)
   2055 		raidmakedisklabel(rs);
   2056 	else {
   2057 		int     i;
   2058 		struct partition *pp;
   2059 
   2060 		/*
   2061 		 * Sanity check whether the found disklabel is valid.
   2062 		 *
   2063 		 * This is necessary since total size of the raid device
   2064 		 * may vary when an interleave is changed even though exactly
   2065 		 * same componets are used, and old disklabel may used
   2066 		 * if that is found.
   2067 		 */
   2068 		if (lp->d_secperunit != rs->sc_size)
   2069 			printf("WARNING: %s: "
   2070 			    "total sector size in disklabel (%d) != "
   2071 			    "the size of raid (%ld)\n", rs->sc_xname,
   2072 			    lp->d_secperunit, (long) rs->sc_size);
   2073 		for (i = 0; i < lp->d_npartitions; i++) {
   2074 			pp = &lp->d_partitions[i];
   2075 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2076 				printf("WARNING: %s: end of partition `%c' "
   2077 				    "exceeds the size of raid (%ld)\n",
   2078 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2079 		}
   2080 	}
   2081 
   2082 }
   2083 /*
   2084  * Take care of things one might want to take care of in the event
   2085  * that a disklabel isn't present.
   2086  */
   2087 static void
   2088 raidmakedisklabel(rs)
   2089 	struct raid_softc *rs;
   2090 {
   2091 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2092 	db1_printf(("Making a label..\n"));
   2093 
   2094 	/*
   2095 	 * For historical reasons, if there's no disklabel present
   2096 	 * the raw partition must be marked FS_BSDFFS.
   2097 	 */
   2098 
   2099 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2100 
   2101 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2102 
   2103 	lp->d_checksum = dkcksum(lp);
   2104 }
   2105 /*
   2106  * Lookup the provided name in the filesystem.  If the file exists,
   2107  * is a valid block device, and isn't being used by anyone else,
   2108  * set *vpp to the file's vnode.
   2109  * You'll find the original of this in ccd.c
   2110  */
   2111 int
   2112 raidlookup(path, p, vpp)
   2113 	char   *path;
   2114 	struct proc *p;
   2115 	struct vnode **vpp;	/* result */
   2116 {
   2117 	struct nameidata nd;
   2118 	struct vnode *vp;
   2119 	struct vattr va;
   2120 	int     error;
   2121 
   2122 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2123 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2124 #ifdef DEBUG
   2125 		printf("RAIDframe: vn_open returned %d\n", error);
   2126 #endif
   2127 		return (error);
   2128 	}
   2129 	vp = nd.ni_vp;
   2130 	if (vp->v_usecount > 1) {
   2131 		VOP_UNLOCK(vp, 0);
   2132 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2133 		return (EBUSY);
   2134 	}
   2135 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2136 		VOP_UNLOCK(vp, 0);
   2137 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2138 		return (error);
   2139 	}
   2140 	/* XXX: eventually we should handle VREG, too. */
   2141 	if (va.va_type != VBLK) {
   2142 		VOP_UNLOCK(vp, 0);
   2143 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2144 		return (ENOTBLK);
   2145 	}
   2146 	VOP_UNLOCK(vp, 0);
   2147 	*vpp = vp;
   2148 	return (0);
   2149 }
   2150 /*
   2151  * Wait interruptibly for an exclusive lock.
   2152  *
   2153  * XXX
   2154  * Several drivers do this; it should be abstracted and made MP-safe.
   2155  * (Hmm... where have we seen this warning before :->  GO )
   2156  */
   2157 static int
   2158 raidlock(rs)
   2159 	struct raid_softc *rs;
   2160 {
   2161 	int     error;
   2162 
   2163 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2164 		rs->sc_flags |= RAIDF_WANTED;
   2165 		if ((error =
   2166 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2167 			return (error);
   2168 	}
   2169 	rs->sc_flags |= RAIDF_LOCKED;
   2170 	return (0);
   2171 }
   2172 /*
   2173  * Unlock and wake up any waiters.
   2174  */
   2175 static void
   2176 raidunlock(rs)
   2177 	struct raid_softc *rs;
   2178 {
   2179 
   2180 	rs->sc_flags &= ~RAIDF_LOCKED;
   2181 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2182 		rs->sc_flags &= ~RAIDF_WANTED;
   2183 		wakeup(rs);
   2184 	}
   2185 }
   2186 
   2187 
   2188 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2189 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2190 
   2191 int
   2192 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2193 {
   2194 	RF_ComponentLabel_t clabel;
   2195 	raidread_component_label(dev, b_vp, &clabel);
   2196 	clabel.mod_counter = mod_counter;
   2197 	clabel.clean = RF_RAID_CLEAN;
   2198 	raidwrite_component_label(dev, b_vp, &clabel);
   2199 	return(0);
   2200 }
   2201 
   2202 
   2203 int
   2204 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2205 {
   2206 	RF_ComponentLabel_t clabel;
   2207 	raidread_component_label(dev, b_vp, &clabel);
   2208 	clabel.mod_counter = mod_counter;
   2209 	clabel.clean = RF_RAID_DIRTY;
   2210 	raidwrite_component_label(dev, b_vp, &clabel);
   2211 	return(0);
   2212 }
   2213 
   2214 /* ARGSUSED */
   2215 int
   2216 raidread_component_label(dev, b_vp, clabel)
   2217 	dev_t dev;
   2218 	struct vnode *b_vp;
   2219 	RF_ComponentLabel_t *clabel;
   2220 {
   2221 	struct buf *bp;
   2222 	int error;
   2223 
   2224 	/* XXX should probably ensure that we don't try to do this if
   2225 	   someone has changed rf_protected_sectors. */
   2226 
   2227 	if (b_vp == NULL) {
   2228 		/* For whatever reason, this component is not valid.
   2229 		   Don't try to read a component label from it. */
   2230 		return(EINVAL);
   2231 	}
   2232 
   2233 	/* get a block of the appropriate size... */
   2234 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2235 	bp->b_dev = dev;
   2236 
   2237 	/* get our ducks in a row for the read */
   2238 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2239 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2240 	bp->b_flags |= B_READ;
   2241  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2242 
   2243 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2244 
   2245 	error = biowait(bp);
   2246 
   2247 	if (!error) {
   2248 		memcpy(clabel, bp->b_data,
   2249 		       sizeof(RF_ComponentLabel_t));
   2250 #if 0
   2251 		rf_print_component_label( clabel );
   2252 #endif
   2253         } else {
   2254 #if 0
   2255 		printf("Failed to read RAID component label!\n");
   2256 #endif
   2257 	}
   2258 
   2259 	brelse(bp);
   2260 	return(error);
   2261 }
   2262 /* ARGSUSED */
   2263 int
   2264 raidwrite_component_label(dev, b_vp, clabel)
   2265 	dev_t dev;
   2266 	struct vnode *b_vp;
   2267 	RF_ComponentLabel_t *clabel;
   2268 {
   2269 	struct buf *bp;
   2270 	int error;
   2271 
   2272 	/* get a block of the appropriate size... */
   2273 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2274 	bp->b_dev = dev;
   2275 
   2276 	/* get our ducks in a row for the write */
   2277 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2278 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2279 	bp->b_flags |= B_WRITE;
   2280  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2281 
   2282 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2283 
   2284 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2285 
   2286 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2287 	error = biowait(bp);
   2288 	brelse(bp);
   2289 	if (error) {
   2290 #if 1
   2291 		printf("Failed to write RAID component info!\n");
   2292 #endif
   2293 	}
   2294 
   2295 	return(error);
   2296 }
   2297 
   2298 void
   2299 rf_markalldirty(raidPtr)
   2300 	RF_Raid_t *raidPtr;
   2301 {
   2302 	RF_ComponentLabel_t clabel;
   2303 	int r,c;
   2304 
   2305 	raidPtr->mod_counter++;
   2306 	for (r = 0; r < raidPtr->numRow; r++) {
   2307 		for (c = 0; c < raidPtr->numCol; c++) {
   2308 			/* we don't want to touch (at all) a disk that has
   2309 			   failed */
   2310 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2311 				raidread_component_label(
   2312 					raidPtr->Disks[r][c].dev,
   2313 					raidPtr->raid_cinfo[r][c].ci_vp,
   2314 					&clabel);
   2315 				if (clabel.status == rf_ds_spared) {
   2316 					/* XXX do something special...
   2317 					 but whatever you do, don't
   2318 					 try to access it!! */
   2319 				} else {
   2320 #if 0
   2321 				clabel.status =
   2322 					raidPtr->Disks[r][c].status;
   2323 				raidwrite_component_label(
   2324 					raidPtr->Disks[r][c].dev,
   2325 					raidPtr->raid_cinfo[r][c].ci_vp,
   2326 					&clabel);
   2327 #endif
   2328 				raidmarkdirty(
   2329 				       raidPtr->Disks[r][c].dev,
   2330 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2331 				       raidPtr->mod_counter);
   2332 				}
   2333 			}
   2334 		}
   2335 	}
   2336 	/* printf("Component labels marked dirty.\n"); */
   2337 #if 0
   2338 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2339 		sparecol = raidPtr->numCol + c;
   2340 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2341 			/*
   2342 
   2343 			   XXX this is where we get fancy and map this spare
   2344 			   into it's correct spot in the array.
   2345 
   2346 			 */
   2347 			/*
   2348 
   2349 			   we claim this disk is "optimal" if it's
   2350 			   rf_ds_used_spare, as that means it should be
   2351 			   directly substitutable for the disk it replaced.
   2352 			   We note that too...
   2353 
   2354 			 */
   2355 
   2356 			for(i=0;i<raidPtr->numRow;i++) {
   2357 				for(j=0;j<raidPtr->numCol;j++) {
   2358 					if ((raidPtr->Disks[i][j].spareRow ==
   2359 					     r) &&
   2360 					    (raidPtr->Disks[i][j].spareCol ==
   2361 					     sparecol)) {
   2362 						srow = r;
   2363 						scol = sparecol;
   2364 						break;
   2365 					}
   2366 				}
   2367 			}
   2368 
   2369 			raidread_component_label(
   2370 				      raidPtr->Disks[r][sparecol].dev,
   2371 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2372 				      &clabel);
   2373 			/* make sure status is noted */
   2374 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2375 			clabel.mod_counter = raidPtr->mod_counter;
   2376 			clabel.serial_number = raidPtr->serial_number;
   2377 			clabel.row = srow;
   2378 			clabel.column = scol;
   2379 			clabel.num_rows = raidPtr->numRow;
   2380 			clabel.num_columns = raidPtr->numCol;
   2381 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2382 			clabel.status = rf_ds_optimal;
   2383 			raidwrite_component_label(
   2384 				      raidPtr->Disks[r][sparecol].dev,
   2385 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2386 				      &clabel);
   2387 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2388 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2389 		}
   2390 	}
   2391 
   2392 #endif
   2393 }
   2394 
   2395 
   2396 void
   2397 rf_update_component_labels(raidPtr, final)
   2398 	RF_Raid_t *raidPtr;
   2399 	int final;
   2400 {
   2401 	RF_ComponentLabel_t clabel;
   2402 	int sparecol;
   2403 	int r,c;
   2404 	int i,j;
   2405 	int srow, scol;
   2406 
   2407 	srow = -1;
   2408 	scol = -1;
   2409 
   2410 	/* XXX should do extra checks to make sure things really are clean,
   2411 	   rather than blindly setting the clean bit... */
   2412 
   2413 	raidPtr->mod_counter++;
   2414 
   2415 	for (r = 0; r < raidPtr->numRow; r++) {
   2416 		for (c = 0; c < raidPtr->numCol; c++) {
   2417 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2418 				raidread_component_label(
   2419 					raidPtr->Disks[r][c].dev,
   2420 					raidPtr->raid_cinfo[r][c].ci_vp,
   2421 					&clabel);
   2422 				/* make sure status is noted */
   2423 				clabel.status = rf_ds_optimal;
   2424 				/* bump the counter */
   2425 				clabel.mod_counter = raidPtr->mod_counter;
   2426 
   2427 				raidwrite_component_label(
   2428 					raidPtr->Disks[r][c].dev,
   2429 					raidPtr->raid_cinfo[r][c].ci_vp,
   2430 					&clabel);
   2431 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2432 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2433 						raidmarkclean(
   2434 							      raidPtr->Disks[r][c].dev,
   2435 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2436 							      raidPtr->mod_counter);
   2437 					}
   2438 				}
   2439 			}
   2440 			/* else we don't touch it.. */
   2441 		}
   2442 	}
   2443 
   2444 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2445 		sparecol = raidPtr->numCol + c;
   2446 		/* Need to ensure that the reconstruct actually completed! */
   2447 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2448 			/*
   2449 
   2450 			   we claim this disk is "optimal" if it's
   2451 			   rf_ds_used_spare, as that means it should be
   2452 			   directly substitutable for the disk it replaced.
   2453 			   We note that too...
   2454 
   2455 			 */
   2456 
   2457 			for(i=0;i<raidPtr->numRow;i++) {
   2458 				for(j=0;j<raidPtr->numCol;j++) {
   2459 					if ((raidPtr->Disks[i][j].spareRow ==
   2460 					     0) &&
   2461 					    (raidPtr->Disks[i][j].spareCol ==
   2462 					     sparecol)) {
   2463 						srow = i;
   2464 						scol = j;
   2465 						break;
   2466 					}
   2467 				}
   2468 			}
   2469 
   2470 			/* XXX shouldn't *really* need this... */
   2471 			raidread_component_label(
   2472 				      raidPtr->Disks[0][sparecol].dev,
   2473 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2474 				      &clabel);
   2475 			/* make sure status is noted */
   2476 
   2477 			raid_init_component_label(raidPtr, &clabel);
   2478 
   2479 			clabel.mod_counter = raidPtr->mod_counter;
   2480 			clabel.row = srow;
   2481 			clabel.column = scol;
   2482 			clabel.status = rf_ds_optimal;
   2483 
   2484 			raidwrite_component_label(
   2485 				      raidPtr->Disks[0][sparecol].dev,
   2486 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2487 				      &clabel);
   2488 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2489 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2490 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2491 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2492 						       raidPtr->mod_counter);
   2493 				}
   2494 			}
   2495 		}
   2496 	}
   2497 	/* 	printf("Component labels updated\n"); */
   2498 }
   2499 
   2500 void
   2501 rf_close_component(raidPtr, vp, auto_configured)
   2502 	RF_Raid_t *raidPtr;
   2503 	struct vnode *vp;
   2504 	int auto_configured;
   2505 {
   2506 	struct proc *p;
   2507 
   2508 	p = raidPtr->engine_thread;
   2509 
   2510 	if (vp != NULL) {
   2511 		if (auto_configured == 1) {
   2512 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2513 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2514 			vput(vp);
   2515 
   2516 		} else {
   2517 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2518 		}
   2519 	} else {
   2520 		printf("vnode was NULL\n");
   2521 	}
   2522 }
   2523 
   2524 
   2525 void
   2526 rf_UnconfigureVnodes(raidPtr)
   2527 	RF_Raid_t *raidPtr;
   2528 {
   2529 	int r,c;
   2530 	struct proc *p;
   2531 	struct vnode *vp;
   2532 	int acd;
   2533 
   2534 
   2535 	/* We take this opportunity to close the vnodes like we should.. */
   2536 
   2537 	p = raidPtr->engine_thread;
   2538 
   2539 	for (r = 0; r < raidPtr->numRow; r++) {
   2540 		for (c = 0; c < raidPtr->numCol; c++) {
   2541 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2542 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2543 			acd = raidPtr->Disks[r][c].auto_configured;
   2544 			rf_close_component(raidPtr, vp, acd);
   2545 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2546 			raidPtr->Disks[r][c].auto_configured = 0;
   2547 		}
   2548 	}
   2549 	for (r = 0; r < raidPtr->numSpare; r++) {
   2550 		printf("Closing vnode for spare: %d\n", r);
   2551 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2552 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2553 		rf_close_component(raidPtr, vp, acd);
   2554 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2555 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2556 	}
   2557 }
   2558 
   2559 
   2560 void
   2561 rf_ReconThread(req)
   2562 	struct rf_recon_req *req;
   2563 {
   2564 	int     s;
   2565 	RF_Raid_t *raidPtr;
   2566 
   2567 	s = splbio();
   2568 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2569 	raidPtr->recon_in_progress = 1;
   2570 
   2571 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2572 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2573 
   2574 	/* XXX get rid of this! we don't need it at all.. */
   2575 	RF_Free(req, sizeof(*req));
   2576 
   2577 	raidPtr->recon_in_progress = 0;
   2578 	splx(s);
   2579 
   2580 	/* That's all... */
   2581 	kthread_exit(0);        /* does not return */
   2582 }
   2583 
   2584 void
   2585 rf_RewriteParityThread(raidPtr)
   2586 	RF_Raid_t *raidPtr;
   2587 {
   2588 	int retcode;
   2589 	int s;
   2590 
   2591 	raidPtr->parity_rewrite_in_progress = 1;
   2592 	s = splbio();
   2593 	retcode = rf_RewriteParity(raidPtr);
   2594 	splx(s);
   2595 	if (retcode) {
   2596 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2597 	} else {
   2598 		/* set the clean bit!  If we shutdown correctly,
   2599 		   the clean bit on each component label will get
   2600 		   set */
   2601 		raidPtr->parity_good = RF_RAID_CLEAN;
   2602 	}
   2603 	raidPtr->parity_rewrite_in_progress = 0;
   2604 
   2605 	/* Anyone waiting for us to stop?  If so, inform them... */
   2606 	if (raidPtr->waitShutdown) {
   2607 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2608 	}
   2609 
   2610 	/* That's all... */
   2611 	kthread_exit(0);        /* does not return */
   2612 }
   2613 
   2614 
   2615 void
   2616 rf_CopybackThread(raidPtr)
   2617 	RF_Raid_t *raidPtr;
   2618 {
   2619 	int s;
   2620 
   2621 	raidPtr->copyback_in_progress = 1;
   2622 	s = splbio();
   2623 	rf_CopybackReconstructedData(raidPtr);
   2624 	splx(s);
   2625 	raidPtr->copyback_in_progress = 0;
   2626 
   2627 	/* That's all... */
   2628 	kthread_exit(0);        /* does not return */
   2629 }
   2630 
   2631 
   2632 void
   2633 rf_ReconstructInPlaceThread(req)
   2634 	struct rf_recon_req *req;
   2635 {
   2636 	int retcode;
   2637 	int s;
   2638 	RF_Raid_t *raidPtr;
   2639 
   2640 	s = splbio();
   2641 	raidPtr = req->raidPtr;
   2642 	raidPtr->recon_in_progress = 1;
   2643 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2644 	RF_Free(req, sizeof(*req));
   2645 	raidPtr->recon_in_progress = 0;
   2646 	splx(s);
   2647 
   2648 	/* That's all... */
   2649 	kthread_exit(0);        /* does not return */
   2650 }
   2651 
   2652 void
   2653 rf_mountroot_hook(dev)
   2654 	struct device *dev;
   2655 {
   2656 
   2657 }
   2658 
   2659 
   2660 RF_AutoConfig_t *
   2661 rf_find_raid_components()
   2662 {
   2663 	struct devnametobdevmaj *dtobdm;
   2664 	struct vnode *vp;
   2665 	struct disklabel label;
   2666 	struct device *dv;
   2667 	char *cd_name;
   2668 	dev_t dev;
   2669 	int error;
   2670 	int i;
   2671 	int good_one;
   2672 	RF_ComponentLabel_t *clabel;
   2673 	RF_AutoConfig_t *ac_list;
   2674 	RF_AutoConfig_t *ac;
   2675 
   2676 
   2677 	/* initialize the AutoConfig list */
   2678 	ac_list = NULL;
   2679 
   2680 	/* we begin by trolling through *all* the devices on the system */
   2681 
   2682 	for (dv = alldevs.tqh_first; dv != NULL;
   2683 	     dv = dv->dv_list.tqe_next) {
   2684 
   2685 		/* we are only interested in disks... */
   2686 		if (dv->dv_class != DV_DISK)
   2687 			continue;
   2688 
   2689 		/* we don't care about floppies... */
   2690 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2691 			continue;
   2692 		}
   2693 
   2694 		/* need to find the device_name_to_block_device_major stuff */
   2695 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2696 		dtobdm = dev_name2blk;
   2697 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2698 			dtobdm++;
   2699 		}
   2700 
   2701 		/* get a vnode for the raw partition of this disk */
   2702 
   2703 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2704 		if (bdevvp(dev, &vp))
   2705 			panic("RAID can't alloc vnode");
   2706 
   2707 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2708 
   2709 		if (error) {
   2710 			/* "Who cares."  Continue looking
   2711 			   for something that exists*/
   2712 			vput(vp);
   2713 			continue;
   2714 		}
   2715 
   2716 		/* Ok, the disk exists.  Go get the disklabel. */
   2717 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2718 				  FREAD, NOCRED, 0);
   2719 		if (error) {
   2720 			/*
   2721 			 * XXX can't happen - open() would
   2722 			 * have errored out (or faked up one)
   2723 			 */
   2724 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2725 			       dv->dv_xname, 'a' + RAW_PART, error);
   2726 		}
   2727 
   2728 		/* don't need this any more.  We'll allocate it again
   2729 		   a little later if we really do... */
   2730 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2731 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2732 		vput(vp);
   2733 
   2734 		for (i=0; i < label.d_npartitions; i++) {
   2735 			/* We only support partitions marked as RAID */
   2736 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2737 				continue;
   2738 
   2739 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2740 			if (bdevvp(dev, &vp))
   2741 				panic("RAID can't alloc vnode");
   2742 
   2743 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2744 			if (error) {
   2745 				/* Whatever... */
   2746 				vput(vp);
   2747 				continue;
   2748 			}
   2749 
   2750 			good_one = 0;
   2751 
   2752 			clabel = (RF_ComponentLabel_t *)
   2753 				malloc(sizeof(RF_ComponentLabel_t),
   2754 				       M_RAIDFRAME, M_NOWAIT);
   2755 			if (clabel == NULL) {
   2756 				/* XXX CLEANUP HERE */
   2757 				printf("RAID auto config: out of memory!\n");
   2758 				return(NULL); /* XXX probably should panic? */
   2759 			}
   2760 
   2761 			if (!raidread_component_label(dev, vp, clabel)) {
   2762 				/* Got the label.  Does it look reasonable? */
   2763 				if (rf_reasonable_label(clabel) &&
   2764 				    (clabel->partitionSize <=
   2765 				     label.d_partitions[i].p_size)) {
   2766 #if DEBUG
   2767 					printf("Component on: %s%c: %d\n",
   2768 					       dv->dv_xname, 'a'+i,
   2769 					       label.d_partitions[i].p_size);
   2770 					rf_print_component_label(clabel);
   2771 #endif
   2772 					/* if it's reasonable, add it,
   2773 					   else ignore it. */
   2774 					ac = (RF_AutoConfig_t *)
   2775 						malloc(sizeof(RF_AutoConfig_t),
   2776 						       M_RAIDFRAME,
   2777 						       M_NOWAIT);
   2778 					if (ac == NULL) {
   2779 						/* XXX should panic?? */
   2780 						return(NULL);
   2781 					}
   2782 
   2783 					sprintf(ac->devname, "%s%c",
   2784 						dv->dv_xname, 'a'+i);
   2785 					ac->dev = dev;
   2786 					ac->vp = vp;
   2787 					ac->clabel = clabel;
   2788 					ac->next = ac_list;
   2789 					ac_list = ac;
   2790 					good_one = 1;
   2791 				}
   2792 			}
   2793 			if (!good_one) {
   2794 				/* cleanup */
   2795 				free(clabel, M_RAIDFRAME);
   2796 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2797 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2798 				vput(vp);
   2799 			}
   2800 		}
   2801 	}
   2802 	return(ac_list);
   2803 }
   2804 
   2805 static int
   2806 rf_reasonable_label(clabel)
   2807 	RF_ComponentLabel_t *clabel;
   2808 {
   2809 
   2810 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2811 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2812 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2813 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2814 	    clabel->row >=0 &&
   2815 	    clabel->column >= 0 &&
   2816 	    clabel->num_rows > 0 &&
   2817 	    clabel->num_columns > 0 &&
   2818 	    clabel->row < clabel->num_rows &&
   2819 	    clabel->column < clabel->num_columns &&
   2820 	    clabel->blockSize > 0 &&
   2821 	    clabel->numBlocks > 0) {
   2822 		/* label looks reasonable enough... */
   2823 		return(1);
   2824 	}
   2825 	return(0);
   2826 }
   2827 
   2828 
   2829 void
   2830 rf_print_component_label(clabel)
   2831 	RF_ComponentLabel_t *clabel;
   2832 {
   2833 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2834 	       clabel->row, clabel->column,
   2835 	       clabel->num_rows, clabel->num_columns);
   2836 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2837 	       clabel->version, clabel->serial_number,
   2838 	       clabel->mod_counter);
   2839 	printf("   Clean: %s Status: %d\n",
   2840 	       clabel->clean ? "Yes" : "No", clabel->status );
   2841 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2842 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2843 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2844 	       (char) clabel->parityConfig, clabel->blockSize,
   2845 	       clabel->numBlocks);
   2846 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2847 	printf("   Contains root partition: %s\n",
   2848 	       clabel->root_partition ? "Yes" : "No" );
   2849 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2850 #if 0
   2851 	   printf("   Config order: %d\n", clabel->config_order);
   2852 #endif
   2853 
   2854 }
   2855 
   2856 RF_ConfigSet_t *
   2857 rf_create_auto_sets(ac_list)
   2858 	RF_AutoConfig_t *ac_list;
   2859 {
   2860 	RF_AutoConfig_t *ac;
   2861 	RF_ConfigSet_t *config_sets;
   2862 	RF_ConfigSet_t *cset;
   2863 	RF_AutoConfig_t *ac_next;
   2864 
   2865 
   2866 	config_sets = NULL;
   2867 
   2868 	/* Go through the AutoConfig list, and figure out which components
   2869 	   belong to what sets.  */
   2870 	ac = ac_list;
   2871 	while(ac!=NULL) {
   2872 		/* we're going to putz with ac->next, so save it here
   2873 		   for use at the end of the loop */
   2874 		ac_next = ac->next;
   2875 
   2876 		if (config_sets == NULL) {
   2877 			/* will need at least this one... */
   2878 			config_sets = (RF_ConfigSet_t *)
   2879 				malloc(sizeof(RF_ConfigSet_t),
   2880 				       M_RAIDFRAME, M_NOWAIT);
   2881 			if (config_sets == NULL) {
   2882 				panic("rf_create_auto_sets: No memory!\n");
   2883 			}
   2884 			/* this one is easy :) */
   2885 			config_sets->ac = ac;
   2886 			config_sets->next = NULL;
   2887 			config_sets->rootable = 0;
   2888 			ac->next = NULL;
   2889 		} else {
   2890 			/* which set does this component fit into? */
   2891 			cset = config_sets;
   2892 			while(cset!=NULL) {
   2893 				if (rf_does_it_fit(cset, ac)) {
   2894 					/* looks like it matches... */
   2895 					ac->next = cset->ac;
   2896 					cset->ac = ac;
   2897 					break;
   2898 				}
   2899 				cset = cset->next;
   2900 			}
   2901 			if (cset==NULL) {
   2902 				/* didn't find a match above... new set..*/
   2903 				cset = (RF_ConfigSet_t *)
   2904 					malloc(sizeof(RF_ConfigSet_t),
   2905 					       M_RAIDFRAME, M_NOWAIT);
   2906 				if (cset == NULL) {
   2907 					panic("rf_create_auto_sets: No memory!\n");
   2908 				}
   2909 				cset->ac = ac;
   2910 				ac->next = NULL;
   2911 				cset->next = config_sets;
   2912 				cset->rootable = 0;
   2913 				config_sets = cset;
   2914 			}
   2915 		}
   2916 		ac = ac_next;
   2917 	}
   2918 
   2919 
   2920 	return(config_sets);
   2921 }
   2922 
   2923 static int
   2924 rf_does_it_fit(cset, ac)
   2925 	RF_ConfigSet_t *cset;
   2926 	RF_AutoConfig_t *ac;
   2927 {
   2928 	RF_ComponentLabel_t *clabel1, *clabel2;
   2929 
   2930 	/* If this one matches the *first* one in the set, that's good
   2931 	   enough, since the other members of the set would have been
   2932 	   through here too... */
   2933 	/* note that we are not checking partitionSize here..
   2934 
   2935 	   Note that we are also not checking the mod_counters here.
   2936 	   If everything else matches execpt the mod_counter, that's
   2937 	   good enough for this test.  We will deal with the mod_counters
   2938 	   a little later in the autoconfiguration process.
   2939 
   2940 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2941 
   2942 	   The reason we don't check for this is that failed disks
   2943 	   will have lower modification counts.  If those disks are
   2944 	   not added to the set they used to belong to, then they will
   2945 	   form their own set, which may result in 2 different sets,
   2946 	   for example, competing to be configured at raid0, and
   2947 	   perhaps competing to be the root filesystem set.  If the
   2948 	   wrong ones get configured, or both attempt to become /,
   2949 	   weird behaviour and or serious lossage will occur.  Thus we
   2950 	   need to bring them into the fold here, and kick them out at
   2951 	   a later point.
   2952 
   2953 	*/
   2954 
   2955 	clabel1 = cset->ac->clabel;
   2956 	clabel2 = ac->clabel;
   2957 	if ((clabel1->version == clabel2->version) &&
   2958 	    (clabel1->serial_number == clabel2->serial_number) &&
   2959 	    (clabel1->num_rows == clabel2->num_rows) &&
   2960 	    (clabel1->num_columns == clabel2->num_columns) &&
   2961 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2962 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2963 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2964 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2965 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2966 	    (clabel1->blockSize == clabel2->blockSize) &&
   2967 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2968 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2969 	    (clabel1->root_partition == clabel2->root_partition) &&
   2970 	    (clabel1->last_unit == clabel2->last_unit) &&
   2971 	    (clabel1->config_order == clabel2->config_order)) {
   2972 		/* if it get's here, it almost *has* to be a match */
   2973 	} else {
   2974 		/* it's not consistent with somebody in the set..
   2975 		   punt */
   2976 		return(0);
   2977 	}
   2978 	/* all was fine.. it must fit... */
   2979 	return(1);
   2980 }
   2981 
   2982 int
   2983 rf_have_enough_components(cset)
   2984 	RF_ConfigSet_t *cset;
   2985 {
   2986 	RF_AutoConfig_t *ac;
   2987 	RF_AutoConfig_t *auto_config;
   2988 	RF_ComponentLabel_t *clabel;
   2989 	int r,c;
   2990 	int num_rows;
   2991 	int num_cols;
   2992 	int num_missing;
   2993 	int mod_counter;
   2994 	int mod_counter_found;
   2995 	int even_pair_failed;
   2996 	char parity_type;
   2997 
   2998 
   2999 	/* check to see that we have enough 'live' components
   3000 	   of this set.  If so, we can configure it if necessary */
   3001 
   3002 	num_rows = cset->ac->clabel->num_rows;
   3003 	num_cols = cset->ac->clabel->num_columns;
   3004 	parity_type = cset->ac->clabel->parityConfig;
   3005 
   3006 	/* XXX Check for duplicate components!?!?!? */
   3007 
   3008 	/* Determine what the mod_counter is supposed to be for this set. */
   3009 
   3010 	mod_counter_found = 0;
   3011 	mod_counter = 0;
   3012 	ac = cset->ac;
   3013 	while(ac!=NULL) {
   3014 		if (mod_counter_found==0) {
   3015 			mod_counter = ac->clabel->mod_counter;
   3016 			mod_counter_found = 1;
   3017 		} else {
   3018 			if (ac->clabel->mod_counter > mod_counter) {
   3019 				mod_counter = ac->clabel->mod_counter;
   3020 			}
   3021 		}
   3022 		ac = ac->next;
   3023 	}
   3024 
   3025 	num_missing = 0;
   3026 	auto_config = cset->ac;
   3027 
   3028 	for(r=0; r<num_rows; r++) {
   3029 		even_pair_failed = 0;
   3030 		for(c=0; c<num_cols; c++) {
   3031 			ac = auto_config;
   3032 			while(ac!=NULL) {
   3033 				if ((ac->clabel->row == r) &&
   3034 				    (ac->clabel->column == c) &&
   3035 				    (ac->clabel->mod_counter == mod_counter)) {
   3036 					/* it's this one... */
   3037 #if DEBUG
   3038 					printf("Found: %s at %d,%d\n",
   3039 					       ac->devname,r,c);
   3040 #endif
   3041 					break;
   3042 				}
   3043 				ac=ac->next;
   3044 			}
   3045 			if (ac==NULL) {
   3046 				/* Didn't find one here! */
   3047 				/* special case for RAID 1, especially
   3048 				   where there are more than 2
   3049 				   components (where RAIDframe treats
   3050 				   things a little differently :( ) */
   3051 				if (parity_type == '1') {
   3052 					if (c%2 == 0) { /* even component */
   3053 						even_pair_failed = 1;
   3054 					} else { /* odd component.  If
   3055                                                     we're failed, and
   3056                                                     so is the even
   3057                                                     component, it's
   3058                                                     "Good Night, Charlie" */
   3059 						if (even_pair_failed == 1) {
   3060 							return(0);
   3061 						}
   3062 					}
   3063 				} else {
   3064 					/* normal accounting */
   3065 					num_missing++;
   3066 				}
   3067 			}
   3068 			if ((parity_type == '1') && (c%2 == 1)) {
   3069 				/* Just did an even component, and we didn't
   3070 				   bail.. reset the even_pair_failed flag,
   3071 				   and go on to the next component.... */
   3072 				even_pair_failed = 0;
   3073 			}
   3074 		}
   3075 	}
   3076 
   3077 	clabel = cset->ac->clabel;
   3078 
   3079 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3080 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3081 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3082 		/* XXX this needs to be made *much* more general */
   3083 		/* Too many failures */
   3084 		return(0);
   3085 	}
   3086 	/* otherwise, all is well, and we've got enough to take a kick
   3087 	   at autoconfiguring this set */
   3088 	return(1);
   3089 }
   3090 
   3091 void
   3092 rf_create_configuration(ac,config,raidPtr)
   3093 	RF_AutoConfig_t *ac;
   3094 	RF_Config_t *config;
   3095 	RF_Raid_t *raidPtr;
   3096 {
   3097 	RF_ComponentLabel_t *clabel;
   3098 	int i;
   3099 
   3100 	clabel = ac->clabel;
   3101 
   3102 	/* 1. Fill in the common stuff */
   3103 	config->numRow = clabel->num_rows;
   3104 	config->numCol = clabel->num_columns;
   3105 	config->numSpare = 0; /* XXX should this be set here? */
   3106 	config->sectPerSU = clabel->sectPerSU;
   3107 	config->SUsPerPU = clabel->SUsPerPU;
   3108 	config->SUsPerRU = clabel->SUsPerRU;
   3109 	config->parityConfig = clabel->parityConfig;
   3110 	/* XXX... */
   3111 	strcpy(config->diskQueueType,"fifo");
   3112 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3113 	config->layoutSpecificSize = 0; /* XXX ?? */
   3114 
   3115 	while(ac!=NULL) {
   3116 		/* row/col values will be in range due to the checks
   3117 		   in reasonable_label() */
   3118 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3119 		       ac->devname);
   3120 		ac = ac->next;
   3121 	}
   3122 
   3123 	for(i=0;i<RF_MAXDBGV;i++) {
   3124 		config->debugVars[i][0] = NULL;
   3125 	}
   3126 }
   3127 
   3128 int
   3129 rf_set_autoconfig(raidPtr, new_value)
   3130 	RF_Raid_t *raidPtr;
   3131 	int new_value;
   3132 {
   3133 	RF_ComponentLabel_t clabel;
   3134 	struct vnode *vp;
   3135 	dev_t dev;
   3136 	int row, column;
   3137 
   3138 	raidPtr->autoconfigure = new_value;
   3139 	for(row=0; row<raidPtr->numRow; row++) {
   3140 		for(column=0; column<raidPtr->numCol; column++) {
   3141 			if (raidPtr->Disks[row][column].status ==
   3142 			    rf_ds_optimal) {
   3143 				dev = raidPtr->Disks[row][column].dev;
   3144 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3145 				raidread_component_label(dev, vp, &clabel);
   3146 				clabel.autoconfigure = new_value;
   3147 				raidwrite_component_label(dev, vp, &clabel);
   3148 			}
   3149 		}
   3150 	}
   3151 	return(new_value);
   3152 }
   3153 
   3154 int
   3155 rf_set_rootpartition(raidPtr, new_value)
   3156 	RF_Raid_t *raidPtr;
   3157 	int new_value;
   3158 {
   3159 	RF_ComponentLabel_t clabel;
   3160 	struct vnode *vp;
   3161 	dev_t dev;
   3162 	int row, column;
   3163 
   3164 	raidPtr->root_partition = new_value;
   3165 	for(row=0; row<raidPtr->numRow; row++) {
   3166 		for(column=0; column<raidPtr->numCol; column++) {
   3167 			if (raidPtr->Disks[row][column].status ==
   3168 			    rf_ds_optimal) {
   3169 				dev = raidPtr->Disks[row][column].dev;
   3170 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3171 				raidread_component_label(dev, vp, &clabel);
   3172 				clabel.root_partition = new_value;
   3173 				raidwrite_component_label(dev, vp, &clabel);
   3174 			}
   3175 		}
   3176 	}
   3177 	return(new_value);
   3178 }
   3179 
   3180 void
   3181 rf_release_all_vps(cset)
   3182 	RF_ConfigSet_t *cset;
   3183 {
   3184 	RF_AutoConfig_t *ac;
   3185 
   3186 	ac = cset->ac;
   3187 	while(ac!=NULL) {
   3188 		/* Close the vp, and give it back */
   3189 		if (ac->vp) {
   3190 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3191 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3192 			vput(ac->vp);
   3193 			ac->vp = NULL;
   3194 		}
   3195 		ac = ac->next;
   3196 	}
   3197 }
   3198 
   3199 
   3200 void
   3201 rf_cleanup_config_set(cset)
   3202 	RF_ConfigSet_t *cset;
   3203 {
   3204 	RF_AutoConfig_t *ac;
   3205 	RF_AutoConfig_t *next_ac;
   3206 
   3207 	ac = cset->ac;
   3208 	while(ac!=NULL) {
   3209 		next_ac = ac->next;
   3210 		/* nuke the label */
   3211 		free(ac->clabel, M_RAIDFRAME);
   3212 		/* cleanup the config structure */
   3213 		free(ac, M_RAIDFRAME);
   3214 		/* "next.." */
   3215 		ac = next_ac;
   3216 	}
   3217 	/* and, finally, nuke the config set */
   3218 	free(cset, M_RAIDFRAME);
   3219 }
   3220 
   3221 
   3222 void
   3223 raid_init_component_label(raidPtr, clabel)
   3224 	RF_Raid_t *raidPtr;
   3225 	RF_ComponentLabel_t *clabel;
   3226 {
   3227 	/* current version number */
   3228 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3229 	clabel->serial_number = raidPtr->serial_number;
   3230 	clabel->mod_counter = raidPtr->mod_counter;
   3231 	clabel->num_rows = raidPtr->numRow;
   3232 	clabel->num_columns = raidPtr->numCol;
   3233 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3234 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3235 
   3236 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3237 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3238 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3239 
   3240 	clabel->blockSize = raidPtr->bytesPerSector;
   3241 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3242 
   3243 	/* XXX not portable */
   3244 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3245 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3246 	clabel->autoconfigure = raidPtr->autoconfigure;
   3247 	clabel->root_partition = raidPtr->root_partition;
   3248 	clabel->last_unit = raidPtr->raidid;
   3249 	clabel->config_order = raidPtr->config_order;
   3250 }
   3251 
   3252 int
   3253 rf_auto_config_set(cset,unit)
   3254 	RF_ConfigSet_t *cset;
   3255 	int *unit;
   3256 {
   3257 	RF_Raid_t *raidPtr;
   3258 	RF_Config_t *config;
   3259 	int raidID;
   3260 	int retcode;
   3261 
   3262 	printf("RAID autoconfigure\n");
   3263 
   3264 	retcode = 0;
   3265 	*unit = -1;
   3266 
   3267 	/* 1. Create a config structure */
   3268 
   3269 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3270 				       M_RAIDFRAME,
   3271 				       M_NOWAIT);
   3272 	if (config==NULL) {
   3273 		printf("Out of mem!?!?\n");
   3274 				/* XXX do something more intelligent here. */
   3275 		return(1);
   3276 	}
   3277 
   3278 	memset(config, 0, sizeof(RF_Config_t));
   3279 
   3280 	/* XXX raidID needs to be set correctly.. */
   3281 
   3282 	/*
   3283 	   2. Figure out what RAID ID this one is supposed to live at
   3284 	   See if we can get the same RAID dev that it was configured
   3285 	   on last time..
   3286 	*/
   3287 
   3288 	raidID = cset->ac->clabel->last_unit;
   3289 	if ((raidID < 0) || (raidID >= numraid)) {
   3290 		/* let's not wander off into lala land. */
   3291 		raidID = numraid - 1;
   3292 	}
   3293 	if (raidPtrs[raidID]->valid != 0) {
   3294 
   3295 		/*
   3296 		   Nope... Go looking for an alternative...
   3297 		   Start high so we don't immediately use raid0 if that's
   3298 		   not taken.
   3299 		*/
   3300 
   3301 		for(raidID = numraid; raidID >= 0; raidID--) {
   3302 			if (raidPtrs[raidID]->valid == 0) {
   3303 				/* can use this one! */
   3304 				break;
   3305 			}
   3306 		}
   3307 	}
   3308 
   3309 	if (raidID < 0) {
   3310 		/* punt... */
   3311 		printf("Unable to auto configure this set!\n");
   3312 		printf("(Out of RAID devs!)\n");
   3313 		return(1);
   3314 	}
   3315 	printf("Configuring raid%d:\n",raidID);
   3316 	raidPtr = raidPtrs[raidID];
   3317 
   3318 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3319 	raidPtr->raidid = raidID;
   3320 	raidPtr->openings = RAIDOUTSTANDING;
   3321 
   3322 	/* 3. Build the configuration structure */
   3323 	rf_create_configuration(cset->ac, config, raidPtr);
   3324 
   3325 	/* 4. Do the configuration */
   3326 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3327 
   3328 	if (retcode == 0) {
   3329 
   3330 		raidinit(raidPtrs[raidID]);
   3331 
   3332 		rf_markalldirty(raidPtrs[raidID]);
   3333 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3334 		if (cset->ac->clabel->root_partition==1) {
   3335 			/* everything configured just fine.  Make a note
   3336 			   that this set is eligible to be root. */
   3337 			cset->rootable = 1;
   3338 			/* XXX do this here? */
   3339 			raidPtrs[raidID]->root_partition = 1;
   3340 		}
   3341 	}
   3342 
   3343 	/* 5. Cleanup */
   3344 	free(config, M_RAIDFRAME);
   3345 
   3346 	*unit = raidID;
   3347 	return(retcode);
   3348 }
   3349 
   3350 void
   3351 rf_disk_unbusy(desc)
   3352 	RF_RaidAccessDesc_t *desc;
   3353 {
   3354 	struct buf *bp;
   3355 
   3356 	bp = (struct buf *)desc->bp;
   3357 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3358 			    (bp->b_bcount - bp->b_resid));
   3359 }
   3360