Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.34
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.34 1999/12/12 20:51:41 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #ifdef DEBUG
    157 #define db0_printf(a) printf a
    158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    164 #else				/* DEBUG */
    165 #define db0_printf(a) printf a
    166 #define db1_printf(a) { }
    167 #define db2_printf(a) { }
    168 #define db3_printf(a) { }
    169 #define db4_printf(a) { }
    170 #define db5_printf(a) { }
    171 #endif				/* DEBUG */
    172 
    173 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    174 
    175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    176 
    177 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    178 						 * spare table */
    179 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    180 						 * installation process */
    181 
    182 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    183 						 * reconstruction
    184 						 * requests */
    185 
    186 
    187 decl_simple_lock_data(, recon_queue_mutex)
    188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    190 
    191 /* prototypes */
    192 static void KernelWakeupFunc(struct buf * bp);
    193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    194 		   dev_t dev, RF_SectorNum_t startSect,
    195 		   RF_SectorCount_t numSect, caddr_t buf,
    196 		   void (*cbFunc) (struct buf *), void *cbArg,
    197 		   int logBytesPerSector, struct proc * b_proc);
    198 
    199 #define Dprintf0(s)       if (rf_queueDebug) \
    200      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    201 #define Dprintf1(s,a)     if (rf_queueDebug) \
    202      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    203 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    204      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    206      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    207 
    208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    210 
    211 void raidattach __P((int));
    212 int raidsize __P((dev_t));
    213 
    214 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    215 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    216 static int raidinit __P((dev_t, RF_Raid_t *, int));
    217 
    218 int raidopen __P((dev_t, int, int, struct proc *));
    219 int raidclose __P((dev_t, int, int, struct proc *));
    220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    221 int raidwrite __P((dev_t, struct uio *, int));
    222 int raidread __P((dev_t, struct uio *, int));
    223 void raidstrategy __P((struct buf *));
    224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    225 
    226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    228 void rf_update_component_labels( RF_Raid_t *);
    229 /*
    230  * Pilfered from ccd.c
    231  */
    232 
    233 struct raidbuf {
    234 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    235 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    236 	int     rf_flags;	/* misc. flags */
    237 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    238 };
    239 
    240 
    241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    242 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    243 
    244 /* XXX Not sure if the following should be replacing the raidPtrs above,
    245    or if it should be used in conjunction with that... */
    246 
    247 struct raid_softc {
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	size_t  sc_size;        /* size of the raid device */
    251 	dev_t   sc_dev;	        /* our device.. */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct pool sc_cbufpool;	/* component buffer pool */
    255 	struct buf buf_queue;   /* used for the device queue */
    256 };
    257 /* sc_flags */
    258 #define RAIDF_INITED	0x01	/* unit has been initialized */
    259 #define RAIDF_WLABEL	0x02	/* label area is writable */
    260 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    261 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    262 #define RAIDF_LOCKED	0x80	/* unit is locked */
    263 
    264 #define	raidunit(x)	DISKUNIT(x)
    265 static int numraid = 0;
    266 
    267 /*
    268  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    269  * Be aware that large numbers can allow the driver to consume a lot of
    270  * kernel memory, especially on writes, and in degraded mode reads.
    271  *
    272  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    273  * a single 64K write will typically require 64K for the old data,
    274  * 64K for the old parity, and 64K for the new parity, for a total
    275  * of 192K (if the parity buffer is not re-used immediately).
    276  * Even it if is used immedately, that's still 128K, which when multiplied
    277  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    278  *
    279  * Now in degraded mode, for example, a 64K read on the above setup may
    280  * require data reconstruction, which will require *all* of the 4 remaining
    281  * disks to participate -- 4 * 32K/disk == 128K again.
    282  */
    283 
    284 #ifndef RAIDOUTSTANDING
    285 #define RAIDOUTSTANDING   6
    286 #endif
    287 
    288 #define RAIDLABELDEV(dev)	\
    289 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    290 
    291 /* declared here, and made public, for the benefit of KVM stuff.. */
    292 struct raid_softc *raid_softc;
    293 
    294 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    295 				     struct disklabel *));
    296 static void raidgetdisklabel __P((dev_t));
    297 static void raidmakedisklabel __P((struct raid_softc *));
    298 
    299 static int raidlock __P((struct raid_softc *));
    300 static void raidunlock __P((struct raid_softc *));
    301 int raidlookup __P((char *, struct proc * p, struct vnode **));
    302 
    303 static void rf_markalldirty __P((RF_Raid_t *));
    304 
    305 void
    306 raidattach(num)
    307 	int     num;
    308 {
    309 	int raidID;
    310 	int i, rc;
    311 
    312 #ifdef DEBUG
    313 	printf("raidattach: Asked for %d units\n", num);
    314 #endif
    315 
    316 	if (num <= 0) {
    317 #ifdef DIAGNOSTIC
    318 		panic("raidattach: count <= 0");
    319 #endif
    320 		return;
    321 	}
    322 	/* This is where all the initialization stuff gets done. */
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!\n");
    329 	}
    330 
    331 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    332 	if (rc) {
    333 		RF_PANIC();
    334 	}
    335 
    336 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    337 	recon_queue = NULL;
    338 
    339 	for (i = 0; i < numraid; i++)
    340 		raidPtrs[i] = NULL;
    341 	rc = rf_BootRaidframe();
    342 	if (rc == 0)
    343 		printf("Kernelized RAIDframe activated\n");
    344 	else
    345 		panic("Serious error booting RAID!!\n");
    346 
    347 	/* put together some datastructures like the CCD device does.. This
    348 	 * lets us lock the device and what-not when it gets opened. */
    349 
    350 	raid_softc = (struct raid_softc *)
    351 	    malloc(num * sizeof(struct raid_softc),
    352 	    M_RAIDFRAME, M_NOWAIT);
    353 	if (raid_softc == NULL) {
    354 		printf("WARNING: no memory for RAIDframe driver\n");
    355 		return;
    356 	}
    357 	numraid = num;
    358 	bzero(raid_softc, num * sizeof(struct raid_softc));
    359 	raid_softc->buf_queue.b_actf = NULL;
    360 	raid_softc->buf_queue.b_actb = &raid_softc->buf_queue.b_actf;
    361 
    362 	for (raidID = 0; raidID < num; raidID++) {
    363 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    364 			  (RF_Raid_t *));
    365 		if (raidPtrs[raidID] == NULL) {
    366 			printf("raidPtrs[%d] is NULL\n", raidID);
    367 		}
    368 	}
    369 }
    370 
    371 
    372 int
    373 raidsize(dev)
    374 	dev_t   dev;
    375 {
    376 	struct raid_softc *rs;
    377 	struct disklabel *lp;
    378 	int     part, unit, omask, size;
    379 
    380 	unit = raidunit(dev);
    381 	if (unit >= numraid)
    382 		return (-1);
    383 	rs = &raid_softc[unit];
    384 
    385 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    386 		return (-1);
    387 
    388 	part = DISKPART(dev);
    389 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    390 	lp = rs->sc_dkdev.dk_label;
    391 
    392 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    393 		return (-1);
    394 
    395 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    396 		size = -1;
    397 	else
    398 		size = lp->d_partitions[part].p_size *
    399 		    (lp->d_secsize / DEV_BSIZE);
    400 
    401 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    402 		return (-1);
    403 
    404 	return (size);
    405 
    406 }
    407 
    408 int
    409 raiddump(dev, blkno, va, size)
    410 	dev_t   dev;
    411 	daddr_t blkno;
    412 	caddr_t va;
    413 	size_t  size;
    414 {
    415 	/* Not implemented. */
    416 	return ENXIO;
    417 }
    418 /* ARGSUSED */
    419 int
    420 raidopen(dev, flags, fmt, p)
    421 	dev_t   dev;
    422 	int     flags, fmt;
    423 	struct proc *p;
    424 {
    425 	int     unit = raidunit(dev);
    426 	struct raid_softc *rs;
    427 	struct disklabel *lp;
    428 	int     part, pmask;
    429 	int     error = 0;
    430 
    431 	if (unit >= numraid)
    432 		return (ENXIO);
    433 	rs = &raid_softc[unit];
    434 
    435 	if ((error = raidlock(rs)) != 0)
    436 		return (error);
    437 	lp = rs->sc_dkdev.dk_label;
    438 
    439 	part = DISKPART(dev);
    440 	pmask = (1 << part);
    441 
    442 	db1_printf(("Opening raid device number: %d partition: %d\n",
    443 		unit, part));
    444 
    445 
    446 	if ((rs->sc_flags & RAIDF_INITED) &&
    447 	    (rs->sc_dkdev.dk_openmask == 0))
    448 		raidgetdisklabel(dev);
    449 
    450 	/* make sure that this partition exists */
    451 
    452 	if (part != RAW_PART) {
    453 		db1_printf(("Not a raw partition..\n"));
    454 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    455 		    ((part >= lp->d_npartitions) ||
    456 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    457 			error = ENXIO;
    458 			raidunlock(rs);
    459 			db1_printf(("Bailing out...\n"));
    460 			return (error);
    461 		}
    462 	}
    463 	/* Prevent this unit from being unconfigured while open. */
    464 	switch (fmt) {
    465 	case S_IFCHR:
    466 		rs->sc_dkdev.dk_copenmask |= pmask;
    467 		break;
    468 
    469 	case S_IFBLK:
    470 		rs->sc_dkdev.dk_bopenmask |= pmask;
    471 		break;
    472 	}
    473 
    474 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    475 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    476 		/* First one... mark things as dirty... Note that we *MUST*
    477 		 have done a configure before this.  I DO NOT WANT TO BE
    478 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    479 		 THAT THEY BELONG TOGETHER!!!!! */
    480 		/* XXX should check to see if we're only open for reading
    481 		   here... If so, we needn't do this, but then need some
    482 		   other way of keeping track of what's happened.. */
    483 
    484 		rf_markalldirty( raidPtrs[unit] );
    485 	}
    486 
    487 
    488 	rs->sc_dkdev.dk_openmask =
    489 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    490 
    491 	raidunlock(rs);
    492 
    493 	return (error);
    494 
    495 
    496 }
    497 /* ARGSUSED */
    498 int
    499 raidclose(dev, flags, fmt, p)
    500 	dev_t   dev;
    501 	int     flags, fmt;
    502 	struct proc *p;
    503 {
    504 	int     unit = raidunit(dev);
    505 	struct raid_softc *rs;
    506 	int     error = 0;
    507 	int     part;
    508 
    509 	if (unit >= numraid)
    510 		return (ENXIO);
    511 	rs = &raid_softc[unit];
    512 
    513 	if ((error = raidlock(rs)) != 0)
    514 		return (error);
    515 
    516 	part = DISKPART(dev);
    517 
    518 	/* ...that much closer to allowing unconfiguration... */
    519 	switch (fmt) {
    520 	case S_IFCHR:
    521 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    522 		break;
    523 
    524 	case S_IFBLK:
    525 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    526 		break;
    527 	}
    528 	rs->sc_dkdev.dk_openmask =
    529 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    530 
    531 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    532 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    533 		/* Last one... device is not unconfigured yet.
    534 		   Device shutdown has taken care of setting the
    535 		   clean bits if RAIDF_INITED is not set
    536 		   mark things as clean... */
    537 		rf_update_component_labels( raidPtrs[unit] );
    538 	}
    539 
    540 	raidunlock(rs);
    541 	return (0);
    542 
    543 }
    544 
    545 void
    546 raidstrategy(bp)
    547 	register struct buf *bp;
    548 {
    549 	register int s;
    550 
    551 	unsigned int raidID = raidunit(bp->b_dev);
    552 	RF_Raid_t *raidPtr;
    553 	struct raid_softc *rs = &raid_softc[raidID];
    554 	struct disklabel *lp;
    555 	struct buf *dp;
    556 	int     wlabel;
    557 
    558 #if 0
    559 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    560 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    561 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    562 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    563 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    564 
    565 	if (bp->b_flags & B_READ)
    566 		db1_printf(("READ\n"));
    567 	else
    568 		db1_printf(("WRITE\n"));
    569 #endif
    570 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    571 		bp->b_error = ENXIO;
    572 		bp->b_flags = B_ERROR;
    573 		bp->b_resid = bp->b_bcount;
    574 		biodone(bp);
    575 		return;
    576 	}
    577 	if (raidID >= numraid || !raidPtrs[raidID]) {
    578 		bp->b_error = ENODEV;
    579 		bp->b_flags |= B_ERROR;
    580 		bp->b_resid = bp->b_bcount;
    581 		biodone(bp);
    582 		return;
    583 	}
    584 	raidPtr = raidPtrs[raidID];
    585 	if (!raidPtr->valid) {
    586 		bp->b_error = ENODEV;
    587 		bp->b_flags |= B_ERROR;
    588 		bp->b_resid = bp->b_bcount;
    589 		biodone(bp);
    590 		return;
    591 	}
    592 	if (bp->b_bcount == 0) {
    593 		db1_printf(("b_bcount is zero..\n"));
    594 		biodone(bp);
    595 		return;
    596 	}
    597 	lp = rs->sc_dkdev.dk_label;
    598 
    599 	/*
    600 	 * Do bounds checking and adjust transfer.  If there's an
    601 	 * error, the bounds check will flag that for us.
    602 	 */
    603 
    604 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    605 	if (DISKPART(bp->b_dev) != RAW_PART)
    606 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    607 			db1_printf(("Bounds check failed!!:%d %d\n",
    608 				(int) bp->b_blkno, (int) wlabel));
    609 			biodone(bp);
    610 			return;
    611 		}
    612 	s = splbio();
    613 
    614 	bp->b_resid = 0;
    615 
    616 	/* stuff it onto our queue */
    617 
    618 	dp = &rs->buf_queue;
    619 	bp->b_actf = NULL;
    620 	bp->b_actb = dp->b_actb;
    621 	*dp->b_actb = bp;
    622 	dp->b_actb = &bp->b_actf;
    623 
    624 	raidstart(raidPtrs[raidID]);
    625 
    626 	splx(s);
    627 }
    628 /* ARGSUSED */
    629 int
    630 raidread(dev, uio, flags)
    631 	dev_t   dev;
    632 	struct uio *uio;
    633 	int     flags;
    634 {
    635 	int     unit = raidunit(dev);
    636 	struct raid_softc *rs;
    637 	int     part;
    638 
    639 	if (unit >= numraid)
    640 		return (ENXIO);
    641 	rs = &raid_softc[unit];
    642 
    643 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    644 		return (ENXIO);
    645 	part = DISKPART(dev);
    646 
    647 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    648 
    649 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    650 
    651 }
    652 /* ARGSUSED */
    653 int
    654 raidwrite(dev, uio, flags)
    655 	dev_t   dev;
    656 	struct uio *uio;
    657 	int     flags;
    658 {
    659 	int     unit = raidunit(dev);
    660 	struct raid_softc *rs;
    661 
    662 	if (unit >= numraid)
    663 		return (ENXIO);
    664 	rs = &raid_softc[unit];
    665 
    666 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    667 		return (ENXIO);
    668 	db1_printf(("raidwrite\n"));
    669 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    670 
    671 }
    672 
    673 int
    674 raidioctl(dev, cmd, data, flag, p)
    675 	dev_t   dev;
    676 	u_long  cmd;
    677 	caddr_t data;
    678 	int     flag;
    679 	struct proc *p;
    680 {
    681 	int     unit = raidunit(dev);
    682 	int     error = 0;
    683 	int     part, pmask;
    684 	struct raid_softc *rs;
    685 	RF_Config_t *k_cfg, *u_cfg;
    686 	u_char *specific_buf;
    687 	int retcode = 0;
    688 	int row;
    689 	int column;
    690 	int s;
    691 	struct rf_recon_req *rrcopy, *rr;
    692 	RF_ComponentLabel_t *component_label;
    693 	RF_ComponentLabel_t ci_label;
    694 	RF_ComponentLabel_t **c_label_ptr;
    695 	RF_SingleComponent_t *sparePtr,*componentPtr;
    696 	RF_SingleComponent_t hot_spare;
    697 	RF_SingleComponent_t component;
    698 
    699 	if (unit >= numraid)
    700 		return (ENXIO);
    701 	rs = &raid_softc[unit];
    702 
    703 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    704 		(int) DISKPART(dev), (int) unit, (int) cmd));
    705 
    706 	/* Must be open for writes for these commands... */
    707 	switch (cmd) {
    708 	case DIOCSDINFO:
    709 	case DIOCWDINFO:
    710 	case DIOCWLABEL:
    711 		if ((flag & FWRITE) == 0)
    712 			return (EBADF);
    713 	}
    714 
    715 	/* Must be initialized for these... */
    716 	switch (cmd) {
    717 	case DIOCGDINFO:
    718 	case DIOCSDINFO:
    719 	case DIOCWDINFO:
    720 	case DIOCGPART:
    721 	case DIOCWLABEL:
    722 	case DIOCGDEFLABEL:
    723 	case RAIDFRAME_SHUTDOWN:
    724 	case RAIDFRAME_REWRITEPARITY:
    725 	case RAIDFRAME_GET_INFO:
    726 	case RAIDFRAME_RESET_ACCTOTALS:
    727 	case RAIDFRAME_GET_ACCTOTALS:
    728 	case RAIDFRAME_KEEP_ACCTOTALS:
    729 	case RAIDFRAME_GET_SIZE:
    730 	case RAIDFRAME_FAIL_DISK:
    731 	case RAIDFRAME_COPYBACK:
    732 	case RAIDFRAME_CHECKRECON:
    733 	case RAIDFRAME_GET_COMPONENT_LABEL:
    734 	case RAIDFRAME_SET_COMPONENT_LABEL:
    735 	case RAIDFRAME_ADD_HOT_SPARE:
    736 	case RAIDFRAME_REMOVE_HOT_SPARE:
    737 	case RAIDFRAME_INIT_LABELS:
    738 	case RAIDFRAME_REBUILD_IN_PLACE:
    739 	case RAIDFRAME_CHECK_PARITY:
    740 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    741 			return (ENXIO);
    742 	}
    743 
    744 	switch (cmd) {
    745 
    746 
    747 		/* configure the system */
    748 	case RAIDFRAME_CONFIGURE:
    749 
    750 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    751 		/* copy-in the configuration information */
    752 		/* data points to a pointer to the configuration structure */
    753 		u_cfg = *((RF_Config_t **) data);
    754 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    755 		if (k_cfg == NULL) {
    756 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    757 			return (ENOMEM);
    758 		}
    759 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    760 		    sizeof(RF_Config_t));
    761 		if (retcode) {
    762 			RF_Free(k_cfg, sizeof(RF_Config_t));
    763 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    764 				retcode));
    765 			return (retcode);
    766 		}
    767 		/* allocate a buffer for the layout-specific data, and copy it
    768 		 * in */
    769 		if (k_cfg->layoutSpecificSize) {
    770 			if (k_cfg->layoutSpecificSize > 10000) {
    771 				/* sanity check */
    772 				RF_Free(k_cfg, sizeof(RF_Config_t));
    773 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    774 				return (EINVAL);
    775 			}
    776 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    777 			    (u_char *));
    778 			if (specific_buf == NULL) {
    779 				RF_Free(k_cfg, sizeof(RF_Config_t));
    780 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    781 				return (ENOMEM);
    782 			}
    783 			retcode = copyin(k_cfg->layoutSpecific,
    784 			    (caddr_t) specific_buf,
    785 			    k_cfg->layoutSpecificSize);
    786 			if (retcode) {
    787 				RF_Free(k_cfg, sizeof(RF_Config_t));
    788 				RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    789 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    790 					retcode));
    791 				return (retcode);
    792 			}
    793 		} else
    794 			specific_buf = NULL;
    795 		k_cfg->layoutSpecific = specific_buf;
    796 
    797 		/* should do some kind of sanity check on the configuration.
    798 		 * Store the sum of all the bytes in the last byte? */
    799 
    800 		/* configure the system */
    801 
    802 		raidPtrs[unit]->raidid = unit;
    803 
    804 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    805 
    806 		/* allow this many simultaneous IO's to this RAID device */
    807 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    808 
    809 		if (retcode == 0) {
    810 			retcode = raidinit(dev, raidPtrs[unit], unit);
    811 			rf_markalldirty( raidPtrs[unit] );
    812 		}
    813 		/* free the buffers.  No return code here. */
    814 		if (k_cfg->layoutSpecificSize) {
    815 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    816 		}
    817 		RF_Free(k_cfg, sizeof(RF_Config_t));
    818 
    819 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    820 			retcode));
    821 
    822 		return (retcode);
    823 
    824 		/* shutdown the system */
    825 	case RAIDFRAME_SHUTDOWN:
    826 
    827 		if ((error = raidlock(rs)) != 0)
    828 			return (error);
    829 
    830 		/*
    831 		 * If somebody has a partition mounted, we shouldn't
    832 		 * shutdown.
    833 		 */
    834 
    835 		part = DISKPART(dev);
    836 		pmask = (1 << part);
    837 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    838 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    839 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    840 			raidunlock(rs);
    841 			return (EBUSY);
    842 		}
    843 
    844 		if (rf_debugKernelAccess) {
    845 			printf("call shutdown\n");
    846 		}
    847 
    848 		retcode = rf_Shutdown(raidPtrs[unit]);
    849 
    850 		db1_printf(("Done main shutdown\n"));
    851 
    852 		pool_destroy(&rs->sc_cbufpool);
    853 		db1_printf(("Done freeing component buffer freelist\n"));
    854 
    855 		/* It's no longer initialized... */
    856 		rs->sc_flags &= ~RAIDF_INITED;
    857 
    858 		/* Detach the disk. */
    859 		disk_detach(&rs->sc_dkdev);
    860 
    861 		raidunlock(rs);
    862 
    863 		return (retcode);
    864 	case RAIDFRAME_GET_COMPONENT_LABEL:
    865 		c_label_ptr = (RF_ComponentLabel_t **) data;
    866 		/* need to read the component label for the disk indicated
    867 		   by row,column in component_label
    868 		   XXX need to sanity check these values!!!
    869 		   */
    870 
    871 		/* For practice, let's get it directly fromdisk, rather
    872 		   than from the in-core copy */
    873 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    874 			   (RF_ComponentLabel_t *));
    875 		if (component_label == NULL)
    876 			return (ENOMEM);
    877 
    878 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    879 
    880 		retcode = copyin( *c_label_ptr, component_label,
    881 				  sizeof(RF_ComponentLabel_t));
    882 
    883 		if (retcode) {
    884 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    885 			return(retcode);
    886 		}
    887 
    888 		row = component_label->row;
    889 		column = component_label->column;
    890 
    891 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    892 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    893 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    894 			return(EINVAL);
    895 		}
    896 
    897 		raidread_component_label(
    898                               raidPtrs[unit]->Disks[row][column].dev,
    899 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    900 			      component_label );
    901 
    902 		retcode = copyout((caddr_t) component_label,
    903 				  (caddr_t) *c_label_ptr,
    904 				  sizeof(RF_ComponentLabel_t));
    905 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    906 		return (retcode);
    907 
    908 	case RAIDFRAME_SET_COMPONENT_LABEL:
    909 		component_label = (RF_ComponentLabel_t *) data;
    910 
    911 		/* XXX check the label for valid stuff... */
    912 		/* Note that some things *should not* get modified --
    913 		   the user should be re-initing the labels instead of
    914 		   trying to patch things.
    915 		   */
    916 
    917 		printf("Got component label:\n");
    918 		printf("Version: %d\n",component_label->version);
    919 		printf("Serial Number: %d\n",component_label->serial_number);
    920 		printf("Mod counter: %d\n",component_label->mod_counter);
    921 		printf("Row: %d\n", component_label->row);
    922 		printf("Column: %d\n", component_label->column);
    923 		printf("Num Rows: %d\n", component_label->num_rows);
    924 		printf("Num Columns: %d\n", component_label->num_columns);
    925 		printf("Clean: %d\n", component_label->clean);
    926 		printf("Status: %d\n", component_label->status);
    927 
    928 		row = component_label->row;
    929 		column = component_label->column;
    930 
    931 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    932 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    933 			return(EINVAL);
    934 		}
    935 
    936 		/* XXX this isn't allowed to do anything for now :-) */
    937 #if 0
    938 		raidwrite_component_label(
    939                             raidPtrs[unit]->Disks[row][column].dev,
    940 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    941 			    component_label );
    942 #endif
    943 		return (0);
    944 
    945 	case RAIDFRAME_INIT_LABELS:
    946 		component_label = (RF_ComponentLabel_t *) data;
    947 		/*
    948 		   we only want the serial number from
    949 		   the above.  We get all the rest of the information
    950 		   from the config that was used to create this RAID
    951 		   set.
    952 		   */
    953 
    954 		raidPtrs[unit]->serial_number = component_label->serial_number;
    955 		/* current version number */
    956 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    957 		ci_label.serial_number = component_label->serial_number;
    958 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    959 		ci_label.num_rows = raidPtrs[unit]->numRow;
    960 		ci_label.num_columns = raidPtrs[unit]->numCol;
    961 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    962 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    963 
    964 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    965 			ci_label.row = row;
    966 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    967 				ci_label.column = column;
    968 				raidwrite_component_label(
    969 				  raidPtrs[unit]->Disks[row][column].dev,
    970 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    971 				  &ci_label );
    972 			}
    973 		}
    974 
    975 		return (retcode);
    976 
    977 		/* initialize all parity */
    978 	case RAIDFRAME_REWRITEPARITY:
    979 
    980 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    981 			/* Parity for RAID 0 is trivially correct */
    982 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    983 			return(0);
    984 		}
    985 
    986 		/* borrow the thread of the requesting process */
    987 
    988 		s = splbio();
    989 		retcode = rf_RewriteParity(raidPtrs[unit]);
    990 		splx(s);
    991 		/* return I/O Error if the parity rewrite fails */
    992 
    993 		if (retcode) {
    994 			retcode = EIO;
    995 		} else {
    996 			/* set the clean bit!  If we shutdown correctly,
    997 			 the clean bit on each component label will get
    998 			 set */
    999 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
   1000 		}
   1001 		return (retcode);
   1002 
   1003 
   1004 	case RAIDFRAME_ADD_HOT_SPARE:
   1005 		sparePtr = (RF_SingleComponent_t *) data;
   1006 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1007 		printf("Adding spare\n");
   1008 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1009 		return(retcode);
   1010 
   1011 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1012 		return(retcode);
   1013 
   1014 	case RAIDFRAME_REBUILD_IN_PLACE:
   1015 
   1016 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1017 			/* Can't do this on a RAID 0!! */
   1018 			return(EINVAL);
   1019 		}
   1020 
   1021 		componentPtr = (RF_SingleComponent_t *) data;
   1022 		memcpy( &component, componentPtr,
   1023 			sizeof(RF_SingleComponent_t));
   1024 		row = component.row;
   1025 		column = component.column;
   1026 		printf("Rebuild: %d %d\n",row, column);
   1027 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1028 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1029 			return(EINVAL);
   1030 		}
   1031 		printf("Attempting a rebuild in place\n");
   1032 		s = splbio();
   1033 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1034 		splx(s);
   1035 		return(retcode);
   1036 
   1037 	case RAIDFRAME_GET_INFO:
   1038 		{
   1039 			RF_Raid_t *raid = raidPtrs[unit];
   1040 			RF_DeviceConfig_t *cfg, **ucfgp;
   1041 			int     i, j, d;
   1042 
   1043 			if (!raid->valid)
   1044 				return (ENODEV);
   1045 			ucfgp = (RF_DeviceConfig_t **) data;
   1046 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1047 				  (RF_DeviceConfig_t *));
   1048 			if (cfg == NULL)
   1049 				return (ENOMEM);
   1050 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1051 			cfg->rows = raid->numRow;
   1052 			cfg->cols = raid->numCol;
   1053 			cfg->ndevs = raid->numRow * raid->numCol;
   1054 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1055 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1056 				return (ENOMEM);
   1057 			}
   1058 			cfg->nspares = raid->numSpare;
   1059 			if (cfg->nspares >= RF_MAX_DISKS) {
   1060 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1061 				return (ENOMEM);
   1062 			}
   1063 			cfg->maxqdepth = raid->maxQueueDepth;
   1064 			d = 0;
   1065 			for (i = 0; i < cfg->rows; i++) {
   1066 				for (j = 0; j < cfg->cols; j++) {
   1067 					cfg->devs[d] = raid->Disks[i][j];
   1068 					d++;
   1069 				}
   1070 			}
   1071 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1072 				cfg->spares[i] = raid->Disks[0][j];
   1073 			}
   1074 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1075 					  sizeof(RF_DeviceConfig_t));
   1076 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1077 
   1078 			return (retcode);
   1079 		}
   1080 		break;
   1081 	case RAIDFRAME_CHECK_PARITY:
   1082 		*(int *) data = raidPtrs[unit]->parity_good;
   1083 		return (0);
   1084 	case RAIDFRAME_RESET_ACCTOTALS:
   1085 		{
   1086 			RF_Raid_t *raid = raidPtrs[unit];
   1087 
   1088 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1089 			return (0);
   1090 		}
   1091 		break;
   1092 
   1093 	case RAIDFRAME_GET_ACCTOTALS:
   1094 		{
   1095 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1096 			RF_Raid_t *raid = raidPtrs[unit];
   1097 
   1098 			*totals = raid->acc_totals;
   1099 			return (0);
   1100 		}
   1101 		break;
   1102 
   1103 	case RAIDFRAME_KEEP_ACCTOTALS:
   1104 		{
   1105 			RF_Raid_t *raid = raidPtrs[unit];
   1106 			int    *keep = (int *) data;
   1107 
   1108 			raid->keep_acc_totals = *keep;
   1109 			return (0);
   1110 		}
   1111 		break;
   1112 
   1113 	case RAIDFRAME_GET_SIZE:
   1114 		*(int *) data = raidPtrs[unit]->totalSectors;
   1115 		return (0);
   1116 
   1117 #define RAIDFRAME_RECON 1
   1118 		/* XXX The above should probably be set somewhere else!! GO */
   1119 #if RAIDFRAME_RECON > 0
   1120 
   1121 		/* fail a disk & optionally start reconstruction */
   1122 	case RAIDFRAME_FAIL_DISK:
   1123 
   1124 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1125 			/* Can't do this on a RAID 0!! */
   1126 			return(EINVAL);
   1127 		}
   1128 
   1129 		rr = (struct rf_recon_req *) data;
   1130 
   1131 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1132 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1133 			return (EINVAL);
   1134 
   1135 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1136 		       unit, rr->row, rr->col);
   1137 
   1138 		/* make a copy of the recon request so that we don't rely on
   1139 		 * the user's buffer */
   1140 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1141 		bcopy(rr, rrcopy, sizeof(*rr));
   1142 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1143 
   1144 		LOCK_RECON_Q_MUTEX();
   1145 		rrcopy->next = recon_queue;
   1146 		recon_queue = rrcopy;
   1147 		wakeup(&recon_queue);
   1148 		UNLOCK_RECON_Q_MUTEX();
   1149 
   1150 		return (0);
   1151 
   1152 		/* invoke a copyback operation after recon on whatever disk
   1153 		 * needs it, if any */
   1154 	case RAIDFRAME_COPYBACK:
   1155 
   1156 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1157 			/* This makes no sense on a RAID 0!! */
   1158 			return(EINVAL);
   1159 		}
   1160 
   1161 		/* borrow the current thread to get this done */
   1162 
   1163 		s = splbio();
   1164 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1165 		splx(s);
   1166 		return (0);
   1167 
   1168 		/* return the percentage completion of reconstruction */
   1169 	case RAIDFRAME_CHECKRECON:
   1170 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1171 			/* This makes no sense on a RAID 0 */
   1172 			return(EINVAL);
   1173 		}
   1174 
   1175 		row = *(int *) data;
   1176 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1177 			return (EINVAL);
   1178 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1179 			*(int *) data = 100;
   1180 		else
   1181 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1182 		return (0);
   1183 
   1184 		/* the sparetable daemon calls this to wait for the kernel to
   1185 		 * need a spare table. this ioctl does not return until a
   1186 		 * spare table is needed. XXX -- calling mpsleep here in the
   1187 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1188 		 * -- I should either compute the spare table in the kernel,
   1189 		 * or have a different -- XXX XXX -- interface (a different
   1190 		 * character device) for delivering the table          -- XXX */
   1191 #if 0
   1192 	case RAIDFRAME_SPARET_WAIT:
   1193 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1194 		while (!rf_sparet_wait_queue)
   1195 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1196 		waitreq = rf_sparet_wait_queue;
   1197 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1198 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1199 
   1200 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1201 
   1202 		RF_Free(waitreq, sizeof(*waitreq));
   1203 		return (0);
   1204 
   1205 
   1206 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1207 		 * code in it that will cause the dameon to exit */
   1208 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1209 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1210 		waitreq->fcol = -1;
   1211 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1212 		waitreq->next = rf_sparet_wait_queue;
   1213 		rf_sparet_wait_queue = waitreq;
   1214 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1215 		wakeup(&rf_sparet_wait_queue);
   1216 		return (0);
   1217 
   1218 		/* used by the spare table daemon to deliver a spare table
   1219 		 * into the kernel */
   1220 	case RAIDFRAME_SEND_SPARET:
   1221 
   1222 		/* install the spare table */
   1223 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1224 
   1225 		/* respond to the requestor.  the return status of the spare
   1226 		 * table installation is passed in the "fcol" field */
   1227 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1228 		waitreq->fcol = retcode;
   1229 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1230 		waitreq->next = rf_sparet_resp_queue;
   1231 		rf_sparet_resp_queue = waitreq;
   1232 		wakeup(&rf_sparet_resp_queue);
   1233 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1234 
   1235 		return (retcode);
   1236 #endif
   1237 
   1238 
   1239 #endif				/* RAIDFRAME_RECON > 0 */
   1240 
   1241 	default:
   1242 		break;		/* fall through to the os-specific code below */
   1243 
   1244 	}
   1245 
   1246 	if (!raidPtrs[unit]->valid)
   1247 		return (EINVAL);
   1248 
   1249 	/*
   1250 	 * Add support for "regular" device ioctls here.
   1251 	 */
   1252 
   1253 	switch (cmd) {
   1254 	case DIOCGDINFO:
   1255 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1256 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1257 		break;
   1258 
   1259 	case DIOCGPART:
   1260 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1261 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1262 		((struct partinfo *) data)->part =
   1263 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1264 		break;
   1265 
   1266 	case DIOCWDINFO:
   1267 		db1_printf(("DIOCWDINFO\n"));
   1268 	case DIOCSDINFO:
   1269 		db1_printf(("DIOCSDINFO\n"));
   1270 		if ((error = raidlock(rs)) != 0)
   1271 			return (error);
   1272 
   1273 		rs->sc_flags |= RAIDF_LABELLING;
   1274 
   1275 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1276 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1277 		if (error == 0) {
   1278 			if (cmd == DIOCWDINFO)
   1279 				error = writedisklabel(RAIDLABELDEV(dev),
   1280 				    raidstrategy, rs->sc_dkdev.dk_label,
   1281 				    rs->sc_dkdev.dk_cpulabel);
   1282 		}
   1283 		rs->sc_flags &= ~RAIDF_LABELLING;
   1284 
   1285 		raidunlock(rs);
   1286 
   1287 		if (error)
   1288 			return (error);
   1289 		break;
   1290 
   1291 	case DIOCWLABEL:
   1292 		db1_printf(("DIOCWLABEL\n"));
   1293 		if (*(int *) data != 0)
   1294 			rs->sc_flags |= RAIDF_WLABEL;
   1295 		else
   1296 			rs->sc_flags &= ~RAIDF_WLABEL;
   1297 		break;
   1298 
   1299 	case DIOCGDEFLABEL:
   1300 		db1_printf(("DIOCGDEFLABEL\n"));
   1301 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1302 		    (struct disklabel *) data);
   1303 		break;
   1304 
   1305 	default:
   1306 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1307 	}
   1308 	return (retcode);
   1309 
   1310 }
   1311 
   1312 
   1313 /* raidinit -- complete the rest of the initialization for the
   1314    RAIDframe device.  */
   1315 
   1316 
   1317 static int
   1318 raidinit(dev, raidPtr, unit)
   1319 	dev_t   dev;
   1320 	RF_Raid_t *raidPtr;
   1321 	int     unit;
   1322 {
   1323 	int     retcode;
   1324 	/* int ix; */
   1325 	/* struct raidbuf *raidbp; */
   1326 	struct raid_softc *rs;
   1327 
   1328 	retcode = 0;
   1329 
   1330 	rs = &raid_softc[unit];
   1331 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1332 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1333 
   1334 
   1335 	/* XXX should check return code first... */
   1336 	rs->sc_flags |= RAIDF_INITED;
   1337 
   1338 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1339 
   1340 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1341 
   1342 	/* disk_attach actually creates space for the CPU disklabel, among
   1343 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1344 	 * with disklabels. */
   1345 
   1346 	disk_attach(&rs->sc_dkdev);
   1347 
   1348 	/* XXX There may be a weird interaction here between this, and
   1349 	 * protectedSectors, as used in RAIDframe.  */
   1350 
   1351 	rs->sc_size = raidPtr->totalSectors;
   1352 	rs->sc_dev = dev;
   1353 
   1354 	return (retcode);
   1355 }
   1356 
   1357 /*
   1358  * This kernel thread never exits.  It is created once, and persists
   1359  * until the system reboots.
   1360  */
   1361 
   1362 void
   1363 rf_ReconKernelThread()
   1364 {
   1365 	struct rf_recon_req *req;
   1366 	int     s;
   1367 
   1368 	/* XXX not sure what spl() level we should be at here... probably
   1369 	 * splbio() */
   1370 	s = splbio();
   1371 
   1372 	while (1) {
   1373 		/* grab the next reconstruction request from the queue */
   1374 		LOCK_RECON_Q_MUTEX();
   1375 		while (!recon_queue) {
   1376 			UNLOCK_RECON_Q_MUTEX();
   1377 			tsleep(&recon_queue, PRIBIO,
   1378 			       "raidframe recon", 0);
   1379 			LOCK_RECON_Q_MUTEX();
   1380 		}
   1381 		req = recon_queue;
   1382 		recon_queue = recon_queue->next;
   1383 		UNLOCK_RECON_Q_MUTEX();
   1384 
   1385 		/*
   1386 	         * If flags specifies that we should start recon, this call
   1387 	         * will not return until reconstruction completes, fails,
   1388 		 * or is aborted.
   1389 	         */
   1390 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1391 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1392 
   1393 		RF_Free(req, sizeof(*req));
   1394 	}
   1395 }
   1396 /* wake up the daemon & tell it to get us a spare table
   1397  * XXX
   1398  * the entries in the queues should be tagged with the raidPtr
   1399  * so that in the extremely rare case that two recons happen at once,
   1400  * we know for which device were requesting a spare table
   1401  * XXX
   1402  */
   1403 int
   1404 rf_GetSpareTableFromDaemon(req)
   1405 	RF_SparetWait_t *req;
   1406 {
   1407 	int     retcode;
   1408 
   1409 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1410 	req->next = rf_sparet_wait_queue;
   1411 	rf_sparet_wait_queue = req;
   1412 	wakeup(&rf_sparet_wait_queue);
   1413 
   1414 	/* mpsleep unlocks the mutex */
   1415 	while (!rf_sparet_resp_queue) {
   1416 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1417 		    "raidframe getsparetable", 0);
   1418 	}
   1419 	req = rf_sparet_resp_queue;
   1420 	rf_sparet_resp_queue = req->next;
   1421 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1422 
   1423 	retcode = req->fcol;
   1424 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1425 					 * alloc'd */
   1426 	return (retcode);
   1427 }
   1428 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1429  * bp & passes it down.
   1430  * any calls originating in the kernel must use non-blocking I/O
   1431  * do some extra sanity checking to return "appropriate" error values for
   1432  * certain conditions (to make some standard utilities work)
   1433  *
   1434  * Formerly known as: rf_DoAccessKernel
   1435  */
   1436 void
   1437 raidstart(raidPtr)
   1438 	RF_Raid_t *raidPtr;
   1439 {
   1440 	RF_SectorCount_t num_blocks, pb, sum;
   1441 	RF_RaidAddr_t raid_addr;
   1442 	int     retcode;
   1443 	struct partition *pp;
   1444 	daddr_t blocknum;
   1445 	int     unit;
   1446 	struct raid_softc *rs;
   1447 	int     do_async;
   1448 	struct buf *bp;
   1449 	struct buf *dp;
   1450 
   1451 	unit = raidPtr->raidid;
   1452 	rs = &raid_softc[unit];
   1453 
   1454 	/* Check to see if we're at the limit... */
   1455 	RF_LOCK_MUTEX(raidPtr->mutex);
   1456 	while (raidPtr->openings > 0) {
   1457 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1458 
   1459 		/* get the next item, if any, from the queue */
   1460 		dp = &rs->buf_queue;
   1461 		bp = dp->b_actf;
   1462 		if (bp == NULL) {
   1463 			/* nothing more to do */
   1464 			return;
   1465 		}
   1466 
   1467 		/* update structures */
   1468 		dp = bp->b_actf;
   1469 		if (dp != NULL) {
   1470 			dp->b_actb = bp->b_actb;
   1471 		} else {
   1472 			rs->buf_queue.b_actb = bp->b_actb;
   1473 		}
   1474 		*bp->b_actb = dp;
   1475 
   1476 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1477 		 * partition.. Need to make it absolute to the underlying
   1478 		 * device.. */
   1479 
   1480 		blocknum = bp->b_blkno;
   1481 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1482 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1483 			blocknum += pp->p_offset;
   1484 		}
   1485 
   1486 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1487 			    (int) blocknum));
   1488 
   1489 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1490 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1491 
   1492 		/* *THIS* is where we adjust what block we're going to...
   1493 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1494 		raid_addr = blocknum;
   1495 
   1496 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1497 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1498 		sum = raid_addr + num_blocks + pb;
   1499 		if (1 || rf_debugKernelAccess) {
   1500 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1501 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1502 				    (int) pb, (int) bp->b_resid));
   1503 		}
   1504 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1505 		    || (sum < num_blocks) || (sum < pb)) {
   1506 			bp->b_error = ENOSPC;
   1507 			bp->b_flags |= B_ERROR;
   1508 			bp->b_resid = bp->b_bcount;
   1509 			biodone(bp);
   1510 			RF_LOCK_MUTEX(raidPtr->mutex);
   1511 			continue;
   1512 		}
   1513 		/*
   1514 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1515 		 */
   1516 
   1517 		if (bp->b_bcount & raidPtr->sectorMask) {
   1518 			bp->b_error = EINVAL;
   1519 			bp->b_flags |= B_ERROR;
   1520 			bp->b_resid = bp->b_bcount;
   1521 			biodone(bp);
   1522 			RF_LOCK_MUTEX(raidPtr->mutex);
   1523 			continue;
   1524 
   1525 		}
   1526 		db1_printf(("Calling DoAccess..\n"));
   1527 
   1528 
   1529 		RF_LOCK_MUTEX(raidPtr->mutex);
   1530 		raidPtr->openings--;
   1531 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1532 
   1533 		/*
   1534 		 * Everything is async.
   1535 		 */
   1536 		do_async = 1;
   1537 
   1538 		/* don't ever condition on bp->b_flags & B_WRITE.
   1539 		 * always condition on B_READ instead */
   1540 
   1541 		/* XXX we're still at splbio() here... do we *really*
   1542 		   need to be? */
   1543 
   1544 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1545 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1546 				      do_async, raid_addr, num_blocks,
   1547 				      bp->b_un.b_addr, bp, NULL, NULL,
   1548 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1549 
   1550 
   1551 		RF_LOCK_MUTEX(raidPtr->mutex);
   1552 	}
   1553 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1554 }
   1555 
   1556 
   1557 
   1558 
   1559 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1560 
   1561 int
   1562 rf_DispatchKernelIO(queue, req)
   1563 	RF_DiskQueue_t *queue;
   1564 	RF_DiskQueueData_t *req;
   1565 {
   1566 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1567 	struct buf *bp;
   1568 	struct raidbuf *raidbp = NULL;
   1569 	struct raid_softc *rs;
   1570 	int     unit;
   1571 
   1572 	/* XXX along with the vnode, we also need the softc associated with
   1573 	 * this device.. */
   1574 
   1575 	req->queue = queue;
   1576 
   1577 	unit = queue->raidPtr->raidid;
   1578 
   1579 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1580 
   1581 	if (unit >= numraid) {
   1582 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1583 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1584 	}
   1585 	rs = &raid_softc[unit];
   1586 
   1587 	/* XXX is this the right place? */
   1588 	disk_busy(&rs->sc_dkdev);
   1589 
   1590 	bp = req->bp;
   1591 #if 1
   1592 	/* XXX when there is a physical disk failure, someone is passing us a
   1593 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1594 	 * without taking a performance hit... (not sure where the real bug
   1595 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1596 
   1597 	if (bp->b_flags & B_ERROR) {
   1598 		bp->b_flags &= ~B_ERROR;
   1599 	}
   1600 	if (bp->b_error != 0) {
   1601 		bp->b_error = 0;
   1602 	}
   1603 #endif
   1604 	raidbp = RAIDGETBUF(rs);
   1605 
   1606 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1607 
   1608 	/*
   1609 	 * context for raidiodone
   1610 	 */
   1611 	raidbp->rf_obp = bp;
   1612 	raidbp->req = req;
   1613 
   1614 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1615 
   1616 	switch (req->type) {
   1617 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1618 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1619 		 * queue->row, queue->col); */
   1620 		/* XXX need to do something extra here.. */
   1621 		/* I'm leaving this in, as I've never actually seen it used,
   1622 		 * and I'd like folks to report it... GO */
   1623 		printf(("WAKEUP CALLED\n"));
   1624 		queue->numOutstanding++;
   1625 
   1626 		/* XXX need to glue the original buffer into this??  */
   1627 
   1628 		KernelWakeupFunc(&raidbp->rf_buf);
   1629 		break;
   1630 
   1631 	case RF_IO_TYPE_READ:
   1632 	case RF_IO_TYPE_WRITE:
   1633 
   1634 		if (req->tracerec) {
   1635 			RF_ETIMER_START(req->tracerec->timer);
   1636 		}
   1637 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1638 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1639 		    req->sectorOffset, req->numSector,
   1640 		    req->buf, KernelWakeupFunc, (void *) req,
   1641 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1642 
   1643 		if (rf_debugKernelAccess) {
   1644 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1645 				(long) bp->b_blkno));
   1646 		}
   1647 		queue->numOutstanding++;
   1648 		queue->last_deq_sector = req->sectorOffset;
   1649 		/* acc wouldn't have been let in if there were any pending
   1650 		 * reqs at any other priority */
   1651 		queue->curPriority = req->priority;
   1652 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1653 		 * req->type, queue->row, queue->col); */
   1654 
   1655 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1656 			req->type, unit, queue->row, queue->col));
   1657 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1658 			(int) req->sectorOffset, (int) req->numSector,
   1659 			(int) (req->numSector <<
   1660 			    queue->raidPtr->logBytesPerSector),
   1661 			(int) queue->raidPtr->logBytesPerSector));
   1662 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1663 			raidbp->rf_buf.b_vp->v_numoutput++;
   1664 		}
   1665 		VOP_STRATEGY(&raidbp->rf_buf);
   1666 
   1667 		break;
   1668 
   1669 	default:
   1670 		panic("bad req->type in rf_DispatchKernelIO");
   1671 	}
   1672 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1673 	return (0);
   1674 }
   1675 /* this is the callback function associated with a I/O invoked from
   1676    kernel code.
   1677  */
   1678 static void
   1679 KernelWakeupFunc(vbp)
   1680 	struct buf *vbp;
   1681 {
   1682 	RF_DiskQueueData_t *req = NULL;
   1683 	RF_DiskQueue_t *queue;
   1684 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1685 	struct buf *bp;
   1686 	struct raid_softc *rs;
   1687 	int     unit;
   1688 	register int s;
   1689 
   1690 	s = splbio();		/* XXX */
   1691 	db1_printf(("recovering the request queue:\n"));
   1692 	req = raidbp->req;
   1693 
   1694 	bp = raidbp->rf_obp;
   1695 #if 0
   1696 	db1_printf(("bp=0x%x\n", bp));
   1697 #endif
   1698 
   1699 	queue = (RF_DiskQueue_t *) req->queue;
   1700 
   1701 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1702 #if 0
   1703 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1704 #endif
   1705 		bp->b_flags |= B_ERROR;
   1706 		bp->b_error = raidbp->rf_buf.b_error ?
   1707 		    raidbp->rf_buf.b_error : EIO;
   1708 	}
   1709 #if 0
   1710 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1711 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1712 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1713 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1714 #endif
   1715 
   1716 	/* XXX methinks this could be wrong... */
   1717 #if 1
   1718 	bp->b_resid = raidbp->rf_buf.b_resid;
   1719 #endif
   1720 
   1721 	if (req->tracerec) {
   1722 		RF_ETIMER_STOP(req->tracerec->timer);
   1723 		RF_ETIMER_EVAL(req->tracerec->timer);
   1724 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1725 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1726 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1727 		req->tracerec->num_phys_ios++;
   1728 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1729 	}
   1730 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1731 
   1732 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1733 
   1734 
   1735 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1736 	 * ballistic, and mark the component as hosed... */
   1737 #if 1
   1738 	if (bp->b_flags & B_ERROR) {
   1739 		/* Mark the disk as dead */
   1740 		/* but only mark it once... */
   1741 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1742 		    rf_ds_optimal) {
   1743 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1744 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1745 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1746 			    rf_ds_failed;
   1747 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1748 			queue->raidPtr->numFailures++;
   1749 			/* XXX here we should bump the version number for each component, and write that data out */
   1750 		} else {	/* Disk is already dead... */
   1751 			/* printf("Disk already marked as dead!\n"); */
   1752 		}
   1753 
   1754 	}
   1755 #endif
   1756 
   1757 	rs = &raid_softc[unit];
   1758 	RAIDPUTBUF(rs, raidbp);
   1759 
   1760 
   1761 	if (bp->b_resid == 0) {
   1762 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1763 			unit, bp->b_resid, bp->b_bcount));
   1764 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1765 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1766 	} else {
   1767 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1768 	}
   1769 
   1770 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1771 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1772 	/* printf("Exiting KernelWakeupFunc\n"); */
   1773 
   1774 	splx(s);		/* XXX */
   1775 }
   1776 
   1777 
   1778 
   1779 /*
   1780  * initialize a buf structure for doing an I/O in the kernel.
   1781  */
   1782 static void
   1783 InitBP(
   1784     struct buf * bp,
   1785     struct vnode * b_vp,
   1786     unsigned rw_flag,
   1787     dev_t dev,
   1788     RF_SectorNum_t startSect,
   1789     RF_SectorCount_t numSect,
   1790     caddr_t buf,
   1791     void (*cbFunc) (struct buf *),
   1792     void *cbArg,
   1793     int logBytesPerSector,
   1794     struct proc * b_proc)
   1795 {
   1796 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1797 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1798 	bp->b_bcount = numSect << logBytesPerSector;
   1799 	bp->b_bufsize = bp->b_bcount;
   1800 	bp->b_error = 0;
   1801 	bp->b_dev = dev;
   1802 	db1_printf(("bp->b_dev is %d\n", dev));
   1803 	bp->b_un.b_addr = buf;
   1804 #if 0
   1805 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1806 #endif
   1807 	bp->b_blkno = startSect;
   1808 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1809 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1810 	if (bp->b_bcount == 0) {
   1811 		panic("bp->b_bcount is zero in InitBP!!\n");
   1812 	}
   1813 	bp->b_proc = b_proc;
   1814 	bp->b_iodone = cbFunc;
   1815 	bp->b_vp = b_vp;
   1816 
   1817 }
   1818 
   1819 static void
   1820 raidgetdefaultlabel(raidPtr, rs, lp)
   1821 	RF_Raid_t *raidPtr;
   1822 	struct raid_softc *rs;
   1823 	struct disklabel *lp;
   1824 {
   1825 	db1_printf(("Building a default label...\n"));
   1826 	bzero(lp, sizeof(*lp));
   1827 
   1828 	/* fabricate a label... */
   1829 	lp->d_secperunit = raidPtr->totalSectors;
   1830 	lp->d_secsize = raidPtr->bytesPerSector;
   1831 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1832 	lp->d_ntracks = 1;
   1833 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1834 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1835 
   1836 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1837 	lp->d_type = DTYPE_RAID;
   1838 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1839 	lp->d_rpm = 3600;
   1840 	lp->d_interleave = 1;
   1841 	lp->d_flags = 0;
   1842 
   1843 	lp->d_partitions[RAW_PART].p_offset = 0;
   1844 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1845 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1846 	lp->d_npartitions = RAW_PART + 1;
   1847 
   1848 	lp->d_magic = DISKMAGIC;
   1849 	lp->d_magic2 = DISKMAGIC;
   1850 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1851 
   1852 }
   1853 /*
   1854  * Read the disklabel from the raid device.  If one is not present, fake one
   1855  * up.
   1856  */
   1857 static void
   1858 raidgetdisklabel(dev)
   1859 	dev_t   dev;
   1860 {
   1861 	int     unit = raidunit(dev);
   1862 	struct raid_softc *rs = &raid_softc[unit];
   1863 	char   *errstring;
   1864 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1865 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1866 	RF_Raid_t *raidPtr;
   1867 
   1868 	db1_printf(("Getting the disklabel...\n"));
   1869 
   1870 	bzero(clp, sizeof(*clp));
   1871 
   1872 	raidPtr = raidPtrs[unit];
   1873 
   1874 	raidgetdefaultlabel(raidPtr, rs, lp);
   1875 
   1876 	/*
   1877 	 * Call the generic disklabel extraction routine.
   1878 	 */
   1879 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1880 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1881 	if (errstring)
   1882 		raidmakedisklabel(rs);
   1883 	else {
   1884 		int     i;
   1885 		struct partition *pp;
   1886 
   1887 		/*
   1888 		 * Sanity check whether the found disklabel is valid.
   1889 		 *
   1890 		 * This is necessary since total size of the raid device
   1891 		 * may vary when an interleave is changed even though exactly
   1892 		 * same componets are used, and old disklabel may used
   1893 		 * if that is found.
   1894 		 */
   1895 		if (lp->d_secperunit != rs->sc_size)
   1896 			printf("WARNING: %s: "
   1897 			    "total sector size in disklabel (%d) != "
   1898 			    "the size of raid (%ld)\n", rs->sc_xname,
   1899 			    lp->d_secperunit, (long) rs->sc_size);
   1900 		for (i = 0; i < lp->d_npartitions; i++) {
   1901 			pp = &lp->d_partitions[i];
   1902 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1903 				printf("WARNING: %s: end of partition `%c' "
   1904 				    "exceeds the size of raid (%ld)\n",
   1905 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1906 		}
   1907 	}
   1908 
   1909 }
   1910 /*
   1911  * Take care of things one might want to take care of in the event
   1912  * that a disklabel isn't present.
   1913  */
   1914 static void
   1915 raidmakedisklabel(rs)
   1916 	struct raid_softc *rs;
   1917 {
   1918 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1919 	db1_printf(("Making a label..\n"));
   1920 
   1921 	/*
   1922 	 * For historical reasons, if there's no disklabel present
   1923 	 * the raw partition must be marked FS_BSDFFS.
   1924 	 */
   1925 
   1926 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1927 
   1928 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1929 
   1930 	lp->d_checksum = dkcksum(lp);
   1931 }
   1932 /*
   1933  * Lookup the provided name in the filesystem.  If the file exists,
   1934  * is a valid block device, and isn't being used by anyone else,
   1935  * set *vpp to the file's vnode.
   1936  * You'll find the original of this in ccd.c
   1937  */
   1938 int
   1939 raidlookup(path, p, vpp)
   1940 	char   *path;
   1941 	struct proc *p;
   1942 	struct vnode **vpp;	/* result */
   1943 {
   1944 	struct nameidata nd;
   1945 	struct vnode *vp;
   1946 	struct vattr va;
   1947 	int     error;
   1948 
   1949 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1950 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1951 #ifdef DEBUG
   1952 		printf("RAIDframe: vn_open returned %d\n", error);
   1953 #endif
   1954 		return (error);
   1955 	}
   1956 	vp = nd.ni_vp;
   1957 	if (vp->v_usecount > 1) {
   1958 		VOP_UNLOCK(vp, 0);
   1959 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1960 		return (EBUSY);
   1961 	}
   1962 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1963 		VOP_UNLOCK(vp, 0);
   1964 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1965 		return (error);
   1966 	}
   1967 	/* XXX: eventually we should handle VREG, too. */
   1968 	if (va.va_type != VBLK) {
   1969 		VOP_UNLOCK(vp, 0);
   1970 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1971 		return (ENOTBLK);
   1972 	}
   1973 	VOP_UNLOCK(vp, 0);
   1974 	*vpp = vp;
   1975 	return (0);
   1976 }
   1977 /*
   1978  * Wait interruptibly for an exclusive lock.
   1979  *
   1980  * XXX
   1981  * Several drivers do this; it should be abstracted and made MP-safe.
   1982  * (Hmm... where have we seen this warning before :->  GO )
   1983  */
   1984 static int
   1985 raidlock(rs)
   1986 	struct raid_softc *rs;
   1987 {
   1988 	int     error;
   1989 
   1990 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1991 		rs->sc_flags |= RAIDF_WANTED;
   1992 		if ((error =
   1993 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1994 			return (error);
   1995 	}
   1996 	rs->sc_flags |= RAIDF_LOCKED;
   1997 	return (0);
   1998 }
   1999 /*
   2000  * Unlock and wake up any waiters.
   2001  */
   2002 static void
   2003 raidunlock(rs)
   2004 	struct raid_softc *rs;
   2005 {
   2006 
   2007 	rs->sc_flags &= ~RAIDF_LOCKED;
   2008 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2009 		rs->sc_flags &= ~RAIDF_WANTED;
   2010 		wakeup(rs);
   2011 	}
   2012 }
   2013 
   2014 
   2015 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2016 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2017 
   2018 int
   2019 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2020 {
   2021 	RF_ComponentLabel_t component_label;
   2022 	raidread_component_label(dev, b_vp, &component_label);
   2023 	component_label.mod_counter = mod_counter;
   2024 	component_label.clean = RF_RAID_CLEAN;
   2025 	raidwrite_component_label(dev, b_vp, &component_label);
   2026 	return(0);
   2027 }
   2028 
   2029 
   2030 int
   2031 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2032 {
   2033 	RF_ComponentLabel_t component_label;
   2034 	raidread_component_label(dev, b_vp, &component_label);
   2035 	component_label.mod_counter = mod_counter;
   2036 	component_label.clean = RF_RAID_DIRTY;
   2037 	raidwrite_component_label(dev, b_vp, &component_label);
   2038 	return(0);
   2039 }
   2040 
   2041 /* ARGSUSED */
   2042 int
   2043 raidread_component_label(dev, b_vp, component_label)
   2044 	dev_t dev;
   2045 	struct vnode *b_vp;
   2046 	RF_ComponentLabel_t *component_label;
   2047 {
   2048 	struct buf *bp;
   2049 	int error;
   2050 
   2051 	/* XXX should probably ensure that we don't try to do this if
   2052 	   someone has changed rf_protected_sectors. */
   2053 
   2054 	/* get a block of the appropriate size... */
   2055 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2056 	bp->b_dev = dev;
   2057 
   2058 	/* get our ducks in a row for the read */
   2059 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2060 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2061 	bp->b_flags = B_BUSY | B_READ;
   2062  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2063 
   2064 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2065 
   2066 	error = biowait(bp);
   2067 
   2068 	if (!error) {
   2069 		memcpy(component_label, bp->b_un.b_addr,
   2070 		       sizeof(RF_ComponentLabel_t));
   2071 #if 0
   2072 		printf("raidread_component_label: got component label:\n");
   2073 		printf("Version: %d\n",component_label->version);
   2074 		printf("Serial Number: %d\n",component_label->serial_number);
   2075 		printf("Mod counter: %d\n",component_label->mod_counter);
   2076 		printf("Row: %d\n", component_label->row);
   2077 		printf("Column: %d\n", component_label->column);
   2078 		printf("Num Rows: %d\n", component_label->num_rows);
   2079 		printf("Num Columns: %d\n", component_label->num_columns);
   2080 		printf("Clean: %d\n", component_label->clean);
   2081 		printf("Status: %d\n", component_label->status);
   2082 #endif
   2083         } else {
   2084 		printf("Failed to read RAID component label!\n");
   2085 	}
   2086 
   2087         bp->b_flags = B_INVAL | B_AGE;
   2088 	brelse(bp);
   2089 	return(error);
   2090 }
   2091 /* ARGSUSED */
   2092 int
   2093 raidwrite_component_label(dev, b_vp, component_label)
   2094 	dev_t dev;
   2095 	struct vnode *b_vp;
   2096 	RF_ComponentLabel_t *component_label;
   2097 {
   2098 	struct buf *bp;
   2099 	int error;
   2100 
   2101 	/* get a block of the appropriate size... */
   2102 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2103 	bp->b_dev = dev;
   2104 
   2105 	/* get our ducks in a row for the write */
   2106 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2107 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2108 	bp->b_flags = B_BUSY | B_WRITE;
   2109  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2110 
   2111 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2112 
   2113 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2114 
   2115 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2116 	error = biowait(bp);
   2117         bp->b_flags = B_INVAL | B_AGE;
   2118 	brelse(bp);
   2119 	if (error) {
   2120 		printf("Failed to write RAID component info!\n");
   2121 	}
   2122 
   2123 	return(error);
   2124 }
   2125 
   2126 void
   2127 rf_markalldirty( raidPtr )
   2128 	RF_Raid_t *raidPtr;
   2129 {
   2130 	RF_ComponentLabel_t c_label;
   2131 	int r,c;
   2132 
   2133 	raidPtr->mod_counter++;
   2134 	for (r = 0; r < raidPtr->numRow; r++) {
   2135 		for (c = 0; c < raidPtr->numCol; c++) {
   2136 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2137 				raidread_component_label(
   2138 					raidPtr->Disks[r][c].dev,
   2139 					raidPtr->raid_cinfo[r][c].ci_vp,
   2140 					&c_label);
   2141 				if (c_label.status == rf_ds_spared) {
   2142 					/* XXX do something special...
   2143 					 but whatever you do, don't
   2144 					 try to access it!! */
   2145 				} else {
   2146 #if 0
   2147 				c_label.status =
   2148 					raidPtr->Disks[r][c].status;
   2149 				raidwrite_component_label(
   2150 					raidPtr->Disks[r][c].dev,
   2151 					raidPtr->raid_cinfo[r][c].ci_vp,
   2152 					&c_label);
   2153 #endif
   2154 				raidmarkdirty(
   2155 				       raidPtr->Disks[r][c].dev,
   2156 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2157 				       raidPtr->mod_counter);
   2158 				}
   2159 			}
   2160 		}
   2161 	}
   2162 	/* printf("Component labels marked dirty.\n"); */
   2163 #if 0
   2164 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2165 		sparecol = raidPtr->numCol + c;
   2166 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2167 			/*
   2168 
   2169 			   XXX this is where we get fancy and map this spare
   2170 			   into it's correct spot in the array.
   2171 
   2172 			 */
   2173 			/*
   2174 
   2175 			   we claim this disk is "optimal" if it's
   2176 			   rf_ds_used_spare, as that means it should be
   2177 			   directly substitutable for the disk it replaced.
   2178 			   We note that too...
   2179 
   2180 			 */
   2181 
   2182 			for(i=0;i<raidPtr->numRow;i++) {
   2183 				for(j=0;j<raidPtr->numCol;j++) {
   2184 					if ((raidPtr->Disks[i][j].spareRow ==
   2185 					     r) &&
   2186 					    (raidPtr->Disks[i][j].spareCol ==
   2187 					     sparecol)) {
   2188 						srow = r;
   2189 						scol = sparecol;
   2190 						break;
   2191 					}
   2192 				}
   2193 			}
   2194 
   2195 			raidread_component_label(
   2196 				      raidPtr->Disks[r][sparecol].dev,
   2197 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2198 				      &c_label);
   2199 			/* make sure status is noted */
   2200 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2201 			c_label.mod_counter = raidPtr->mod_counter;
   2202 			c_label.serial_number = raidPtr->serial_number;
   2203 			c_label.row = srow;
   2204 			c_label.column = scol;
   2205 			c_label.num_rows = raidPtr->numRow;
   2206 			c_label.num_columns = raidPtr->numCol;
   2207 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2208 			c_label.status = rf_ds_optimal;
   2209 			raidwrite_component_label(
   2210 				      raidPtr->Disks[r][sparecol].dev,
   2211 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2212 				      &c_label);
   2213 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2214 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2215 		}
   2216 	}
   2217 
   2218 #endif
   2219 }
   2220 
   2221 
   2222 void
   2223 rf_update_component_labels( raidPtr )
   2224 	RF_Raid_t *raidPtr;
   2225 {
   2226 	RF_ComponentLabel_t c_label;
   2227 	int sparecol;
   2228 	int r,c;
   2229 	int i,j;
   2230 	int srow, scol;
   2231 
   2232 	srow = -1;
   2233 	scol = -1;
   2234 
   2235 	/* XXX should do extra checks to make sure things really are clean,
   2236 	   rather than blindly setting the clean bit... */
   2237 
   2238 	raidPtr->mod_counter++;
   2239 
   2240 	for (r = 0; r < raidPtr->numRow; r++) {
   2241 		for (c = 0; c < raidPtr->numCol; c++) {
   2242 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2243 				raidread_component_label(
   2244 					raidPtr->Disks[r][c].dev,
   2245 					raidPtr->raid_cinfo[r][c].ci_vp,
   2246 					&c_label);
   2247 				/* make sure status is noted */
   2248 				c_label.status = rf_ds_optimal;
   2249 				raidwrite_component_label(
   2250 					raidPtr->Disks[r][c].dev,
   2251 					raidPtr->raid_cinfo[r][c].ci_vp,
   2252 					&c_label);
   2253 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2254 					raidmarkclean(
   2255 					      raidPtr->Disks[r][c].dev,
   2256 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2257 					      raidPtr->mod_counter);
   2258 				}
   2259 			}
   2260 			/* else we don't touch it.. */
   2261 #if 0
   2262 			else if (raidPtr->Disks[r][c].status !=
   2263 				   rf_ds_failed) {
   2264 				raidread_component_label(
   2265 					raidPtr->Disks[r][c].dev,
   2266 					raidPtr->raid_cinfo[r][c].ci_vp,
   2267 					&c_label);
   2268 				/* make sure status is noted */
   2269 				c_label.status =
   2270 					raidPtr->Disks[r][c].status;
   2271 				raidwrite_component_label(
   2272 					raidPtr->Disks[r][c].dev,
   2273 					raidPtr->raid_cinfo[r][c].ci_vp,
   2274 					&c_label);
   2275 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2276 					raidmarkclean(
   2277 					      raidPtr->Disks[r][c].dev,
   2278 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2279 					      raidPtr->mod_counter);
   2280 				}
   2281 			}
   2282 #endif
   2283 		}
   2284 	}
   2285 
   2286 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2287 		sparecol = raidPtr->numCol + c;
   2288 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2289 			/*
   2290 
   2291 			   we claim this disk is "optimal" if it's
   2292 			   rf_ds_used_spare, as that means it should be
   2293 			   directly substitutable for the disk it replaced.
   2294 			   We note that too...
   2295 
   2296 			 */
   2297 
   2298 			for(i=0;i<raidPtr->numRow;i++) {
   2299 				for(j=0;j<raidPtr->numCol;j++) {
   2300 					if ((raidPtr->Disks[i][j].spareRow ==
   2301 					     0) &&
   2302 					    (raidPtr->Disks[i][j].spareCol ==
   2303 					     sparecol)) {
   2304 						srow = i;
   2305 						scol = j;
   2306 						break;
   2307 					}
   2308 				}
   2309 			}
   2310 
   2311 			raidread_component_label(
   2312 				      raidPtr->Disks[0][sparecol].dev,
   2313 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2314 				      &c_label);
   2315 			/* make sure status is noted */
   2316 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2317 			c_label.mod_counter = raidPtr->mod_counter;
   2318 			c_label.serial_number = raidPtr->serial_number;
   2319 			c_label.row = srow;
   2320 			c_label.column = scol;
   2321 			c_label.num_rows = raidPtr->numRow;
   2322 			c_label.num_columns = raidPtr->numCol;
   2323 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2324 			c_label.status = rf_ds_optimal;
   2325 			raidwrite_component_label(
   2326 				      raidPtr->Disks[0][sparecol].dev,
   2327 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2328 				      &c_label);
   2329 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2330 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2331 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2332 					       raidPtr->mod_counter);
   2333 			}
   2334 		}
   2335 	}
   2336 	/* 	printf("Component labels updated\n"); */
   2337 }
   2338