Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.32
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.32 1999/12/03 02:43:22 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #ifdef DEBUG
    157 #define db0_printf(a) printf a
    158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    164 #else				/* DEBUG */
    165 #define db0_printf(a) printf a
    166 #define db1_printf(a) { }
    167 #define db2_printf(a) { }
    168 #define db3_printf(a) { }
    169 #define db4_printf(a) { }
    170 #define db5_printf(a) { }
    171 #endif				/* DEBUG */
    172 
    173 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    174 
    175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    176 
    177 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    178 						 * spare table */
    179 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    180 						 * installation process */
    181 
    182 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    183 						 * reconstruction
    184 						 * requests */
    185 
    186 
    187 decl_simple_lock_data(, recon_queue_mutex)
    188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    190 
    191 /* prototypes */
    192 static void KernelWakeupFunc(struct buf * bp);
    193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    194 		   dev_t dev, RF_SectorNum_t startSect,
    195 		   RF_SectorCount_t numSect, caddr_t buf,
    196 		   void (*cbFunc) (struct buf *), void *cbArg,
    197 		   int logBytesPerSector, struct proc * b_proc);
    198 
    199 #define Dprintf0(s)       if (rf_queueDebug) \
    200      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    201 #define Dprintf1(s,a)     if (rf_queueDebug) \
    202      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    203 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    204      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    206      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    207 
    208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    210 
    211 void raidattach __P((int));
    212 int raidsize __P((dev_t));
    213 
    214 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    215 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    216 static int raidinit __P((dev_t, RF_Raid_t *, int));
    217 
    218 int raidopen __P((dev_t, int, int, struct proc *));
    219 int raidclose __P((dev_t, int, int, struct proc *));
    220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    221 int raidwrite __P((dev_t, struct uio *, int));
    222 int raidread __P((dev_t, struct uio *, int));
    223 void raidstrategy __P((struct buf *));
    224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    225 
    226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    228 void rf_update_component_labels( RF_Raid_t *);
    229 /*
    230  * Pilfered from ccd.c
    231  */
    232 
    233 struct raidbuf {
    234 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    235 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    236 	int     rf_flags;	/* misc. flags */
    237 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    238 };
    239 
    240 
    241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    242 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    243 
    244 /* XXX Not sure if the following should be replacing the raidPtrs above,
    245    or if it should be used in conjunction with that... */
    246 
    247 struct raid_softc {
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	size_t  sc_size;        /* size of the raid device */
    251 	dev_t   sc_dev;	        /* our device.. */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct pool sc_cbufpool;	/* component buffer pool */
    255 };
    256 /* sc_flags */
    257 #define RAIDF_INITED	0x01	/* unit has been initialized */
    258 #define RAIDF_WLABEL	0x02	/* label area is writable */
    259 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    260 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    261 #define RAIDF_LOCKED	0x80	/* unit is locked */
    262 
    263 #define	raidunit(x)	DISKUNIT(x)
    264 static int numraid = 0;
    265 
    266 /*
    267  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    268  * Be aware that large numbers can allow the driver to consume a lot of
    269  * kernel memory, especially on writes, and in degraded mode reads.
    270  *
    271  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    272  * a single 64K write will typically require 64K for the old data,
    273  * 64K for the old parity, and 64K for the new parity, for a total
    274  * of 192K (if the parity buffer is not re-used immediately).
    275  * Even it if is used immedately, that's still 128K, which when multiplied
    276  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    277  *
    278  * Now in degraded mode, for example, a 64K read on the above setup may
    279  * require data reconstruction, which will require *all* of the 4 remaining
    280  * disks to participate -- 4 * 32K/disk == 128K again.
    281  */
    282 
    283 #ifndef RAIDOUTSTANDING
    284 #define RAIDOUTSTANDING   6
    285 #endif
    286 
    287 #define RAIDLABELDEV(dev)	\
    288 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    289 
    290 /* declared here, and made public, for the benefit of KVM stuff.. */
    291 struct raid_softc *raid_softc;
    292 
    293 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    294 				     struct disklabel *));
    295 static void raidgetdisklabel __P((dev_t));
    296 static void raidmakedisklabel __P((struct raid_softc *));
    297 
    298 static int raidlock __P((struct raid_softc *));
    299 static void raidunlock __P((struct raid_softc *));
    300 int raidlookup __P((char *, struct proc * p, struct vnode **));
    301 
    302 static void rf_markalldirty __P((RF_Raid_t *));
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 	recon_queue = NULL;
    337 
    338 	for (i = 0; i < numraid; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 	    malloc(num * sizeof(struct raid_softc),
    351 	    M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 	numraid = num;
    357 	bzero(raid_softc, num * sizeof(struct raid_softc));
    358 
    359 	for (raidID = 0; raidID < num; raidID++) {
    360 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    361 			  (RF_Raid_t *));
    362 		if (raidPtrs[raidID] == NULL) {
    363 			printf("raidPtrs[%d] is NULL\n", raidID);
    364 		}
    365 	}
    366 }
    367 
    368 
    369 int
    370 raidsize(dev)
    371 	dev_t   dev;
    372 {
    373 	struct raid_softc *rs;
    374 	struct disklabel *lp;
    375 	int     part, unit, omask, size;
    376 
    377 	unit = raidunit(dev);
    378 	if (unit >= numraid)
    379 		return (-1);
    380 	rs = &raid_softc[unit];
    381 
    382 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    383 		return (-1);
    384 
    385 	part = DISKPART(dev);
    386 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    387 	lp = rs->sc_dkdev.dk_label;
    388 
    389 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    390 		return (-1);
    391 
    392 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    393 		size = -1;
    394 	else
    395 		size = lp->d_partitions[part].p_size *
    396 		    (lp->d_secsize / DEV_BSIZE);
    397 
    398 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    399 		return (-1);
    400 
    401 	return (size);
    402 
    403 }
    404 
    405 int
    406 raiddump(dev, blkno, va, size)
    407 	dev_t   dev;
    408 	daddr_t blkno;
    409 	caddr_t va;
    410 	size_t  size;
    411 {
    412 	/* Not implemented. */
    413 	return ENXIO;
    414 }
    415 /* ARGSUSED */
    416 int
    417 raidopen(dev, flags, fmt, p)
    418 	dev_t   dev;
    419 	int     flags, fmt;
    420 	struct proc *p;
    421 {
    422 	int     unit = raidunit(dev);
    423 	struct raid_softc *rs;
    424 	struct disklabel *lp;
    425 	int     part, pmask;
    426 	int     error = 0;
    427 
    428 	if (unit >= numraid)
    429 		return (ENXIO);
    430 	rs = &raid_softc[unit];
    431 
    432 	if ((error = raidlock(rs)) != 0)
    433 		return (error);
    434 	lp = rs->sc_dkdev.dk_label;
    435 
    436 	part = DISKPART(dev);
    437 	pmask = (1 << part);
    438 
    439 	db1_printf(("Opening raid device number: %d partition: %d\n",
    440 		unit, part));
    441 
    442 
    443 	if ((rs->sc_flags & RAIDF_INITED) &&
    444 	    (rs->sc_dkdev.dk_openmask == 0))
    445 		raidgetdisklabel(dev);
    446 
    447 	/* make sure that this partition exists */
    448 
    449 	if (part != RAW_PART) {
    450 		db1_printf(("Not a raw partition..\n"));
    451 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    452 		    ((part >= lp->d_npartitions) ||
    453 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    454 			error = ENXIO;
    455 			raidunlock(rs);
    456 			db1_printf(("Bailing out...\n"));
    457 			return (error);
    458 		}
    459 	}
    460 	/* Prevent this unit from being unconfigured while open. */
    461 	switch (fmt) {
    462 	case S_IFCHR:
    463 		rs->sc_dkdev.dk_copenmask |= pmask;
    464 		break;
    465 
    466 	case S_IFBLK:
    467 		rs->sc_dkdev.dk_bopenmask |= pmask;
    468 		break;
    469 	}
    470 
    471 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    472 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    473 		/* First one... mark things as dirty... Note that we *MUST*
    474 		 have done a configure before this.  I DO NOT WANT TO BE
    475 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    476 		 THAT THEY BELONG TOGETHER!!!!! */
    477 		/* XXX should check to see if we're only open for reading
    478 		   here... If so, we needn't do this, but then need some
    479 		   other way of keeping track of what's happened.. */
    480 
    481 		rf_markalldirty( raidPtrs[unit] );
    482 	}
    483 
    484 
    485 	rs->sc_dkdev.dk_openmask =
    486 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    487 
    488 	raidunlock(rs);
    489 
    490 	return (error);
    491 
    492 
    493 }
    494 /* ARGSUSED */
    495 int
    496 raidclose(dev, flags, fmt, p)
    497 	dev_t   dev;
    498 	int     flags, fmt;
    499 	struct proc *p;
    500 {
    501 	int     unit = raidunit(dev);
    502 	struct raid_softc *rs;
    503 	int     error = 0;
    504 	int     part;
    505 
    506 	if (unit >= numraid)
    507 		return (ENXIO);
    508 	rs = &raid_softc[unit];
    509 
    510 	if ((error = raidlock(rs)) != 0)
    511 		return (error);
    512 
    513 	part = DISKPART(dev);
    514 
    515 	/* ...that much closer to allowing unconfiguration... */
    516 	switch (fmt) {
    517 	case S_IFCHR:
    518 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    519 		break;
    520 
    521 	case S_IFBLK:
    522 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    523 		break;
    524 	}
    525 	rs->sc_dkdev.dk_openmask =
    526 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    527 
    528 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    529 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    530 		/* Last one... device is not unconfigured yet.
    531 		   Device shutdown has taken care of setting the
    532 		   clean bits if RAIDF_INITED is not set
    533 		   mark things as clean... */
    534 		rf_update_component_labels( raidPtrs[unit] );
    535 	}
    536 
    537 	raidunlock(rs);
    538 	return (0);
    539 
    540 }
    541 
    542 void
    543 raidstrategy(bp)
    544 	register struct buf *bp;
    545 {
    546 	register int s;
    547 
    548 	unsigned int raidID = raidunit(bp->b_dev);
    549 	RF_Raid_t *raidPtr;
    550 	struct raid_softc *rs = &raid_softc[raidID];
    551 	struct disklabel *lp;
    552 	int     wlabel;
    553 
    554 #if 0
    555 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    556 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    557 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    558 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    559 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    560 
    561 	if (bp->b_flags & B_READ)
    562 		db1_printf(("READ\n"));
    563 	else
    564 		db1_printf(("WRITE\n"));
    565 #endif
    566 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    567 		bp->b_error = ENXIO;
    568 		bp->b_flags = B_ERROR;
    569 		bp->b_resid = bp->b_bcount;
    570 		biodone(bp);
    571 		return;
    572 	}
    573 	if (raidID >= numraid || !raidPtrs[raidID]) {
    574 		bp->b_error = ENODEV;
    575 		bp->b_flags |= B_ERROR;
    576 		bp->b_resid = bp->b_bcount;
    577 		biodone(bp);
    578 		return;
    579 	}
    580 	raidPtr = raidPtrs[raidID];
    581 	if (!raidPtr->valid) {
    582 		bp->b_error = ENODEV;
    583 		bp->b_flags |= B_ERROR;
    584 		bp->b_resid = bp->b_bcount;
    585 		biodone(bp);
    586 		return;
    587 	}
    588 	if (bp->b_bcount == 0) {
    589 		db1_printf(("b_bcount is zero..\n"));
    590 		biodone(bp);
    591 		return;
    592 	}
    593 	lp = rs->sc_dkdev.dk_label;
    594 
    595 	/*
    596 	 * Do bounds checking and adjust transfer.  If there's an
    597 	 * error, the bounds check will flag that for us.
    598 	 */
    599 
    600 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    601 	if (DISKPART(bp->b_dev) != RAW_PART)
    602 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    603 			db1_printf(("Bounds check failed!!:%d %d\n",
    604 				(int) bp->b_blkno, (int) wlabel));
    605 			biodone(bp);
    606 			return;
    607 		}
    608 	s = splbio();		/* XXX Needed? */
    609 	db1_printf(("Beginning strategy...\n"));
    610 
    611 	bp->b_resid = 0;
    612 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    613 	    NULL, NULL, NULL);
    614 	if (bp->b_error) {
    615 		bp->b_flags |= B_ERROR;
    616 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    617 			bp->b_error));
    618 	}
    619 	splx(s);
    620 #if 0
    621 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    622 		bp, bp->b_data,
    623 		(int) bp->b_bcount, (int) bp->b_resid));
    624 #endif
    625 }
    626 /* ARGSUSED */
    627 int
    628 raidread(dev, uio, flags)
    629 	dev_t   dev;
    630 	struct uio *uio;
    631 	int     flags;
    632 {
    633 	int     unit = raidunit(dev);
    634 	struct raid_softc *rs;
    635 	int     part;
    636 
    637 	if (unit >= numraid)
    638 		return (ENXIO);
    639 	rs = &raid_softc[unit];
    640 
    641 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    642 		return (ENXIO);
    643 	part = DISKPART(dev);
    644 
    645 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    646 
    647 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    648 
    649 }
    650 /* ARGSUSED */
    651 int
    652 raidwrite(dev, uio, flags)
    653 	dev_t   dev;
    654 	struct uio *uio;
    655 	int     flags;
    656 {
    657 	int     unit = raidunit(dev);
    658 	struct raid_softc *rs;
    659 
    660 	if (unit >= numraid)
    661 		return (ENXIO);
    662 	rs = &raid_softc[unit];
    663 
    664 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    665 		return (ENXIO);
    666 	db1_printf(("raidwrite\n"));
    667 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    668 
    669 }
    670 
    671 int
    672 raidioctl(dev, cmd, data, flag, p)
    673 	dev_t   dev;
    674 	u_long  cmd;
    675 	caddr_t data;
    676 	int     flag;
    677 	struct proc *p;
    678 {
    679 	int     unit = raidunit(dev);
    680 	int     error = 0;
    681 	int     part, pmask;
    682 	struct raid_softc *rs;
    683 	RF_Config_t *k_cfg, *u_cfg;
    684 	u_char *specific_buf;
    685 	int retcode = 0;
    686 	int row;
    687 	int column;
    688 	int s;
    689 	struct rf_recon_req *rrcopy, *rr;
    690 	RF_ComponentLabel_t *component_label;
    691 	RF_ComponentLabel_t ci_label;
    692 	RF_ComponentLabel_t **c_label_ptr;
    693 	RF_SingleComponent_t *sparePtr,*componentPtr;
    694 	RF_SingleComponent_t hot_spare;
    695 	RF_SingleComponent_t component;
    696 
    697 	if (unit >= numraid)
    698 		return (ENXIO);
    699 	rs = &raid_softc[unit];
    700 
    701 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    702 		(int) DISKPART(dev), (int) unit, (int) cmd));
    703 
    704 	/* Must be open for writes for these commands... */
    705 	switch (cmd) {
    706 	case DIOCSDINFO:
    707 	case DIOCWDINFO:
    708 	case DIOCWLABEL:
    709 		if ((flag & FWRITE) == 0)
    710 			return (EBADF);
    711 	}
    712 
    713 	/* Must be initialized for these... */
    714 	switch (cmd) {
    715 	case DIOCGDINFO:
    716 	case DIOCSDINFO:
    717 	case DIOCWDINFO:
    718 	case DIOCGPART:
    719 	case DIOCWLABEL:
    720 	case DIOCGDEFLABEL:
    721 	case RAIDFRAME_SHUTDOWN:
    722 	case RAIDFRAME_REWRITEPARITY:
    723 	case RAIDFRAME_GET_INFO:
    724 	case RAIDFRAME_RESET_ACCTOTALS:
    725 	case RAIDFRAME_GET_ACCTOTALS:
    726 	case RAIDFRAME_KEEP_ACCTOTALS:
    727 	case RAIDFRAME_GET_SIZE:
    728 	case RAIDFRAME_FAIL_DISK:
    729 	case RAIDFRAME_COPYBACK:
    730 	case RAIDFRAME_CHECKRECON:
    731 	case RAIDFRAME_GET_COMPONENT_LABEL:
    732 	case RAIDFRAME_SET_COMPONENT_LABEL:
    733 	case RAIDFRAME_ADD_HOT_SPARE:
    734 	case RAIDFRAME_REMOVE_HOT_SPARE:
    735 	case RAIDFRAME_INIT_LABELS:
    736 	case RAIDFRAME_REBUILD_IN_PLACE:
    737 	case RAIDFRAME_CHECK_PARITY:
    738 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    739 			return (ENXIO);
    740 	}
    741 
    742 	switch (cmd) {
    743 
    744 
    745 		/* configure the system */
    746 	case RAIDFRAME_CONFIGURE:
    747 
    748 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    749 		/* copy-in the configuration information */
    750 		/* data points to a pointer to the configuration structure */
    751 		u_cfg = *((RF_Config_t **) data);
    752 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    753 		if (k_cfg == NULL) {
    754 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    755 			return (ENOMEM);
    756 		}
    757 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    758 		    sizeof(RF_Config_t));
    759 		if (retcode) {
    760 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    761 				retcode));
    762 			return (retcode);
    763 		}
    764 		/* allocate a buffer for the layout-specific data, and copy it
    765 		 * in */
    766 		if (k_cfg->layoutSpecificSize) {
    767 			if (k_cfg->layoutSpecificSize > 10000) {
    768 				/* sanity check */
    769 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    770 				return (EINVAL);
    771 			}
    772 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    773 			    (u_char *));
    774 			if (specific_buf == NULL) {
    775 				RF_Free(k_cfg, sizeof(RF_Config_t));
    776 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    777 				return (ENOMEM);
    778 			}
    779 			retcode = copyin(k_cfg->layoutSpecific,
    780 			    (caddr_t) specific_buf,
    781 			    k_cfg->layoutSpecificSize);
    782 			if (retcode) {
    783 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    784 					retcode));
    785 				return (retcode);
    786 			}
    787 		} else
    788 			specific_buf = NULL;
    789 		k_cfg->layoutSpecific = specific_buf;
    790 
    791 		/* should do some kind of sanity check on the configuration.
    792 		 * Store the sum of all the bytes in the last byte? */
    793 
    794 		/* configure the system */
    795 
    796 		raidPtrs[unit]->raidid = unit;
    797 
    798 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    799 
    800 		/* allow this many simultaneous IO's to this RAID device */
    801 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    802 
    803 		if (retcode == 0) {
    804 			retcode = raidinit(dev, raidPtrs[unit], unit);
    805 			rf_markalldirty( raidPtrs[unit] );
    806 		}
    807 		/* free the buffers.  No return code here. */
    808 		if (k_cfg->layoutSpecificSize) {
    809 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    810 		}
    811 		RF_Free(k_cfg, sizeof(RF_Config_t));
    812 
    813 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    814 			retcode));
    815 
    816 		return (retcode);
    817 
    818 		/* shutdown the system */
    819 	case RAIDFRAME_SHUTDOWN:
    820 
    821 		if ((error = raidlock(rs)) != 0)
    822 			return (error);
    823 
    824 		/*
    825 		 * If somebody has a partition mounted, we shouldn't
    826 		 * shutdown.
    827 		 */
    828 
    829 		part = DISKPART(dev);
    830 		pmask = (1 << part);
    831 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    832 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    833 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    834 			raidunlock(rs);
    835 			return (EBUSY);
    836 		}
    837 
    838 		if (rf_debugKernelAccess) {
    839 			printf("call shutdown\n");
    840 		}
    841 
    842 		retcode = rf_Shutdown(raidPtrs[unit]);
    843 
    844 		db1_printf(("Done main shutdown\n"));
    845 
    846 		pool_destroy(&rs->sc_cbufpool);
    847 		db1_printf(("Done freeing component buffer freelist\n"));
    848 
    849 		/* It's no longer initialized... */
    850 		rs->sc_flags &= ~RAIDF_INITED;
    851 
    852 		/* Detach the disk. */
    853 		disk_detach(&rs->sc_dkdev);
    854 
    855 		raidunlock(rs);
    856 
    857 		return (retcode);
    858 	case RAIDFRAME_GET_COMPONENT_LABEL:
    859 		c_label_ptr = (RF_ComponentLabel_t **) data;
    860 		/* need to read the component label for the disk indicated
    861 		   by row,column in component_label
    862 		   XXX need to sanity check these values!!!
    863 		   */
    864 
    865 		/* For practice, let's get it directly fromdisk, rather
    866 		   than from the in-core copy */
    867 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    868 			   (RF_ComponentLabel_t *));
    869 		if (component_label == NULL)
    870 			return (ENOMEM);
    871 
    872 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    873 
    874 		retcode = copyin( *c_label_ptr, component_label,
    875 				  sizeof(RF_ComponentLabel_t));
    876 
    877 		if (retcode) {
    878 			return(retcode);
    879 		}
    880 
    881 		row = component_label->row;
    882 		column = component_label->column;
    883 
    884 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    885 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    886 			return(EINVAL);
    887 		}
    888 
    889 		raidread_component_label(
    890                               raidPtrs[unit]->Disks[row][column].dev,
    891 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    892 			      component_label );
    893 
    894 		retcode = copyout((caddr_t) component_label,
    895 				  (caddr_t) *c_label_ptr,
    896 				  sizeof(RF_ComponentLabel_t));
    897 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    898 		return (retcode);
    899 
    900 	case RAIDFRAME_SET_COMPONENT_LABEL:
    901 		component_label = (RF_ComponentLabel_t *) data;
    902 
    903 		/* XXX check the label for valid stuff... */
    904 		/* Note that some things *should not* get modified --
    905 		   the user should be re-initing the labels instead of
    906 		   trying to patch things.
    907 		   */
    908 
    909 		printf("Got component label:\n");
    910 		printf("Version: %d\n",component_label->version);
    911 		printf("Serial Number: %d\n",component_label->serial_number);
    912 		printf("Mod counter: %d\n",component_label->mod_counter);
    913 		printf("Row: %d\n", component_label->row);
    914 		printf("Column: %d\n", component_label->column);
    915 		printf("Num Rows: %d\n", component_label->num_rows);
    916 		printf("Num Columns: %d\n", component_label->num_columns);
    917 		printf("Clean: %d\n", component_label->clean);
    918 		printf("Status: %d\n", component_label->status);
    919 
    920 		row = component_label->row;
    921 		column = component_label->column;
    922 
    923 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    924 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    925 			return(EINVAL);
    926 		}
    927 
    928 		/* XXX this isn't allowed to do anything for now :-) */
    929 #if 0
    930 		raidwrite_component_label(
    931                             raidPtrs[unit]->Disks[row][column].dev,
    932 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    933 			    component_label );
    934 #endif
    935 		return (0);
    936 
    937 	case RAIDFRAME_INIT_LABELS:
    938 		component_label = (RF_ComponentLabel_t *) data;
    939 		/*
    940 		   we only want the serial number from
    941 		   the above.  We get all the rest of the information
    942 		   from the config that was used to create this RAID
    943 		   set.
    944 		   */
    945 
    946 		raidPtrs[unit]->serial_number = component_label->serial_number;
    947 		/* current version number */
    948 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    949 		ci_label.serial_number = component_label->serial_number;
    950 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    951 		ci_label.num_rows = raidPtrs[unit]->numRow;
    952 		ci_label.num_columns = raidPtrs[unit]->numCol;
    953 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    954 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    955 
    956 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    957 			ci_label.row = row;
    958 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    959 				ci_label.column = column;
    960 				raidwrite_component_label(
    961 				  raidPtrs[unit]->Disks[row][column].dev,
    962 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    963 				  &ci_label );
    964 			}
    965 		}
    966 
    967 		return (retcode);
    968 
    969 		/* initialize all parity */
    970 	case RAIDFRAME_REWRITEPARITY:
    971 
    972 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    973 			/* Parity for RAID 0 is trivially correct */
    974 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    975 			return(0);
    976 		}
    977 
    978 		/* borrow the thread of the requesting process */
    979 
    980 		s = splbio();
    981 		retcode = rf_RewriteParity(raidPtrs[unit]);
    982 		splx(s);
    983 		/* return I/O Error if the parity rewrite fails */
    984 
    985 		if (retcode) {
    986 			retcode = EIO;
    987 		} else {
    988 			/* set the clean bit!  If we shutdown correctly,
    989 			 the clean bit on each component label will get
    990 			 set */
    991 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    992 		}
    993 		return (retcode);
    994 
    995 
    996 	case RAIDFRAME_ADD_HOT_SPARE:
    997 		sparePtr = (RF_SingleComponent_t *) data;
    998 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
    999 		printf("Adding spare\n");
   1000 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1001 		return(retcode);
   1002 
   1003 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1004 		return(retcode);
   1005 
   1006 	case RAIDFRAME_REBUILD_IN_PLACE:
   1007 
   1008 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1009 			/* Can't do this on a RAID 0!! */
   1010 			return(EINVAL);
   1011 		}
   1012 
   1013 		componentPtr = (RF_SingleComponent_t *) data;
   1014 		memcpy( &component, componentPtr,
   1015 			sizeof(RF_SingleComponent_t));
   1016 		row = component.row;
   1017 		column = component.column;
   1018 		printf("Rebuild: %d %d\n",row, column);
   1019 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1020 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1021 			return(EINVAL);
   1022 		}
   1023 		printf("Attempting a rebuild in place\n");
   1024 		s = splbio();
   1025 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1026 		splx(s);
   1027 		return(retcode);
   1028 
   1029 	case RAIDFRAME_GET_INFO:
   1030 		{
   1031 			RF_Raid_t *raid = raidPtrs[unit];
   1032 			RF_DeviceConfig_t *cfg, **ucfgp;
   1033 			int     i, j, d;
   1034 
   1035 			if (!raid->valid)
   1036 				return (ENODEV);
   1037 			ucfgp = (RF_DeviceConfig_t **) data;
   1038 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1039 				  (RF_DeviceConfig_t *));
   1040 			if (cfg == NULL)
   1041 				return (ENOMEM);
   1042 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1043 			cfg->rows = raid->numRow;
   1044 			cfg->cols = raid->numCol;
   1045 			cfg->ndevs = raid->numRow * raid->numCol;
   1046 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1047 				cfg->ndevs = 0;
   1048 				return (ENOMEM);
   1049 			}
   1050 			cfg->nspares = raid->numSpare;
   1051 			if (cfg->nspares >= RF_MAX_DISKS) {
   1052 				cfg->nspares = 0;
   1053 				return (ENOMEM);
   1054 			}
   1055 			cfg->maxqdepth = raid->maxQueueDepth;
   1056 			d = 0;
   1057 			for (i = 0; i < cfg->rows; i++) {
   1058 				for (j = 0; j < cfg->cols; j++) {
   1059 					cfg->devs[d] = raid->Disks[i][j];
   1060 					d++;
   1061 				}
   1062 			}
   1063 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1064 				cfg->spares[i] = raid->Disks[0][j];
   1065 			}
   1066 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1067 					  sizeof(RF_DeviceConfig_t));
   1068 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1069 
   1070 			return (retcode);
   1071 		}
   1072 		break;
   1073 	case RAIDFRAME_CHECK_PARITY:
   1074 		*(int *) data = raidPtrs[unit]->parity_good;
   1075 		return (0);
   1076 	case RAIDFRAME_RESET_ACCTOTALS:
   1077 		{
   1078 			RF_Raid_t *raid = raidPtrs[unit];
   1079 
   1080 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1081 			return (0);
   1082 		}
   1083 		break;
   1084 
   1085 	case RAIDFRAME_GET_ACCTOTALS:
   1086 		{
   1087 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1088 			RF_Raid_t *raid = raidPtrs[unit];
   1089 
   1090 			*totals = raid->acc_totals;
   1091 			return (0);
   1092 		}
   1093 		break;
   1094 
   1095 	case RAIDFRAME_KEEP_ACCTOTALS:
   1096 		{
   1097 			RF_Raid_t *raid = raidPtrs[unit];
   1098 			int    *keep = (int *) data;
   1099 
   1100 			raid->keep_acc_totals = *keep;
   1101 			return (0);
   1102 		}
   1103 		break;
   1104 
   1105 	case RAIDFRAME_GET_SIZE:
   1106 		*(int *) data = raidPtrs[unit]->totalSectors;
   1107 		return (0);
   1108 
   1109 #define RAIDFRAME_RECON 1
   1110 		/* XXX The above should probably be set somewhere else!! GO */
   1111 #if RAIDFRAME_RECON > 0
   1112 
   1113 		/* fail a disk & optionally start reconstruction */
   1114 	case RAIDFRAME_FAIL_DISK:
   1115 
   1116 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1117 			/* Can't do this on a RAID 0!! */
   1118 			return(EINVAL);
   1119 		}
   1120 
   1121 		rr = (struct rf_recon_req *) data;
   1122 
   1123 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1124 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1125 			return (EINVAL);
   1126 
   1127 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1128 		       unit, rr->row, rr->col);
   1129 
   1130 		/* make a copy of the recon request so that we don't rely on
   1131 		 * the user's buffer */
   1132 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1133 		bcopy(rr, rrcopy, sizeof(*rr));
   1134 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1135 
   1136 		LOCK_RECON_Q_MUTEX();
   1137 		rrcopy->next = recon_queue;
   1138 		recon_queue = rrcopy;
   1139 		wakeup(&recon_queue);
   1140 		UNLOCK_RECON_Q_MUTEX();
   1141 
   1142 		return (0);
   1143 
   1144 		/* invoke a copyback operation after recon on whatever disk
   1145 		 * needs it, if any */
   1146 	case RAIDFRAME_COPYBACK:
   1147 
   1148 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1149 			/* This makes no sense on a RAID 0!! */
   1150 			return(EINVAL);
   1151 		}
   1152 
   1153 		/* borrow the current thread to get this done */
   1154 
   1155 		s = splbio();
   1156 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1157 		splx(s);
   1158 		return (0);
   1159 
   1160 		/* return the percentage completion of reconstruction */
   1161 	case RAIDFRAME_CHECKRECON:
   1162 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1163 			/* This makes no sense on a RAID 0 */
   1164 			return(EINVAL);
   1165 		}
   1166 
   1167 		row = *(int *) data;
   1168 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1169 			return (EINVAL);
   1170 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1171 			*(int *) data = 100;
   1172 		else
   1173 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1174 		return (0);
   1175 
   1176 		/* the sparetable daemon calls this to wait for the kernel to
   1177 		 * need a spare table. this ioctl does not return until a
   1178 		 * spare table is needed. XXX -- calling mpsleep here in the
   1179 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1180 		 * -- I should either compute the spare table in the kernel,
   1181 		 * or have a different -- XXX XXX -- interface (a different
   1182 		 * character device) for delivering the table          -- XXX */
   1183 #if 0
   1184 	case RAIDFRAME_SPARET_WAIT:
   1185 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1186 		while (!rf_sparet_wait_queue)
   1187 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1188 		waitreq = rf_sparet_wait_queue;
   1189 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1190 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1191 
   1192 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1193 
   1194 		RF_Free(waitreq, sizeof(*waitreq));
   1195 		return (0);
   1196 
   1197 
   1198 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1199 		 * code in it that will cause the dameon to exit */
   1200 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1201 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1202 		waitreq->fcol = -1;
   1203 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1204 		waitreq->next = rf_sparet_wait_queue;
   1205 		rf_sparet_wait_queue = waitreq;
   1206 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1207 		wakeup(&rf_sparet_wait_queue);
   1208 		return (0);
   1209 
   1210 		/* used by the spare table daemon to deliver a spare table
   1211 		 * into the kernel */
   1212 	case RAIDFRAME_SEND_SPARET:
   1213 
   1214 		/* install the spare table */
   1215 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1216 
   1217 		/* respond to the requestor.  the return status of the spare
   1218 		 * table installation is passed in the "fcol" field */
   1219 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1220 		waitreq->fcol = retcode;
   1221 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1222 		waitreq->next = rf_sparet_resp_queue;
   1223 		rf_sparet_resp_queue = waitreq;
   1224 		wakeup(&rf_sparet_resp_queue);
   1225 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1226 
   1227 		return (retcode);
   1228 #endif
   1229 
   1230 
   1231 #endif				/* RAIDFRAME_RECON > 0 */
   1232 
   1233 	default:
   1234 		break;		/* fall through to the os-specific code below */
   1235 
   1236 	}
   1237 
   1238 	if (!raidPtrs[unit]->valid)
   1239 		return (EINVAL);
   1240 
   1241 	/*
   1242 	 * Add support for "regular" device ioctls here.
   1243 	 */
   1244 
   1245 	switch (cmd) {
   1246 	case DIOCGDINFO:
   1247 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1248 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1249 		break;
   1250 
   1251 	case DIOCGPART:
   1252 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1253 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1254 		((struct partinfo *) data)->part =
   1255 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1256 		break;
   1257 
   1258 	case DIOCWDINFO:
   1259 		db1_printf(("DIOCWDINFO\n"));
   1260 	case DIOCSDINFO:
   1261 		db1_printf(("DIOCSDINFO\n"));
   1262 		if ((error = raidlock(rs)) != 0)
   1263 			return (error);
   1264 
   1265 		rs->sc_flags |= RAIDF_LABELLING;
   1266 
   1267 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1268 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1269 		if (error == 0) {
   1270 			if (cmd == DIOCWDINFO)
   1271 				error = writedisklabel(RAIDLABELDEV(dev),
   1272 				    raidstrategy, rs->sc_dkdev.dk_label,
   1273 				    rs->sc_dkdev.dk_cpulabel);
   1274 		}
   1275 		rs->sc_flags &= ~RAIDF_LABELLING;
   1276 
   1277 		raidunlock(rs);
   1278 
   1279 		if (error)
   1280 			return (error);
   1281 		break;
   1282 
   1283 	case DIOCWLABEL:
   1284 		db1_printf(("DIOCWLABEL\n"));
   1285 		if (*(int *) data != 0)
   1286 			rs->sc_flags |= RAIDF_WLABEL;
   1287 		else
   1288 			rs->sc_flags &= ~RAIDF_WLABEL;
   1289 		break;
   1290 
   1291 	case DIOCGDEFLABEL:
   1292 		db1_printf(("DIOCGDEFLABEL\n"));
   1293 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1294 		    (struct disklabel *) data);
   1295 		break;
   1296 
   1297 	default:
   1298 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1299 	}
   1300 	return (retcode);
   1301 
   1302 }
   1303 
   1304 
   1305 /* raidinit -- complete the rest of the initialization for the
   1306    RAIDframe device.  */
   1307 
   1308 
   1309 static int
   1310 raidinit(dev, raidPtr, unit)
   1311 	dev_t   dev;
   1312 	RF_Raid_t *raidPtr;
   1313 	int     unit;
   1314 {
   1315 	int     retcode;
   1316 	/* int ix; */
   1317 	/* struct raidbuf *raidbp; */
   1318 	struct raid_softc *rs;
   1319 
   1320 	retcode = 0;
   1321 
   1322 	rs = &raid_softc[unit];
   1323 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1324 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1325 
   1326 
   1327 	/* XXX should check return code first... */
   1328 	rs->sc_flags |= RAIDF_INITED;
   1329 
   1330 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1331 
   1332 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1333 
   1334 	/* disk_attach actually creates space for the CPU disklabel, among
   1335 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1336 	 * with disklabels. */
   1337 
   1338 	disk_attach(&rs->sc_dkdev);
   1339 
   1340 	/* XXX There may be a weird interaction here between this, and
   1341 	 * protectedSectors, as used in RAIDframe.  */
   1342 
   1343 	rs->sc_size = raidPtr->totalSectors;
   1344 	rs->sc_dev = dev;
   1345 
   1346 	return (retcode);
   1347 }
   1348 
   1349 /*
   1350  * This kernel thread never exits.  It is created once, and persists
   1351  * until the system reboots.
   1352  */
   1353 
   1354 void
   1355 rf_ReconKernelThread()
   1356 {
   1357 	struct rf_recon_req *req;
   1358 	int     s;
   1359 
   1360 	/* XXX not sure what spl() level we should be at here... probably
   1361 	 * splbio() */
   1362 	s = splbio();
   1363 
   1364 	while (1) {
   1365 		/* grab the next reconstruction request from the queue */
   1366 		LOCK_RECON_Q_MUTEX();
   1367 		while (!recon_queue) {
   1368 			UNLOCK_RECON_Q_MUTEX();
   1369 			tsleep(&recon_queue, PRIBIO,
   1370 			       "raidframe recon", 0);
   1371 			LOCK_RECON_Q_MUTEX();
   1372 		}
   1373 		req = recon_queue;
   1374 		recon_queue = recon_queue->next;
   1375 		UNLOCK_RECON_Q_MUTEX();
   1376 
   1377 		/*
   1378 	         * If flags specifies that we should start recon, this call
   1379 	         * will not return until reconstruction completes, fails,
   1380 		 * or is aborted.
   1381 	         */
   1382 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1383 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1384 
   1385 		RF_Free(req, sizeof(*req));
   1386 	}
   1387 }
   1388 /* wake up the daemon & tell it to get us a spare table
   1389  * XXX
   1390  * the entries in the queues should be tagged with the raidPtr
   1391  * so that in the extremely rare case that two recons happen at once,
   1392  * we know for which device were requesting a spare table
   1393  * XXX
   1394  */
   1395 int
   1396 rf_GetSpareTableFromDaemon(req)
   1397 	RF_SparetWait_t *req;
   1398 {
   1399 	int     retcode;
   1400 
   1401 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1402 	req->next = rf_sparet_wait_queue;
   1403 	rf_sparet_wait_queue = req;
   1404 	wakeup(&rf_sparet_wait_queue);
   1405 
   1406 	/* mpsleep unlocks the mutex */
   1407 	while (!rf_sparet_resp_queue) {
   1408 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1409 		    "raidframe getsparetable", 0);
   1410 	}
   1411 	req = rf_sparet_resp_queue;
   1412 	rf_sparet_resp_queue = req->next;
   1413 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1414 
   1415 	retcode = req->fcol;
   1416 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1417 					 * alloc'd */
   1418 	return (retcode);
   1419 }
   1420 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1421  * bp & passes it down.
   1422  * any calls originating in the kernel must use non-blocking I/O
   1423  * do some extra sanity checking to return "appropriate" error values for
   1424  * certain conditions (to make some standard utilities work)
   1425  */
   1426 int
   1427 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1428 	RF_Raid_t *raidPtr;
   1429 	struct buf *bp;
   1430 	RF_RaidAccessFlags_t flags;
   1431 	void    (*cbFunc) (struct buf *);
   1432 	void   *cbArg;
   1433 {
   1434 	RF_SectorCount_t num_blocks, pb, sum;
   1435 	RF_RaidAddr_t raid_addr;
   1436 	int     retcode;
   1437 	struct partition *pp;
   1438 	daddr_t blocknum;
   1439 	int     unit;
   1440 	struct raid_softc *rs;
   1441 	int     do_async;
   1442 
   1443 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1444 
   1445 	unit = raidPtr->raidid;
   1446 	rs = &raid_softc[unit];
   1447 
   1448 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1449 	 * partition.. Need to make it absolute to the underlying device.. */
   1450 
   1451 	blocknum = bp->b_blkno;
   1452 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1453 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1454 		blocknum += pp->p_offset;
   1455 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1456 			pp->p_offset));
   1457 	} else {
   1458 		db1_printf(("Is raw..\n"));
   1459 	}
   1460 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1461 
   1462 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1463 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1464 
   1465 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1466 	 * TOUCH bp->b_blkno!!! */
   1467 	raid_addr = blocknum;
   1468 
   1469 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1470 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1471 	sum = raid_addr + num_blocks + pb;
   1472 	if (1 || rf_debugKernelAccess) {
   1473 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1474 			(int) raid_addr, (int) sum, (int) num_blocks,
   1475 			(int) pb, (int) bp->b_resid));
   1476 	}
   1477 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1478 	    || (sum < num_blocks) || (sum < pb)) {
   1479 		bp->b_error = ENOSPC;
   1480 		bp->b_flags |= B_ERROR;
   1481 		bp->b_resid = bp->b_bcount;
   1482 		biodone(bp);
   1483 		return (bp->b_error);
   1484 	}
   1485 	/*
   1486 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1487 	 */
   1488 
   1489 	if (bp->b_bcount & raidPtr->sectorMask) {
   1490 		bp->b_error = EINVAL;
   1491 		bp->b_flags |= B_ERROR;
   1492 		bp->b_resid = bp->b_bcount;
   1493 		biodone(bp);
   1494 		return (bp->b_error);
   1495 	}
   1496 	db1_printf(("Calling DoAccess..\n"));
   1497 
   1498 
   1499 	/* Put a throttle on the number of requests we handle simultanously */
   1500 
   1501 	RF_LOCK_MUTEX(raidPtr->mutex);
   1502 
   1503 	while(raidPtr->openings <= 0) {
   1504 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1505 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1506 		RF_LOCK_MUTEX(raidPtr->mutex);
   1507 	}
   1508 	raidPtr->openings--;
   1509 
   1510 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1511 
   1512 	/*
   1513 	 * Everything is async.
   1514 	 */
   1515 	do_async = 1;
   1516 
   1517 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1518 	 * B_READ instead */
   1519 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1520 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1521 	    do_async, raid_addr, num_blocks,
   1522 	    bp->b_un.b_addr,
   1523 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1524 	    NULL, cbFunc, cbArg);
   1525 #if 0
   1526 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1527 		bp->b_data, (int) bp->b_resid));
   1528 #endif
   1529 
   1530 	return (retcode);
   1531 }
   1532 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1533 
   1534 int
   1535 rf_DispatchKernelIO(queue, req)
   1536 	RF_DiskQueue_t *queue;
   1537 	RF_DiskQueueData_t *req;
   1538 {
   1539 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1540 	struct buf *bp;
   1541 	struct raidbuf *raidbp = NULL;
   1542 	struct raid_softc *rs;
   1543 	int     unit;
   1544 
   1545 	/* XXX along with the vnode, we also need the softc associated with
   1546 	 * this device.. */
   1547 
   1548 	req->queue = queue;
   1549 
   1550 	unit = queue->raidPtr->raidid;
   1551 
   1552 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1553 
   1554 	if (unit >= numraid) {
   1555 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1556 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1557 	}
   1558 	rs = &raid_softc[unit];
   1559 
   1560 	/* XXX is this the right place? */
   1561 	disk_busy(&rs->sc_dkdev);
   1562 
   1563 	bp = req->bp;
   1564 #if 1
   1565 	/* XXX when there is a physical disk failure, someone is passing us a
   1566 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1567 	 * without taking a performance hit... (not sure where the real bug
   1568 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1569 
   1570 	if (bp->b_flags & B_ERROR) {
   1571 		bp->b_flags &= ~B_ERROR;
   1572 	}
   1573 	if (bp->b_error != 0) {
   1574 		bp->b_error = 0;
   1575 	}
   1576 #endif
   1577 	raidbp = RAIDGETBUF(rs);
   1578 
   1579 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1580 
   1581 	/*
   1582 	 * context for raidiodone
   1583 	 */
   1584 	raidbp->rf_obp = bp;
   1585 	raidbp->req = req;
   1586 
   1587 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1588 
   1589 	switch (req->type) {
   1590 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1591 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1592 		 * queue->row, queue->col); */
   1593 		/* XXX need to do something extra here.. */
   1594 		/* I'm leaving this in, as I've never actually seen it used,
   1595 		 * and I'd like folks to report it... GO */
   1596 		printf(("WAKEUP CALLED\n"));
   1597 		queue->numOutstanding++;
   1598 
   1599 		/* XXX need to glue the original buffer into this??  */
   1600 
   1601 		KernelWakeupFunc(&raidbp->rf_buf);
   1602 		break;
   1603 
   1604 	case RF_IO_TYPE_READ:
   1605 	case RF_IO_TYPE_WRITE:
   1606 
   1607 		if (req->tracerec) {
   1608 			RF_ETIMER_START(req->tracerec->timer);
   1609 		}
   1610 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1611 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1612 		    req->sectorOffset, req->numSector,
   1613 		    req->buf, KernelWakeupFunc, (void *) req,
   1614 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1615 
   1616 		if (rf_debugKernelAccess) {
   1617 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1618 				(long) bp->b_blkno));
   1619 		}
   1620 		queue->numOutstanding++;
   1621 		queue->last_deq_sector = req->sectorOffset;
   1622 		/* acc wouldn't have been let in if there were any pending
   1623 		 * reqs at any other priority */
   1624 		queue->curPriority = req->priority;
   1625 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1626 		 * req->type, queue->row, queue->col); */
   1627 
   1628 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1629 			req->type, unit, queue->row, queue->col));
   1630 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1631 			(int) req->sectorOffset, (int) req->numSector,
   1632 			(int) (req->numSector <<
   1633 			    queue->raidPtr->logBytesPerSector),
   1634 			(int) queue->raidPtr->logBytesPerSector));
   1635 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1636 			raidbp->rf_buf.b_vp->v_numoutput++;
   1637 		}
   1638 		VOP_STRATEGY(&raidbp->rf_buf);
   1639 
   1640 		break;
   1641 
   1642 	default:
   1643 		panic("bad req->type in rf_DispatchKernelIO");
   1644 	}
   1645 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1646 	return (0);
   1647 }
   1648 /* this is the callback function associated with a I/O invoked from
   1649    kernel code.
   1650  */
   1651 static void
   1652 KernelWakeupFunc(vbp)
   1653 	struct buf *vbp;
   1654 {
   1655 	RF_DiskQueueData_t *req = NULL;
   1656 	RF_DiskQueue_t *queue;
   1657 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1658 	struct buf *bp;
   1659 	struct raid_softc *rs;
   1660 	int     unit;
   1661 	register int s;
   1662 
   1663 	s = splbio();		/* XXX */
   1664 	db1_printf(("recovering the request queue:\n"));
   1665 	req = raidbp->req;
   1666 
   1667 	bp = raidbp->rf_obp;
   1668 #if 0
   1669 	db1_printf(("bp=0x%x\n", bp));
   1670 #endif
   1671 
   1672 	queue = (RF_DiskQueue_t *) req->queue;
   1673 
   1674 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1675 #if 0
   1676 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1677 #endif
   1678 		bp->b_flags |= B_ERROR;
   1679 		bp->b_error = raidbp->rf_buf.b_error ?
   1680 		    raidbp->rf_buf.b_error : EIO;
   1681 	}
   1682 #if 0
   1683 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1684 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1685 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1686 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1687 #endif
   1688 
   1689 	/* XXX methinks this could be wrong... */
   1690 #if 1
   1691 	bp->b_resid = raidbp->rf_buf.b_resid;
   1692 #endif
   1693 
   1694 	if (req->tracerec) {
   1695 		RF_ETIMER_STOP(req->tracerec->timer);
   1696 		RF_ETIMER_EVAL(req->tracerec->timer);
   1697 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1698 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1699 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1700 		req->tracerec->num_phys_ios++;
   1701 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1702 	}
   1703 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1704 
   1705 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1706 
   1707 
   1708 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1709 	 * ballistic, and mark the component as hosed... */
   1710 #if 1
   1711 	if (bp->b_flags & B_ERROR) {
   1712 		/* Mark the disk as dead */
   1713 		/* but only mark it once... */
   1714 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1715 		    rf_ds_optimal) {
   1716 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1717 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1718 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1719 			    rf_ds_failed;
   1720 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1721 			queue->raidPtr->numFailures++;
   1722 			/* XXX here we should bump the version number for each component, and write that data out */
   1723 		} else {	/* Disk is already dead... */
   1724 			/* printf("Disk already marked as dead!\n"); */
   1725 		}
   1726 
   1727 	}
   1728 #endif
   1729 
   1730 	rs = &raid_softc[unit];
   1731 	RAIDPUTBUF(rs, raidbp);
   1732 
   1733 
   1734 	if (bp->b_resid == 0) {
   1735 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1736 			unit, bp->b_resid, bp->b_bcount));
   1737 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1738 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1739 	} else {
   1740 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1741 	}
   1742 
   1743 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1744 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1745 	/* printf("Exiting KernelWakeupFunc\n"); */
   1746 
   1747 	splx(s);		/* XXX */
   1748 }
   1749 
   1750 
   1751 
   1752 /*
   1753  * initialize a buf structure for doing an I/O in the kernel.
   1754  */
   1755 static void
   1756 InitBP(
   1757     struct buf * bp,
   1758     struct vnode * b_vp,
   1759     unsigned rw_flag,
   1760     dev_t dev,
   1761     RF_SectorNum_t startSect,
   1762     RF_SectorCount_t numSect,
   1763     caddr_t buf,
   1764     void (*cbFunc) (struct buf *),
   1765     void *cbArg,
   1766     int logBytesPerSector,
   1767     struct proc * b_proc)
   1768 {
   1769 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1770 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1771 	bp->b_bcount = numSect << logBytesPerSector;
   1772 	bp->b_bufsize = bp->b_bcount;
   1773 	bp->b_error = 0;
   1774 	bp->b_dev = dev;
   1775 	db1_printf(("bp->b_dev is %d\n", dev));
   1776 	bp->b_un.b_addr = buf;
   1777 #if 0
   1778 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1779 #endif
   1780 	bp->b_blkno = startSect;
   1781 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1782 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1783 	if (bp->b_bcount == 0) {
   1784 		panic("bp->b_bcount is zero in InitBP!!\n");
   1785 	}
   1786 	bp->b_proc = b_proc;
   1787 	bp->b_iodone = cbFunc;
   1788 	bp->b_vp = b_vp;
   1789 
   1790 }
   1791 
   1792 static void
   1793 raidgetdefaultlabel(raidPtr, rs, lp)
   1794 	RF_Raid_t *raidPtr;
   1795 	struct raid_softc *rs;
   1796 	struct disklabel *lp;
   1797 {
   1798 	db1_printf(("Building a default label...\n"));
   1799 	bzero(lp, sizeof(*lp));
   1800 
   1801 	/* fabricate a label... */
   1802 	lp->d_secperunit = raidPtr->totalSectors;
   1803 	lp->d_secsize = raidPtr->bytesPerSector;
   1804 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1805 	lp->d_ntracks = 1;
   1806 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1807 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1808 
   1809 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1810 	lp->d_type = DTYPE_RAID;
   1811 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1812 	lp->d_rpm = 3600;
   1813 	lp->d_interleave = 1;
   1814 	lp->d_flags = 0;
   1815 
   1816 	lp->d_partitions[RAW_PART].p_offset = 0;
   1817 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1818 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1819 	lp->d_npartitions = RAW_PART + 1;
   1820 
   1821 	lp->d_magic = DISKMAGIC;
   1822 	lp->d_magic2 = DISKMAGIC;
   1823 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1824 
   1825 }
   1826 /*
   1827  * Read the disklabel from the raid device.  If one is not present, fake one
   1828  * up.
   1829  */
   1830 static void
   1831 raidgetdisklabel(dev)
   1832 	dev_t   dev;
   1833 {
   1834 	int     unit = raidunit(dev);
   1835 	struct raid_softc *rs = &raid_softc[unit];
   1836 	char   *errstring;
   1837 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1838 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1839 	RF_Raid_t *raidPtr;
   1840 
   1841 	db1_printf(("Getting the disklabel...\n"));
   1842 
   1843 	bzero(clp, sizeof(*clp));
   1844 
   1845 	raidPtr = raidPtrs[unit];
   1846 
   1847 	raidgetdefaultlabel(raidPtr, rs, lp);
   1848 
   1849 	/*
   1850 	 * Call the generic disklabel extraction routine.
   1851 	 */
   1852 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1853 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1854 	if (errstring)
   1855 		raidmakedisklabel(rs);
   1856 	else {
   1857 		int     i;
   1858 		struct partition *pp;
   1859 
   1860 		/*
   1861 		 * Sanity check whether the found disklabel is valid.
   1862 		 *
   1863 		 * This is necessary since total size of the raid device
   1864 		 * may vary when an interleave is changed even though exactly
   1865 		 * same componets are used, and old disklabel may used
   1866 		 * if that is found.
   1867 		 */
   1868 		if (lp->d_secperunit != rs->sc_size)
   1869 			printf("WARNING: %s: "
   1870 			    "total sector size in disklabel (%d) != "
   1871 			    "the size of raid (%ld)\n", rs->sc_xname,
   1872 			    lp->d_secperunit, (long) rs->sc_size);
   1873 		for (i = 0; i < lp->d_npartitions; i++) {
   1874 			pp = &lp->d_partitions[i];
   1875 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1876 				printf("WARNING: %s: end of partition `%c' "
   1877 				    "exceeds the size of raid (%ld)\n",
   1878 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1879 		}
   1880 	}
   1881 
   1882 }
   1883 /*
   1884  * Take care of things one might want to take care of in the event
   1885  * that a disklabel isn't present.
   1886  */
   1887 static void
   1888 raidmakedisklabel(rs)
   1889 	struct raid_softc *rs;
   1890 {
   1891 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1892 	db1_printf(("Making a label..\n"));
   1893 
   1894 	/*
   1895 	 * For historical reasons, if there's no disklabel present
   1896 	 * the raw partition must be marked FS_BSDFFS.
   1897 	 */
   1898 
   1899 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1900 
   1901 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1902 
   1903 	lp->d_checksum = dkcksum(lp);
   1904 }
   1905 /*
   1906  * Lookup the provided name in the filesystem.  If the file exists,
   1907  * is a valid block device, and isn't being used by anyone else,
   1908  * set *vpp to the file's vnode.
   1909  * You'll find the original of this in ccd.c
   1910  */
   1911 int
   1912 raidlookup(path, p, vpp)
   1913 	char   *path;
   1914 	struct proc *p;
   1915 	struct vnode **vpp;	/* result */
   1916 {
   1917 	struct nameidata nd;
   1918 	struct vnode *vp;
   1919 	struct vattr va;
   1920 	int     error;
   1921 
   1922 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1923 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1924 #ifdef DEBUG
   1925 		printf("RAIDframe: vn_open returned %d\n", error);
   1926 #endif
   1927 		return (error);
   1928 	}
   1929 	vp = nd.ni_vp;
   1930 	if (vp->v_usecount > 1) {
   1931 		VOP_UNLOCK(vp, 0);
   1932 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1933 		return (EBUSY);
   1934 	}
   1935 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1936 		VOP_UNLOCK(vp, 0);
   1937 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1938 		return (error);
   1939 	}
   1940 	/* XXX: eventually we should handle VREG, too. */
   1941 	if (va.va_type != VBLK) {
   1942 		VOP_UNLOCK(vp, 0);
   1943 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1944 		return (ENOTBLK);
   1945 	}
   1946 	VOP_UNLOCK(vp, 0);
   1947 	*vpp = vp;
   1948 	return (0);
   1949 }
   1950 /*
   1951  * Wait interruptibly for an exclusive lock.
   1952  *
   1953  * XXX
   1954  * Several drivers do this; it should be abstracted and made MP-safe.
   1955  * (Hmm... where have we seen this warning before :->  GO )
   1956  */
   1957 static int
   1958 raidlock(rs)
   1959 	struct raid_softc *rs;
   1960 {
   1961 	int     error;
   1962 
   1963 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1964 		rs->sc_flags |= RAIDF_WANTED;
   1965 		if ((error =
   1966 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1967 			return (error);
   1968 	}
   1969 	rs->sc_flags |= RAIDF_LOCKED;
   1970 	return (0);
   1971 }
   1972 /*
   1973  * Unlock and wake up any waiters.
   1974  */
   1975 static void
   1976 raidunlock(rs)
   1977 	struct raid_softc *rs;
   1978 {
   1979 
   1980 	rs->sc_flags &= ~RAIDF_LOCKED;
   1981 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1982 		rs->sc_flags &= ~RAIDF_WANTED;
   1983 		wakeup(rs);
   1984 	}
   1985 }
   1986 
   1987 
   1988 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1989 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1990 
   1991 int
   1992 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   1993 {
   1994 	RF_ComponentLabel_t component_label;
   1995 	raidread_component_label(dev, b_vp, &component_label);
   1996 	component_label.mod_counter = mod_counter;
   1997 	component_label.clean = RF_RAID_CLEAN;
   1998 	raidwrite_component_label(dev, b_vp, &component_label);
   1999 	return(0);
   2000 }
   2001 
   2002 
   2003 int
   2004 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2005 {
   2006 	RF_ComponentLabel_t component_label;
   2007 	raidread_component_label(dev, b_vp, &component_label);
   2008 	component_label.mod_counter = mod_counter;
   2009 	component_label.clean = RF_RAID_DIRTY;
   2010 	raidwrite_component_label(dev, b_vp, &component_label);
   2011 	return(0);
   2012 }
   2013 
   2014 /* ARGSUSED */
   2015 int
   2016 raidread_component_label(dev, b_vp, component_label)
   2017 	dev_t dev;
   2018 	struct vnode *b_vp;
   2019 	RF_ComponentLabel_t *component_label;
   2020 {
   2021 	struct buf *bp;
   2022 	int error;
   2023 
   2024 	/* XXX should probably ensure that we don't try to do this if
   2025 	   someone has changed rf_protected_sectors. */
   2026 
   2027 	/* get a block of the appropriate size... */
   2028 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2029 	bp->b_dev = dev;
   2030 
   2031 	/* get our ducks in a row for the read */
   2032 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2033 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2034 	bp->b_flags = B_BUSY | B_READ;
   2035  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2036 
   2037 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2038 
   2039 	error = biowait(bp);
   2040 
   2041 	if (!error) {
   2042 		memcpy(component_label, bp->b_un.b_addr,
   2043 		       sizeof(RF_ComponentLabel_t));
   2044 #if 0
   2045 		printf("raidread_component_label: got component label:\n");
   2046 		printf("Version: %d\n",component_label->version);
   2047 		printf("Serial Number: %d\n",component_label->serial_number);
   2048 		printf("Mod counter: %d\n",component_label->mod_counter);
   2049 		printf("Row: %d\n", component_label->row);
   2050 		printf("Column: %d\n", component_label->column);
   2051 		printf("Num Rows: %d\n", component_label->num_rows);
   2052 		printf("Num Columns: %d\n", component_label->num_columns);
   2053 		printf("Clean: %d\n", component_label->clean);
   2054 		printf("Status: %d\n", component_label->status);
   2055 #endif
   2056         } else {
   2057 		printf("Failed to read RAID component label!\n");
   2058 	}
   2059 
   2060         bp->b_flags = B_INVAL | B_AGE;
   2061 	brelse(bp);
   2062 	return(error);
   2063 }
   2064 /* ARGSUSED */
   2065 int
   2066 raidwrite_component_label(dev, b_vp, component_label)
   2067 	dev_t dev;
   2068 	struct vnode *b_vp;
   2069 	RF_ComponentLabel_t *component_label;
   2070 {
   2071 	struct buf *bp;
   2072 	int error;
   2073 
   2074 	/* get a block of the appropriate size... */
   2075 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2076 	bp->b_dev = dev;
   2077 
   2078 	/* get our ducks in a row for the write */
   2079 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2080 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2081 	bp->b_flags = B_BUSY | B_WRITE;
   2082  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2083 
   2084 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2085 
   2086 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2087 
   2088 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2089 	error = biowait(bp);
   2090         bp->b_flags = B_INVAL | B_AGE;
   2091 	brelse(bp);
   2092 	if (error) {
   2093 		printf("Failed to write RAID component info!\n");
   2094 	}
   2095 
   2096 	return(error);
   2097 }
   2098 
   2099 void
   2100 rf_markalldirty( raidPtr )
   2101 	RF_Raid_t *raidPtr;
   2102 {
   2103 	RF_ComponentLabel_t c_label;
   2104 	int r,c;
   2105 
   2106 	raidPtr->mod_counter++;
   2107 	for (r = 0; r < raidPtr->numRow; r++) {
   2108 		for (c = 0; c < raidPtr->numCol; c++) {
   2109 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2110 				raidread_component_label(
   2111 					raidPtr->Disks[r][c].dev,
   2112 					raidPtr->raid_cinfo[r][c].ci_vp,
   2113 					&c_label);
   2114 				if (c_label.status == rf_ds_spared) {
   2115 					/* XXX do something special...
   2116 					 but whatever you do, don't
   2117 					 try to access it!! */
   2118 				} else {
   2119 #if 0
   2120 				c_label.status =
   2121 					raidPtr->Disks[r][c].status;
   2122 				raidwrite_component_label(
   2123 					raidPtr->Disks[r][c].dev,
   2124 					raidPtr->raid_cinfo[r][c].ci_vp,
   2125 					&c_label);
   2126 #endif
   2127 				raidmarkdirty(
   2128 				       raidPtr->Disks[r][c].dev,
   2129 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2130 				       raidPtr->mod_counter);
   2131 				}
   2132 			}
   2133 		}
   2134 	}
   2135 	/* printf("Component labels marked dirty.\n"); */
   2136 #if 0
   2137 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2138 		sparecol = raidPtr->numCol + c;
   2139 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2140 			/*
   2141 
   2142 			   XXX this is where we get fancy and map this spare
   2143 			   into it's correct spot in the array.
   2144 
   2145 			 */
   2146 			/*
   2147 
   2148 			   we claim this disk is "optimal" if it's
   2149 			   rf_ds_used_spare, as that means it should be
   2150 			   directly substitutable for the disk it replaced.
   2151 			   We note that too...
   2152 
   2153 			 */
   2154 
   2155 			for(i=0;i<raidPtr->numRow;i++) {
   2156 				for(j=0;j<raidPtr->numCol;j++) {
   2157 					if ((raidPtr->Disks[i][j].spareRow ==
   2158 					     r) &&
   2159 					    (raidPtr->Disks[i][j].spareCol ==
   2160 					     sparecol)) {
   2161 						srow = r;
   2162 						scol = sparecol;
   2163 						break;
   2164 					}
   2165 				}
   2166 			}
   2167 
   2168 			raidread_component_label(
   2169 				      raidPtr->Disks[r][sparecol].dev,
   2170 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2171 				      &c_label);
   2172 			/* make sure status is noted */
   2173 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2174 			c_label.mod_counter = raidPtr->mod_counter;
   2175 			c_label.serial_number = raidPtr->serial_number;
   2176 			c_label.row = srow;
   2177 			c_label.column = scol;
   2178 			c_label.num_rows = raidPtr->numRow;
   2179 			c_label.num_columns = raidPtr->numCol;
   2180 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2181 			c_label.status = rf_ds_optimal;
   2182 			raidwrite_component_label(
   2183 				      raidPtr->Disks[r][sparecol].dev,
   2184 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2185 				      &c_label);
   2186 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2187 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2188 		}
   2189 	}
   2190 
   2191 #endif
   2192 }
   2193 
   2194 
   2195 void
   2196 rf_update_component_labels( raidPtr )
   2197 	RF_Raid_t *raidPtr;
   2198 {
   2199 	RF_ComponentLabel_t c_label;
   2200 	int sparecol;
   2201 	int r,c;
   2202 	int i,j;
   2203 	int srow, scol;
   2204 
   2205 	srow = -1;
   2206 	scol = -1;
   2207 
   2208 	/* XXX should do extra checks to make sure things really are clean,
   2209 	   rather than blindly setting the clean bit... */
   2210 
   2211 	raidPtr->mod_counter++;
   2212 
   2213 	for (r = 0; r < raidPtr->numRow; r++) {
   2214 		for (c = 0; c < raidPtr->numCol; c++) {
   2215 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2216 				raidread_component_label(
   2217 					raidPtr->Disks[r][c].dev,
   2218 					raidPtr->raid_cinfo[r][c].ci_vp,
   2219 					&c_label);
   2220 				/* make sure status is noted */
   2221 				c_label.status = rf_ds_optimal;
   2222 				raidwrite_component_label(
   2223 					raidPtr->Disks[r][c].dev,
   2224 					raidPtr->raid_cinfo[r][c].ci_vp,
   2225 					&c_label);
   2226 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2227 					raidmarkclean(
   2228 					      raidPtr->Disks[r][c].dev,
   2229 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2230 					      raidPtr->mod_counter);
   2231 				}
   2232 			}
   2233 			/* else we don't touch it.. */
   2234 #if 0
   2235 			else if (raidPtr->Disks[r][c].status !=
   2236 				   rf_ds_failed) {
   2237 				raidread_component_label(
   2238 					raidPtr->Disks[r][c].dev,
   2239 					raidPtr->raid_cinfo[r][c].ci_vp,
   2240 					&c_label);
   2241 				/* make sure status is noted */
   2242 				c_label.status =
   2243 					raidPtr->Disks[r][c].status;
   2244 				raidwrite_component_label(
   2245 					raidPtr->Disks[r][c].dev,
   2246 					raidPtr->raid_cinfo[r][c].ci_vp,
   2247 					&c_label);
   2248 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2249 					raidmarkclean(
   2250 					      raidPtr->Disks[r][c].dev,
   2251 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2252 					      raidPtr->mod_counter);
   2253 				}
   2254 			}
   2255 #endif
   2256 		}
   2257 	}
   2258 
   2259 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2260 		sparecol = raidPtr->numCol + c;
   2261 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2262 			/*
   2263 
   2264 			   we claim this disk is "optimal" if it's
   2265 			   rf_ds_used_spare, as that means it should be
   2266 			   directly substitutable for the disk it replaced.
   2267 			   We note that too...
   2268 
   2269 			 */
   2270 
   2271 			for(i=0;i<raidPtr->numRow;i++) {
   2272 				for(j=0;j<raidPtr->numCol;j++) {
   2273 					if ((raidPtr->Disks[i][j].spareRow ==
   2274 					     0) &&
   2275 					    (raidPtr->Disks[i][j].spareCol ==
   2276 					     sparecol)) {
   2277 						srow = i;
   2278 						scol = j;
   2279 						break;
   2280 					}
   2281 				}
   2282 			}
   2283 
   2284 			raidread_component_label(
   2285 				      raidPtr->Disks[0][sparecol].dev,
   2286 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2287 				      &c_label);
   2288 			/* make sure status is noted */
   2289 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2290 			c_label.mod_counter = raidPtr->mod_counter;
   2291 			c_label.serial_number = raidPtr->serial_number;
   2292 			c_label.row = srow;
   2293 			c_label.column = scol;
   2294 			c_label.num_rows = raidPtr->numRow;
   2295 			c_label.num_columns = raidPtr->numCol;
   2296 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2297 			c_label.status = rf_ds_optimal;
   2298 			raidwrite_component_label(
   2299 				      raidPtr->Disks[0][sparecol].dev,
   2300 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2301 				      &c_label);
   2302 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2303 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2304 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2305 					       raidPtr->mod_counter);
   2306 			}
   2307 		}
   2308 	}
   2309 	/* 	printf("Component labels updated\n"); */
   2310 }
   2311