Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.33
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.33 1999/12/03 03:06:44 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #ifdef DEBUG
    157 #define db0_printf(a) printf a
    158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    164 #else				/* DEBUG */
    165 #define db0_printf(a) printf a
    166 #define db1_printf(a) { }
    167 #define db2_printf(a) { }
    168 #define db3_printf(a) { }
    169 #define db4_printf(a) { }
    170 #define db5_printf(a) { }
    171 #endif				/* DEBUG */
    172 
    173 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    174 
    175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    176 
    177 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    178 						 * spare table */
    179 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    180 						 * installation process */
    181 
    182 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    183 						 * reconstruction
    184 						 * requests */
    185 
    186 
    187 decl_simple_lock_data(, recon_queue_mutex)
    188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    190 
    191 /* prototypes */
    192 static void KernelWakeupFunc(struct buf * bp);
    193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    194 		   dev_t dev, RF_SectorNum_t startSect,
    195 		   RF_SectorCount_t numSect, caddr_t buf,
    196 		   void (*cbFunc) (struct buf *), void *cbArg,
    197 		   int logBytesPerSector, struct proc * b_proc);
    198 
    199 #define Dprintf0(s)       if (rf_queueDebug) \
    200      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    201 #define Dprintf1(s,a)     if (rf_queueDebug) \
    202      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    203 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    204      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    206      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    207 
    208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    210 
    211 void raidattach __P((int));
    212 int raidsize __P((dev_t));
    213 
    214 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    215 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    216 static int raidinit __P((dev_t, RF_Raid_t *, int));
    217 
    218 int raidopen __P((dev_t, int, int, struct proc *));
    219 int raidclose __P((dev_t, int, int, struct proc *));
    220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    221 int raidwrite __P((dev_t, struct uio *, int));
    222 int raidread __P((dev_t, struct uio *, int));
    223 void raidstrategy __P((struct buf *));
    224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    225 
    226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    228 void rf_update_component_labels( RF_Raid_t *);
    229 /*
    230  * Pilfered from ccd.c
    231  */
    232 
    233 struct raidbuf {
    234 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    235 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    236 	int     rf_flags;	/* misc. flags */
    237 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    238 };
    239 
    240 
    241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    242 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    243 
    244 /* XXX Not sure if the following should be replacing the raidPtrs above,
    245    or if it should be used in conjunction with that... */
    246 
    247 struct raid_softc {
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	size_t  sc_size;        /* size of the raid device */
    251 	dev_t   sc_dev;	        /* our device.. */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct pool sc_cbufpool;	/* component buffer pool */
    255 };
    256 /* sc_flags */
    257 #define RAIDF_INITED	0x01	/* unit has been initialized */
    258 #define RAIDF_WLABEL	0x02	/* label area is writable */
    259 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    260 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    261 #define RAIDF_LOCKED	0x80	/* unit is locked */
    262 
    263 #define	raidunit(x)	DISKUNIT(x)
    264 static int numraid = 0;
    265 
    266 /*
    267  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    268  * Be aware that large numbers can allow the driver to consume a lot of
    269  * kernel memory, especially on writes, and in degraded mode reads.
    270  *
    271  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    272  * a single 64K write will typically require 64K for the old data,
    273  * 64K for the old parity, and 64K for the new parity, for a total
    274  * of 192K (if the parity buffer is not re-used immediately).
    275  * Even it if is used immedately, that's still 128K, which when multiplied
    276  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    277  *
    278  * Now in degraded mode, for example, a 64K read on the above setup may
    279  * require data reconstruction, which will require *all* of the 4 remaining
    280  * disks to participate -- 4 * 32K/disk == 128K again.
    281  */
    282 
    283 #ifndef RAIDOUTSTANDING
    284 #define RAIDOUTSTANDING   6
    285 #endif
    286 
    287 #define RAIDLABELDEV(dev)	\
    288 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    289 
    290 /* declared here, and made public, for the benefit of KVM stuff.. */
    291 struct raid_softc *raid_softc;
    292 
    293 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    294 				     struct disklabel *));
    295 static void raidgetdisklabel __P((dev_t));
    296 static void raidmakedisklabel __P((struct raid_softc *));
    297 
    298 static int raidlock __P((struct raid_softc *));
    299 static void raidunlock __P((struct raid_softc *));
    300 int raidlookup __P((char *, struct proc * p, struct vnode **));
    301 
    302 static void rf_markalldirty __P((RF_Raid_t *));
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 	recon_queue = NULL;
    337 
    338 	for (i = 0; i < numraid; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 	    malloc(num * sizeof(struct raid_softc),
    351 	    M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 	numraid = num;
    357 	bzero(raid_softc, num * sizeof(struct raid_softc));
    358 
    359 	for (raidID = 0; raidID < num; raidID++) {
    360 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    361 			  (RF_Raid_t *));
    362 		if (raidPtrs[raidID] == NULL) {
    363 			printf("raidPtrs[%d] is NULL\n", raidID);
    364 		}
    365 	}
    366 }
    367 
    368 
    369 int
    370 raidsize(dev)
    371 	dev_t   dev;
    372 {
    373 	struct raid_softc *rs;
    374 	struct disklabel *lp;
    375 	int     part, unit, omask, size;
    376 
    377 	unit = raidunit(dev);
    378 	if (unit >= numraid)
    379 		return (-1);
    380 	rs = &raid_softc[unit];
    381 
    382 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    383 		return (-1);
    384 
    385 	part = DISKPART(dev);
    386 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    387 	lp = rs->sc_dkdev.dk_label;
    388 
    389 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    390 		return (-1);
    391 
    392 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    393 		size = -1;
    394 	else
    395 		size = lp->d_partitions[part].p_size *
    396 		    (lp->d_secsize / DEV_BSIZE);
    397 
    398 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    399 		return (-1);
    400 
    401 	return (size);
    402 
    403 }
    404 
    405 int
    406 raiddump(dev, blkno, va, size)
    407 	dev_t   dev;
    408 	daddr_t blkno;
    409 	caddr_t va;
    410 	size_t  size;
    411 {
    412 	/* Not implemented. */
    413 	return ENXIO;
    414 }
    415 /* ARGSUSED */
    416 int
    417 raidopen(dev, flags, fmt, p)
    418 	dev_t   dev;
    419 	int     flags, fmt;
    420 	struct proc *p;
    421 {
    422 	int     unit = raidunit(dev);
    423 	struct raid_softc *rs;
    424 	struct disklabel *lp;
    425 	int     part, pmask;
    426 	int     error = 0;
    427 
    428 	if (unit >= numraid)
    429 		return (ENXIO);
    430 	rs = &raid_softc[unit];
    431 
    432 	if ((error = raidlock(rs)) != 0)
    433 		return (error);
    434 	lp = rs->sc_dkdev.dk_label;
    435 
    436 	part = DISKPART(dev);
    437 	pmask = (1 << part);
    438 
    439 	db1_printf(("Opening raid device number: %d partition: %d\n",
    440 		unit, part));
    441 
    442 
    443 	if ((rs->sc_flags & RAIDF_INITED) &&
    444 	    (rs->sc_dkdev.dk_openmask == 0))
    445 		raidgetdisklabel(dev);
    446 
    447 	/* make sure that this partition exists */
    448 
    449 	if (part != RAW_PART) {
    450 		db1_printf(("Not a raw partition..\n"));
    451 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    452 		    ((part >= lp->d_npartitions) ||
    453 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    454 			error = ENXIO;
    455 			raidunlock(rs);
    456 			db1_printf(("Bailing out...\n"));
    457 			return (error);
    458 		}
    459 	}
    460 	/* Prevent this unit from being unconfigured while open. */
    461 	switch (fmt) {
    462 	case S_IFCHR:
    463 		rs->sc_dkdev.dk_copenmask |= pmask;
    464 		break;
    465 
    466 	case S_IFBLK:
    467 		rs->sc_dkdev.dk_bopenmask |= pmask;
    468 		break;
    469 	}
    470 
    471 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    472 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    473 		/* First one... mark things as dirty... Note that we *MUST*
    474 		 have done a configure before this.  I DO NOT WANT TO BE
    475 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    476 		 THAT THEY BELONG TOGETHER!!!!! */
    477 		/* XXX should check to see if we're only open for reading
    478 		   here... If so, we needn't do this, but then need some
    479 		   other way of keeping track of what's happened.. */
    480 
    481 		rf_markalldirty( raidPtrs[unit] );
    482 	}
    483 
    484 
    485 	rs->sc_dkdev.dk_openmask =
    486 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    487 
    488 	raidunlock(rs);
    489 
    490 	return (error);
    491 
    492 
    493 }
    494 /* ARGSUSED */
    495 int
    496 raidclose(dev, flags, fmt, p)
    497 	dev_t   dev;
    498 	int     flags, fmt;
    499 	struct proc *p;
    500 {
    501 	int     unit = raidunit(dev);
    502 	struct raid_softc *rs;
    503 	int     error = 0;
    504 	int     part;
    505 
    506 	if (unit >= numraid)
    507 		return (ENXIO);
    508 	rs = &raid_softc[unit];
    509 
    510 	if ((error = raidlock(rs)) != 0)
    511 		return (error);
    512 
    513 	part = DISKPART(dev);
    514 
    515 	/* ...that much closer to allowing unconfiguration... */
    516 	switch (fmt) {
    517 	case S_IFCHR:
    518 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    519 		break;
    520 
    521 	case S_IFBLK:
    522 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    523 		break;
    524 	}
    525 	rs->sc_dkdev.dk_openmask =
    526 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    527 
    528 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    529 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    530 		/* Last one... device is not unconfigured yet.
    531 		   Device shutdown has taken care of setting the
    532 		   clean bits if RAIDF_INITED is not set
    533 		   mark things as clean... */
    534 		rf_update_component_labels( raidPtrs[unit] );
    535 	}
    536 
    537 	raidunlock(rs);
    538 	return (0);
    539 
    540 }
    541 
    542 void
    543 raidstrategy(bp)
    544 	register struct buf *bp;
    545 {
    546 	register int s;
    547 
    548 	unsigned int raidID = raidunit(bp->b_dev);
    549 	RF_Raid_t *raidPtr;
    550 	struct raid_softc *rs = &raid_softc[raidID];
    551 	struct disklabel *lp;
    552 	int     wlabel;
    553 
    554 #if 0
    555 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    556 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    557 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    558 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    559 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    560 
    561 	if (bp->b_flags & B_READ)
    562 		db1_printf(("READ\n"));
    563 	else
    564 		db1_printf(("WRITE\n"));
    565 #endif
    566 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    567 		bp->b_error = ENXIO;
    568 		bp->b_flags = B_ERROR;
    569 		bp->b_resid = bp->b_bcount;
    570 		biodone(bp);
    571 		return;
    572 	}
    573 	if (raidID >= numraid || !raidPtrs[raidID]) {
    574 		bp->b_error = ENODEV;
    575 		bp->b_flags |= B_ERROR;
    576 		bp->b_resid = bp->b_bcount;
    577 		biodone(bp);
    578 		return;
    579 	}
    580 	raidPtr = raidPtrs[raidID];
    581 	if (!raidPtr->valid) {
    582 		bp->b_error = ENODEV;
    583 		bp->b_flags |= B_ERROR;
    584 		bp->b_resid = bp->b_bcount;
    585 		biodone(bp);
    586 		return;
    587 	}
    588 	if (bp->b_bcount == 0) {
    589 		db1_printf(("b_bcount is zero..\n"));
    590 		biodone(bp);
    591 		return;
    592 	}
    593 	lp = rs->sc_dkdev.dk_label;
    594 
    595 	/*
    596 	 * Do bounds checking and adjust transfer.  If there's an
    597 	 * error, the bounds check will flag that for us.
    598 	 */
    599 
    600 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    601 	if (DISKPART(bp->b_dev) != RAW_PART)
    602 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    603 			db1_printf(("Bounds check failed!!:%d %d\n",
    604 				(int) bp->b_blkno, (int) wlabel));
    605 			biodone(bp);
    606 			return;
    607 		}
    608 	s = splbio();		/* XXX Needed? */
    609 	db1_printf(("Beginning strategy...\n"));
    610 
    611 	bp->b_resid = 0;
    612 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    613 	    NULL, NULL, NULL);
    614 	if (bp->b_error) {
    615 		bp->b_flags |= B_ERROR;
    616 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    617 			bp->b_error));
    618 	}
    619 	splx(s);
    620 #if 0
    621 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    622 		bp, bp->b_data,
    623 		(int) bp->b_bcount, (int) bp->b_resid));
    624 #endif
    625 }
    626 /* ARGSUSED */
    627 int
    628 raidread(dev, uio, flags)
    629 	dev_t   dev;
    630 	struct uio *uio;
    631 	int     flags;
    632 {
    633 	int     unit = raidunit(dev);
    634 	struct raid_softc *rs;
    635 	int     part;
    636 
    637 	if (unit >= numraid)
    638 		return (ENXIO);
    639 	rs = &raid_softc[unit];
    640 
    641 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    642 		return (ENXIO);
    643 	part = DISKPART(dev);
    644 
    645 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    646 
    647 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    648 
    649 }
    650 /* ARGSUSED */
    651 int
    652 raidwrite(dev, uio, flags)
    653 	dev_t   dev;
    654 	struct uio *uio;
    655 	int     flags;
    656 {
    657 	int     unit = raidunit(dev);
    658 	struct raid_softc *rs;
    659 
    660 	if (unit >= numraid)
    661 		return (ENXIO);
    662 	rs = &raid_softc[unit];
    663 
    664 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    665 		return (ENXIO);
    666 	db1_printf(("raidwrite\n"));
    667 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    668 
    669 }
    670 
    671 int
    672 raidioctl(dev, cmd, data, flag, p)
    673 	dev_t   dev;
    674 	u_long  cmd;
    675 	caddr_t data;
    676 	int     flag;
    677 	struct proc *p;
    678 {
    679 	int     unit = raidunit(dev);
    680 	int     error = 0;
    681 	int     part, pmask;
    682 	struct raid_softc *rs;
    683 	RF_Config_t *k_cfg, *u_cfg;
    684 	u_char *specific_buf;
    685 	int retcode = 0;
    686 	int row;
    687 	int column;
    688 	int s;
    689 	struct rf_recon_req *rrcopy, *rr;
    690 	RF_ComponentLabel_t *component_label;
    691 	RF_ComponentLabel_t ci_label;
    692 	RF_ComponentLabel_t **c_label_ptr;
    693 	RF_SingleComponent_t *sparePtr,*componentPtr;
    694 	RF_SingleComponent_t hot_spare;
    695 	RF_SingleComponent_t component;
    696 
    697 	if (unit >= numraid)
    698 		return (ENXIO);
    699 	rs = &raid_softc[unit];
    700 
    701 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    702 		(int) DISKPART(dev), (int) unit, (int) cmd));
    703 
    704 	/* Must be open for writes for these commands... */
    705 	switch (cmd) {
    706 	case DIOCSDINFO:
    707 	case DIOCWDINFO:
    708 	case DIOCWLABEL:
    709 		if ((flag & FWRITE) == 0)
    710 			return (EBADF);
    711 	}
    712 
    713 	/* Must be initialized for these... */
    714 	switch (cmd) {
    715 	case DIOCGDINFO:
    716 	case DIOCSDINFO:
    717 	case DIOCWDINFO:
    718 	case DIOCGPART:
    719 	case DIOCWLABEL:
    720 	case DIOCGDEFLABEL:
    721 	case RAIDFRAME_SHUTDOWN:
    722 	case RAIDFRAME_REWRITEPARITY:
    723 	case RAIDFRAME_GET_INFO:
    724 	case RAIDFRAME_RESET_ACCTOTALS:
    725 	case RAIDFRAME_GET_ACCTOTALS:
    726 	case RAIDFRAME_KEEP_ACCTOTALS:
    727 	case RAIDFRAME_GET_SIZE:
    728 	case RAIDFRAME_FAIL_DISK:
    729 	case RAIDFRAME_COPYBACK:
    730 	case RAIDFRAME_CHECKRECON:
    731 	case RAIDFRAME_GET_COMPONENT_LABEL:
    732 	case RAIDFRAME_SET_COMPONENT_LABEL:
    733 	case RAIDFRAME_ADD_HOT_SPARE:
    734 	case RAIDFRAME_REMOVE_HOT_SPARE:
    735 	case RAIDFRAME_INIT_LABELS:
    736 	case RAIDFRAME_REBUILD_IN_PLACE:
    737 	case RAIDFRAME_CHECK_PARITY:
    738 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    739 			return (ENXIO);
    740 	}
    741 
    742 	switch (cmd) {
    743 
    744 
    745 		/* configure the system */
    746 	case RAIDFRAME_CONFIGURE:
    747 
    748 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    749 		/* copy-in the configuration information */
    750 		/* data points to a pointer to the configuration structure */
    751 		u_cfg = *((RF_Config_t **) data);
    752 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    753 		if (k_cfg == NULL) {
    754 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    755 			return (ENOMEM);
    756 		}
    757 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    758 		    sizeof(RF_Config_t));
    759 		if (retcode) {
    760 			RF_Free(k_cfg, sizeof(RF_Config_t));
    761 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    762 				retcode));
    763 			return (retcode);
    764 		}
    765 		/* allocate a buffer for the layout-specific data, and copy it
    766 		 * in */
    767 		if (k_cfg->layoutSpecificSize) {
    768 			if (k_cfg->layoutSpecificSize > 10000) {
    769 				/* sanity check */
    770 				RF_Free(k_cfg, sizeof(RF_Config_t));
    771 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    772 				return (EINVAL);
    773 			}
    774 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    775 			    (u_char *));
    776 			if (specific_buf == NULL) {
    777 				RF_Free(k_cfg, sizeof(RF_Config_t));
    778 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    779 				return (ENOMEM);
    780 			}
    781 			retcode = copyin(k_cfg->layoutSpecific,
    782 			    (caddr_t) specific_buf,
    783 			    k_cfg->layoutSpecificSize);
    784 			if (retcode) {
    785 				RF_Free(k_cfg, sizeof(RF_Config_t));
    786 				RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    787 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    788 					retcode));
    789 				return (retcode);
    790 			}
    791 		} else
    792 			specific_buf = NULL;
    793 		k_cfg->layoutSpecific = specific_buf;
    794 
    795 		/* should do some kind of sanity check on the configuration.
    796 		 * Store the sum of all the bytes in the last byte? */
    797 
    798 		/* configure the system */
    799 
    800 		raidPtrs[unit]->raidid = unit;
    801 
    802 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    803 
    804 		/* allow this many simultaneous IO's to this RAID device */
    805 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    806 
    807 		if (retcode == 0) {
    808 			retcode = raidinit(dev, raidPtrs[unit], unit);
    809 			rf_markalldirty( raidPtrs[unit] );
    810 		}
    811 		/* free the buffers.  No return code here. */
    812 		if (k_cfg->layoutSpecificSize) {
    813 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    814 		}
    815 		RF_Free(k_cfg, sizeof(RF_Config_t));
    816 
    817 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    818 			retcode));
    819 
    820 		return (retcode);
    821 
    822 		/* shutdown the system */
    823 	case RAIDFRAME_SHUTDOWN:
    824 
    825 		if ((error = raidlock(rs)) != 0)
    826 			return (error);
    827 
    828 		/*
    829 		 * If somebody has a partition mounted, we shouldn't
    830 		 * shutdown.
    831 		 */
    832 
    833 		part = DISKPART(dev);
    834 		pmask = (1 << part);
    835 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    836 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    837 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    838 			raidunlock(rs);
    839 			return (EBUSY);
    840 		}
    841 
    842 		if (rf_debugKernelAccess) {
    843 			printf("call shutdown\n");
    844 		}
    845 
    846 		retcode = rf_Shutdown(raidPtrs[unit]);
    847 
    848 		db1_printf(("Done main shutdown\n"));
    849 
    850 		pool_destroy(&rs->sc_cbufpool);
    851 		db1_printf(("Done freeing component buffer freelist\n"));
    852 
    853 		/* It's no longer initialized... */
    854 		rs->sc_flags &= ~RAIDF_INITED;
    855 
    856 		/* Detach the disk. */
    857 		disk_detach(&rs->sc_dkdev);
    858 
    859 		raidunlock(rs);
    860 
    861 		return (retcode);
    862 	case RAIDFRAME_GET_COMPONENT_LABEL:
    863 		c_label_ptr = (RF_ComponentLabel_t **) data;
    864 		/* need to read the component label for the disk indicated
    865 		   by row,column in component_label
    866 		   XXX need to sanity check these values!!!
    867 		   */
    868 
    869 		/* For practice, let's get it directly fromdisk, rather
    870 		   than from the in-core copy */
    871 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    872 			   (RF_ComponentLabel_t *));
    873 		if (component_label == NULL)
    874 			return (ENOMEM);
    875 
    876 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    877 
    878 		retcode = copyin( *c_label_ptr, component_label,
    879 				  sizeof(RF_ComponentLabel_t));
    880 
    881 		if (retcode) {
    882 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    883 			return(retcode);
    884 		}
    885 
    886 		row = component_label->row;
    887 		column = component_label->column;
    888 
    889 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    890 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    891 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    892 			return(EINVAL);
    893 		}
    894 
    895 		raidread_component_label(
    896                               raidPtrs[unit]->Disks[row][column].dev,
    897 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    898 			      component_label );
    899 
    900 		retcode = copyout((caddr_t) component_label,
    901 				  (caddr_t) *c_label_ptr,
    902 				  sizeof(RF_ComponentLabel_t));
    903 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    904 		return (retcode);
    905 
    906 	case RAIDFRAME_SET_COMPONENT_LABEL:
    907 		component_label = (RF_ComponentLabel_t *) data;
    908 
    909 		/* XXX check the label for valid stuff... */
    910 		/* Note that some things *should not* get modified --
    911 		   the user should be re-initing the labels instead of
    912 		   trying to patch things.
    913 		   */
    914 
    915 		printf("Got component label:\n");
    916 		printf("Version: %d\n",component_label->version);
    917 		printf("Serial Number: %d\n",component_label->serial_number);
    918 		printf("Mod counter: %d\n",component_label->mod_counter);
    919 		printf("Row: %d\n", component_label->row);
    920 		printf("Column: %d\n", component_label->column);
    921 		printf("Num Rows: %d\n", component_label->num_rows);
    922 		printf("Num Columns: %d\n", component_label->num_columns);
    923 		printf("Clean: %d\n", component_label->clean);
    924 		printf("Status: %d\n", component_label->status);
    925 
    926 		row = component_label->row;
    927 		column = component_label->column;
    928 
    929 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    930 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    931 			return(EINVAL);
    932 		}
    933 
    934 		/* XXX this isn't allowed to do anything for now :-) */
    935 #if 0
    936 		raidwrite_component_label(
    937                             raidPtrs[unit]->Disks[row][column].dev,
    938 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    939 			    component_label );
    940 #endif
    941 		return (0);
    942 
    943 	case RAIDFRAME_INIT_LABELS:
    944 		component_label = (RF_ComponentLabel_t *) data;
    945 		/*
    946 		   we only want the serial number from
    947 		   the above.  We get all the rest of the information
    948 		   from the config that was used to create this RAID
    949 		   set.
    950 		   */
    951 
    952 		raidPtrs[unit]->serial_number = component_label->serial_number;
    953 		/* current version number */
    954 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    955 		ci_label.serial_number = component_label->serial_number;
    956 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    957 		ci_label.num_rows = raidPtrs[unit]->numRow;
    958 		ci_label.num_columns = raidPtrs[unit]->numCol;
    959 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    960 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    961 
    962 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    963 			ci_label.row = row;
    964 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    965 				ci_label.column = column;
    966 				raidwrite_component_label(
    967 				  raidPtrs[unit]->Disks[row][column].dev,
    968 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    969 				  &ci_label );
    970 			}
    971 		}
    972 
    973 		return (retcode);
    974 
    975 		/* initialize all parity */
    976 	case RAIDFRAME_REWRITEPARITY:
    977 
    978 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    979 			/* Parity for RAID 0 is trivially correct */
    980 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    981 			return(0);
    982 		}
    983 
    984 		/* borrow the thread of the requesting process */
    985 
    986 		s = splbio();
    987 		retcode = rf_RewriteParity(raidPtrs[unit]);
    988 		splx(s);
    989 		/* return I/O Error if the parity rewrite fails */
    990 
    991 		if (retcode) {
    992 			retcode = EIO;
    993 		} else {
    994 			/* set the clean bit!  If we shutdown correctly,
    995 			 the clean bit on each component label will get
    996 			 set */
    997 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    998 		}
    999 		return (retcode);
   1000 
   1001 
   1002 	case RAIDFRAME_ADD_HOT_SPARE:
   1003 		sparePtr = (RF_SingleComponent_t *) data;
   1004 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1005 		printf("Adding spare\n");
   1006 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1007 		return(retcode);
   1008 
   1009 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1010 		return(retcode);
   1011 
   1012 	case RAIDFRAME_REBUILD_IN_PLACE:
   1013 
   1014 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1015 			/* Can't do this on a RAID 0!! */
   1016 			return(EINVAL);
   1017 		}
   1018 
   1019 		componentPtr = (RF_SingleComponent_t *) data;
   1020 		memcpy( &component, componentPtr,
   1021 			sizeof(RF_SingleComponent_t));
   1022 		row = component.row;
   1023 		column = component.column;
   1024 		printf("Rebuild: %d %d\n",row, column);
   1025 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1026 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1027 			return(EINVAL);
   1028 		}
   1029 		printf("Attempting a rebuild in place\n");
   1030 		s = splbio();
   1031 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1032 		splx(s);
   1033 		return(retcode);
   1034 
   1035 	case RAIDFRAME_GET_INFO:
   1036 		{
   1037 			RF_Raid_t *raid = raidPtrs[unit];
   1038 			RF_DeviceConfig_t *cfg, **ucfgp;
   1039 			int     i, j, d;
   1040 
   1041 			if (!raid->valid)
   1042 				return (ENODEV);
   1043 			ucfgp = (RF_DeviceConfig_t **) data;
   1044 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1045 				  (RF_DeviceConfig_t *));
   1046 			if (cfg == NULL)
   1047 				return (ENOMEM);
   1048 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1049 			cfg->rows = raid->numRow;
   1050 			cfg->cols = raid->numCol;
   1051 			cfg->ndevs = raid->numRow * raid->numCol;
   1052 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1053 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1054 				return (ENOMEM);
   1055 			}
   1056 			cfg->nspares = raid->numSpare;
   1057 			if (cfg->nspares >= RF_MAX_DISKS) {
   1058 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1059 				return (ENOMEM);
   1060 			}
   1061 			cfg->maxqdepth = raid->maxQueueDepth;
   1062 			d = 0;
   1063 			for (i = 0; i < cfg->rows; i++) {
   1064 				for (j = 0; j < cfg->cols; j++) {
   1065 					cfg->devs[d] = raid->Disks[i][j];
   1066 					d++;
   1067 				}
   1068 			}
   1069 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1070 				cfg->spares[i] = raid->Disks[0][j];
   1071 			}
   1072 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1073 					  sizeof(RF_DeviceConfig_t));
   1074 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1075 
   1076 			return (retcode);
   1077 		}
   1078 		break;
   1079 	case RAIDFRAME_CHECK_PARITY:
   1080 		*(int *) data = raidPtrs[unit]->parity_good;
   1081 		return (0);
   1082 	case RAIDFRAME_RESET_ACCTOTALS:
   1083 		{
   1084 			RF_Raid_t *raid = raidPtrs[unit];
   1085 
   1086 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1087 			return (0);
   1088 		}
   1089 		break;
   1090 
   1091 	case RAIDFRAME_GET_ACCTOTALS:
   1092 		{
   1093 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1094 			RF_Raid_t *raid = raidPtrs[unit];
   1095 
   1096 			*totals = raid->acc_totals;
   1097 			return (0);
   1098 		}
   1099 		break;
   1100 
   1101 	case RAIDFRAME_KEEP_ACCTOTALS:
   1102 		{
   1103 			RF_Raid_t *raid = raidPtrs[unit];
   1104 			int    *keep = (int *) data;
   1105 
   1106 			raid->keep_acc_totals = *keep;
   1107 			return (0);
   1108 		}
   1109 		break;
   1110 
   1111 	case RAIDFRAME_GET_SIZE:
   1112 		*(int *) data = raidPtrs[unit]->totalSectors;
   1113 		return (0);
   1114 
   1115 #define RAIDFRAME_RECON 1
   1116 		/* XXX The above should probably be set somewhere else!! GO */
   1117 #if RAIDFRAME_RECON > 0
   1118 
   1119 		/* fail a disk & optionally start reconstruction */
   1120 	case RAIDFRAME_FAIL_DISK:
   1121 
   1122 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1123 			/* Can't do this on a RAID 0!! */
   1124 			return(EINVAL);
   1125 		}
   1126 
   1127 		rr = (struct rf_recon_req *) data;
   1128 
   1129 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1130 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1131 			return (EINVAL);
   1132 
   1133 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1134 		       unit, rr->row, rr->col);
   1135 
   1136 		/* make a copy of the recon request so that we don't rely on
   1137 		 * the user's buffer */
   1138 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1139 		bcopy(rr, rrcopy, sizeof(*rr));
   1140 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1141 
   1142 		LOCK_RECON_Q_MUTEX();
   1143 		rrcopy->next = recon_queue;
   1144 		recon_queue = rrcopy;
   1145 		wakeup(&recon_queue);
   1146 		UNLOCK_RECON_Q_MUTEX();
   1147 
   1148 		return (0);
   1149 
   1150 		/* invoke a copyback operation after recon on whatever disk
   1151 		 * needs it, if any */
   1152 	case RAIDFRAME_COPYBACK:
   1153 
   1154 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1155 			/* This makes no sense on a RAID 0!! */
   1156 			return(EINVAL);
   1157 		}
   1158 
   1159 		/* borrow the current thread to get this done */
   1160 
   1161 		s = splbio();
   1162 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1163 		splx(s);
   1164 		return (0);
   1165 
   1166 		/* return the percentage completion of reconstruction */
   1167 	case RAIDFRAME_CHECKRECON:
   1168 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1169 			/* This makes no sense on a RAID 0 */
   1170 			return(EINVAL);
   1171 		}
   1172 
   1173 		row = *(int *) data;
   1174 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1175 			return (EINVAL);
   1176 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1177 			*(int *) data = 100;
   1178 		else
   1179 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1180 		return (0);
   1181 
   1182 		/* the sparetable daemon calls this to wait for the kernel to
   1183 		 * need a spare table. this ioctl does not return until a
   1184 		 * spare table is needed. XXX -- calling mpsleep here in the
   1185 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1186 		 * -- I should either compute the spare table in the kernel,
   1187 		 * or have a different -- XXX XXX -- interface (a different
   1188 		 * character device) for delivering the table          -- XXX */
   1189 #if 0
   1190 	case RAIDFRAME_SPARET_WAIT:
   1191 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1192 		while (!rf_sparet_wait_queue)
   1193 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1194 		waitreq = rf_sparet_wait_queue;
   1195 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1196 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1197 
   1198 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1199 
   1200 		RF_Free(waitreq, sizeof(*waitreq));
   1201 		return (0);
   1202 
   1203 
   1204 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1205 		 * code in it that will cause the dameon to exit */
   1206 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1207 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1208 		waitreq->fcol = -1;
   1209 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1210 		waitreq->next = rf_sparet_wait_queue;
   1211 		rf_sparet_wait_queue = waitreq;
   1212 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1213 		wakeup(&rf_sparet_wait_queue);
   1214 		return (0);
   1215 
   1216 		/* used by the spare table daemon to deliver a spare table
   1217 		 * into the kernel */
   1218 	case RAIDFRAME_SEND_SPARET:
   1219 
   1220 		/* install the spare table */
   1221 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1222 
   1223 		/* respond to the requestor.  the return status of the spare
   1224 		 * table installation is passed in the "fcol" field */
   1225 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1226 		waitreq->fcol = retcode;
   1227 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1228 		waitreq->next = rf_sparet_resp_queue;
   1229 		rf_sparet_resp_queue = waitreq;
   1230 		wakeup(&rf_sparet_resp_queue);
   1231 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1232 
   1233 		return (retcode);
   1234 #endif
   1235 
   1236 
   1237 #endif				/* RAIDFRAME_RECON > 0 */
   1238 
   1239 	default:
   1240 		break;		/* fall through to the os-specific code below */
   1241 
   1242 	}
   1243 
   1244 	if (!raidPtrs[unit]->valid)
   1245 		return (EINVAL);
   1246 
   1247 	/*
   1248 	 * Add support for "regular" device ioctls here.
   1249 	 */
   1250 
   1251 	switch (cmd) {
   1252 	case DIOCGDINFO:
   1253 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1254 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1255 		break;
   1256 
   1257 	case DIOCGPART:
   1258 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1259 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1260 		((struct partinfo *) data)->part =
   1261 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1262 		break;
   1263 
   1264 	case DIOCWDINFO:
   1265 		db1_printf(("DIOCWDINFO\n"));
   1266 	case DIOCSDINFO:
   1267 		db1_printf(("DIOCSDINFO\n"));
   1268 		if ((error = raidlock(rs)) != 0)
   1269 			return (error);
   1270 
   1271 		rs->sc_flags |= RAIDF_LABELLING;
   1272 
   1273 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1274 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1275 		if (error == 0) {
   1276 			if (cmd == DIOCWDINFO)
   1277 				error = writedisklabel(RAIDLABELDEV(dev),
   1278 				    raidstrategy, rs->sc_dkdev.dk_label,
   1279 				    rs->sc_dkdev.dk_cpulabel);
   1280 		}
   1281 		rs->sc_flags &= ~RAIDF_LABELLING;
   1282 
   1283 		raidunlock(rs);
   1284 
   1285 		if (error)
   1286 			return (error);
   1287 		break;
   1288 
   1289 	case DIOCWLABEL:
   1290 		db1_printf(("DIOCWLABEL\n"));
   1291 		if (*(int *) data != 0)
   1292 			rs->sc_flags |= RAIDF_WLABEL;
   1293 		else
   1294 			rs->sc_flags &= ~RAIDF_WLABEL;
   1295 		break;
   1296 
   1297 	case DIOCGDEFLABEL:
   1298 		db1_printf(("DIOCGDEFLABEL\n"));
   1299 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1300 		    (struct disklabel *) data);
   1301 		break;
   1302 
   1303 	default:
   1304 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1305 	}
   1306 	return (retcode);
   1307 
   1308 }
   1309 
   1310 
   1311 /* raidinit -- complete the rest of the initialization for the
   1312    RAIDframe device.  */
   1313 
   1314 
   1315 static int
   1316 raidinit(dev, raidPtr, unit)
   1317 	dev_t   dev;
   1318 	RF_Raid_t *raidPtr;
   1319 	int     unit;
   1320 {
   1321 	int     retcode;
   1322 	/* int ix; */
   1323 	/* struct raidbuf *raidbp; */
   1324 	struct raid_softc *rs;
   1325 
   1326 	retcode = 0;
   1327 
   1328 	rs = &raid_softc[unit];
   1329 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1330 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1331 
   1332 
   1333 	/* XXX should check return code first... */
   1334 	rs->sc_flags |= RAIDF_INITED;
   1335 
   1336 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1337 
   1338 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1339 
   1340 	/* disk_attach actually creates space for the CPU disklabel, among
   1341 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1342 	 * with disklabels. */
   1343 
   1344 	disk_attach(&rs->sc_dkdev);
   1345 
   1346 	/* XXX There may be a weird interaction here between this, and
   1347 	 * protectedSectors, as used in RAIDframe.  */
   1348 
   1349 	rs->sc_size = raidPtr->totalSectors;
   1350 	rs->sc_dev = dev;
   1351 
   1352 	return (retcode);
   1353 }
   1354 
   1355 /*
   1356  * This kernel thread never exits.  It is created once, and persists
   1357  * until the system reboots.
   1358  */
   1359 
   1360 void
   1361 rf_ReconKernelThread()
   1362 {
   1363 	struct rf_recon_req *req;
   1364 	int     s;
   1365 
   1366 	/* XXX not sure what spl() level we should be at here... probably
   1367 	 * splbio() */
   1368 	s = splbio();
   1369 
   1370 	while (1) {
   1371 		/* grab the next reconstruction request from the queue */
   1372 		LOCK_RECON_Q_MUTEX();
   1373 		while (!recon_queue) {
   1374 			UNLOCK_RECON_Q_MUTEX();
   1375 			tsleep(&recon_queue, PRIBIO,
   1376 			       "raidframe recon", 0);
   1377 			LOCK_RECON_Q_MUTEX();
   1378 		}
   1379 		req = recon_queue;
   1380 		recon_queue = recon_queue->next;
   1381 		UNLOCK_RECON_Q_MUTEX();
   1382 
   1383 		/*
   1384 	         * If flags specifies that we should start recon, this call
   1385 	         * will not return until reconstruction completes, fails,
   1386 		 * or is aborted.
   1387 	         */
   1388 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1389 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1390 
   1391 		RF_Free(req, sizeof(*req));
   1392 	}
   1393 }
   1394 /* wake up the daemon & tell it to get us a spare table
   1395  * XXX
   1396  * the entries in the queues should be tagged with the raidPtr
   1397  * so that in the extremely rare case that two recons happen at once,
   1398  * we know for which device were requesting a spare table
   1399  * XXX
   1400  */
   1401 int
   1402 rf_GetSpareTableFromDaemon(req)
   1403 	RF_SparetWait_t *req;
   1404 {
   1405 	int     retcode;
   1406 
   1407 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1408 	req->next = rf_sparet_wait_queue;
   1409 	rf_sparet_wait_queue = req;
   1410 	wakeup(&rf_sparet_wait_queue);
   1411 
   1412 	/* mpsleep unlocks the mutex */
   1413 	while (!rf_sparet_resp_queue) {
   1414 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1415 		    "raidframe getsparetable", 0);
   1416 	}
   1417 	req = rf_sparet_resp_queue;
   1418 	rf_sparet_resp_queue = req->next;
   1419 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1420 
   1421 	retcode = req->fcol;
   1422 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1423 					 * alloc'd */
   1424 	return (retcode);
   1425 }
   1426 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1427  * bp & passes it down.
   1428  * any calls originating in the kernel must use non-blocking I/O
   1429  * do some extra sanity checking to return "appropriate" error values for
   1430  * certain conditions (to make some standard utilities work)
   1431  */
   1432 int
   1433 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1434 	RF_Raid_t *raidPtr;
   1435 	struct buf *bp;
   1436 	RF_RaidAccessFlags_t flags;
   1437 	void    (*cbFunc) (struct buf *);
   1438 	void   *cbArg;
   1439 {
   1440 	RF_SectorCount_t num_blocks, pb, sum;
   1441 	RF_RaidAddr_t raid_addr;
   1442 	int     retcode;
   1443 	struct partition *pp;
   1444 	daddr_t blocknum;
   1445 	int     unit;
   1446 	struct raid_softc *rs;
   1447 	int     do_async;
   1448 
   1449 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1450 
   1451 	unit = raidPtr->raidid;
   1452 	rs = &raid_softc[unit];
   1453 
   1454 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1455 	 * partition.. Need to make it absolute to the underlying device.. */
   1456 
   1457 	blocknum = bp->b_blkno;
   1458 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1459 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1460 		blocknum += pp->p_offset;
   1461 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1462 			pp->p_offset));
   1463 	} else {
   1464 		db1_printf(("Is raw..\n"));
   1465 	}
   1466 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1467 
   1468 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1469 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1470 
   1471 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1472 	 * TOUCH bp->b_blkno!!! */
   1473 	raid_addr = blocknum;
   1474 
   1475 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1476 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1477 	sum = raid_addr + num_blocks + pb;
   1478 	if (1 || rf_debugKernelAccess) {
   1479 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1480 			(int) raid_addr, (int) sum, (int) num_blocks,
   1481 			(int) pb, (int) bp->b_resid));
   1482 	}
   1483 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1484 	    || (sum < num_blocks) || (sum < pb)) {
   1485 		bp->b_error = ENOSPC;
   1486 		bp->b_flags |= B_ERROR;
   1487 		bp->b_resid = bp->b_bcount;
   1488 		biodone(bp);
   1489 		return (bp->b_error);
   1490 	}
   1491 	/*
   1492 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1493 	 */
   1494 
   1495 	if (bp->b_bcount & raidPtr->sectorMask) {
   1496 		bp->b_error = EINVAL;
   1497 		bp->b_flags |= B_ERROR;
   1498 		bp->b_resid = bp->b_bcount;
   1499 		biodone(bp);
   1500 		return (bp->b_error);
   1501 	}
   1502 	db1_printf(("Calling DoAccess..\n"));
   1503 
   1504 
   1505 	/* Put a throttle on the number of requests we handle simultanously */
   1506 
   1507 	RF_LOCK_MUTEX(raidPtr->mutex);
   1508 
   1509 	while(raidPtr->openings <= 0) {
   1510 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1511 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1512 		RF_LOCK_MUTEX(raidPtr->mutex);
   1513 	}
   1514 	raidPtr->openings--;
   1515 
   1516 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1517 
   1518 	/*
   1519 	 * Everything is async.
   1520 	 */
   1521 	do_async = 1;
   1522 
   1523 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1524 	 * B_READ instead */
   1525 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1526 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1527 	    do_async, raid_addr, num_blocks,
   1528 	    bp->b_un.b_addr,
   1529 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1530 	    NULL, cbFunc, cbArg);
   1531 #if 0
   1532 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1533 		bp->b_data, (int) bp->b_resid));
   1534 #endif
   1535 
   1536 	return (retcode);
   1537 }
   1538 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1539 
   1540 int
   1541 rf_DispatchKernelIO(queue, req)
   1542 	RF_DiskQueue_t *queue;
   1543 	RF_DiskQueueData_t *req;
   1544 {
   1545 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1546 	struct buf *bp;
   1547 	struct raidbuf *raidbp = NULL;
   1548 	struct raid_softc *rs;
   1549 	int     unit;
   1550 
   1551 	/* XXX along with the vnode, we also need the softc associated with
   1552 	 * this device.. */
   1553 
   1554 	req->queue = queue;
   1555 
   1556 	unit = queue->raidPtr->raidid;
   1557 
   1558 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1559 
   1560 	if (unit >= numraid) {
   1561 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1562 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1563 	}
   1564 	rs = &raid_softc[unit];
   1565 
   1566 	/* XXX is this the right place? */
   1567 	disk_busy(&rs->sc_dkdev);
   1568 
   1569 	bp = req->bp;
   1570 #if 1
   1571 	/* XXX when there is a physical disk failure, someone is passing us a
   1572 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1573 	 * without taking a performance hit... (not sure where the real bug
   1574 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1575 
   1576 	if (bp->b_flags & B_ERROR) {
   1577 		bp->b_flags &= ~B_ERROR;
   1578 	}
   1579 	if (bp->b_error != 0) {
   1580 		bp->b_error = 0;
   1581 	}
   1582 #endif
   1583 	raidbp = RAIDGETBUF(rs);
   1584 
   1585 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1586 
   1587 	/*
   1588 	 * context for raidiodone
   1589 	 */
   1590 	raidbp->rf_obp = bp;
   1591 	raidbp->req = req;
   1592 
   1593 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1594 
   1595 	switch (req->type) {
   1596 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1597 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1598 		 * queue->row, queue->col); */
   1599 		/* XXX need to do something extra here.. */
   1600 		/* I'm leaving this in, as I've never actually seen it used,
   1601 		 * and I'd like folks to report it... GO */
   1602 		printf(("WAKEUP CALLED\n"));
   1603 		queue->numOutstanding++;
   1604 
   1605 		/* XXX need to glue the original buffer into this??  */
   1606 
   1607 		KernelWakeupFunc(&raidbp->rf_buf);
   1608 		break;
   1609 
   1610 	case RF_IO_TYPE_READ:
   1611 	case RF_IO_TYPE_WRITE:
   1612 
   1613 		if (req->tracerec) {
   1614 			RF_ETIMER_START(req->tracerec->timer);
   1615 		}
   1616 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1617 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1618 		    req->sectorOffset, req->numSector,
   1619 		    req->buf, KernelWakeupFunc, (void *) req,
   1620 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1621 
   1622 		if (rf_debugKernelAccess) {
   1623 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1624 				(long) bp->b_blkno));
   1625 		}
   1626 		queue->numOutstanding++;
   1627 		queue->last_deq_sector = req->sectorOffset;
   1628 		/* acc wouldn't have been let in if there were any pending
   1629 		 * reqs at any other priority */
   1630 		queue->curPriority = req->priority;
   1631 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1632 		 * req->type, queue->row, queue->col); */
   1633 
   1634 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1635 			req->type, unit, queue->row, queue->col));
   1636 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1637 			(int) req->sectorOffset, (int) req->numSector,
   1638 			(int) (req->numSector <<
   1639 			    queue->raidPtr->logBytesPerSector),
   1640 			(int) queue->raidPtr->logBytesPerSector));
   1641 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1642 			raidbp->rf_buf.b_vp->v_numoutput++;
   1643 		}
   1644 		VOP_STRATEGY(&raidbp->rf_buf);
   1645 
   1646 		break;
   1647 
   1648 	default:
   1649 		panic("bad req->type in rf_DispatchKernelIO");
   1650 	}
   1651 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1652 	return (0);
   1653 }
   1654 /* this is the callback function associated with a I/O invoked from
   1655    kernel code.
   1656  */
   1657 static void
   1658 KernelWakeupFunc(vbp)
   1659 	struct buf *vbp;
   1660 {
   1661 	RF_DiskQueueData_t *req = NULL;
   1662 	RF_DiskQueue_t *queue;
   1663 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1664 	struct buf *bp;
   1665 	struct raid_softc *rs;
   1666 	int     unit;
   1667 	register int s;
   1668 
   1669 	s = splbio();		/* XXX */
   1670 	db1_printf(("recovering the request queue:\n"));
   1671 	req = raidbp->req;
   1672 
   1673 	bp = raidbp->rf_obp;
   1674 #if 0
   1675 	db1_printf(("bp=0x%x\n", bp));
   1676 #endif
   1677 
   1678 	queue = (RF_DiskQueue_t *) req->queue;
   1679 
   1680 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1681 #if 0
   1682 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1683 #endif
   1684 		bp->b_flags |= B_ERROR;
   1685 		bp->b_error = raidbp->rf_buf.b_error ?
   1686 		    raidbp->rf_buf.b_error : EIO;
   1687 	}
   1688 #if 0
   1689 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1690 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1691 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1692 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1693 #endif
   1694 
   1695 	/* XXX methinks this could be wrong... */
   1696 #if 1
   1697 	bp->b_resid = raidbp->rf_buf.b_resid;
   1698 #endif
   1699 
   1700 	if (req->tracerec) {
   1701 		RF_ETIMER_STOP(req->tracerec->timer);
   1702 		RF_ETIMER_EVAL(req->tracerec->timer);
   1703 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1704 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1705 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1706 		req->tracerec->num_phys_ios++;
   1707 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1708 	}
   1709 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1710 
   1711 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1712 
   1713 
   1714 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1715 	 * ballistic, and mark the component as hosed... */
   1716 #if 1
   1717 	if (bp->b_flags & B_ERROR) {
   1718 		/* Mark the disk as dead */
   1719 		/* but only mark it once... */
   1720 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1721 		    rf_ds_optimal) {
   1722 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1723 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1724 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1725 			    rf_ds_failed;
   1726 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1727 			queue->raidPtr->numFailures++;
   1728 			/* XXX here we should bump the version number for each component, and write that data out */
   1729 		} else {	/* Disk is already dead... */
   1730 			/* printf("Disk already marked as dead!\n"); */
   1731 		}
   1732 
   1733 	}
   1734 #endif
   1735 
   1736 	rs = &raid_softc[unit];
   1737 	RAIDPUTBUF(rs, raidbp);
   1738 
   1739 
   1740 	if (bp->b_resid == 0) {
   1741 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1742 			unit, bp->b_resid, bp->b_bcount));
   1743 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1744 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1745 	} else {
   1746 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1747 	}
   1748 
   1749 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1750 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1751 	/* printf("Exiting KernelWakeupFunc\n"); */
   1752 
   1753 	splx(s);		/* XXX */
   1754 }
   1755 
   1756 
   1757 
   1758 /*
   1759  * initialize a buf structure for doing an I/O in the kernel.
   1760  */
   1761 static void
   1762 InitBP(
   1763     struct buf * bp,
   1764     struct vnode * b_vp,
   1765     unsigned rw_flag,
   1766     dev_t dev,
   1767     RF_SectorNum_t startSect,
   1768     RF_SectorCount_t numSect,
   1769     caddr_t buf,
   1770     void (*cbFunc) (struct buf *),
   1771     void *cbArg,
   1772     int logBytesPerSector,
   1773     struct proc * b_proc)
   1774 {
   1775 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1776 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1777 	bp->b_bcount = numSect << logBytesPerSector;
   1778 	bp->b_bufsize = bp->b_bcount;
   1779 	bp->b_error = 0;
   1780 	bp->b_dev = dev;
   1781 	db1_printf(("bp->b_dev is %d\n", dev));
   1782 	bp->b_un.b_addr = buf;
   1783 #if 0
   1784 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1785 #endif
   1786 	bp->b_blkno = startSect;
   1787 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1788 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1789 	if (bp->b_bcount == 0) {
   1790 		panic("bp->b_bcount is zero in InitBP!!\n");
   1791 	}
   1792 	bp->b_proc = b_proc;
   1793 	bp->b_iodone = cbFunc;
   1794 	bp->b_vp = b_vp;
   1795 
   1796 }
   1797 
   1798 static void
   1799 raidgetdefaultlabel(raidPtr, rs, lp)
   1800 	RF_Raid_t *raidPtr;
   1801 	struct raid_softc *rs;
   1802 	struct disklabel *lp;
   1803 {
   1804 	db1_printf(("Building a default label...\n"));
   1805 	bzero(lp, sizeof(*lp));
   1806 
   1807 	/* fabricate a label... */
   1808 	lp->d_secperunit = raidPtr->totalSectors;
   1809 	lp->d_secsize = raidPtr->bytesPerSector;
   1810 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1811 	lp->d_ntracks = 1;
   1812 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1813 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1814 
   1815 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1816 	lp->d_type = DTYPE_RAID;
   1817 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1818 	lp->d_rpm = 3600;
   1819 	lp->d_interleave = 1;
   1820 	lp->d_flags = 0;
   1821 
   1822 	lp->d_partitions[RAW_PART].p_offset = 0;
   1823 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1824 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1825 	lp->d_npartitions = RAW_PART + 1;
   1826 
   1827 	lp->d_magic = DISKMAGIC;
   1828 	lp->d_magic2 = DISKMAGIC;
   1829 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1830 
   1831 }
   1832 /*
   1833  * Read the disklabel from the raid device.  If one is not present, fake one
   1834  * up.
   1835  */
   1836 static void
   1837 raidgetdisklabel(dev)
   1838 	dev_t   dev;
   1839 {
   1840 	int     unit = raidunit(dev);
   1841 	struct raid_softc *rs = &raid_softc[unit];
   1842 	char   *errstring;
   1843 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1844 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1845 	RF_Raid_t *raidPtr;
   1846 
   1847 	db1_printf(("Getting the disklabel...\n"));
   1848 
   1849 	bzero(clp, sizeof(*clp));
   1850 
   1851 	raidPtr = raidPtrs[unit];
   1852 
   1853 	raidgetdefaultlabel(raidPtr, rs, lp);
   1854 
   1855 	/*
   1856 	 * Call the generic disklabel extraction routine.
   1857 	 */
   1858 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1859 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1860 	if (errstring)
   1861 		raidmakedisklabel(rs);
   1862 	else {
   1863 		int     i;
   1864 		struct partition *pp;
   1865 
   1866 		/*
   1867 		 * Sanity check whether the found disklabel is valid.
   1868 		 *
   1869 		 * This is necessary since total size of the raid device
   1870 		 * may vary when an interleave is changed even though exactly
   1871 		 * same componets are used, and old disklabel may used
   1872 		 * if that is found.
   1873 		 */
   1874 		if (lp->d_secperunit != rs->sc_size)
   1875 			printf("WARNING: %s: "
   1876 			    "total sector size in disklabel (%d) != "
   1877 			    "the size of raid (%ld)\n", rs->sc_xname,
   1878 			    lp->d_secperunit, (long) rs->sc_size);
   1879 		for (i = 0; i < lp->d_npartitions; i++) {
   1880 			pp = &lp->d_partitions[i];
   1881 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1882 				printf("WARNING: %s: end of partition `%c' "
   1883 				    "exceeds the size of raid (%ld)\n",
   1884 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1885 		}
   1886 	}
   1887 
   1888 }
   1889 /*
   1890  * Take care of things one might want to take care of in the event
   1891  * that a disklabel isn't present.
   1892  */
   1893 static void
   1894 raidmakedisklabel(rs)
   1895 	struct raid_softc *rs;
   1896 {
   1897 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1898 	db1_printf(("Making a label..\n"));
   1899 
   1900 	/*
   1901 	 * For historical reasons, if there's no disklabel present
   1902 	 * the raw partition must be marked FS_BSDFFS.
   1903 	 */
   1904 
   1905 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1906 
   1907 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1908 
   1909 	lp->d_checksum = dkcksum(lp);
   1910 }
   1911 /*
   1912  * Lookup the provided name in the filesystem.  If the file exists,
   1913  * is a valid block device, and isn't being used by anyone else,
   1914  * set *vpp to the file's vnode.
   1915  * You'll find the original of this in ccd.c
   1916  */
   1917 int
   1918 raidlookup(path, p, vpp)
   1919 	char   *path;
   1920 	struct proc *p;
   1921 	struct vnode **vpp;	/* result */
   1922 {
   1923 	struct nameidata nd;
   1924 	struct vnode *vp;
   1925 	struct vattr va;
   1926 	int     error;
   1927 
   1928 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1929 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1930 #ifdef DEBUG
   1931 		printf("RAIDframe: vn_open returned %d\n", error);
   1932 #endif
   1933 		return (error);
   1934 	}
   1935 	vp = nd.ni_vp;
   1936 	if (vp->v_usecount > 1) {
   1937 		VOP_UNLOCK(vp, 0);
   1938 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1939 		return (EBUSY);
   1940 	}
   1941 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1942 		VOP_UNLOCK(vp, 0);
   1943 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1944 		return (error);
   1945 	}
   1946 	/* XXX: eventually we should handle VREG, too. */
   1947 	if (va.va_type != VBLK) {
   1948 		VOP_UNLOCK(vp, 0);
   1949 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1950 		return (ENOTBLK);
   1951 	}
   1952 	VOP_UNLOCK(vp, 0);
   1953 	*vpp = vp;
   1954 	return (0);
   1955 }
   1956 /*
   1957  * Wait interruptibly for an exclusive lock.
   1958  *
   1959  * XXX
   1960  * Several drivers do this; it should be abstracted and made MP-safe.
   1961  * (Hmm... where have we seen this warning before :->  GO )
   1962  */
   1963 static int
   1964 raidlock(rs)
   1965 	struct raid_softc *rs;
   1966 {
   1967 	int     error;
   1968 
   1969 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1970 		rs->sc_flags |= RAIDF_WANTED;
   1971 		if ((error =
   1972 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1973 			return (error);
   1974 	}
   1975 	rs->sc_flags |= RAIDF_LOCKED;
   1976 	return (0);
   1977 }
   1978 /*
   1979  * Unlock and wake up any waiters.
   1980  */
   1981 static void
   1982 raidunlock(rs)
   1983 	struct raid_softc *rs;
   1984 {
   1985 
   1986 	rs->sc_flags &= ~RAIDF_LOCKED;
   1987 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1988 		rs->sc_flags &= ~RAIDF_WANTED;
   1989 		wakeup(rs);
   1990 	}
   1991 }
   1992 
   1993 
   1994 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1995 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1996 
   1997 int
   1998 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   1999 {
   2000 	RF_ComponentLabel_t component_label;
   2001 	raidread_component_label(dev, b_vp, &component_label);
   2002 	component_label.mod_counter = mod_counter;
   2003 	component_label.clean = RF_RAID_CLEAN;
   2004 	raidwrite_component_label(dev, b_vp, &component_label);
   2005 	return(0);
   2006 }
   2007 
   2008 
   2009 int
   2010 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2011 {
   2012 	RF_ComponentLabel_t component_label;
   2013 	raidread_component_label(dev, b_vp, &component_label);
   2014 	component_label.mod_counter = mod_counter;
   2015 	component_label.clean = RF_RAID_DIRTY;
   2016 	raidwrite_component_label(dev, b_vp, &component_label);
   2017 	return(0);
   2018 }
   2019 
   2020 /* ARGSUSED */
   2021 int
   2022 raidread_component_label(dev, b_vp, component_label)
   2023 	dev_t dev;
   2024 	struct vnode *b_vp;
   2025 	RF_ComponentLabel_t *component_label;
   2026 {
   2027 	struct buf *bp;
   2028 	int error;
   2029 
   2030 	/* XXX should probably ensure that we don't try to do this if
   2031 	   someone has changed rf_protected_sectors. */
   2032 
   2033 	/* get a block of the appropriate size... */
   2034 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2035 	bp->b_dev = dev;
   2036 
   2037 	/* get our ducks in a row for the read */
   2038 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2039 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2040 	bp->b_flags = B_BUSY | B_READ;
   2041  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2042 
   2043 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2044 
   2045 	error = biowait(bp);
   2046 
   2047 	if (!error) {
   2048 		memcpy(component_label, bp->b_un.b_addr,
   2049 		       sizeof(RF_ComponentLabel_t));
   2050 #if 0
   2051 		printf("raidread_component_label: got component label:\n");
   2052 		printf("Version: %d\n",component_label->version);
   2053 		printf("Serial Number: %d\n",component_label->serial_number);
   2054 		printf("Mod counter: %d\n",component_label->mod_counter);
   2055 		printf("Row: %d\n", component_label->row);
   2056 		printf("Column: %d\n", component_label->column);
   2057 		printf("Num Rows: %d\n", component_label->num_rows);
   2058 		printf("Num Columns: %d\n", component_label->num_columns);
   2059 		printf("Clean: %d\n", component_label->clean);
   2060 		printf("Status: %d\n", component_label->status);
   2061 #endif
   2062         } else {
   2063 		printf("Failed to read RAID component label!\n");
   2064 	}
   2065 
   2066         bp->b_flags = B_INVAL | B_AGE;
   2067 	brelse(bp);
   2068 	return(error);
   2069 }
   2070 /* ARGSUSED */
   2071 int
   2072 raidwrite_component_label(dev, b_vp, component_label)
   2073 	dev_t dev;
   2074 	struct vnode *b_vp;
   2075 	RF_ComponentLabel_t *component_label;
   2076 {
   2077 	struct buf *bp;
   2078 	int error;
   2079 
   2080 	/* get a block of the appropriate size... */
   2081 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2082 	bp->b_dev = dev;
   2083 
   2084 	/* get our ducks in a row for the write */
   2085 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2086 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2087 	bp->b_flags = B_BUSY | B_WRITE;
   2088  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2089 
   2090 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2091 
   2092 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2093 
   2094 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2095 	error = biowait(bp);
   2096         bp->b_flags = B_INVAL | B_AGE;
   2097 	brelse(bp);
   2098 	if (error) {
   2099 		printf("Failed to write RAID component info!\n");
   2100 	}
   2101 
   2102 	return(error);
   2103 }
   2104 
   2105 void
   2106 rf_markalldirty( raidPtr )
   2107 	RF_Raid_t *raidPtr;
   2108 {
   2109 	RF_ComponentLabel_t c_label;
   2110 	int r,c;
   2111 
   2112 	raidPtr->mod_counter++;
   2113 	for (r = 0; r < raidPtr->numRow; r++) {
   2114 		for (c = 0; c < raidPtr->numCol; c++) {
   2115 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2116 				raidread_component_label(
   2117 					raidPtr->Disks[r][c].dev,
   2118 					raidPtr->raid_cinfo[r][c].ci_vp,
   2119 					&c_label);
   2120 				if (c_label.status == rf_ds_spared) {
   2121 					/* XXX do something special...
   2122 					 but whatever you do, don't
   2123 					 try to access it!! */
   2124 				} else {
   2125 #if 0
   2126 				c_label.status =
   2127 					raidPtr->Disks[r][c].status;
   2128 				raidwrite_component_label(
   2129 					raidPtr->Disks[r][c].dev,
   2130 					raidPtr->raid_cinfo[r][c].ci_vp,
   2131 					&c_label);
   2132 #endif
   2133 				raidmarkdirty(
   2134 				       raidPtr->Disks[r][c].dev,
   2135 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2136 				       raidPtr->mod_counter);
   2137 				}
   2138 			}
   2139 		}
   2140 	}
   2141 	/* printf("Component labels marked dirty.\n"); */
   2142 #if 0
   2143 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2144 		sparecol = raidPtr->numCol + c;
   2145 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2146 			/*
   2147 
   2148 			   XXX this is where we get fancy and map this spare
   2149 			   into it's correct spot in the array.
   2150 
   2151 			 */
   2152 			/*
   2153 
   2154 			   we claim this disk is "optimal" if it's
   2155 			   rf_ds_used_spare, as that means it should be
   2156 			   directly substitutable for the disk it replaced.
   2157 			   We note that too...
   2158 
   2159 			 */
   2160 
   2161 			for(i=0;i<raidPtr->numRow;i++) {
   2162 				for(j=0;j<raidPtr->numCol;j++) {
   2163 					if ((raidPtr->Disks[i][j].spareRow ==
   2164 					     r) &&
   2165 					    (raidPtr->Disks[i][j].spareCol ==
   2166 					     sparecol)) {
   2167 						srow = r;
   2168 						scol = sparecol;
   2169 						break;
   2170 					}
   2171 				}
   2172 			}
   2173 
   2174 			raidread_component_label(
   2175 				      raidPtr->Disks[r][sparecol].dev,
   2176 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2177 				      &c_label);
   2178 			/* make sure status is noted */
   2179 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2180 			c_label.mod_counter = raidPtr->mod_counter;
   2181 			c_label.serial_number = raidPtr->serial_number;
   2182 			c_label.row = srow;
   2183 			c_label.column = scol;
   2184 			c_label.num_rows = raidPtr->numRow;
   2185 			c_label.num_columns = raidPtr->numCol;
   2186 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2187 			c_label.status = rf_ds_optimal;
   2188 			raidwrite_component_label(
   2189 				      raidPtr->Disks[r][sparecol].dev,
   2190 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2191 				      &c_label);
   2192 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2193 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2194 		}
   2195 	}
   2196 
   2197 #endif
   2198 }
   2199 
   2200 
   2201 void
   2202 rf_update_component_labels( raidPtr )
   2203 	RF_Raid_t *raidPtr;
   2204 {
   2205 	RF_ComponentLabel_t c_label;
   2206 	int sparecol;
   2207 	int r,c;
   2208 	int i,j;
   2209 	int srow, scol;
   2210 
   2211 	srow = -1;
   2212 	scol = -1;
   2213 
   2214 	/* XXX should do extra checks to make sure things really are clean,
   2215 	   rather than blindly setting the clean bit... */
   2216 
   2217 	raidPtr->mod_counter++;
   2218 
   2219 	for (r = 0; r < raidPtr->numRow; r++) {
   2220 		for (c = 0; c < raidPtr->numCol; c++) {
   2221 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2222 				raidread_component_label(
   2223 					raidPtr->Disks[r][c].dev,
   2224 					raidPtr->raid_cinfo[r][c].ci_vp,
   2225 					&c_label);
   2226 				/* make sure status is noted */
   2227 				c_label.status = rf_ds_optimal;
   2228 				raidwrite_component_label(
   2229 					raidPtr->Disks[r][c].dev,
   2230 					raidPtr->raid_cinfo[r][c].ci_vp,
   2231 					&c_label);
   2232 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2233 					raidmarkclean(
   2234 					      raidPtr->Disks[r][c].dev,
   2235 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2236 					      raidPtr->mod_counter);
   2237 				}
   2238 			}
   2239 			/* else we don't touch it.. */
   2240 #if 0
   2241 			else if (raidPtr->Disks[r][c].status !=
   2242 				   rf_ds_failed) {
   2243 				raidread_component_label(
   2244 					raidPtr->Disks[r][c].dev,
   2245 					raidPtr->raid_cinfo[r][c].ci_vp,
   2246 					&c_label);
   2247 				/* make sure status is noted */
   2248 				c_label.status =
   2249 					raidPtr->Disks[r][c].status;
   2250 				raidwrite_component_label(
   2251 					raidPtr->Disks[r][c].dev,
   2252 					raidPtr->raid_cinfo[r][c].ci_vp,
   2253 					&c_label);
   2254 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2255 					raidmarkclean(
   2256 					      raidPtr->Disks[r][c].dev,
   2257 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2258 					      raidPtr->mod_counter);
   2259 				}
   2260 			}
   2261 #endif
   2262 		}
   2263 	}
   2264 
   2265 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2266 		sparecol = raidPtr->numCol + c;
   2267 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2268 			/*
   2269 
   2270 			   we claim this disk is "optimal" if it's
   2271 			   rf_ds_used_spare, as that means it should be
   2272 			   directly substitutable for the disk it replaced.
   2273 			   We note that too...
   2274 
   2275 			 */
   2276 
   2277 			for(i=0;i<raidPtr->numRow;i++) {
   2278 				for(j=0;j<raidPtr->numCol;j++) {
   2279 					if ((raidPtr->Disks[i][j].spareRow ==
   2280 					     0) &&
   2281 					    (raidPtr->Disks[i][j].spareCol ==
   2282 					     sparecol)) {
   2283 						srow = i;
   2284 						scol = j;
   2285 						break;
   2286 					}
   2287 				}
   2288 			}
   2289 
   2290 			raidread_component_label(
   2291 				      raidPtr->Disks[0][sparecol].dev,
   2292 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2293 				      &c_label);
   2294 			/* make sure status is noted */
   2295 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2296 			c_label.mod_counter = raidPtr->mod_counter;
   2297 			c_label.serial_number = raidPtr->serial_number;
   2298 			c_label.row = srow;
   2299 			c_label.column = scol;
   2300 			c_label.num_rows = raidPtr->numRow;
   2301 			c_label.num_columns = raidPtr->numCol;
   2302 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2303 			c_label.status = rf_ds_optimal;
   2304 			raidwrite_component_label(
   2305 				      raidPtr->Disks[0][sparecol].dev,
   2306 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2307 				      &c_label);
   2308 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2309 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2310 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2311 					       raidPtr->mod_counter);
   2312 			}
   2313 		}
   2314 	}
   2315 	/* 	printf("Component labels updated\n"); */
   2316 }
   2317