Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.30
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.30 1999/11/17 01:16:37 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #ifdef DEBUG
    157 #define db0_printf(a) printf a
    158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    164 #else				/* DEBUG */
    165 #define db0_printf(a) printf a
    166 #define db1_printf(a) { }
    167 #define db2_printf(a) { }
    168 #define db3_printf(a) { }
    169 #define db4_printf(a) { }
    170 #define db5_printf(a) { }
    171 #endif				/* DEBUG */
    172 
    173 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    174 
    175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    176 
    177 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    178 						 * spare table */
    179 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    180 						 * installation process */
    181 
    182 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    183 						 * reconstruction
    184 						 * requests */
    185 
    186 
    187 decl_simple_lock_data(, recon_queue_mutex)
    188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    190 
    191 /* prototypes */
    192 static void KernelWakeupFunc(struct buf * bp);
    193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    194 		   dev_t dev, RF_SectorNum_t startSect,
    195 		   RF_SectorCount_t numSect, caddr_t buf,
    196 		   void (*cbFunc) (struct buf *), void *cbArg,
    197 		   int logBytesPerSector, struct proc * b_proc);
    198 
    199 #define Dprintf0(s)       if (rf_queueDebug) \
    200      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    201 #define Dprintf1(s,a)     if (rf_queueDebug) \
    202      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    203 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    204      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    206      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    207 
    208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    210 
    211 void raidattach __P((int));
    212 int raidsize __P((dev_t));
    213 
    214 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    215 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    216 static int raidinit __P((dev_t, RF_Raid_t *, int));
    217 
    218 int raidopen __P((dev_t, int, int, struct proc *));
    219 int raidclose __P((dev_t, int, int, struct proc *));
    220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    221 int raidwrite __P((dev_t, struct uio *, int));
    222 int raidread __P((dev_t, struct uio *, int));
    223 void raidstrategy __P((struct buf *));
    224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    225 
    226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    228 void rf_update_component_labels( RF_Raid_t *);
    229 /*
    230  * Pilfered from ccd.c
    231  */
    232 
    233 struct raidbuf {
    234 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    235 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    236 	int     rf_flags;	/* misc. flags */
    237 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    238 };
    239 
    240 
    241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    242 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    243 
    244 /* XXX Not sure if the following should be replacing the raidPtrs above,
    245    or if it should be used in conjunction with that... */
    246 
    247 struct raid_softc {
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	size_t  sc_size;        /* size of the raid device */
    251 	dev_t   sc_dev;	        /* our device.. */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct pool sc_cbufpool;	/* component buffer pool */
    255 };
    256 /* sc_flags */
    257 #define RAIDF_INITED	0x01	/* unit has been initialized */
    258 #define RAIDF_WLABEL	0x02	/* label area is writable */
    259 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    260 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    261 #define RAIDF_LOCKED	0x80	/* unit is locked */
    262 
    263 #define	raidunit(x)	DISKUNIT(x)
    264 static int numraid = 0;
    265 
    266 /*
    267  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    268  * Be aware that large numbers can allow the driver to consume a lot of
    269  * kernel memory, especially on writes, and in degraded mode reads.
    270  *
    271  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    272  * a single 64K write will typically require 64K for the old data,
    273  * 64K for the old parity, and 64K for the new parity, for a total
    274  * of 192K (if the parity buffer is not re-used immediately).
    275  * Even it if is used immedately, that's still 128K, which when multiplied
    276  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    277  *
    278  * Now in degraded mode, for example, a 64K read on the above setup may
    279  * require data reconstruction, which will require *all* of the 4 remaining
    280  * disks to participate -- 4 * 32K/disk == 128K again.
    281  */
    282 
    283 #ifndef RAIDOUTSTANDING
    284 #define RAIDOUTSTANDING   6
    285 #endif
    286 
    287 #define RAIDLABELDEV(dev)	\
    288 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    289 
    290 /* declared here, and made public, for the benefit of KVM stuff.. */
    291 struct raid_softc *raid_softc;
    292 
    293 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    294 				     struct disklabel *));
    295 static void raidgetdisklabel __P((dev_t));
    296 static void raidmakedisklabel __P((struct raid_softc *));
    297 
    298 static int raidlock __P((struct raid_softc *));
    299 static void raidunlock __P((struct raid_softc *));
    300 int raidlookup __P((char *, struct proc * p, struct vnode **));
    301 
    302 static void rf_markalldirty __P((RF_Raid_t *));
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 	recon_queue = NULL;
    337 
    338 	for (i = 0; i < numraid; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 	    malloc(num * sizeof(struct raid_softc),
    351 	    M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 	numraid = num;
    357 	bzero(raid_softc, num * sizeof(struct raid_softc));
    358 
    359 	for (raidID = 0; raidID < num; raidID++) {
    360 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    361 			  (RF_Raid_t *));
    362 		if (raidPtrs[raidID] == NULL) {
    363 			printf("raidPtrs[%d] is NULL\n", raidID);
    364 		}
    365 	}
    366 }
    367 
    368 
    369 int
    370 raidsize(dev)
    371 	dev_t   dev;
    372 {
    373 	struct raid_softc *rs;
    374 	struct disklabel *lp;
    375 	int     part, unit, omask, size;
    376 
    377 	unit = raidunit(dev);
    378 	if (unit >= numraid)
    379 		return (-1);
    380 	rs = &raid_softc[unit];
    381 
    382 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    383 		return (-1);
    384 
    385 	part = DISKPART(dev);
    386 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    387 	lp = rs->sc_dkdev.dk_label;
    388 
    389 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    390 		return (-1);
    391 
    392 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    393 		size = -1;
    394 	else
    395 		size = lp->d_partitions[part].p_size *
    396 		    (lp->d_secsize / DEV_BSIZE);
    397 
    398 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    399 		return (-1);
    400 
    401 	return (size);
    402 
    403 }
    404 
    405 int
    406 raiddump(dev, blkno, va, size)
    407 	dev_t   dev;
    408 	daddr_t blkno;
    409 	caddr_t va;
    410 	size_t  size;
    411 {
    412 	/* Not implemented. */
    413 	return ENXIO;
    414 }
    415 /* ARGSUSED */
    416 int
    417 raidopen(dev, flags, fmt, p)
    418 	dev_t   dev;
    419 	int     flags, fmt;
    420 	struct proc *p;
    421 {
    422 	int     unit = raidunit(dev);
    423 	struct raid_softc *rs;
    424 	struct disklabel *lp;
    425 	int     part, pmask;
    426 	int     error = 0;
    427 
    428 	if (unit >= numraid)
    429 		return (ENXIO);
    430 	rs = &raid_softc[unit];
    431 
    432 	if ((error = raidlock(rs)) != 0)
    433 		return (error);
    434 	lp = rs->sc_dkdev.dk_label;
    435 
    436 	part = DISKPART(dev);
    437 	pmask = (1 << part);
    438 
    439 	db1_printf(("Opening raid device number: %d partition: %d\n",
    440 		unit, part));
    441 
    442 
    443 	if ((rs->sc_flags & RAIDF_INITED) &&
    444 	    (rs->sc_dkdev.dk_openmask == 0))
    445 		raidgetdisklabel(dev);
    446 
    447 	/* make sure that this partition exists */
    448 
    449 	if (part != RAW_PART) {
    450 		db1_printf(("Not a raw partition..\n"));
    451 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    452 		    ((part >= lp->d_npartitions) ||
    453 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    454 			error = ENXIO;
    455 			raidunlock(rs);
    456 			db1_printf(("Bailing out...\n"));
    457 			return (error);
    458 		}
    459 	}
    460 	/* Prevent this unit from being unconfigured while open. */
    461 	switch (fmt) {
    462 	case S_IFCHR:
    463 		rs->sc_dkdev.dk_copenmask |= pmask;
    464 		break;
    465 
    466 	case S_IFBLK:
    467 		rs->sc_dkdev.dk_bopenmask |= pmask;
    468 		break;
    469 	}
    470 
    471 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    472 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    473 		/* First one... mark things as dirty... Note that we *MUST*
    474 		 have done a configure before this.  I DO NOT WANT TO BE
    475 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    476 		 THAT THEY BELONG TOGETHER!!!!! */
    477 		/* XXX should check to see if we're only open for reading
    478 		   here... If so, we needn't do this, but then need some
    479 		   other way of keeping track of what's happened.. */
    480 
    481 		rf_markalldirty( raidPtrs[unit] );
    482 	}
    483 
    484 
    485 	rs->sc_dkdev.dk_openmask =
    486 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    487 
    488 	raidunlock(rs);
    489 
    490 	return (error);
    491 
    492 
    493 }
    494 /* ARGSUSED */
    495 int
    496 raidclose(dev, flags, fmt, p)
    497 	dev_t   dev;
    498 	int     flags, fmt;
    499 	struct proc *p;
    500 {
    501 	int     unit = raidunit(dev);
    502 	struct raid_softc *rs;
    503 	int     error = 0;
    504 	int     part;
    505 
    506 	if (unit >= numraid)
    507 		return (ENXIO);
    508 	rs = &raid_softc[unit];
    509 
    510 	if ((error = raidlock(rs)) != 0)
    511 		return (error);
    512 
    513 	part = DISKPART(dev);
    514 
    515 	/* ...that much closer to allowing unconfiguration... */
    516 	switch (fmt) {
    517 	case S_IFCHR:
    518 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    519 		break;
    520 
    521 	case S_IFBLK:
    522 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    523 		break;
    524 	}
    525 	rs->sc_dkdev.dk_openmask =
    526 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    527 
    528 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    529 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    530 		/* Last one... device is not unconfigured yet.
    531 		   Device shutdown has taken care of setting the
    532 		   clean bits if RAIDF_INITED is not set
    533 		   mark things as clean... */
    534 		rf_update_component_labels( raidPtrs[unit] );
    535 	}
    536 
    537 	raidunlock(rs);
    538 	return (0);
    539 
    540 }
    541 
    542 void
    543 raidstrategy(bp)
    544 	register struct buf *bp;
    545 {
    546 	register int s;
    547 
    548 	unsigned int raidID = raidunit(bp->b_dev);
    549 	RF_Raid_t *raidPtr;
    550 	struct raid_softc *rs = &raid_softc[raidID];
    551 	struct disklabel *lp;
    552 	int     wlabel;
    553 
    554 #if 0
    555 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    556 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    557 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    558 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    559 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    560 
    561 	if (bp->b_flags & B_READ)
    562 		db1_printf(("READ\n"));
    563 	else
    564 		db1_printf(("WRITE\n"));
    565 #endif
    566 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    567 		bp->b_error = ENXIO;
    568 		bp->b_flags = B_ERROR;
    569 		bp->b_resid = bp->b_bcount;
    570 		biodone(bp);
    571 		return;
    572 	}
    573 	if (raidID >= numraid || !raidPtrs[raidID]) {
    574 		bp->b_error = ENODEV;
    575 		bp->b_flags |= B_ERROR;
    576 		bp->b_resid = bp->b_bcount;
    577 		biodone(bp);
    578 		return;
    579 	}
    580 	raidPtr = raidPtrs[raidID];
    581 	if (!raidPtr->valid) {
    582 		bp->b_error = ENODEV;
    583 		bp->b_flags |= B_ERROR;
    584 		bp->b_resid = bp->b_bcount;
    585 		biodone(bp);
    586 		return;
    587 	}
    588 	if (bp->b_bcount == 0) {
    589 		db1_printf(("b_bcount is zero..\n"));
    590 		biodone(bp);
    591 		return;
    592 	}
    593 	lp = rs->sc_dkdev.dk_label;
    594 
    595 	/*
    596 	 * Do bounds checking and adjust transfer.  If there's an
    597 	 * error, the bounds check will flag that for us.
    598 	 */
    599 
    600 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    601 	if (DISKPART(bp->b_dev) != RAW_PART)
    602 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    603 			db1_printf(("Bounds check failed!!:%d %d\n",
    604 				(int) bp->b_blkno, (int) wlabel));
    605 			biodone(bp);
    606 			return;
    607 		}
    608 	s = splbio();		/* XXX Needed? */
    609 	db1_printf(("Beginning strategy...\n"));
    610 
    611 	bp->b_resid = 0;
    612 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    613 	    NULL, NULL, NULL);
    614 	if (bp->b_error) {
    615 		bp->b_flags |= B_ERROR;
    616 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    617 			bp->b_error));
    618 	}
    619 	splx(s);
    620 #if 0
    621 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    622 		bp, bp->b_data,
    623 		(int) bp->b_bcount, (int) bp->b_resid));
    624 #endif
    625 }
    626 /* ARGSUSED */
    627 int
    628 raidread(dev, uio, flags)
    629 	dev_t   dev;
    630 	struct uio *uio;
    631 	int     flags;
    632 {
    633 	int     unit = raidunit(dev);
    634 	struct raid_softc *rs;
    635 	int     part;
    636 
    637 	if (unit >= numraid)
    638 		return (ENXIO);
    639 	rs = &raid_softc[unit];
    640 
    641 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    642 		return (ENXIO);
    643 	part = DISKPART(dev);
    644 
    645 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    646 
    647 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    648 
    649 }
    650 /* ARGSUSED */
    651 int
    652 raidwrite(dev, uio, flags)
    653 	dev_t   dev;
    654 	struct uio *uio;
    655 	int     flags;
    656 {
    657 	int     unit = raidunit(dev);
    658 	struct raid_softc *rs;
    659 
    660 	if (unit >= numraid)
    661 		return (ENXIO);
    662 	rs = &raid_softc[unit];
    663 
    664 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    665 		return (ENXIO);
    666 	db1_printf(("raidwrite\n"));
    667 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    668 
    669 }
    670 
    671 int
    672 raidioctl(dev, cmd, data, flag, p)
    673 	dev_t   dev;
    674 	u_long  cmd;
    675 	caddr_t data;
    676 	int     flag;
    677 	struct proc *p;
    678 {
    679 	int     unit = raidunit(dev);
    680 	int     error = 0;
    681 	int     part, pmask;
    682 	struct raid_softc *rs;
    683 	RF_Config_t *k_cfg, *u_cfg;
    684 	u_char *specific_buf;
    685 	int retcode = 0;
    686 	int row;
    687 	int column;
    688 	int s;
    689 	struct rf_recon_req *rrcopy, *rr;
    690 	RF_ComponentLabel_t *component_label;
    691 	RF_ComponentLabel_t ci_label;
    692 	RF_ComponentLabel_t **c_label_ptr;
    693 	RF_SingleComponent_t *sparePtr,*componentPtr;
    694 	RF_SingleComponent_t hot_spare;
    695 	RF_SingleComponent_t component;
    696 
    697 	if (unit >= numraid)
    698 		return (ENXIO);
    699 	rs = &raid_softc[unit];
    700 
    701 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    702 		(int) DISKPART(dev), (int) unit, (int) cmd));
    703 
    704 	/* Must be open for writes for these commands... */
    705 	switch (cmd) {
    706 	case DIOCSDINFO:
    707 	case DIOCWDINFO:
    708 	case DIOCWLABEL:
    709 		if ((flag & FWRITE) == 0)
    710 			return (EBADF);
    711 	}
    712 
    713 	/* Must be initialized for these... */
    714 	switch (cmd) {
    715 	case DIOCGDINFO:
    716 	case DIOCSDINFO:
    717 	case DIOCWDINFO:
    718 	case DIOCGPART:
    719 	case DIOCWLABEL:
    720 	case DIOCGDEFLABEL:
    721 	case RAIDFRAME_SHUTDOWN:
    722 	case RAIDFRAME_REWRITEPARITY:
    723 	case RAIDFRAME_GET_INFO:
    724 	case RAIDFRAME_RESET_ACCTOTALS:
    725 	case RAIDFRAME_GET_ACCTOTALS:
    726 	case RAIDFRAME_KEEP_ACCTOTALS:
    727 	case RAIDFRAME_GET_SIZE:
    728 	case RAIDFRAME_FAIL_DISK:
    729 	case RAIDFRAME_COPYBACK:
    730 	case RAIDFRAME_CHECKRECON:
    731 	case RAIDFRAME_GET_COMPONENT_LABEL:
    732 	case RAIDFRAME_SET_COMPONENT_LABEL:
    733 	case RAIDFRAME_ADD_HOT_SPARE:
    734 	case RAIDFRAME_REMOVE_HOT_SPARE:
    735 	case RAIDFRAME_INIT_LABELS:
    736 	case RAIDFRAME_REBUILD_IN_PLACE:
    737 	case RAIDFRAME_CHECK_PARITY:
    738 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    739 			return (ENXIO);
    740 	}
    741 
    742 	switch (cmd) {
    743 
    744 
    745 		/* configure the system */
    746 	case RAIDFRAME_CONFIGURE:
    747 
    748 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    749 		/* copy-in the configuration information */
    750 		/* data points to a pointer to the configuration structure */
    751 		u_cfg = *((RF_Config_t **) data);
    752 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    753 		if (k_cfg == NULL) {
    754 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    755 			return (ENOMEM);
    756 		}
    757 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    758 		    sizeof(RF_Config_t));
    759 		if (retcode) {
    760 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    761 				retcode));
    762 			return (retcode);
    763 		}
    764 		/* allocate a buffer for the layout-specific data, and copy it
    765 		 * in */
    766 		if (k_cfg->layoutSpecificSize) {
    767 			if (k_cfg->layoutSpecificSize > 10000) {
    768 				/* sanity check */
    769 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    770 				return (EINVAL);
    771 			}
    772 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    773 			    (u_char *));
    774 			if (specific_buf == NULL) {
    775 				RF_Free(k_cfg, sizeof(RF_Config_t));
    776 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    777 				return (ENOMEM);
    778 			}
    779 			retcode = copyin(k_cfg->layoutSpecific,
    780 			    (caddr_t) specific_buf,
    781 			    k_cfg->layoutSpecificSize);
    782 			if (retcode) {
    783 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    784 					retcode));
    785 				return (retcode);
    786 			}
    787 		} else
    788 			specific_buf = NULL;
    789 		k_cfg->layoutSpecific = specific_buf;
    790 
    791 		/* should do some kind of sanity check on the configuration.
    792 		 * Store the sum of all the bytes in the last byte? */
    793 
    794 		/* configure the system */
    795 
    796 		raidPtrs[unit]->raidid = unit;
    797 
    798 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    799 
    800 		/* allow this many simultaneous IO's to this RAID device */
    801 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    802 
    803 		if (retcode == 0) {
    804 			retcode = raidinit(dev, raidPtrs[unit], unit);
    805 			rf_markalldirty( raidPtrs[unit] );
    806 		}
    807 		/* free the buffers.  No return code here. */
    808 		if (k_cfg->layoutSpecificSize) {
    809 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    810 		}
    811 		RF_Free(k_cfg, sizeof(RF_Config_t));
    812 
    813 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    814 			retcode));
    815 
    816 		return (retcode);
    817 
    818 		/* shutdown the system */
    819 	case RAIDFRAME_SHUTDOWN:
    820 
    821 		if ((error = raidlock(rs)) != 0)
    822 			return (error);
    823 
    824 		/*
    825 		 * If somebody has a partition mounted, we shouldn't
    826 		 * shutdown.
    827 		 */
    828 
    829 		part = DISKPART(dev);
    830 		pmask = (1 << part);
    831 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    832 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    833 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    834 			raidunlock(rs);
    835 			return (EBUSY);
    836 		}
    837 
    838 		if (rf_debugKernelAccess) {
    839 			printf("call shutdown\n");
    840 		}
    841 
    842 		retcode = rf_Shutdown(raidPtrs[unit]);
    843 
    844 		db1_printf(("Done main shutdown\n"));
    845 
    846 		pool_destroy(&rs->sc_cbufpool);
    847 		db1_printf(("Done freeing component buffer freelist\n"));
    848 
    849 		/* It's no longer initialized... */
    850 		rs->sc_flags &= ~RAIDF_INITED;
    851 
    852 		/* Detach the disk. */
    853 		disk_detach(&rs->sc_dkdev);
    854 
    855 		raidunlock(rs);
    856 
    857 		return (retcode);
    858 	case RAIDFRAME_GET_COMPONENT_LABEL:
    859 		c_label_ptr = (RF_ComponentLabel_t **) data;
    860 		/* need to read the component label for the disk indicated
    861 		   by row,column in component_label
    862 		   XXX need to sanity check these values!!!
    863 		   */
    864 
    865 		/* For practice, let's get it directly fromdisk, rather
    866 		   than from the in-core copy */
    867 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    868 			   (RF_ComponentLabel_t *));
    869 		if (component_label == NULL)
    870 			return (ENOMEM);
    871 
    872 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    873 
    874 		retcode = copyin( *c_label_ptr, component_label,
    875 				  sizeof(RF_ComponentLabel_t));
    876 
    877 		if (retcode) {
    878 			return(retcode);
    879 		}
    880 
    881 		row = component_label->row;
    882 		column = component_label->column;
    883 
    884 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    885 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    886 			return(EINVAL);
    887 		}
    888 
    889 		raidread_component_label(
    890                               raidPtrs[unit]->Disks[row][column].dev,
    891 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    892 			      component_label );
    893 
    894 		retcode = copyout((caddr_t) component_label,
    895 				  (caddr_t) *c_label_ptr,
    896 				  sizeof(RF_ComponentLabel_t));
    897 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    898 		return (retcode);
    899 
    900 	case RAIDFRAME_SET_COMPONENT_LABEL:
    901 		component_label = (RF_ComponentLabel_t *) data;
    902 
    903 		/* XXX check the label for valid stuff... */
    904 		/* Note that some things *should not* get modified --
    905 		   the user should be re-initing the labels instead of
    906 		   trying to patch things.
    907 		   */
    908 
    909 		printf("Got component label:\n");
    910 		printf("Version: %d\n",component_label->version);
    911 		printf("Serial Number: %d\n",component_label->serial_number);
    912 		printf("Mod counter: %d\n",component_label->mod_counter);
    913 		printf("Row: %d\n", component_label->row);
    914 		printf("Column: %d\n", component_label->column);
    915 		printf("Num Rows: %d\n", component_label->num_rows);
    916 		printf("Num Columns: %d\n", component_label->num_columns);
    917 		printf("Clean: %d\n", component_label->clean);
    918 		printf("Status: %d\n", component_label->status);
    919 
    920 		row = component_label->row;
    921 		column = component_label->column;
    922 
    923 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    924 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    925 			return(EINVAL);
    926 		}
    927 
    928 		/* XXX this isn't allowed to do anything for now :-) */
    929 #if 0
    930 		raidwrite_component_label(
    931                             raidPtrs[unit]->Disks[row][column].dev,
    932 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    933 			    component_label );
    934 #endif
    935 		return (0);
    936 
    937 	case RAIDFRAME_INIT_LABELS:
    938 		component_label = (RF_ComponentLabel_t *) data;
    939 		/*
    940 		   we only want the serial number from
    941 		   the above.  We get all the rest of the information
    942 		   from the config that was used to create this RAID
    943 		   set.
    944 		   */
    945 
    946 		raidPtrs[unit]->serial_number = component_label->serial_number;
    947 		/* current version number */
    948 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    949 		ci_label.serial_number = component_label->serial_number;
    950 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    951 		ci_label.num_rows = raidPtrs[unit]->numRow;
    952 		ci_label.num_columns = raidPtrs[unit]->numCol;
    953 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    954 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    955 
    956 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    957 			ci_label.row = row;
    958 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    959 				ci_label.column = column;
    960 				raidwrite_component_label(
    961 				  raidPtrs[unit]->Disks[row][column].dev,
    962 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    963 				  &ci_label );
    964 			}
    965 		}
    966 
    967 		return (retcode);
    968 
    969 		/* initialize all parity */
    970 	case RAIDFRAME_REWRITEPARITY:
    971 
    972 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    973 			/* Parity for RAID 0 is trivially correct */
    974 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    975 			return(0);
    976 		}
    977 
    978 		/* borrow the thread of the requesting process */
    979 
    980 		s = splbio();
    981 		retcode = rf_RewriteParity(raidPtrs[unit]);
    982 		splx(s);
    983 		/* return I/O Error if the parity rewrite fails */
    984 
    985 		if (retcode) {
    986 			retcode = EIO;
    987 		} else {
    988 			/* set the clean bit!  If we shutdown correctly,
    989 			 the clean bit on each component label will get
    990 			 set */
    991 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    992 		}
    993 		return (retcode);
    994 
    995 
    996 	case RAIDFRAME_ADD_HOT_SPARE:
    997 		sparePtr = (RF_SingleComponent_t *) data;
    998 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
    999 		printf("Adding spare\n");
   1000 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1001 		return(retcode);
   1002 
   1003 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1004 		return(retcode);
   1005 
   1006 	case RAIDFRAME_REBUILD_IN_PLACE:
   1007 
   1008 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1009 			/* Can't do this on a RAID 0!! */
   1010 			return(EINVAL);
   1011 		}
   1012 
   1013 		componentPtr = (RF_SingleComponent_t *) data;
   1014 		memcpy( &component, componentPtr,
   1015 			sizeof(RF_SingleComponent_t));
   1016 		row = component.row;
   1017 		column = component.column;
   1018 		printf("Rebuild: %d %d\n",row, column);
   1019 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1020 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1021 			return(EINVAL);
   1022 		}
   1023 		printf("Attempting a rebuild in place\n");
   1024 		s = splbio();
   1025 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1026 		splx(s);
   1027 		return(retcode);
   1028 
   1029 	case RAIDFRAME_GET_INFO:
   1030 		{
   1031 			RF_Raid_t *raid = raidPtrs[unit];
   1032 			RF_DeviceConfig_t *cfg, **ucfgp;
   1033 			int     i, j, d;
   1034 
   1035 			if (!raid->valid)
   1036 				return (ENODEV);
   1037 			ucfgp = (RF_DeviceConfig_t **) data;
   1038 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1039 				  (RF_DeviceConfig_t *));
   1040 			if (cfg == NULL)
   1041 				return (ENOMEM);
   1042 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1043 			cfg->rows = raid->numRow;
   1044 			cfg->cols = raid->numCol;
   1045 			cfg->ndevs = raid->numRow * raid->numCol;
   1046 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1047 				cfg->ndevs = 0;
   1048 				return (ENOMEM);
   1049 			}
   1050 			cfg->nspares = raid->numSpare;
   1051 			if (cfg->nspares >= RF_MAX_DISKS) {
   1052 				cfg->nspares = 0;
   1053 				return (ENOMEM);
   1054 			}
   1055 			cfg->maxqdepth = raid->maxQueueDepth;
   1056 			d = 0;
   1057 			for (i = 0; i < cfg->rows; i++) {
   1058 				for (j = 0; j < cfg->cols; j++) {
   1059 					cfg->devs[d] = raid->Disks[i][j];
   1060 					d++;
   1061 				}
   1062 			}
   1063 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1064 				cfg->spares[i] = raid->Disks[0][j];
   1065 			}
   1066 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1067 					  sizeof(RF_DeviceConfig_t));
   1068 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1069 
   1070 			return (retcode);
   1071 		}
   1072 		break;
   1073 	case RAIDFRAME_CHECK_PARITY:
   1074 		*(int *) data = raidPtrs[unit]->parity_good;
   1075 		return (0);
   1076 	case RAIDFRAME_RESET_ACCTOTALS:
   1077 		{
   1078 			RF_Raid_t *raid = raidPtrs[unit];
   1079 
   1080 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1081 			return (0);
   1082 		}
   1083 		break;
   1084 
   1085 	case RAIDFRAME_GET_ACCTOTALS:
   1086 		{
   1087 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1088 			RF_Raid_t *raid = raidPtrs[unit];
   1089 
   1090 			*totals = raid->acc_totals;
   1091 			return (0);
   1092 		}
   1093 		break;
   1094 
   1095 	case RAIDFRAME_KEEP_ACCTOTALS:
   1096 		{
   1097 			RF_Raid_t *raid = raidPtrs[unit];
   1098 			int    *keep = (int *) data;
   1099 
   1100 			raid->keep_acc_totals = *keep;
   1101 			return (0);
   1102 		}
   1103 		break;
   1104 
   1105 	case RAIDFRAME_GET_SIZE:
   1106 		*(int *) data = raidPtrs[unit]->totalSectors;
   1107 		return (0);
   1108 
   1109 #define RAIDFRAME_RECON 1
   1110 		/* XXX The above should probably be set somewhere else!! GO */
   1111 #if RAIDFRAME_RECON > 0
   1112 
   1113 		/* fail a disk & optionally start reconstruction */
   1114 	case RAIDFRAME_FAIL_DISK:
   1115 
   1116 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1117 			/* Can't do this on a RAID 0!! */
   1118 			return(EINVAL);
   1119 		}
   1120 
   1121 		rr = (struct rf_recon_req *) data;
   1122 
   1123 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1124 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1125 			return (EINVAL);
   1126 
   1127 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1128 		       unit, rr->row, rr->col);
   1129 
   1130 		/* make a copy of the recon request so that we don't rely on
   1131 		 * the user's buffer */
   1132 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1133 		bcopy(rr, rrcopy, sizeof(*rr));
   1134 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1135 
   1136 		LOCK_RECON_Q_MUTEX();
   1137 		rrcopy->next = recon_queue;
   1138 		recon_queue = rrcopy;
   1139 		wakeup(&recon_queue);
   1140 		UNLOCK_RECON_Q_MUTEX();
   1141 
   1142 		return (0);
   1143 
   1144 		/* invoke a copyback operation after recon on whatever disk
   1145 		 * needs it, if any */
   1146 	case RAIDFRAME_COPYBACK:
   1147 
   1148 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1149 			/* This makes no sense on a RAID 0!! */
   1150 			return(EINVAL);
   1151 		}
   1152 
   1153 		/* borrow the current thread to get this done */
   1154 
   1155 		s = splbio();
   1156 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1157 		splx(s);
   1158 		return (0);
   1159 
   1160 		/* return the percentage completion of reconstruction */
   1161 	case RAIDFRAME_CHECKRECON:
   1162 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1163 			/* This makes no sense on a RAID 0 */
   1164 			return(EINVAL);
   1165 		}
   1166 
   1167 		row = *(int *) data;
   1168 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1169 			return (EINVAL);
   1170 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1171 			*(int *) data = 100;
   1172 		else
   1173 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1174 		return (0);
   1175 
   1176 		/* the sparetable daemon calls this to wait for the kernel to
   1177 		 * need a spare table. this ioctl does not return until a
   1178 		 * spare table is needed. XXX -- calling mpsleep here in the
   1179 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1180 		 * -- I should either compute the spare table in the kernel,
   1181 		 * or have a different -- XXX XXX -- interface (a different
   1182 		 * character device) for delivering the table          -- XXX */
   1183 #if 0
   1184 	case RAIDFRAME_SPARET_WAIT:
   1185 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1186 		while (!rf_sparet_wait_queue)
   1187 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1188 		waitreq = rf_sparet_wait_queue;
   1189 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1190 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1191 
   1192 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1193 
   1194 		RF_Free(waitreq, sizeof(*waitreq));
   1195 		return (0);
   1196 
   1197 
   1198 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1199 		 * code in it that will cause the dameon to exit */
   1200 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1201 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1202 		waitreq->fcol = -1;
   1203 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1204 		waitreq->next = rf_sparet_wait_queue;
   1205 		rf_sparet_wait_queue = waitreq;
   1206 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1207 		wakeup(&rf_sparet_wait_queue);
   1208 		return (0);
   1209 
   1210 		/* used by the spare table daemon to deliver a spare table
   1211 		 * into the kernel */
   1212 	case RAIDFRAME_SEND_SPARET:
   1213 
   1214 		/* install the spare table */
   1215 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1216 
   1217 		/* respond to the requestor.  the return status of the spare
   1218 		 * table installation is passed in the "fcol" field */
   1219 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1220 		waitreq->fcol = retcode;
   1221 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1222 		waitreq->next = rf_sparet_resp_queue;
   1223 		rf_sparet_resp_queue = waitreq;
   1224 		wakeup(&rf_sparet_resp_queue);
   1225 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1226 
   1227 		return (retcode);
   1228 #endif
   1229 
   1230 
   1231 #endif				/* RAIDFRAME_RECON > 0 */
   1232 
   1233 	default:
   1234 		break;		/* fall through to the os-specific code below */
   1235 
   1236 	}
   1237 
   1238 	if (!raidPtrs[unit]->valid)
   1239 		return (EINVAL);
   1240 
   1241 	/*
   1242 	 * Add support for "regular" device ioctls here.
   1243 	 */
   1244 
   1245 	switch (cmd) {
   1246 	case DIOCGDINFO:
   1247 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1248 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1249 		break;
   1250 
   1251 	case DIOCGPART:
   1252 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1253 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1254 		((struct partinfo *) data)->part =
   1255 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1256 		break;
   1257 
   1258 	case DIOCWDINFO:
   1259 		db1_printf(("DIOCWDINFO\n"));
   1260 	case DIOCSDINFO:
   1261 		db1_printf(("DIOCSDINFO\n"));
   1262 		if ((error = raidlock(rs)) != 0)
   1263 			return (error);
   1264 
   1265 		rs->sc_flags |= RAIDF_LABELLING;
   1266 
   1267 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1268 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1269 		if (error == 0) {
   1270 			if (cmd == DIOCWDINFO)
   1271 				error = writedisklabel(RAIDLABELDEV(dev),
   1272 				    raidstrategy, rs->sc_dkdev.dk_label,
   1273 				    rs->sc_dkdev.dk_cpulabel);
   1274 		}
   1275 		rs->sc_flags &= ~RAIDF_LABELLING;
   1276 
   1277 		raidunlock(rs);
   1278 
   1279 		if (error)
   1280 			return (error);
   1281 		break;
   1282 
   1283 	case DIOCWLABEL:
   1284 		db1_printf(("DIOCWLABEL\n"));
   1285 		if (*(int *) data != 0)
   1286 			rs->sc_flags |= RAIDF_WLABEL;
   1287 		else
   1288 			rs->sc_flags &= ~RAIDF_WLABEL;
   1289 		break;
   1290 
   1291 	case DIOCGDEFLABEL:
   1292 		db1_printf(("DIOCGDEFLABEL\n"));
   1293 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1294 		    (struct disklabel *) data);
   1295 		break;
   1296 
   1297 	default:
   1298 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1299 	}
   1300 	return (retcode);
   1301 
   1302 }
   1303 
   1304 
   1305 /* raidinit -- complete the rest of the initialization for the
   1306    RAIDframe device.  */
   1307 
   1308 
   1309 static int
   1310 raidinit(dev, raidPtr, unit)
   1311 	dev_t   dev;
   1312 	RF_Raid_t *raidPtr;
   1313 	int     unit;
   1314 {
   1315 	int     retcode;
   1316 	/* int ix; */
   1317 	/* struct raidbuf *raidbp; */
   1318 	struct raid_softc *rs;
   1319 
   1320 	retcode = 0;
   1321 
   1322 	rs = &raid_softc[unit];
   1323 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1324 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1325 
   1326 
   1327 	/* XXX should check return code first... */
   1328 	rs->sc_flags |= RAIDF_INITED;
   1329 
   1330 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1331 
   1332 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1333 
   1334 	/* disk_attach actually creates space for the CPU disklabel, among
   1335 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1336 	 * with disklabels. */
   1337 
   1338 	disk_attach(&rs->sc_dkdev);
   1339 
   1340 	/* XXX There may be a weird interaction here between this, and
   1341 	 * protectedSectors, as used in RAIDframe.  */
   1342 
   1343 	rs->sc_size = raidPtr->totalSectors;
   1344 	rs->sc_dev = dev;
   1345 
   1346 	return (retcode);
   1347 }
   1348 
   1349 /*
   1350  * This kernel thread never exits.  It is created once, and persists
   1351  * until the system reboots.
   1352  */
   1353 
   1354 void
   1355 rf_ReconKernelThread()
   1356 {
   1357 	struct rf_recon_req *req;
   1358 	int     s;
   1359 
   1360 	/* XXX not sure what spl() level we should be at here... probably
   1361 	 * splbio() */
   1362 	s = splbio();
   1363 
   1364 	while (1) {
   1365 		/* grab the next reconstruction request from the queue */
   1366 		LOCK_RECON_Q_MUTEX();
   1367 		while (!recon_queue) {
   1368 			UNLOCK_RECON_Q_MUTEX();
   1369 			tsleep(&recon_queue, PRIBIO,
   1370 			       "raidframe recon", 0);
   1371 			LOCK_RECON_Q_MUTEX();
   1372 		}
   1373 		req = recon_queue;
   1374 		recon_queue = recon_queue->next;
   1375 		UNLOCK_RECON_Q_MUTEX();
   1376 
   1377 		/*
   1378 	         * If flags specifies that we should start recon, this call
   1379 	         * will not return until reconstruction completes, fails,
   1380 		 * or is aborted.
   1381 	         */
   1382 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1383 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1384 
   1385 		RF_Free(req, sizeof(*req));
   1386 	}
   1387 }
   1388 /* wake up the daemon & tell it to get us a spare table
   1389  * XXX
   1390  * the entries in the queues should be tagged with the raidPtr
   1391  * so that in the extremely rare case that two recons happen at once,
   1392  * we know for which device were requesting a spare table
   1393  * XXX
   1394  */
   1395 int
   1396 rf_GetSpareTableFromDaemon(req)
   1397 	RF_SparetWait_t *req;
   1398 {
   1399 	int     retcode;
   1400 
   1401 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1402 	req->next = rf_sparet_wait_queue;
   1403 	rf_sparet_wait_queue = req;
   1404 	wakeup(&rf_sparet_wait_queue);
   1405 
   1406 	/* mpsleep unlocks the mutex */
   1407 	while (!rf_sparet_resp_queue) {
   1408 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1409 		    "raidframe getsparetable", 0);
   1410 	}
   1411 	req = rf_sparet_resp_queue;
   1412 	rf_sparet_resp_queue = req->next;
   1413 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1414 
   1415 	retcode = req->fcol;
   1416 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1417 					 * alloc'd */
   1418 	return (retcode);
   1419 }
   1420 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1421  * bp & passes it down.
   1422  * any calls originating in the kernel must use non-blocking I/O
   1423  * do some extra sanity checking to return "appropriate" error values for
   1424  * certain conditions (to make some standard utilities work)
   1425  */
   1426 int
   1427 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1428 	RF_Raid_t *raidPtr;
   1429 	struct buf *bp;
   1430 	RF_RaidAccessFlags_t flags;
   1431 	void    (*cbFunc) (struct buf *);
   1432 	void   *cbArg;
   1433 {
   1434 	RF_SectorCount_t num_blocks, pb, sum;
   1435 	RF_RaidAddr_t raid_addr;
   1436 	int     retcode;
   1437 	struct partition *pp;
   1438 	daddr_t blocknum;
   1439 	int     unit;
   1440 	struct raid_softc *rs;
   1441 	int     do_async;
   1442 
   1443 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1444 
   1445 	unit = raidPtr->raidid;
   1446 	rs = &raid_softc[unit];
   1447 
   1448 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1449 	 * partition.. Need to make it absolute to the underlying device.. */
   1450 
   1451 	blocknum = bp->b_blkno;
   1452 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1453 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1454 		blocknum += pp->p_offset;
   1455 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1456 			pp->p_offset));
   1457 	} else {
   1458 		db1_printf(("Is raw..\n"));
   1459 	}
   1460 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1461 
   1462 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1463 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1464 
   1465 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1466 	 * TOUCH bp->b_blkno!!! */
   1467 	raid_addr = blocknum;
   1468 
   1469 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1470 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1471 	sum = raid_addr + num_blocks + pb;
   1472 	if (1 || rf_debugKernelAccess) {
   1473 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1474 			(int) raid_addr, (int) sum, (int) num_blocks,
   1475 			(int) pb, (int) bp->b_resid));
   1476 	}
   1477 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1478 	    || (sum < num_blocks) || (sum < pb)) {
   1479 		bp->b_error = ENOSPC;
   1480 		bp->b_flags |= B_ERROR;
   1481 		bp->b_resid = bp->b_bcount;
   1482 		biodone(bp);
   1483 		return (bp->b_error);
   1484 	}
   1485 	/*
   1486 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1487 	 */
   1488 
   1489 	if (bp->b_bcount & raidPtr->sectorMask) {
   1490 		bp->b_error = EINVAL;
   1491 		bp->b_flags |= B_ERROR;
   1492 		bp->b_resid = bp->b_bcount;
   1493 		biodone(bp);
   1494 		return (bp->b_error);
   1495 	}
   1496 	db1_printf(("Calling DoAccess..\n"));
   1497 
   1498 
   1499 	/* Put a throttle on the number of requests we handle simultanously */
   1500 
   1501 	RF_LOCK_MUTEX(raidPtr->mutex);
   1502 
   1503 	while(raidPtr->openings <= 0) {
   1504 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1505 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1506 		RF_LOCK_MUTEX(raidPtr->mutex);
   1507 	}
   1508 	raidPtr->openings--;
   1509 
   1510 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1511 
   1512 	/*
   1513 	 * Everything is async.
   1514 	 */
   1515 	do_async = 1;
   1516 
   1517 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1518 	 * B_READ instead */
   1519 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1520 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1521 	    do_async, raid_addr, num_blocks,
   1522 	    bp->b_un.b_addr,
   1523 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1524 	    NULL, cbFunc, cbArg);
   1525 #if 0
   1526 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1527 		bp->b_data, (int) bp->b_resid));
   1528 #endif
   1529 
   1530 	return (retcode);
   1531 }
   1532 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1533 
   1534 int
   1535 rf_DispatchKernelIO(queue, req)
   1536 	RF_DiskQueue_t *queue;
   1537 	RF_DiskQueueData_t *req;
   1538 {
   1539 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1540 	struct buf *bp;
   1541 	struct raidbuf *raidbp = NULL;
   1542 	struct raid_softc *rs;
   1543 	int     unit;
   1544 
   1545 	/* XXX along with the vnode, we also need the softc associated with
   1546 	 * this device.. */
   1547 
   1548 	req->queue = queue;
   1549 
   1550 	unit = queue->raidPtr->raidid;
   1551 
   1552 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1553 
   1554 	if (unit >= numraid) {
   1555 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1556 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1557 	}
   1558 	rs = &raid_softc[unit];
   1559 
   1560 	/* XXX is this the right place? */
   1561 	disk_busy(&rs->sc_dkdev);
   1562 
   1563 	bp = req->bp;
   1564 #if 1
   1565 	/* XXX when there is a physical disk failure, someone is passing us a
   1566 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1567 	 * without taking a performance hit... (not sure where the real bug
   1568 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1569 
   1570 	if (bp->b_flags & B_ERROR) {
   1571 		bp->b_flags &= ~B_ERROR;
   1572 	}
   1573 	if (bp->b_error != 0) {
   1574 		bp->b_error = 0;
   1575 	}
   1576 #endif
   1577 	raidbp = RAIDGETBUF(rs);
   1578 
   1579 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1580 
   1581 	/*
   1582 	 * context for raidiodone
   1583 	 */
   1584 	raidbp->rf_obp = bp;
   1585 	raidbp->req = req;
   1586 
   1587 	switch (req->type) {
   1588 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1589 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1590 		 * queue->row, queue->col); */
   1591 		/* XXX need to do something extra here.. */
   1592 		/* I'm leaving this in, as I've never actually seen it used,
   1593 		 * and I'd like folks to report it... GO */
   1594 		printf(("WAKEUP CALLED\n"));
   1595 		queue->numOutstanding++;
   1596 
   1597 		/* XXX need to glue the original buffer into this??  */
   1598 
   1599 		KernelWakeupFunc(&raidbp->rf_buf);
   1600 		break;
   1601 
   1602 	case RF_IO_TYPE_READ:
   1603 	case RF_IO_TYPE_WRITE:
   1604 
   1605 		if (req->tracerec) {
   1606 			RF_ETIMER_START(req->tracerec->timer);
   1607 		}
   1608 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1609 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1610 		    req->sectorOffset, req->numSector,
   1611 		    req->buf, KernelWakeupFunc, (void *) req,
   1612 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1613 
   1614 		if (rf_debugKernelAccess) {
   1615 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1616 				(long) bp->b_blkno));
   1617 		}
   1618 		queue->numOutstanding++;
   1619 		queue->last_deq_sector = req->sectorOffset;
   1620 		/* acc wouldn't have been let in if there were any pending
   1621 		 * reqs at any other priority */
   1622 		queue->curPriority = req->priority;
   1623 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1624 		 * req->type, queue->row, queue->col); */
   1625 
   1626 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1627 			req->type, unit, queue->row, queue->col));
   1628 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1629 			(int) req->sectorOffset, (int) req->numSector,
   1630 			(int) (req->numSector <<
   1631 			    queue->raidPtr->logBytesPerSector),
   1632 			(int) queue->raidPtr->logBytesPerSector));
   1633 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1634 			raidbp->rf_buf.b_vp->v_numoutput++;
   1635 		}
   1636 		VOP_STRATEGY(&raidbp->rf_buf);
   1637 
   1638 		break;
   1639 
   1640 	default:
   1641 		panic("bad req->type in rf_DispatchKernelIO");
   1642 	}
   1643 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1644 	return (0);
   1645 }
   1646 /* this is the callback function associated with a I/O invoked from
   1647    kernel code.
   1648  */
   1649 static void
   1650 KernelWakeupFunc(vbp)
   1651 	struct buf *vbp;
   1652 {
   1653 	RF_DiskQueueData_t *req = NULL;
   1654 	RF_DiskQueue_t *queue;
   1655 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1656 	struct buf *bp;
   1657 	struct raid_softc *rs;
   1658 	int     unit;
   1659 	register int s;
   1660 
   1661 	s = splbio();		/* XXX */
   1662 	db1_printf(("recovering the request queue:\n"));
   1663 	req = raidbp->req;
   1664 
   1665 	bp = raidbp->rf_obp;
   1666 #if 0
   1667 	db1_printf(("bp=0x%x\n", bp));
   1668 #endif
   1669 
   1670 	queue = (RF_DiskQueue_t *) req->queue;
   1671 
   1672 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1673 #if 0
   1674 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1675 #endif
   1676 		bp->b_flags |= B_ERROR;
   1677 		bp->b_error = raidbp->rf_buf.b_error ?
   1678 		    raidbp->rf_buf.b_error : EIO;
   1679 	}
   1680 #if 0
   1681 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1682 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1683 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1684 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1685 #endif
   1686 
   1687 	/* XXX methinks this could be wrong... */
   1688 #if 1
   1689 	bp->b_resid = raidbp->rf_buf.b_resid;
   1690 #endif
   1691 
   1692 	if (req->tracerec) {
   1693 		RF_ETIMER_STOP(req->tracerec->timer);
   1694 		RF_ETIMER_EVAL(req->tracerec->timer);
   1695 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1696 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1697 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1698 		req->tracerec->num_phys_ios++;
   1699 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1700 	}
   1701 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1702 
   1703 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1704 
   1705 
   1706 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1707 	 * ballistic, and mark the component as hosed... */
   1708 #if 1
   1709 	if (bp->b_flags & B_ERROR) {
   1710 		/* Mark the disk as dead */
   1711 		/* but only mark it once... */
   1712 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1713 		    rf_ds_optimal) {
   1714 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1715 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1716 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1717 			    rf_ds_failed;
   1718 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1719 			queue->raidPtr->numFailures++;
   1720 			/* XXX here we should bump the version number for each component, and write that data out */
   1721 		} else {	/* Disk is already dead... */
   1722 			/* printf("Disk already marked as dead!\n"); */
   1723 		}
   1724 
   1725 	}
   1726 #endif
   1727 
   1728 	rs = &raid_softc[unit];
   1729 	RAIDPUTBUF(rs, raidbp);
   1730 
   1731 
   1732 	if (bp->b_resid == 0) {
   1733 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1734 			unit, bp->b_resid, bp->b_bcount));
   1735 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1736 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1737 	} else {
   1738 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1739 	}
   1740 
   1741 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1742 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1743 	/* printf("Exiting KernelWakeupFunc\n"); */
   1744 
   1745 	splx(s);		/* XXX */
   1746 }
   1747 
   1748 
   1749 
   1750 /*
   1751  * initialize a buf structure for doing an I/O in the kernel.
   1752  */
   1753 static void
   1754 InitBP(
   1755     struct buf * bp,
   1756     struct vnode * b_vp,
   1757     unsigned rw_flag,
   1758     dev_t dev,
   1759     RF_SectorNum_t startSect,
   1760     RF_SectorCount_t numSect,
   1761     caddr_t buf,
   1762     void (*cbFunc) (struct buf *),
   1763     void *cbArg,
   1764     int logBytesPerSector,
   1765     struct proc * b_proc)
   1766 {
   1767 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1768 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1769 	bp->b_bcount = numSect << logBytesPerSector;
   1770 	bp->b_bufsize = bp->b_bcount;
   1771 	bp->b_error = 0;
   1772 	bp->b_dev = dev;
   1773 	db1_printf(("bp->b_dev is %d\n", dev));
   1774 	bp->b_un.b_addr = buf;
   1775 #if 0
   1776 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1777 #endif
   1778 
   1779 	bp->b_blkno = startSect;
   1780 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1781 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1782 	if (bp->b_bcount == 0) {
   1783 		panic("bp->b_bcount is zero in InitBP!!\n");
   1784 	}
   1785 	bp->b_proc = b_proc;
   1786 	bp->b_iodone = cbFunc;
   1787 	bp->b_vp = b_vp;
   1788 
   1789 }
   1790 
   1791 static void
   1792 raidgetdefaultlabel(raidPtr, rs, lp)
   1793 	RF_Raid_t *raidPtr;
   1794 	struct raid_softc *rs;
   1795 	struct disklabel *lp;
   1796 {
   1797 	db1_printf(("Building a default label...\n"));
   1798 	bzero(lp, sizeof(*lp));
   1799 
   1800 	/* fabricate a label... */
   1801 	lp->d_secperunit = raidPtr->totalSectors;
   1802 	lp->d_secsize = raidPtr->bytesPerSector;
   1803 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1804 	lp->d_ntracks = 1;
   1805 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1806 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1807 
   1808 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1809 	lp->d_type = DTYPE_RAID;
   1810 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1811 	lp->d_rpm = 3600;
   1812 	lp->d_interleave = 1;
   1813 	lp->d_flags = 0;
   1814 
   1815 	lp->d_partitions[RAW_PART].p_offset = 0;
   1816 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1817 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1818 	lp->d_npartitions = RAW_PART + 1;
   1819 
   1820 	lp->d_magic = DISKMAGIC;
   1821 	lp->d_magic2 = DISKMAGIC;
   1822 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1823 
   1824 }
   1825 /*
   1826  * Read the disklabel from the raid device.  If one is not present, fake one
   1827  * up.
   1828  */
   1829 static void
   1830 raidgetdisklabel(dev)
   1831 	dev_t   dev;
   1832 {
   1833 	int     unit = raidunit(dev);
   1834 	struct raid_softc *rs = &raid_softc[unit];
   1835 	char   *errstring;
   1836 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1837 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1838 	RF_Raid_t *raidPtr;
   1839 
   1840 	db1_printf(("Getting the disklabel...\n"));
   1841 
   1842 	bzero(clp, sizeof(*clp));
   1843 
   1844 	raidPtr = raidPtrs[unit];
   1845 
   1846 	raidgetdefaultlabel(raidPtr, rs, lp);
   1847 
   1848 	/*
   1849 	 * Call the generic disklabel extraction routine.
   1850 	 */
   1851 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1852 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1853 	if (errstring)
   1854 		raidmakedisklabel(rs);
   1855 	else {
   1856 		int     i;
   1857 		struct partition *pp;
   1858 
   1859 		/*
   1860 		 * Sanity check whether the found disklabel is valid.
   1861 		 *
   1862 		 * This is necessary since total size of the raid device
   1863 		 * may vary when an interleave is changed even though exactly
   1864 		 * same componets are used, and old disklabel may used
   1865 		 * if that is found.
   1866 		 */
   1867 		if (lp->d_secperunit != rs->sc_size)
   1868 			printf("WARNING: %s: "
   1869 			    "total sector size in disklabel (%d) != "
   1870 			    "the size of raid (%ld)\n", rs->sc_xname,
   1871 			    lp->d_secperunit, (long) rs->sc_size);
   1872 		for (i = 0; i < lp->d_npartitions; i++) {
   1873 			pp = &lp->d_partitions[i];
   1874 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1875 				printf("WARNING: %s: end of partition `%c' "
   1876 				    "exceeds the size of raid (%ld)\n",
   1877 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1878 		}
   1879 	}
   1880 
   1881 }
   1882 /*
   1883  * Take care of things one might want to take care of in the event
   1884  * that a disklabel isn't present.
   1885  */
   1886 static void
   1887 raidmakedisklabel(rs)
   1888 	struct raid_softc *rs;
   1889 {
   1890 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1891 	db1_printf(("Making a label..\n"));
   1892 
   1893 	/*
   1894 	 * For historical reasons, if there's no disklabel present
   1895 	 * the raw partition must be marked FS_BSDFFS.
   1896 	 */
   1897 
   1898 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1899 
   1900 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1901 
   1902 	lp->d_checksum = dkcksum(lp);
   1903 }
   1904 /*
   1905  * Lookup the provided name in the filesystem.  If the file exists,
   1906  * is a valid block device, and isn't being used by anyone else,
   1907  * set *vpp to the file's vnode.
   1908  * You'll find the original of this in ccd.c
   1909  */
   1910 int
   1911 raidlookup(path, p, vpp)
   1912 	char   *path;
   1913 	struct proc *p;
   1914 	struct vnode **vpp;	/* result */
   1915 {
   1916 	struct nameidata nd;
   1917 	struct vnode *vp;
   1918 	struct vattr va;
   1919 	int     error;
   1920 
   1921 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1922 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1923 #ifdef DEBUG
   1924 		printf("RAIDframe: vn_open returned %d\n", error);
   1925 #endif
   1926 		return (error);
   1927 	}
   1928 	vp = nd.ni_vp;
   1929 	if (vp->v_usecount > 1) {
   1930 		VOP_UNLOCK(vp, 0);
   1931 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1932 		return (EBUSY);
   1933 	}
   1934 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1935 		VOP_UNLOCK(vp, 0);
   1936 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1937 		return (error);
   1938 	}
   1939 	/* XXX: eventually we should handle VREG, too. */
   1940 	if (va.va_type != VBLK) {
   1941 		VOP_UNLOCK(vp, 0);
   1942 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1943 		return (ENOTBLK);
   1944 	}
   1945 	VOP_UNLOCK(vp, 0);
   1946 	*vpp = vp;
   1947 	return (0);
   1948 }
   1949 /*
   1950  * Wait interruptibly for an exclusive lock.
   1951  *
   1952  * XXX
   1953  * Several drivers do this; it should be abstracted and made MP-safe.
   1954  * (Hmm... where have we seen this warning before :->  GO )
   1955  */
   1956 static int
   1957 raidlock(rs)
   1958 	struct raid_softc *rs;
   1959 {
   1960 	int     error;
   1961 
   1962 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1963 		rs->sc_flags |= RAIDF_WANTED;
   1964 		if ((error =
   1965 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1966 			return (error);
   1967 	}
   1968 	rs->sc_flags |= RAIDF_LOCKED;
   1969 	return (0);
   1970 }
   1971 /*
   1972  * Unlock and wake up any waiters.
   1973  */
   1974 static void
   1975 raidunlock(rs)
   1976 	struct raid_softc *rs;
   1977 {
   1978 
   1979 	rs->sc_flags &= ~RAIDF_LOCKED;
   1980 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1981 		rs->sc_flags &= ~RAIDF_WANTED;
   1982 		wakeup(rs);
   1983 	}
   1984 }
   1985 
   1986 
   1987 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1988 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1989 
   1990 int
   1991 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   1992 {
   1993 	RF_ComponentLabel_t component_label;
   1994 	raidread_component_label(dev, b_vp, &component_label);
   1995 	component_label.mod_counter = mod_counter;
   1996 	component_label.clean = RF_RAID_CLEAN;
   1997 	raidwrite_component_label(dev, b_vp, &component_label);
   1998 	return(0);
   1999 }
   2000 
   2001 
   2002 int
   2003 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2004 {
   2005 	RF_ComponentLabel_t component_label;
   2006 	raidread_component_label(dev, b_vp, &component_label);
   2007 	component_label.mod_counter = mod_counter;
   2008 	component_label.clean = RF_RAID_DIRTY;
   2009 	raidwrite_component_label(dev, b_vp, &component_label);
   2010 	return(0);
   2011 }
   2012 
   2013 /* ARGSUSED */
   2014 int
   2015 raidread_component_label(dev, b_vp, component_label)
   2016 	dev_t dev;
   2017 	struct vnode *b_vp;
   2018 	RF_ComponentLabel_t *component_label;
   2019 {
   2020 	struct buf *bp;
   2021 	int error;
   2022 
   2023 	/* XXX should probably ensure that we don't try to do this if
   2024 	   someone has changed rf_protected_sectors. */
   2025 
   2026 	/* get a block of the appropriate size... */
   2027 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2028 	bp->b_dev = dev;
   2029 
   2030 	/* get our ducks in a row for the read */
   2031 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2032 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2033 	bp->b_flags = B_BUSY | B_READ;
   2034  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2035 
   2036 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2037 
   2038 	error = biowait(bp);
   2039 
   2040 	if (!error) {
   2041 		memcpy(component_label, bp->b_un.b_addr,
   2042 		       sizeof(RF_ComponentLabel_t));
   2043 #if 0
   2044 		printf("raidread_component_label: got component label:\n");
   2045 		printf("Version: %d\n",component_label->version);
   2046 		printf("Serial Number: %d\n",component_label->serial_number);
   2047 		printf("Mod counter: %d\n",component_label->mod_counter);
   2048 		printf("Row: %d\n", component_label->row);
   2049 		printf("Column: %d\n", component_label->column);
   2050 		printf("Num Rows: %d\n", component_label->num_rows);
   2051 		printf("Num Columns: %d\n", component_label->num_columns);
   2052 		printf("Clean: %d\n", component_label->clean);
   2053 		printf("Status: %d\n", component_label->status);
   2054 #endif
   2055         } else {
   2056 		printf("Failed to read RAID component label!\n");
   2057 	}
   2058 
   2059         bp->b_flags = B_INVAL | B_AGE;
   2060 	brelse(bp);
   2061 	return(error);
   2062 }
   2063 /* ARGSUSED */
   2064 int
   2065 raidwrite_component_label(dev, b_vp, component_label)
   2066 	dev_t dev;
   2067 	struct vnode *b_vp;
   2068 	RF_ComponentLabel_t *component_label;
   2069 {
   2070 	struct buf *bp;
   2071 	int error;
   2072 
   2073 	/* get a block of the appropriate size... */
   2074 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2075 	bp->b_dev = dev;
   2076 
   2077 	/* get our ducks in a row for the write */
   2078 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2079 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2080 	bp->b_flags = B_BUSY | B_WRITE;
   2081  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2082 
   2083 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2084 
   2085 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2086 
   2087 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2088 	error = biowait(bp);
   2089         bp->b_flags = B_INVAL | B_AGE;
   2090 	brelse(bp);
   2091 	if (error) {
   2092 		printf("Failed to write RAID component info!\n");
   2093 	}
   2094 
   2095 	return(error);
   2096 }
   2097 
   2098 void
   2099 rf_markalldirty( raidPtr )
   2100 	RF_Raid_t *raidPtr;
   2101 {
   2102 	RF_ComponentLabel_t c_label;
   2103 	int r,c;
   2104 
   2105 	raidPtr->mod_counter++;
   2106 	for (r = 0; r < raidPtr->numRow; r++) {
   2107 		for (c = 0; c < raidPtr->numCol; c++) {
   2108 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2109 				raidread_component_label(
   2110 					raidPtr->Disks[r][c].dev,
   2111 					raidPtr->raid_cinfo[r][c].ci_vp,
   2112 					&c_label);
   2113 				if (c_label.status == rf_ds_spared) {
   2114 					/* XXX do something special...
   2115 					 but whatever you do, don't
   2116 					 try to access it!! */
   2117 				} else {
   2118 #if 0
   2119 				c_label.status =
   2120 					raidPtr->Disks[r][c].status;
   2121 				raidwrite_component_label(
   2122 					raidPtr->Disks[r][c].dev,
   2123 					raidPtr->raid_cinfo[r][c].ci_vp,
   2124 					&c_label);
   2125 #endif
   2126 				raidmarkdirty(
   2127 				       raidPtr->Disks[r][c].dev,
   2128 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2129 				       raidPtr->mod_counter);
   2130 				}
   2131 			}
   2132 		}
   2133 	}
   2134 	/* printf("Component labels marked dirty.\n"); */
   2135 #if 0
   2136 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2137 		sparecol = raidPtr->numCol + c;
   2138 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2139 			/*
   2140 
   2141 			   XXX this is where we get fancy and map this spare
   2142 			   into it's correct spot in the array.
   2143 
   2144 			 */
   2145 			/*
   2146 
   2147 			   we claim this disk is "optimal" if it's
   2148 			   rf_ds_used_spare, as that means it should be
   2149 			   directly substitutable for the disk it replaced.
   2150 			   We note that too...
   2151 
   2152 			 */
   2153 
   2154 			for(i=0;i<raidPtr->numRow;i++) {
   2155 				for(j=0;j<raidPtr->numCol;j++) {
   2156 					if ((raidPtr->Disks[i][j].spareRow ==
   2157 					     r) &&
   2158 					    (raidPtr->Disks[i][j].spareCol ==
   2159 					     sparecol)) {
   2160 						srow = r;
   2161 						scol = sparecol;
   2162 						break;
   2163 					}
   2164 				}
   2165 			}
   2166 
   2167 			raidread_component_label(
   2168 				      raidPtr->Disks[r][sparecol].dev,
   2169 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2170 				      &c_label);
   2171 			/* make sure status is noted */
   2172 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2173 			c_label.mod_counter = raidPtr->mod_counter;
   2174 			c_label.serial_number = raidPtr->serial_number;
   2175 			c_label.row = srow;
   2176 			c_label.column = scol;
   2177 			c_label.num_rows = raidPtr->numRow;
   2178 			c_label.num_columns = raidPtr->numCol;
   2179 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2180 			c_label.status = rf_ds_optimal;
   2181 			raidwrite_component_label(
   2182 				      raidPtr->Disks[r][sparecol].dev,
   2183 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2184 				      &c_label);
   2185 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2186 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2187 		}
   2188 	}
   2189 
   2190 #endif
   2191 }
   2192 
   2193 
   2194 void
   2195 rf_update_component_labels( raidPtr )
   2196 	RF_Raid_t *raidPtr;
   2197 {
   2198 	RF_ComponentLabel_t c_label;
   2199 	int sparecol;
   2200 	int r,c;
   2201 	int i,j;
   2202 	int srow, scol;
   2203 
   2204 	srow = -1;
   2205 	scol = -1;
   2206 
   2207 	/* XXX should do extra checks to make sure things really are clean,
   2208 	   rather than blindly setting the clean bit... */
   2209 
   2210 	raidPtr->mod_counter++;
   2211 
   2212 	for (r = 0; r < raidPtr->numRow; r++) {
   2213 		for (c = 0; c < raidPtr->numCol; c++) {
   2214 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2215 				raidread_component_label(
   2216 					raidPtr->Disks[r][c].dev,
   2217 					raidPtr->raid_cinfo[r][c].ci_vp,
   2218 					&c_label);
   2219 				/* make sure status is noted */
   2220 				c_label.status = rf_ds_optimal;
   2221 				raidwrite_component_label(
   2222 					raidPtr->Disks[r][c].dev,
   2223 					raidPtr->raid_cinfo[r][c].ci_vp,
   2224 					&c_label);
   2225 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2226 					raidmarkclean(
   2227 					      raidPtr->Disks[r][c].dev,
   2228 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2229 					      raidPtr->mod_counter);
   2230 				}
   2231 			}
   2232 			/* else we don't touch it.. */
   2233 #if 0
   2234 			else if (raidPtr->Disks[r][c].status !=
   2235 				   rf_ds_failed) {
   2236 				raidread_component_label(
   2237 					raidPtr->Disks[r][c].dev,
   2238 					raidPtr->raid_cinfo[r][c].ci_vp,
   2239 					&c_label);
   2240 				/* make sure status is noted */
   2241 				c_label.status =
   2242 					raidPtr->Disks[r][c].status;
   2243 				raidwrite_component_label(
   2244 					raidPtr->Disks[r][c].dev,
   2245 					raidPtr->raid_cinfo[r][c].ci_vp,
   2246 					&c_label);
   2247 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2248 					raidmarkclean(
   2249 					      raidPtr->Disks[r][c].dev,
   2250 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2251 					      raidPtr->mod_counter);
   2252 				}
   2253 			}
   2254 #endif
   2255 		}
   2256 	}
   2257 
   2258 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2259 		sparecol = raidPtr->numCol + c;
   2260 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2261 			/*
   2262 
   2263 			   we claim this disk is "optimal" if it's
   2264 			   rf_ds_used_spare, as that means it should be
   2265 			   directly substitutable for the disk it replaced.
   2266 			   We note that too...
   2267 
   2268 			 */
   2269 
   2270 			for(i=0;i<raidPtr->numRow;i++) {
   2271 				for(j=0;j<raidPtr->numCol;j++) {
   2272 					if ((raidPtr->Disks[i][j].spareRow ==
   2273 					     0) &&
   2274 					    (raidPtr->Disks[i][j].spareCol ==
   2275 					     sparecol)) {
   2276 						srow = i;
   2277 						scol = j;
   2278 						break;
   2279 					}
   2280 				}
   2281 			}
   2282 
   2283 			raidread_component_label(
   2284 				      raidPtr->Disks[0][sparecol].dev,
   2285 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2286 				      &c_label);
   2287 			/* make sure status is noted */
   2288 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2289 			c_label.mod_counter = raidPtr->mod_counter;
   2290 			c_label.serial_number = raidPtr->serial_number;
   2291 			c_label.row = srow;
   2292 			c_label.column = scol;
   2293 			c_label.num_rows = raidPtr->numRow;
   2294 			c_label.num_columns = raidPtr->numCol;
   2295 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2296 			c_label.status = rf_ds_optimal;
   2297 			raidwrite_component_label(
   2298 				      raidPtr->Disks[0][sparecol].dev,
   2299 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2300 				      &c_label);
   2301 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2302 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2303 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2304 					       raidPtr->mod_counter);
   2305 			}
   2306 		}
   2307 	}
   2308 	/* 	printf("Component labels updated\n"); */
   2309 }
   2310