Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.28
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.28 1999/08/14 03:47:07 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #define RFK_BOOT_NONE 0
    157 #define RFK_BOOT_GOOD 1
    158 #define RFK_BOOT_BAD  2
    159 static int rf_kbooted = RFK_BOOT_NONE;
    160 
    161 #ifdef DEBUG
    162 #define db0_printf(a) printf a
    163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    169 #else				/* DEBUG */
    170 #define db0_printf(a) printf a
    171 #define db1_printf(a) { }
    172 #define db2_printf(a) { }
    173 #define db3_printf(a) { }
    174 #define db4_printf(a) { }
    175 #define db5_printf(a) { }
    176 #endif				/* DEBUG */
    177 
    178 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    179 
    180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    181 
    182 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    183 						 * spare table */
    184 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    185 						 * installation process */
    186 
    187 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    188 						 * reconstruction
    189 						 * requests */
    190 
    191 
    192 decl_simple_lock_data(, recon_queue_mutex)
    193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    195 
    196 /* prototypes */
    197 static void KernelWakeupFunc(struct buf * bp);
    198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    199 		   dev_t dev, RF_SectorNum_t startSect,
    200 		   RF_SectorCount_t numSect, caddr_t buf,
    201 		   void (*cbFunc) (struct buf *), void *cbArg,
    202 		   int logBytesPerSector, struct proc * b_proc);
    203 
    204 #define Dprintf0(s)       if (rf_queueDebug) \
    205      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    206 #define Dprintf1(s,a)     if (rf_queueDebug) \
    207      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    208 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    209      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    211      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    212 
    213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    215 
    216 void raidattach __P((int));
    217 int raidsize __P((dev_t));
    218 
    219 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    220 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    221 static int raidinit __P((dev_t, RF_Raid_t *, int));
    222 
    223 int raidopen __P((dev_t, int, int, struct proc *));
    224 int raidclose __P((dev_t, int, int, struct proc *));
    225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    226 int raidwrite __P((dev_t, struct uio *, int));
    227 int raidread __P((dev_t, struct uio *, int));
    228 void raidstrategy __P((struct buf *));
    229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    230 
    231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    233 void rf_update_component_labels( RF_Raid_t *);
    234 /*
    235  * Pilfered from ccd.c
    236  */
    237 
    238 struct raidbuf {
    239 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    240 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    241 	int     rf_flags;	/* misc. flags */
    242 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    243 };
    244 
    245 
    246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    247 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    248 
    249 /* XXX Not sure if the following should be replacing the raidPtrs above,
    250    or if it should be used in conjunction with that... */
    251 
    252 struct raid_softc {
    253 	int     sc_flags;	/* flags */
    254 	int     sc_cflags;	/* configuration flags */
    255 	size_t  sc_size;        /* size of the raid device */
    256 	dev_t   sc_dev;	        /* our device.. */
    257 	char    sc_xname[20];	/* XXX external name */
    258 	struct disk sc_dkdev;	/* generic disk device info */
    259 	struct pool sc_cbufpool;	/* component buffer pool */
    260 };
    261 /* sc_flags */
    262 #define RAIDF_INITED	0x01	/* unit has been initialized */
    263 #define RAIDF_WLABEL	0x02	/* label area is writable */
    264 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    265 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    266 #define RAIDF_LOCKED	0x80	/* unit is locked */
    267 
    268 #define	raidunit(x)	DISKUNIT(x)
    269 static int numraid = 0;
    270 
    271 /*
    272  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    273  * Be aware that large numbers can allow the driver to consume a lot of
    274  * kernel memory, especially on writes, and in degraded mode reads.
    275  *
    276  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    277  * a single 64K write will typically require 64K for the old data,
    278  * 64K for the old parity, and 64K for the new parity, for a total
    279  * of 192K (if the parity buffer is not re-used immediately).
    280  * Even it if is used immedately, that's still 128K, which when multiplied
    281  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    282  *
    283  * Now in degraded mode, for example, a 64K read on the above setup may
    284  * require data reconstruction, which will require *all* of the 4 remaining
    285  * disks to participate -- 4 * 32K/disk == 128K again.
    286  */
    287 
    288 #ifndef RAIDOUTSTANDING
    289 #define RAIDOUTSTANDING   6
    290 #endif
    291 
    292 #define RAIDLABELDEV(dev)	\
    293 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    294 
    295 /* declared here, and made public, for the benefit of KVM stuff.. */
    296 struct raid_softc *raid_softc;
    297 
    298 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    299 				     struct disklabel *));
    300 static void raidgetdisklabel __P((dev_t));
    301 static void raidmakedisklabel __P((struct raid_softc *));
    302 
    303 static int raidlock __P((struct raid_softc *));
    304 static void raidunlock __P((struct raid_softc *));
    305 int raidlookup __P((char *, struct proc * p, struct vnode **));
    306 
    307 static void rf_markalldirty __P((RF_Raid_t *));
    308 
    309 void
    310 raidattach(num)
    311 	int     num;
    312 {
    313 	int raidID;
    314 	int i, rc;
    315 
    316 #ifdef DEBUG
    317 	printf("raidattach: Asked for %d units\n", num);
    318 #endif
    319 
    320 	if (num <= 0) {
    321 #ifdef DIAGNOSTIC
    322 		panic("raidattach: count <= 0");
    323 #endif
    324 		return;
    325 	}
    326 	/* This is where all the initialization stuff gets done. */
    327 
    328 	/* Make some space for requested number of units... */
    329 
    330 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    331 	if (raidPtrs == NULL) {
    332 		panic("raidPtrs is NULL!!\n");
    333 	}
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 	recon_queue = NULL;
    342 
    343 	for (i = 0; i < numraid; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!\n");
    350 
    351 	rf_kbooted = RFK_BOOT_GOOD;
    352 
    353 	/* put together some datastructures like the CCD device does.. This
    354 	 * lets us lock the device and what-not when it gets opened. */
    355 
    356 	raid_softc = (struct raid_softc *)
    357 	    malloc(num * sizeof(struct raid_softc),
    358 	    M_RAIDFRAME, M_NOWAIT);
    359 	if (raid_softc == NULL) {
    360 		printf("WARNING: no memory for RAIDframe driver\n");
    361 		return;
    362 	}
    363 	numraid = num;
    364 	bzero(raid_softc, num * sizeof(struct raid_softc));
    365 
    366 	for (raidID = 0; raidID < num; raidID++) {
    367 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    368 			  (RF_Raid_t *));
    369 		if (raidPtrs[raidID] == NULL) {
    370 			printf("raidPtrs[%d] is NULL\n", raidID);
    371 		}
    372 	}
    373 }
    374 
    375 
    376 int
    377 raidsize(dev)
    378 	dev_t   dev;
    379 {
    380 	struct raid_softc *rs;
    381 	struct disklabel *lp;
    382 	int     part, unit, omask, size;
    383 
    384 	unit = raidunit(dev);
    385 	if (unit >= numraid)
    386 		return (-1);
    387 	rs = &raid_softc[unit];
    388 
    389 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    390 		return (-1);
    391 
    392 	part = DISKPART(dev);
    393 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    394 	lp = rs->sc_dkdev.dk_label;
    395 
    396 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    397 		return (-1);
    398 
    399 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    400 		size = -1;
    401 	else
    402 		size = lp->d_partitions[part].p_size *
    403 		    (lp->d_secsize / DEV_BSIZE);
    404 
    405 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    406 		return (-1);
    407 
    408 	return (size);
    409 
    410 }
    411 
    412 int
    413 raiddump(dev, blkno, va, size)
    414 	dev_t   dev;
    415 	daddr_t blkno;
    416 	caddr_t va;
    417 	size_t  size;
    418 {
    419 	/* Not implemented. */
    420 	return ENXIO;
    421 }
    422 /* ARGSUSED */
    423 int
    424 raidopen(dev, flags, fmt, p)
    425 	dev_t   dev;
    426 	int     flags, fmt;
    427 	struct proc *p;
    428 {
    429 	int     unit = raidunit(dev);
    430 	struct raid_softc *rs;
    431 	struct disklabel *lp;
    432 	int     part, pmask;
    433 	int     error = 0;
    434 
    435 	if (unit >= numraid)
    436 		return (ENXIO);
    437 	rs = &raid_softc[unit];
    438 
    439 	if ((error = raidlock(rs)) != 0)
    440 		return (error);
    441 	lp = rs->sc_dkdev.dk_label;
    442 
    443 	part = DISKPART(dev);
    444 	pmask = (1 << part);
    445 
    446 	db1_printf(("Opening raid device number: %d partition: %d\n",
    447 		unit, part));
    448 
    449 
    450 	if ((rs->sc_flags & RAIDF_INITED) &&
    451 	    (rs->sc_dkdev.dk_openmask == 0))
    452 		raidgetdisklabel(dev);
    453 
    454 	/* make sure that this partition exists */
    455 
    456 	if (part != RAW_PART) {
    457 		db1_printf(("Not a raw partition..\n"));
    458 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    459 		    ((part >= lp->d_npartitions) ||
    460 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    461 			error = ENXIO;
    462 			raidunlock(rs);
    463 			db1_printf(("Bailing out...\n"));
    464 			return (error);
    465 		}
    466 	}
    467 	/* Prevent this unit from being unconfigured while open. */
    468 	switch (fmt) {
    469 	case S_IFCHR:
    470 		rs->sc_dkdev.dk_copenmask |= pmask;
    471 		break;
    472 
    473 	case S_IFBLK:
    474 		rs->sc_dkdev.dk_bopenmask |= pmask;
    475 		break;
    476 	}
    477 
    478 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    479 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    480 		/* First one... mark things as dirty... Note that we *MUST*
    481 		 have done a configure before this.  I DO NOT WANT TO BE
    482 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    483 		 THAT THEY BELONG TOGETHER!!!!! */
    484 		/* XXX should check to see if we're only open for reading
    485 		   here... If so, we needn't do this, but then need some
    486 		   other way of keeping track of what's happened.. */
    487 
    488 		rf_markalldirty( raidPtrs[unit] );
    489 	}
    490 
    491 
    492 	rs->sc_dkdev.dk_openmask =
    493 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    494 
    495 	raidunlock(rs);
    496 
    497 	return (error);
    498 
    499 
    500 }
    501 /* ARGSUSED */
    502 int
    503 raidclose(dev, flags, fmt, p)
    504 	dev_t   dev;
    505 	int     flags, fmt;
    506 	struct proc *p;
    507 {
    508 	int     unit = raidunit(dev);
    509 	struct raid_softc *rs;
    510 	int     error = 0;
    511 	int     part;
    512 
    513 	if (unit >= numraid)
    514 		return (ENXIO);
    515 	rs = &raid_softc[unit];
    516 
    517 	if ((error = raidlock(rs)) != 0)
    518 		return (error);
    519 
    520 	part = DISKPART(dev);
    521 
    522 	/* ...that much closer to allowing unconfiguration... */
    523 	switch (fmt) {
    524 	case S_IFCHR:
    525 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    526 		break;
    527 
    528 	case S_IFBLK:
    529 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    530 		break;
    531 	}
    532 	rs->sc_dkdev.dk_openmask =
    533 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    534 
    535 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    536 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    537 		/* Last one... device is not unconfigured yet.
    538 		   Device shutdown has taken care of setting the
    539 		   clean bits if RAIDF_INITED is not set
    540 		   mark things as clean... */
    541 		rf_update_component_labels( raidPtrs[unit] );
    542 	}
    543 
    544 	raidunlock(rs);
    545 	return (0);
    546 
    547 }
    548 
    549 void
    550 raidstrategy(bp)
    551 	register struct buf *bp;
    552 {
    553 	register int s;
    554 
    555 	unsigned int raidID = raidunit(bp->b_dev);
    556 	RF_Raid_t *raidPtr;
    557 	struct raid_softc *rs = &raid_softc[raidID];
    558 	struct disklabel *lp;
    559 	int     wlabel;
    560 
    561 #if 0
    562 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    563 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    564 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    565 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    566 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    567 
    568 	if (bp->b_flags & B_READ)
    569 		db1_printf(("READ\n"));
    570 	else
    571 		db1_printf(("WRITE\n"));
    572 #endif
    573 	if (rf_kbooted != RFK_BOOT_GOOD)
    574 		return;
    575 	if (raidID >= numraid || !raidPtrs[raidID]) {
    576 		bp->b_error = ENODEV;
    577 		bp->b_flags |= B_ERROR;
    578 		bp->b_resid = bp->b_bcount;
    579 		biodone(bp);
    580 		return;
    581 	}
    582 	raidPtr = raidPtrs[raidID];
    583 	if (!raidPtr->valid) {
    584 		bp->b_error = ENODEV;
    585 		bp->b_flags |= B_ERROR;
    586 		bp->b_resid = bp->b_bcount;
    587 		biodone(bp);
    588 		return;
    589 	}
    590 	if (bp->b_bcount == 0) {
    591 		db1_printf(("b_bcount is zero..\n"));
    592 		biodone(bp);
    593 		return;
    594 	}
    595 	lp = rs->sc_dkdev.dk_label;
    596 
    597 	/*
    598 	 * Do bounds checking and adjust transfer.  If there's an
    599 	 * error, the bounds check will flag that for us.
    600 	 */
    601 
    602 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    603 	if (DISKPART(bp->b_dev) != RAW_PART)
    604 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    605 			db1_printf(("Bounds check failed!!:%d %d\n",
    606 				(int) bp->b_blkno, (int) wlabel));
    607 			biodone(bp);
    608 			return;
    609 		}
    610 	s = splbio();		/* XXX Needed? */
    611 	db1_printf(("Beginning strategy...\n"));
    612 
    613 	bp->b_resid = 0;
    614 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    615 	    NULL, NULL, NULL);
    616 	if (bp->b_error) {
    617 		bp->b_flags |= B_ERROR;
    618 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    619 			bp->b_error));
    620 	}
    621 	splx(s);
    622 #if 0
    623 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    624 		bp, bp->b_data,
    625 		(int) bp->b_bcount, (int) bp->b_resid));
    626 #endif
    627 }
    628 /* ARGSUSED */
    629 int
    630 raidread(dev, uio, flags)
    631 	dev_t   dev;
    632 	struct uio *uio;
    633 	int     flags;
    634 {
    635 	int     unit = raidunit(dev);
    636 	struct raid_softc *rs;
    637 	int     part;
    638 
    639 	if (unit >= numraid)
    640 		return (ENXIO);
    641 	rs = &raid_softc[unit];
    642 
    643 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    644 		return (ENXIO);
    645 	part = DISKPART(dev);
    646 
    647 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    648 
    649 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    650 
    651 }
    652 /* ARGSUSED */
    653 int
    654 raidwrite(dev, uio, flags)
    655 	dev_t   dev;
    656 	struct uio *uio;
    657 	int     flags;
    658 {
    659 	int     unit = raidunit(dev);
    660 	struct raid_softc *rs;
    661 
    662 	if (unit >= numraid)
    663 		return (ENXIO);
    664 	rs = &raid_softc[unit];
    665 
    666 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    667 		return (ENXIO);
    668 	db1_printf(("raidwrite\n"));
    669 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    670 
    671 }
    672 
    673 int
    674 raidioctl(dev, cmd, data, flag, p)
    675 	dev_t   dev;
    676 	u_long  cmd;
    677 	caddr_t data;
    678 	int     flag;
    679 	struct proc *p;
    680 {
    681 	int     unit = raidunit(dev);
    682 	int     error = 0;
    683 	int     part, pmask;
    684 	struct raid_softc *rs;
    685 #if 0
    686 	int     r, c;
    687 #endif
    688 	/* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
    689 
    690 	/* struct ccdbuf *cbp; */
    691 	/* struct raidbuf *raidbp; */
    692 	RF_Config_t *k_cfg, *u_cfg;
    693 	u_char *specific_buf;
    694 	int retcode = 0;
    695 	int row;
    696 	int column;
    697 	int s;
    698 	struct rf_recon_req *rrcopy, *rr;
    699 	RF_ComponentLabel_t *component_label;
    700 	RF_ComponentLabel_t ci_label;
    701 	RF_ComponentLabel_t **c_label_ptr;
    702 	RF_SingleComponent_t *sparePtr,*componentPtr;
    703 	RF_SingleComponent_t hot_spare;
    704 	RF_SingleComponent_t component;
    705 
    706 	if (unit >= numraid)
    707 		return (ENXIO);
    708 	rs = &raid_softc[unit];
    709 
    710 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    711 		(int) DISKPART(dev), (int) unit, (int) cmd));
    712 
    713 	/* Must be open for writes for these commands... */
    714 	switch (cmd) {
    715 	case DIOCSDINFO:
    716 	case DIOCWDINFO:
    717 	case DIOCWLABEL:
    718 		if ((flag & FWRITE) == 0)
    719 			return (EBADF);
    720 	}
    721 
    722 	/* Must be initialized for these... */
    723 	switch (cmd) {
    724 	case DIOCGDINFO:
    725 	case DIOCSDINFO:
    726 	case DIOCWDINFO:
    727 	case DIOCGPART:
    728 	case DIOCWLABEL:
    729 	case DIOCGDEFLABEL:
    730 	case RAIDFRAME_SHUTDOWN:
    731 	case RAIDFRAME_REWRITEPARITY:
    732 	case RAIDFRAME_GET_INFO:
    733 	case RAIDFRAME_RESET_ACCTOTALS:
    734 	case RAIDFRAME_GET_ACCTOTALS:
    735 	case RAIDFRAME_KEEP_ACCTOTALS:
    736 	case RAIDFRAME_GET_SIZE:
    737 	case RAIDFRAME_FAIL_DISK:
    738 	case RAIDFRAME_COPYBACK:
    739 	case RAIDFRAME_CHECKRECON:
    740 	case RAIDFRAME_GET_COMPONENT_LABEL:
    741 	case RAIDFRAME_SET_COMPONENT_LABEL:
    742 	case RAIDFRAME_ADD_HOT_SPARE:
    743 	case RAIDFRAME_REMOVE_HOT_SPARE:
    744 	case RAIDFRAME_INIT_LABELS:
    745 	case RAIDFRAME_REBUILD_IN_PLACE:
    746 	case RAIDFRAME_CHECK_PARITY:
    747 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    748 			return (ENXIO);
    749 	}
    750 
    751 	switch (cmd) {
    752 
    753 
    754 		/* configure the system */
    755 	case RAIDFRAME_CONFIGURE:
    756 
    757 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    758 		/* copy-in the configuration information */
    759 		/* data points to a pointer to the configuration structure */
    760 		u_cfg = *((RF_Config_t **) data);
    761 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    762 		if (k_cfg == NULL) {
    763 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    764 			return (ENOMEM);
    765 		}
    766 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    767 		    sizeof(RF_Config_t));
    768 		if (retcode) {
    769 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    770 				retcode));
    771 			return (retcode);
    772 		}
    773 		/* allocate a buffer for the layout-specific data, and copy it
    774 		 * in */
    775 		if (k_cfg->layoutSpecificSize) {
    776 			if (k_cfg->layoutSpecificSize > 10000) {
    777 				/* sanity check */
    778 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    779 				return (EINVAL);
    780 			}
    781 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    782 			    (u_char *));
    783 			if (specific_buf == NULL) {
    784 				RF_Free(k_cfg, sizeof(RF_Config_t));
    785 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    786 				return (ENOMEM);
    787 			}
    788 			retcode = copyin(k_cfg->layoutSpecific,
    789 			    (caddr_t) specific_buf,
    790 			    k_cfg->layoutSpecificSize);
    791 			if (retcode) {
    792 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    793 					retcode));
    794 				return (retcode);
    795 			}
    796 		} else
    797 			specific_buf = NULL;
    798 		k_cfg->layoutSpecific = specific_buf;
    799 
    800 		/* should do some kind of sanity check on the configuration.
    801 		 * Store the sum of all the bytes in the last byte? */
    802 
    803 		/* configure the system */
    804 
    805 		raidPtrs[unit]->raidid = unit;
    806 
    807 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    808 
    809 		/* allow this many simultaneous IO's to this RAID device */
    810 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    811 
    812 		if (retcode == 0) {
    813 			retcode = raidinit(dev, raidPtrs[unit], unit);
    814 			rf_markalldirty( raidPtrs[unit] );
    815 		}
    816 		/* free the buffers.  No return code here. */
    817 		if (k_cfg->layoutSpecificSize) {
    818 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    819 		}
    820 		RF_Free(k_cfg, sizeof(RF_Config_t));
    821 
    822 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    823 			retcode));
    824 
    825 		return (retcode);
    826 
    827 		/* shutdown the system */
    828 	case RAIDFRAME_SHUTDOWN:
    829 
    830 		if ((error = raidlock(rs)) != 0)
    831 			return (error);
    832 
    833 		/*
    834 		 * If somebody has a partition mounted, we shouldn't
    835 		 * shutdown.
    836 		 */
    837 
    838 		part = DISKPART(dev);
    839 		pmask = (1 << part);
    840 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    841 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    842 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    843 			raidunlock(rs);
    844 			return (EBUSY);
    845 		}
    846 
    847 		if (rf_debugKernelAccess) {
    848 			printf("call shutdown\n");
    849 		}
    850 
    851 		retcode = rf_Shutdown(raidPtrs[unit]);
    852 
    853 		db1_printf(("Done main shutdown\n"));
    854 
    855 		pool_destroy(&rs->sc_cbufpool);
    856 		db1_printf(("Done freeing component buffer freelist\n"));
    857 
    858 		/* It's no longer initialized... */
    859 		rs->sc_flags &= ~RAIDF_INITED;
    860 
    861 		/* Detach the disk. */
    862 		disk_detach(&rs->sc_dkdev);
    863 
    864 		raidunlock(rs);
    865 
    866 		return (retcode);
    867 	case RAIDFRAME_GET_COMPONENT_LABEL:
    868 		c_label_ptr = (RF_ComponentLabel_t **) data;
    869 		/* need to read the component label for the disk indicated
    870 		   by row,column in component_label
    871 		   XXX need to sanity check these values!!!
    872 		   */
    873 
    874 		/* For practice, let's get it directly fromdisk, rather
    875 		   than from the in-core copy */
    876 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    877 			   (RF_ComponentLabel_t *));
    878 		if (component_label == NULL)
    879 			return (ENOMEM);
    880 
    881 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    882 
    883 		retcode = copyin( *c_label_ptr, component_label,
    884 				  sizeof(RF_ComponentLabel_t));
    885 
    886 		if (retcode) {
    887 			return(retcode);
    888 		}
    889 
    890 		row = component_label->row;
    891 		column = component_label->column;
    892 
    893 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    894 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    895 			return(EINVAL);
    896 		}
    897 
    898 		raidread_component_label(
    899                               raidPtrs[unit]->Disks[row][column].dev,
    900 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    901 			      component_label );
    902 
    903 		retcode = copyout((caddr_t) component_label,
    904 				  (caddr_t) *c_label_ptr,
    905 				  sizeof(RF_ComponentLabel_t));
    906 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    907 		return (retcode);
    908 
    909 	case RAIDFRAME_SET_COMPONENT_LABEL:
    910 		component_label = (RF_ComponentLabel_t *) data;
    911 
    912 		/* XXX check the label for valid stuff... */
    913 		/* Note that some things *should not* get modified --
    914 		   the user should be re-initing the labels instead of
    915 		   trying to patch things.
    916 		   */
    917 
    918 		printf("Got component label:\n");
    919 		printf("Version: %d\n",component_label->version);
    920 		printf("Serial Number: %d\n",component_label->serial_number);
    921 		printf("Mod counter: %d\n",component_label->mod_counter);
    922 		printf("Row: %d\n", component_label->row);
    923 		printf("Column: %d\n", component_label->column);
    924 		printf("Num Rows: %d\n", component_label->num_rows);
    925 		printf("Num Columns: %d\n", component_label->num_columns);
    926 		printf("Clean: %d\n", component_label->clean);
    927 		printf("Status: %d\n", component_label->status);
    928 
    929 		row = component_label->row;
    930 		column = component_label->column;
    931 
    932 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    933 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    934 			return(EINVAL);
    935 		}
    936 
    937 		/* XXX this isn't allowed to do anything for now :-) */
    938 #if 0
    939 		raidwrite_component_label(
    940                             raidPtrs[unit]->Disks[row][column].dev,
    941 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    942 			    component_label );
    943 #endif
    944 		return (0);
    945 
    946 	case RAIDFRAME_INIT_LABELS:
    947 		component_label = (RF_ComponentLabel_t *) data;
    948 		/*
    949 		   we only want the serial number from
    950 		   the above.  We get all the rest of the information
    951 		   from the config that was used to create this RAID
    952 		   set.
    953 		   */
    954 
    955 		raidPtrs[unit]->serial_number = component_label->serial_number;
    956 		/* current version number */
    957 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    958 		ci_label.serial_number = component_label->serial_number;
    959 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    960 		ci_label.num_rows = raidPtrs[unit]->numRow;
    961 		ci_label.num_columns = raidPtrs[unit]->numCol;
    962 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    963 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    964 
    965 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    966 			ci_label.row = row;
    967 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    968 				ci_label.column = column;
    969 				raidwrite_component_label(
    970 				  raidPtrs[unit]->Disks[row][column].dev,
    971 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    972 				  &ci_label );
    973 			}
    974 		}
    975 
    976 		return (retcode);
    977 
    978 		/* initialize all parity */
    979 	case RAIDFRAME_REWRITEPARITY:
    980 
    981 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    982 			/* Parity for RAID 0 is trivially correct */
    983 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    984 			return(0);
    985 		}
    986 
    987 		/* borrow the thread of the requesting process */
    988 
    989 		s = splbio();
    990 		retcode = rf_RewriteParity(raidPtrs[unit]);
    991 		splx(s);
    992 		/* return I/O Error if the parity rewrite fails */
    993 
    994 		if (retcode) {
    995 			retcode = EIO;
    996 		} else {
    997 			/* set the clean bit!  If we shutdown correctly,
    998 			 the clean bit on each component label will get
    999 			 set */
   1000 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
   1001 		}
   1002 		return (retcode);
   1003 
   1004 
   1005 	case RAIDFRAME_ADD_HOT_SPARE:
   1006 		sparePtr = (RF_SingleComponent_t *) data;
   1007 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1008 		printf("Adding spare\n");
   1009 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1010 		return(retcode);
   1011 
   1012 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1013 		return(retcode);
   1014 
   1015 	case RAIDFRAME_REBUILD_IN_PLACE:
   1016 
   1017 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1018 			/* Can't do this on a RAID 0!! */
   1019 			return(EINVAL);
   1020 		}
   1021 
   1022 		componentPtr = (RF_SingleComponent_t *) data;
   1023 		memcpy( &component, componentPtr,
   1024 			sizeof(RF_SingleComponent_t));
   1025 		row = component.row;
   1026 		column = component.column;
   1027 		printf("Rebuild: %d %d\n",row, column);
   1028 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1029 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1030 			return(EINVAL);
   1031 		}
   1032 		printf("Attempting a rebuild in place\n");
   1033 		s = splbio();
   1034 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1035 		splx(s);
   1036 		return(retcode);
   1037 
   1038 		/* issue a test-unit-ready through raidframe to the indicated
   1039 		 * device */
   1040 #if 0				/* XXX not supported yet (ever?) */
   1041 	case RAIDFRAME_TUR:
   1042 		/* debug only */
   1043 		retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
   1044 		return (retcode);
   1045 #endif
   1046 	case RAIDFRAME_GET_INFO:
   1047 		{
   1048 			RF_Raid_t *raid = raidPtrs[unit];
   1049 			RF_DeviceConfig_t *cfg, **ucfgp;
   1050 			int     i, j, d;
   1051 
   1052 			if (!raid->valid)
   1053 				return (ENODEV);
   1054 			ucfgp = (RF_DeviceConfig_t **) data;
   1055 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1056 				  (RF_DeviceConfig_t *));
   1057 			if (cfg == NULL)
   1058 				return (ENOMEM);
   1059 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1060 			cfg->rows = raid->numRow;
   1061 			cfg->cols = raid->numCol;
   1062 			cfg->ndevs = raid->numRow * raid->numCol;
   1063 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1064 				cfg->ndevs = 0;
   1065 				return (ENOMEM);
   1066 			}
   1067 			cfg->nspares = raid->numSpare;
   1068 			if (cfg->nspares >= RF_MAX_DISKS) {
   1069 				cfg->nspares = 0;
   1070 				return (ENOMEM);
   1071 			}
   1072 			cfg->maxqdepth = raid->maxQueueDepth;
   1073 			d = 0;
   1074 			for (i = 0; i < cfg->rows; i++) {
   1075 				for (j = 0; j < cfg->cols; j++) {
   1076 					cfg->devs[d] = raid->Disks[i][j];
   1077 					d++;
   1078 				}
   1079 			}
   1080 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1081 				cfg->spares[i] = raid->Disks[0][j];
   1082 			}
   1083 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1084 					  sizeof(RF_DeviceConfig_t));
   1085 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1086 
   1087 			return (retcode);
   1088 		}
   1089 		break;
   1090 	case RAIDFRAME_CHECK_PARITY:
   1091 		*(int *) data = raidPtrs[unit]->parity_good;
   1092 		return (0);
   1093 	case RAIDFRAME_RESET_ACCTOTALS:
   1094 		{
   1095 			RF_Raid_t *raid = raidPtrs[unit];
   1096 
   1097 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1098 			return (0);
   1099 		}
   1100 		break;
   1101 
   1102 	case RAIDFRAME_GET_ACCTOTALS:
   1103 		{
   1104 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1105 			RF_Raid_t *raid = raidPtrs[unit];
   1106 
   1107 			*totals = raid->acc_totals;
   1108 			return (0);
   1109 		}
   1110 		break;
   1111 
   1112 	case RAIDFRAME_KEEP_ACCTOTALS:
   1113 		{
   1114 			RF_Raid_t *raid = raidPtrs[unit];
   1115 			int    *keep = (int *) data;
   1116 
   1117 			raid->keep_acc_totals = *keep;
   1118 			return (0);
   1119 		}
   1120 		break;
   1121 
   1122 	case RAIDFRAME_GET_SIZE:
   1123 		*(int *) data = raidPtrs[unit]->totalSectors;
   1124 		return (0);
   1125 
   1126 #define RAIDFRAME_RECON 1
   1127 		/* XXX The above should probably be set somewhere else!! GO */
   1128 #if RAIDFRAME_RECON > 0
   1129 
   1130 		/* fail a disk & optionally start reconstruction */
   1131 	case RAIDFRAME_FAIL_DISK:
   1132 
   1133 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1134 			/* Can't do this on a RAID 0!! */
   1135 			return(EINVAL);
   1136 		}
   1137 
   1138 		rr = (struct rf_recon_req *) data;
   1139 
   1140 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1141 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1142 			return (EINVAL);
   1143 
   1144 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1145 		       unit, rr->row, rr->col);
   1146 
   1147 		/* make a copy of the recon request so that we don't rely on
   1148 		 * the user's buffer */
   1149 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1150 		bcopy(rr, rrcopy, sizeof(*rr));
   1151 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1152 
   1153 		LOCK_RECON_Q_MUTEX();
   1154 		rrcopy->next = recon_queue;
   1155 		recon_queue = rrcopy;
   1156 		wakeup(&recon_queue);
   1157 		UNLOCK_RECON_Q_MUTEX();
   1158 
   1159 		return (0);
   1160 
   1161 		/* invoke a copyback operation after recon on whatever disk
   1162 		 * needs it, if any */
   1163 	case RAIDFRAME_COPYBACK:
   1164 
   1165 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1166 			/* This makes no sense on a RAID 0!! */
   1167 			return(EINVAL);
   1168 		}
   1169 
   1170 		/* borrow the current thread to get this done */
   1171 
   1172 		s = splbio();
   1173 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1174 		splx(s);
   1175 		return (0);
   1176 
   1177 		/* return the percentage completion of reconstruction */
   1178 	case RAIDFRAME_CHECKRECON:
   1179 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1180 			/* This makes no sense on a RAID 0 */
   1181 			return(EINVAL);
   1182 		}
   1183 
   1184 		row = *(int *) data;
   1185 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1186 			return (EINVAL);
   1187 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1188 			*(int *) data = 100;
   1189 		else
   1190 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1191 		return (0);
   1192 
   1193 		/* the sparetable daemon calls this to wait for the kernel to
   1194 		 * need a spare table. this ioctl does not return until a
   1195 		 * spare table is needed. XXX -- calling mpsleep here in the
   1196 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1197 		 * -- I should either compute the spare table in the kernel,
   1198 		 * or have a different -- XXX XXX -- interface (a different
   1199 		 * character device) for delivering the table          -- XXX */
   1200 #if 0
   1201 	case RAIDFRAME_SPARET_WAIT:
   1202 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1203 		while (!rf_sparet_wait_queue)
   1204 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1205 		waitreq = rf_sparet_wait_queue;
   1206 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1207 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1208 
   1209 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1210 
   1211 		RF_Free(waitreq, sizeof(*waitreq));
   1212 		return (0);
   1213 
   1214 
   1215 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1216 		 * code in it that will cause the dameon to exit */
   1217 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1218 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1219 		waitreq->fcol = -1;
   1220 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1221 		waitreq->next = rf_sparet_wait_queue;
   1222 		rf_sparet_wait_queue = waitreq;
   1223 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1224 		wakeup(&rf_sparet_wait_queue);
   1225 		return (0);
   1226 
   1227 		/* used by the spare table daemon to deliver a spare table
   1228 		 * into the kernel */
   1229 	case RAIDFRAME_SEND_SPARET:
   1230 
   1231 		/* install the spare table */
   1232 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1233 
   1234 		/* respond to the requestor.  the return status of the spare
   1235 		 * table installation is passed in the "fcol" field */
   1236 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1237 		waitreq->fcol = retcode;
   1238 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1239 		waitreq->next = rf_sparet_resp_queue;
   1240 		rf_sparet_resp_queue = waitreq;
   1241 		wakeup(&rf_sparet_resp_queue);
   1242 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1243 
   1244 		return (retcode);
   1245 #endif
   1246 
   1247 
   1248 #endif				/* RAIDFRAME_RECON > 0 */
   1249 
   1250 	default:
   1251 		break;		/* fall through to the os-specific code below */
   1252 
   1253 	}
   1254 
   1255 	if (!raidPtrs[unit]->valid)
   1256 		return (EINVAL);
   1257 
   1258 	/*
   1259 	 * Add support for "regular" device ioctls here.
   1260 	 */
   1261 
   1262 	switch (cmd) {
   1263 	case DIOCGDINFO:
   1264 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1265 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1266 		break;
   1267 
   1268 	case DIOCGPART:
   1269 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1270 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1271 		((struct partinfo *) data)->part =
   1272 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1273 		break;
   1274 
   1275 	case DIOCWDINFO:
   1276 		db1_printf(("DIOCWDINFO\n"));
   1277 	case DIOCSDINFO:
   1278 		db1_printf(("DIOCSDINFO\n"));
   1279 		if ((error = raidlock(rs)) != 0)
   1280 			return (error);
   1281 
   1282 		rs->sc_flags |= RAIDF_LABELLING;
   1283 
   1284 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1285 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1286 		if (error == 0) {
   1287 			if (cmd == DIOCWDINFO)
   1288 				error = writedisklabel(RAIDLABELDEV(dev),
   1289 				    raidstrategy, rs->sc_dkdev.dk_label,
   1290 				    rs->sc_dkdev.dk_cpulabel);
   1291 		}
   1292 		rs->sc_flags &= ~RAIDF_LABELLING;
   1293 
   1294 		raidunlock(rs);
   1295 
   1296 		if (error)
   1297 			return (error);
   1298 		break;
   1299 
   1300 	case DIOCWLABEL:
   1301 		db1_printf(("DIOCWLABEL\n"));
   1302 		if (*(int *) data != 0)
   1303 			rs->sc_flags |= RAIDF_WLABEL;
   1304 		else
   1305 			rs->sc_flags &= ~RAIDF_WLABEL;
   1306 		break;
   1307 
   1308 	case DIOCGDEFLABEL:
   1309 		db1_printf(("DIOCGDEFLABEL\n"));
   1310 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1311 		    (struct disklabel *) data);
   1312 		break;
   1313 
   1314 	default:
   1315 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1316 	}
   1317 	return (retcode);
   1318 
   1319 }
   1320 
   1321 
   1322 /* raidinit -- complete the rest of the initialization for the
   1323    RAIDframe device.  */
   1324 
   1325 
   1326 static int
   1327 raidinit(dev, raidPtr, unit)
   1328 	dev_t   dev;
   1329 	RF_Raid_t *raidPtr;
   1330 	int     unit;
   1331 {
   1332 	int     retcode;
   1333 	/* int ix; */
   1334 	/* struct raidbuf *raidbp; */
   1335 	struct raid_softc *rs;
   1336 
   1337 	retcode = 0;
   1338 
   1339 	rs = &raid_softc[unit];
   1340 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1341 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1342 
   1343 
   1344 	/* XXX should check return code first... */
   1345 	rs->sc_flags |= RAIDF_INITED;
   1346 
   1347 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1348 
   1349 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1350 
   1351 	/* disk_attach actually creates space for the CPU disklabel, among
   1352 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1353 	 * with disklabels. */
   1354 
   1355 	disk_attach(&rs->sc_dkdev);
   1356 
   1357 	/* XXX There may be a weird interaction here between this, and
   1358 	 * protectedSectors, as used in RAIDframe.  */
   1359 
   1360 	rs->sc_size = raidPtr->totalSectors;
   1361 	rs->sc_dev = dev;
   1362 
   1363 	return (retcode);
   1364 }
   1365 
   1366 /*
   1367  * This kernel thread never exits.  It is created once, and persists
   1368  * until the system reboots.
   1369  */
   1370 
   1371 void
   1372 rf_ReconKernelThread()
   1373 {
   1374 	struct rf_recon_req *req;
   1375 	int     s;
   1376 
   1377 	/* XXX not sure what spl() level we should be at here... probably
   1378 	 * splbio() */
   1379 	s = splbio();
   1380 
   1381 	while (1) {
   1382 		/* grab the next reconstruction request from the queue */
   1383 		LOCK_RECON_Q_MUTEX();
   1384 		while (!recon_queue) {
   1385 			UNLOCK_RECON_Q_MUTEX();
   1386 			tsleep(&recon_queue, PRIBIO,
   1387 			       "raidframe recon", 0);
   1388 			LOCK_RECON_Q_MUTEX();
   1389 		}
   1390 		req = recon_queue;
   1391 		recon_queue = recon_queue->next;
   1392 		UNLOCK_RECON_Q_MUTEX();
   1393 
   1394 		/*
   1395 	         * If flags specifies that we should start recon, this call
   1396 	         * will not return until reconstruction completes, fails,
   1397 		 * or is aborted.
   1398 	         */
   1399 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1400 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1401 
   1402 		RF_Free(req, sizeof(*req));
   1403 	}
   1404 }
   1405 /* wake up the daemon & tell it to get us a spare table
   1406  * XXX
   1407  * the entries in the queues should be tagged with the raidPtr
   1408  * so that in the extremely rare case that two recons happen at once,
   1409  * we know for which device were requesting a spare table
   1410  * XXX
   1411  */
   1412 int
   1413 rf_GetSpareTableFromDaemon(req)
   1414 	RF_SparetWait_t *req;
   1415 {
   1416 	int     retcode;
   1417 
   1418 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1419 	req->next = rf_sparet_wait_queue;
   1420 	rf_sparet_wait_queue = req;
   1421 	wakeup(&rf_sparet_wait_queue);
   1422 
   1423 	/* mpsleep unlocks the mutex */
   1424 	while (!rf_sparet_resp_queue) {
   1425 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1426 		    "raidframe getsparetable", 0);
   1427 #if 0
   1428 		mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
   1429 			(void *) simple_lock_addr(rf_sparet_wait_mutex),
   1430 			MS_LOCK_SIMPLE);
   1431 #endif
   1432 	}
   1433 	req = rf_sparet_resp_queue;
   1434 	rf_sparet_resp_queue = req->next;
   1435 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1436 
   1437 	retcode = req->fcol;
   1438 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1439 					 * alloc'd */
   1440 	return (retcode);
   1441 }
   1442 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1443  * bp & passes it down.
   1444  * any calls originating in the kernel must use non-blocking I/O
   1445  * do some extra sanity checking to return "appropriate" error values for
   1446  * certain conditions (to make some standard utilities work)
   1447  */
   1448 int
   1449 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1450 	RF_Raid_t *raidPtr;
   1451 	struct buf *bp;
   1452 	RF_RaidAccessFlags_t flags;
   1453 	void    (*cbFunc) (struct buf *);
   1454 	void   *cbArg;
   1455 {
   1456 	RF_SectorCount_t num_blocks, pb, sum;
   1457 	RF_RaidAddr_t raid_addr;
   1458 	int     retcode;
   1459 	struct partition *pp;
   1460 	daddr_t blocknum;
   1461 	int     unit;
   1462 	struct raid_softc *rs;
   1463 	int     do_async;
   1464 
   1465 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1466 
   1467 	unit = raidPtr->raidid;
   1468 	rs = &raid_softc[unit];
   1469 
   1470 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1471 	 * partition.. Need to make it absolute to the underlying device.. */
   1472 
   1473 	blocknum = bp->b_blkno;
   1474 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1475 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1476 		blocknum += pp->p_offset;
   1477 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1478 			pp->p_offset));
   1479 	} else {
   1480 		db1_printf(("Is raw..\n"));
   1481 	}
   1482 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1483 
   1484 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1485 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1486 
   1487 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1488 	 * TOUCH bp->b_blkno!!! */
   1489 	raid_addr = blocknum;
   1490 
   1491 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1492 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1493 	sum = raid_addr + num_blocks + pb;
   1494 	if (1 || rf_debugKernelAccess) {
   1495 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1496 			(int) raid_addr, (int) sum, (int) num_blocks,
   1497 			(int) pb, (int) bp->b_resid));
   1498 	}
   1499 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1500 	    || (sum < num_blocks) || (sum < pb)) {
   1501 		bp->b_error = ENOSPC;
   1502 		bp->b_flags |= B_ERROR;
   1503 		bp->b_resid = bp->b_bcount;
   1504 		biodone(bp);
   1505 		return (bp->b_error);
   1506 	}
   1507 	/*
   1508 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1509 	 */
   1510 
   1511 	if (bp->b_bcount & raidPtr->sectorMask) {
   1512 		bp->b_error = EINVAL;
   1513 		bp->b_flags |= B_ERROR;
   1514 		bp->b_resid = bp->b_bcount;
   1515 		biodone(bp);
   1516 		return (bp->b_error);
   1517 	}
   1518 	db1_printf(("Calling DoAccess..\n"));
   1519 
   1520 
   1521 	/* Put a throttle on the number of requests we handle simultanously */
   1522 
   1523 	RF_LOCK_MUTEX(raidPtr->mutex);
   1524 
   1525 	while(raidPtr->openings <= 0) {
   1526 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1527 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1528 		RF_LOCK_MUTEX(raidPtr->mutex);
   1529 	}
   1530 	raidPtr->openings--;
   1531 
   1532 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1533 
   1534 	/*
   1535 	 * Everything is async.
   1536 	 */
   1537 	do_async = 1;
   1538 
   1539 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1540 	 * B_READ instead */
   1541 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1542 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1543 	    do_async, raid_addr, num_blocks,
   1544 	    bp->b_un.b_addr,
   1545 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1546 	    NULL, cbFunc, cbArg);
   1547 #if 0
   1548 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1549 		bp->b_data, (int) bp->b_resid));
   1550 #endif
   1551 
   1552 	return (retcode);
   1553 }
   1554 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1555 
   1556 int
   1557 rf_DispatchKernelIO(queue, req)
   1558 	RF_DiskQueue_t *queue;
   1559 	RF_DiskQueueData_t *req;
   1560 {
   1561 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1562 	struct buf *bp;
   1563 	struct raidbuf *raidbp = NULL;
   1564 	struct raid_softc *rs;
   1565 	int     unit;
   1566 
   1567 	/* XXX along with the vnode, we also need the softc associated with
   1568 	 * this device.. */
   1569 
   1570 	req->queue = queue;
   1571 
   1572 	unit = queue->raidPtr->raidid;
   1573 
   1574 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1575 
   1576 	if (unit >= numraid) {
   1577 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1578 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1579 	}
   1580 	rs = &raid_softc[unit];
   1581 
   1582 	/* XXX is this the right place? */
   1583 	disk_busy(&rs->sc_dkdev);
   1584 
   1585 	bp = req->bp;
   1586 #if 1
   1587 	/* XXX when there is a physical disk failure, someone is passing us a
   1588 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1589 	 * without taking a performance hit... (not sure where the real bug
   1590 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1591 
   1592 	if (bp->b_flags & B_ERROR) {
   1593 		bp->b_flags &= ~B_ERROR;
   1594 	}
   1595 	if (bp->b_error != 0) {
   1596 		bp->b_error = 0;
   1597 	}
   1598 #endif
   1599 	raidbp = RAIDGETBUF(rs);
   1600 
   1601 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1602 
   1603 	/*
   1604 	 * context for raidiodone
   1605 	 */
   1606 	raidbp->rf_obp = bp;
   1607 	raidbp->req = req;
   1608 
   1609 	switch (req->type) {
   1610 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1611 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1612 		 * queue->row, queue->col); */
   1613 		/* XXX need to do something extra here.. */
   1614 		/* I'm leaving this in, as I've never actually seen it used,
   1615 		 * and I'd like folks to report it... GO */
   1616 		printf(("WAKEUP CALLED\n"));
   1617 		queue->numOutstanding++;
   1618 
   1619 		/* XXX need to glue the original buffer into this??  */
   1620 
   1621 		KernelWakeupFunc(&raidbp->rf_buf);
   1622 		break;
   1623 
   1624 	case RF_IO_TYPE_READ:
   1625 	case RF_IO_TYPE_WRITE:
   1626 
   1627 		if (req->tracerec) {
   1628 			RF_ETIMER_START(req->tracerec->timer);
   1629 		}
   1630 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1631 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1632 		    req->sectorOffset, req->numSector,
   1633 		    req->buf, KernelWakeupFunc, (void *) req,
   1634 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1635 
   1636 		if (rf_debugKernelAccess) {
   1637 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1638 				(long) bp->b_blkno));
   1639 		}
   1640 		queue->numOutstanding++;
   1641 		queue->last_deq_sector = req->sectorOffset;
   1642 		/* acc wouldn't have been let in if there were any pending
   1643 		 * reqs at any other priority */
   1644 		queue->curPriority = req->priority;
   1645 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1646 		 * req->type, queue->row, queue->col); */
   1647 
   1648 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1649 			req->type, unit, queue->row, queue->col));
   1650 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1651 			(int) req->sectorOffset, (int) req->numSector,
   1652 			(int) (req->numSector <<
   1653 			    queue->raidPtr->logBytesPerSector),
   1654 			(int) queue->raidPtr->logBytesPerSector));
   1655 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1656 			raidbp->rf_buf.b_vp->v_numoutput++;
   1657 		}
   1658 		VOP_STRATEGY(&raidbp->rf_buf);
   1659 
   1660 		break;
   1661 
   1662 	default:
   1663 		panic("bad req->type in rf_DispatchKernelIO");
   1664 	}
   1665 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1666 	return (0);
   1667 }
   1668 /* this is the callback function associated with a I/O invoked from
   1669    kernel code.
   1670  */
   1671 static void
   1672 KernelWakeupFunc(vbp)
   1673 	struct buf *vbp;
   1674 {
   1675 	RF_DiskQueueData_t *req = NULL;
   1676 	RF_DiskQueue_t *queue;
   1677 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1678 	struct buf *bp;
   1679 	struct raid_softc *rs;
   1680 	int     unit;
   1681 	register int s;
   1682 
   1683 	s = splbio();		/* XXX */
   1684 	db1_printf(("recovering the request queue:\n"));
   1685 	req = raidbp->req;
   1686 
   1687 	bp = raidbp->rf_obp;
   1688 #if 0
   1689 	db1_printf(("bp=0x%x\n", bp));
   1690 #endif
   1691 
   1692 	queue = (RF_DiskQueue_t *) req->queue;
   1693 
   1694 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1695 #if 0
   1696 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1697 #endif
   1698 		bp->b_flags |= B_ERROR;
   1699 		bp->b_error = raidbp->rf_buf.b_error ?
   1700 		    raidbp->rf_buf.b_error : EIO;
   1701 	}
   1702 #if 0
   1703 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1704 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1705 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1706 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1707 #endif
   1708 
   1709 	/* XXX methinks this could be wrong... */
   1710 #if 1
   1711 	bp->b_resid = raidbp->rf_buf.b_resid;
   1712 #endif
   1713 
   1714 	if (req->tracerec) {
   1715 		RF_ETIMER_STOP(req->tracerec->timer);
   1716 		RF_ETIMER_EVAL(req->tracerec->timer);
   1717 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1718 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1719 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1720 		req->tracerec->num_phys_ios++;
   1721 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1722 	}
   1723 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1724 
   1725 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1726 
   1727 
   1728 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1729 	 * ballistic, and mark the component as hosed... */
   1730 #if 1
   1731 	if (bp->b_flags & B_ERROR) {
   1732 		/* Mark the disk as dead */
   1733 		/* but only mark it once... */
   1734 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1735 		    rf_ds_optimal) {
   1736 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1737 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1738 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1739 			    rf_ds_failed;
   1740 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1741 			queue->raidPtr->numFailures++;
   1742 			/* XXX here we should bump the version number for each component, and write that data out */
   1743 		} else {	/* Disk is already dead... */
   1744 			/* printf("Disk already marked as dead!\n"); */
   1745 		}
   1746 
   1747 	}
   1748 #endif
   1749 
   1750 	rs = &raid_softc[unit];
   1751 	RAIDPUTBUF(rs, raidbp);
   1752 
   1753 
   1754 	if (bp->b_resid == 0) {
   1755 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1756 			unit, bp->b_resid, bp->b_bcount));
   1757 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1758 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1759 	} else {
   1760 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1761 	}
   1762 
   1763 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1764 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1765 	/* printf("Exiting KernelWakeupFunc\n"); */
   1766 
   1767 	splx(s);		/* XXX */
   1768 }
   1769 
   1770 
   1771 
   1772 /*
   1773  * initialize a buf structure for doing an I/O in the kernel.
   1774  */
   1775 static void
   1776 InitBP(
   1777     struct buf * bp,
   1778     struct vnode * b_vp,
   1779     unsigned rw_flag,
   1780     dev_t dev,
   1781     RF_SectorNum_t startSect,
   1782     RF_SectorCount_t numSect,
   1783     caddr_t buf,
   1784     void (*cbFunc) (struct buf *),
   1785     void *cbArg,
   1786     int logBytesPerSector,
   1787     struct proc * b_proc)
   1788 {
   1789 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1790 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1791 	bp->b_bcount = numSect << logBytesPerSector;
   1792 	bp->b_bufsize = bp->b_bcount;
   1793 	bp->b_error = 0;
   1794 	bp->b_dev = dev;
   1795 	db1_printf(("bp->b_dev is %d\n", dev));
   1796 	bp->b_un.b_addr = buf;
   1797 #if 0
   1798 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1799 #endif
   1800 
   1801 	bp->b_blkno = startSect;
   1802 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1803 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1804 	if (bp->b_bcount == 0) {
   1805 		panic("bp->b_bcount is zero in InitBP!!\n");
   1806 	}
   1807 	bp->b_proc = b_proc;
   1808 	bp->b_iodone = cbFunc;
   1809 	bp->b_vp = b_vp;
   1810 
   1811 }
   1812 /* Extras... */
   1813 
   1814 #if 0
   1815 int
   1816 rf_GetSpareTableFromDaemon(req)
   1817 	RF_SparetWait_t *req;
   1818 {
   1819 	int     retcode = 1;
   1820 	printf("This is supposed to do something useful!!\n");	/* XXX */
   1821 
   1822 	return (retcode);
   1823 
   1824 }
   1825 #endif
   1826 
   1827 static void
   1828 raidgetdefaultlabel(raidPtr, rs, lp)
   1829 	RF_Raid_t *raidPtr;
   1830 	struct raid_softc *rs;
   1831 	struct disklabel *lp;
   1832 {
   1833 	db1_printf(("Building a default label...\n"));
   1834 	bzero(lp, sizeof(*lp));
   1835 
   1836 	/* fabricate a label... */
   1837 	lp->d_secperunit = raidPtr->totalSectors;
   1838 	lp->d_secsize = raidPtr->bytesPerSector;
   1839 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1840 	lp->d_ntracks = 1;
   1841 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1842 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1843 
   1844 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1845 	lp->d_type = DTYPE_RAID;
   1846 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1847 	lp->d_rpm = 3600;
   1848 	lp->d_interleave = 1;
   1849 	lp->d_flags = 0;
   1850 
   1851 	lp->d_partitions[RAW_PART].p_offset = 0;
   1852 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1853 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1854 	lp->d_npartitions = RAW_PART + 1;
   1855 
   1856 	lp->d_magic = DISKMAGIC;
   1857 	lp->d_magic2 = DISKMAGIC;
   1858 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1859 
   1860 }
   1861 /*
   1862  * Read the disklabel from the raid device.  If one is not present, fake one
   1863  * up.
   1864  */
   1865 static void
   1866 raidgetdisklabel(dev)
   1867 	dev_t   dev;
   1868 {
   1869 	int     unit = raidunit(dev);
   1870 	struct raid_softc *rs = &raid_softc[unit];
   1871 	char   *errstring;
   1872 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1873 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1874 	RF_Raid_t *raidPtr;
   1875 
   1876 	db1_printf(("Getting the disklabel...\n"));
   1877 
   1878 	bzero(clp, sizeof(*clp));
   1879 
   1880 	raidPtr = raidPtrs[unit];
   1881 
   1882 	raidgetdefaultlabel(raidPtr, rs, lp);
   1883 
   1884 	/*
   1885 	 * Call the generic disklabel extraction routine.
   1886 	 */
   1887 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1888 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1889 	if (errstring)
   1890 		raidmakedisklabel(rs);
   1891 	else {
   1892 		int     i;
   1893 		struct partition *pp;
   1894 
   1895 		/*
   1896 		 * Sanity check whether the found disklabel is valid.
   1897 		 *
   1898 		 * This is necessary since total size of the raid device
   1899 		 * may vary when an interleave is changed even though exactly
   1900 		 * same componets are used, and old disklabel may used
   1901 		 * if that is found.
   1902 		 */
   1903 		if (lp->d_secperunit != rs->sc_size)
   1904 			printf("WARNING: %s: "
   1905 			    "total sector size in disklabel (%d) != "
   1906 			    "the size of raid (%ld)\n", rs->sc_xname,
   1907 			    lp->d_secperunit, (long) rs->sc_size);
   1908 		for (i = 0; i < lp->d_npartitions; i++) {
   1909 			pp = &lp->d_partitions[i];
   1910 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1911 				printf("WARNING: %s: end of partition `%c' "
   1912 				    "exceeds the size of raid (%ld)\n",
   1913 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1914 		}
   1915 	}
   1916 
   1917 }
   1918 /*
   1919  * Take care of things one might want to take care of in the event
   1920  * that a disklabel isn't present.
   1921  */
   1922 static void
   1923 raidmakedisklabel(rs)
   1924 	struct raid_softc *rs;
   1925 {
   1926 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1927 	db1_printf(("Making a label..\n"));
   1928 
   1929 	/*
   1930 	 * For historical reasons, if there's no disklabel present
   1931 	 * the raw partition must be marked FS_BSDFFS.
   1932 	 */
   1933 
   1934 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1935 
   1936 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1937 
   1938 	lp->d_checksum = dkcksum(lp);
   1939 }
   1940 /*
   1941  * Lookup the provided name in the filesystem.  If the file exists,
   1942  * is a valid block device, and isn't being used by anyone else,
   1943  * set *vpp to the file's vnode.
   1944  * You'll find the original of this in ccd.c
   1945  */
   1946 int
   1947 raidlookup(path, p, vpp)
   1948 	char   *path;
   1949 	struct proc *p;
   1950 	struct vnode **vpp;	/* result */
   1951 {
   1952 	struct nameidata nd;
   1953 	struct vnode *vp;
   1954 	struct vattr va;
   1955 	int     error;
   1956 
   1957 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1958 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1959 #ifdef DEBUG
   1960 		printf("RAIDframe: vn_open returned %d\n", error);
   1961 #endif
   1962 		return (error);
   1963 	}
   1964 	vp = nd.ni_vp;
   1965 	if (vp->v_usecount > 1) {
   1966 		VOP_UNLOCK(vp, 0);
   1967 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1968 		return (EBUSY);
   1969 	}
   1970 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1971 		VOP_UNLOCK(vp, 0);
   1972 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1973 		return (error);
   1974 	}
   1975 	/* XXX: eventually we should handle VREG, too. */
   1976 	if (va.va_type != VBLK) {
   1977 		VOP_UNLOCK(vp, 0);
   1978 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1979 		return (ENOTBLK);
   1980 	}
   1981 	VOP_UNLOCK(vp, 0);
   1982 	*vpp = vp;
   1983 	return (0);
   1984 }
   1985 /*
   1986  * Wait interruptibly for an exclusive lock.
   1987  *
   1988  * XXX
   1989  * Several drivers do this; it should be abstracted and made MP-safe.
   1990  * (Hmm... where have we seen this warning before :->  GO )
   1991  */
   1992 static int
   1993 raidlock(rs)
   1994 	struct raid_softc *rs;
   1995 {
   1996 	int     error;
   1997 
   1998 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1999 		rs->sc_flags |= RAIDF_WANTED;
   2000 		if ((error =
   2001 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2002 			return (error);
   2003 	}
   2004 	rs->sc_flags |= RAIDF_LOCKED;
   2005 	return (0);
   2006 }
   2007 /*
   2008  * Unlock and wake up any waiters.
   2009  */
   2010 static void
   2011 raidunlock(rs)
   2012 	struct raid_softc *rs;
   2013 {
   2014 
   2015 	rs->sc_flags &= ~RAIDF_LOCKED;
   2016 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2017 		rs->sc_flags &= ~RAIDF_WANTED;
   2018 		wakeup(rs);
   2019 	}
   2020 }
   2021 
   2022 
   2023 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2024 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2025 
   2026 int
   2027 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2028 {
   2029 	RF_ComponentLabel_t component_label;
   2030 	raidread_component_label(dev, b_vp, &component_label);
   2031 	component_label.mod_counter = mod_counter;
   2032 	component_label.clean = RF_RAID_CLEAN;
   2033 	raidwrite_component_label(dev, b_vp, &component_label);
   2034 	return(0);
   2035 }
   2036 
   2037 
   2038 int
   2039 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2040 {
   2041 	RF_ComponentLabel_t component_label;
   2042 	raidread_component_label(dev, b_vp, &component_label);
   2043 	component_label.mod_counter = mod_counter;
   2044 	component_label.clean = RF_RAID_DIRTY;
   2045 	raidwrite_component_label(dev, b_vp, &component_label);
   2046 	return(0);
   2047 }
   2048 
   2049 /* ARGSUSED */
   2050 int
   2051 raidread_component_label(dev, b_vp, component_label)
   2052 	dev_t dev;
   2053 	struct vnode *b_vp;
   2054 	RF_ComponentLabel_t *component_label;
   2055 {
   2056 	struct buf *bp;
   2057 	int error;
   2058 
   2059 	/* XXX should probably ensure that we don't try to do this if
   2060 	   someone has changed rf_protected_sectors. */
   2061 
   2062 	/* get a block of the appropriate size... */
   2063 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2064 	bp->b_dev = dev;
   2065 
   2066 	/* get our ducks in a row for the read */
   2067 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2068 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2069 	bp->b_flags = B_BUSY | B_READ;
   2070  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2071 
   2072 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2073 
   2074 	error = biowait(bp);
   2075 
   2076 	if (!error) {
   2077 		memcpy(component_label, bp->b_un.b_addr,
   2078 		       sizeof(RF_ComponentLabel_t));
   2079 #if 0
   2080 		printf("raidread_component_label: got component label:\n");
   2081 		printf("Version: %d\n",component_label->version);
   2082 		printf("Serial Number: %d\n",component_label->serial_number);
   2083 		printf("Mod counter: %d\n",component_label->mod_counter);
   2084 		printf("Row: %d\n", component_label->row);
   2085 		printf("Column: %d\n", component_label->column);
   2086 		printf("Num Rows: %d\n", component_label->num_rows);
   2087 		printf("Num Columns: %d\n", component_label->num_columns);
   2088 		printf("Clean: %d\n", component_label->clean);
   2089 		printf("Status: %d\n", component_label->status);
   2090 #endif
   2091         } else {
   2092 		printf("Failed to read RAID component label!\n");
   2093 	}
   2094 
   2095         bp->b_flags = B_INVAL | B_AGE;
   2096 	brelse(bp);
   2097 	return(error);
   2098 }
   2099 /* ARGSUSED */
   2100 int
   2101 raidwrite_component_label(dev, b_vp, component_label)
   2102 	dev_t dev;
   2103 	struct vnode *b_vp;
   2104 	RF_ComponentLabel_t *component_label;
   2105 {
   2106 	struct buf *bp;
   2107 	int error;
   2108 
   2109 	/* get a block of the appropriate size... */
   2110 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2111 	bp->b_dev = dev;
   2112 
   2113 	/* get our ducks in a row for the write */
   2114 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2115 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2116 	bp->b_flags = B_BUSY | B_WRITE;
   2117  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2118 
   2119 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2120 
   2121 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2122 
   2123 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2124 	error = biowait(bp);
   2125         bp->b_flags = B_INVAL | B_AGE;
   2126 	brelse(bp);
   2127 	if (error) {
   2128 		printf("Failed to write RAID component info!\n");
   2129 	}
   2130 
   2131 	return(error);
   2132 }
   2133 
   2134 void
   2135 rf_markalldirty( raidPtr )
   2136 	RF_Raid_t *raidPtr;
   2137 {
   2138 	RF_ComponentLabel_t c_label;
   2139 	int r,c;
   2140 
   2141 	raidPtr->mod_counter++;
   2142 	for (r = 0; r < raidPtr->numRow; r++) {
   2143 		for (c = 0; c < raidPtr->numCol; c++) {
   2144 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2145 				raidread_component_label(
   2146 					raidPtr->Disks[r][c].dev,
   2147 					raidPtr->raid_cinfo[r][c].ci_vp,
   2148 					&c_label);
   2149 				if (c_label.status == rf_ds_spared) {
   2150 					/* XXX do something special...
   2151 					 but whatever you do, don't
   2152 					 try to access it!! */
   2153 				} else {
   2154 #if 0
   2155 				c_label.status =
   2156 					raidPtr->Disks[r][c].status;
   2157 				raidwrite_component_label(
   2158 					raidPtr->Disks[r][c].dev,
   2159 					raidPtr->raid_cinfo[r][c].ci_vp,
   2160 					&c_label);
   2161 #endif
   2162 				raidmarkdirty(
   2163 				       raidPtr->Disks[r][c].dev,
   2164 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2165 				       raidPtr->mod_counter);
   2166 				}
   2167 			}
   2168 		}
   2169 	}
   2170 	/* printf("Component labels marked dirty.\n"); */
   2171 #if 0
   2172 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2173 		sparecol = raidPtr->numCol + c;
   2174 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2175 			/*
   2176 
   2177 			   XXX this is where we get fancy and map this spare
   2178 			   into it's correct spot in the array.
   2179 
   2180 			 */
   2181 			/*
   2182 
   2183 			   we claim this disk is "optimal" if it's
   2184 			   rf_ds_used_spare, as that means it should be
   2185 			   directly substitutable for the disk it replaced.
   2186 			   We note that too...
   2187 
   2188 			 */
   2189 
   2190 			for(i=0;i<raidPtr->numRow;i++) {
   2191 				for(j=0;j<raidPtr->numCol;j++) {
   2192 					if ((raidPtr->Disks[i][j].spareRow ==
   2193 					     r) &&
   2194 					    (raidPtr->Disks[i][j].spareCol ==
   2195 					     sparecol)) {
   2196 						srow = r;
   2197 						scol = sparecol;
   2198 						break;
   2199 					}
   2200 				}
   2201 			}
   2202 
   2203 			raidread_component_label(
   2204 				      raidPtr->Disks[r][sparecol].dev,
   2205 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2206 				      &c_label);
   2207 			/* make sure status is noted */
   2208 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2209 			c_label.mod_counter = raidPtr->mod_counter;
   2210 			c_label.serial_number = raidPtr->serial_number;
   2211 			c_label.row = srow;
   2212 			c_label.column = scol;
   2213 			c_label.num_rows = raidPtr->numRow;
   2214 			c_label.num_columns = raidPtr->numCol;
   2215 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2216 			c_label.status = rf_ds_optimal;
   2217 			raidwrite_component_label(
   2218 				      raidPtr->Disks[r][sparecol].dev,
   2219 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2220 				      &c_label);
   2221 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2222 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2223 		}
   2224 	}
   2225 
   2226 #endif
   2227 }
   2228 
   2229 
   2230 void
   2231 rf_update_component_labels( raidPtr )
   2232 	RF_Raid_t *raidPtr;
   2233 {
   2234 	RF_ComponentLabel_t c_label;
   2235 	int sparecol;
   2236 	int r,c;
   2237 	int i,j;
   2238 	int srow, scol;
   2239 
   2240 	srow = -1;
   2241 	scol = -1;
   2242 
   2243 	/* XXX should do extra checks to make sure things really are clean,
   2244 	   rather than blindly setting the clean bit... */
   2245 
   2246 	raidPtr->mod_counter++;
   2247 
   2248 	for (r = 0; r < raidPtr->numRow; r++) {
   2249 		for (c = 0; c < raidPtr->numCol; c++) {
   2250 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2251 				raidread_component_label(
   2252 					raidPtr->Disks[r][c].dev,
   2253 					raidPtr->raid_cinfo[r][c].ci_vp,
   2254 					&c_label);
   2255 				/* make sure status is noted */
   2256 				c_label.status = rf_ds_optimal;
   2257 				raidwrite_component_label(
   2258 					raidPtr->Disks[r][c].dev,
   2259 					raidPtr->raid_cinfo[r][c].ci_vp,
   2260 					&c_label);
   2261 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2262 					raidmarkclean(
   2263 					      raidPtr->Disks[r][c].dev,
   2264 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2265 					      raidPtr->mod_counter);
   2266 				}
   2267 			}
   2268 			/* else we don't touch it.. */
   2269 #if 0
   2270 			else if (raidPtr->Disks[r][c].status !=
   2271 				   rf_ds_failed) {
   2272 				raidread_component_label(
   2273 					raidPtr->Disks[r][c].dev,
   2274 					raidPtr->raid_cinfo[r][c].ci_vp,
   2275 					&c_label);
   2276 				/* make sure status is noted */
   2277 				c_label.status =
   2278 					raidPtr->Disks[r][c].status;
   2279 				raidwrite_component_label(
   2280 					raidPtr->Disks[r][c].dev,
   2281 					raidPtr->raid_cinfo[r][c].ci_vp,
   2282 					&c_label);
   2283 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2284 					raidmarkclean(
   2285 					      raidPtr->Disks[r][c].dev,
   2286 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2287 					      raidPtr->mod_counter);
   2288 				}
   2289 			}
   2290 #endif
   2291 		}
   2292 	}
   2293 
   2294 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2295 		sparecol = raidPtr->numCol + c;
   2296 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2297 			/*
   2298 
   2299 			   we claim this disk is "optimal" if it's
   2300 			   rf_ds_used_spare, as that means it should be
   2301 			   directly substitutable for the disk it replaced.
   2302 			   We note that too...
   2303 
   2304 			 */
   2305 
   2306 			for(i=0;i<raidPtr->numRow;i++) {
   2307 				for(j=0;j<raidPtr->numCol;j++) {
   2308 					if ((raidPtr->Disks[i][j].spareRow ==
   2309 					     0) &&
   2310 					    (raidPtr->Disks[i][j].spareCol ==
   2311 					     sparecol)) {
   2312 						srow = i;
   2313 						scol = j;
   2314 						break;
   2315 					}
   2316 				}
   2317 			}
   2318 
   2319 			raidread_component_label(
   2320 				      raidPtr->Disks[0][sparecol].dev,
   2321 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2322 				      &c_label);
   2323 			/* make sure status is noted */
   2324 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2325 			c_label.mod_counter = raidPtr->mod_counter;
   2326 			c_label.serial_number = raidPtr->serial_number;
   2327 			c_label.row = srow;
   2328 			c_label.column = scol;
   2329 			c_label.num_rows = raidPtr->numRow;
   2330 			c_label.num_columns = raidPtr->numCol;
   2331 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2332 			c_label.status = rf_ds_optimal;
   2333 			raidwrite_component_label(
   2334 				      raidPtr->Disks[0][sparecol].dev,
   2335 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2336 				      &c_label);
   2337 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2338 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2339 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2340 					       raidPtr->mod_counter);
   2341 			}
   2342 		}
   2343 	}
   2344 	/* 	printf("Component labels updated\n"); */
   2345 }
   2346