Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.26
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.26 1999/08/14 02:41:36 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #define RFK_BOOT_NONE 0
    157 #define RFK_BOOT_GOOD 1
    158 #define RFK_BOOT_BAD  2
    159 static int rf_kbooted = RFK_BOOT_NONE;
    160 
    161 #ifdef DEBUG
    162 #define db0_printf(a) printf a
    163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    169 #else				/* DEBUG */
    170 #define db0_printf(a) printf a
    171 #define db1_printf(a) { }
    172 #define db2_printf(a) { }
    173 #define db3_printf(a) { }
    174 #define db4_printf(a) { }
    175 #define db5_printf(a) { }
    176 #endif				/* DEBUG */
    177 
    178 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    179 
    180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    181 
    182 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    183 						 * spare table */
    184 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    185 						 * installation process */
    186 
    187 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    188 						 * reconstruction
    189 						 * requests */
    190 
    191 
    192 decl_simple_lock_data(, recon_queue_mutex)
    193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    195 
    196 /* prototypes */
    197 static void KernelWakeupFunc(struct buf * bp);
    198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    199 		   dev_t dev, RF_SectorNum_t startSect,
    200 		   RF_SectorCount_t numSect, caddr_t buf,
    201 		   void (*cbFunc) (struct buf *), void *cbArg,
    202 		   int logBytesPerSector, struct proc * b_proc);
    203 
    204 #define Dprintf0(s)       if (rf_queueDebug) \
    205      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    206 #define Dprintf1(s,a)     if (rf_queueDebug) \
    207      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    208 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    209      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    211      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    212 
    213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    215 
    216 void raidattach __P((int));
    217 int raidsize __P((dev_t));
    218 
    219 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    220 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    221 static int raidinit __P((dev_t, RF_Raid_t *, int));
    222 
    223 int raidopen __P((dev_t, int, int, struct proc *));
    224 int raidclose __P((dev_t, int, int, struct proc *));
    225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    226 int raidwrite __P((dev_t, struct uio *, int));
    227 int raidread __P((dev_t, struct uio *, int));
    228 void raidstrategy __P((struct buf *));
    229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    230 
    231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    233 void rf_update_component_labels( RF_Raid_t *);
    234 /*
    235  * Pilfered from ccd.c
    236  */
    237 
    238 struct raidbuf {
    239 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    240 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    241 	int     rf_flags;	/* misc. flags */
    242 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    243 };
    244 
    245 
    246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    247 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    248 
    249 /* XXX Not sure if the following should be replacing the raidPtrs above,
    250    or if it should be used in conjunction with that... */
    251 
    252 struct raid_softc {
    253 	int     sc_flags;	/* flags */
    254 	int     sc_cflags;	/* configuration flags */
    255 	size_t  sc_size;        /* size of the raid device */
    256 	dev_t   sc_dev;	        /* our device.. */
    257 	char    sc_xname[20];	/* XXX external name */
    258 	struct disk sc_dkdev;	/* generic disk device info */
    259 	struct pool sc_cbufpool;	/* component buffer pool */
    260 };
    261 /* sc_flags */
    262 #define RAIDF_INITED	0x01	/* unit has been initialized */
    263 #define RAIDF_WLABEL	0x02	/* label area is writable */
    264 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    265 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    266 #define RAIDF_LOCKED	0x80	/* unit is locked */
    267 
    268 #define	raidunit(x)	DISKUNIT(x)
    269 static int numraid = 0;
    270 
    271 /*
    272  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    273  * Be aware that large numbers can allow the driver to consume a lot of
    274  * kernel memory, especially on writes...
    275  */
    276 
    277 #ifndef RAIDOUTSTANDING
    278 #define RAIDOUTSTANDING   10
    279 #endif
    280 
    281 #define RAIDLABELDEV(dev)	\
    282 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    283 
    284 /* declared here, and made public, for the benefit of KVM stuff.. */
    285 struct raid_softc *raid_softc;
    286 
    287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    288 				     struct disklabel *));
    289 static void raidgetdisklabel __P((dev_t));
    290 static void raidmakedisklabel __P((struct raid_softc *));
    291 
    292 static int raidlock __P((struct raid_softc *));
    293 static void raidunlock __P((struct raid_softc *));
    294 int raidlookup __P((char *, struct proc * p, struct vnode **));
    295 
    296 static void rf_markalldirty __P((RF_Raid_t *));
    297 
    298 void
    299 raidattach(num)
    300 	int     num;
    301 {
    302 	int raidID;
    303 	int i, rc;
    304 
    305 #ifdef DEBUG
    306 	printf("raidattach: Asked for %d units\n", num);
    307 #endif
    308 
    309 	if (num <= 0) {
    310 #ifdef DIAGNOSTIC
    311 		panic("raidattach: count <= 0");
    312 #endif
    313 		return;
    314 	}
    315 	/* This is where all the initialization stuff gets done. */
    316 
    317 	/* Make some space for requested number of units... */
    318 
    319 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    320 	if (raidPtrs == NULL) {
    321 		panic("raidPtrs is NULL!!\n");
    322 	}
    323 
    324 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    325 	if (rc) {
    326 		RF_PANIC();
    327 	}
    328 
    329 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    330 	recon_queue = NULL;
    331 
    332 	for (i = 0; i < numraid; i++)
    333 		raidPtrs[i] = NULL;
    334 	rc = rf_BootRaidframe();
    335 	if (rc == 0)
    336 		printf("Kernelized RAIDframe activated\n");
    337 	else
    338 		panic("Serious error booting RAID!!\n");
    339 
    340 	rf_kbooted = RFK_BOOT_GOOD;
    341 
    342 	/* put together some datastructures like the CCD device does.. This
    343 	 * lets us lock the device and what-not when it gets opened. */
    344 
    345 	raid_softc = (struct raid_softc *)
    346 	    malloc(num * sizeof(struct raid_softc),
    347 	    M_RAIDFRAME, M_NOWAIT);
    348 	if (raid_softc == NULL) {
    349 		printf("WARNING: no memory for RAIDframe driver\n");
    350 		return;
    351 	}
    352 	numraid = num;
    353 	bzero(raid_softc, num * sizeof(struct raid_softc));
    354 
    355 	for (raidID = 0; raidID < num; raidID++) {
    356 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    357 			  (RF_Raid_t *));
    358 		if (raidPtrs[raidID] == NULL) {
    359 			printf("raidPtrs[%d] is NULL\n", raidID);
    360 		}
    361 	}
    362 }
    363 
    364 
    365 int
    366 raidsize(dev)
    367 	dev_t   dev;
    368 {
    369 	struct raid_softc *rs;
    370 	struct disklabel *lp;
    371 	int     part, unit, omask, size;
    372 
    373 	unit = raidunit(dev);
    374 	if (unit >= numraid)
    375 		return (-1);
    376 	rs = &raid_softc[unit];
    377 
    378 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    379 		return (-1);
    380 
    381 	part = DISKPART(dev);
    382 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    383 	lp = rs->sc_dkdev.dk_label;
    384 
    385 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    386 		return (-1);
    387 
    388 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    389 		size = -1;
    390 	else
    391 		size = lp->d_partitions[part].p_size *
    392 		    (lp->d_secsize / DEV_BSIZE);
    393 
    394 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    395 		return (-1);
    396 
    397 	return (size);
    398 
    399 }
    400 
    401 int
    402 raiddump(dev, blkno, va, size)
    403 	dev_t   dev;
    404 	daddr_t blkno;
    405 	caddr_t va;
    406 	size_t  size;
    407 {
    408 	/* Not implemented. */
    409 	return ENXIO;
    410 }
    411 /* ARGSUSED */
    412 int
    413 raidopen(dev, flags, fmt, p)
    414 	dev_t   dev;
    415 	int     flags, fmt;
    416 	struct proc *p;
    417 {
    418 	int     unit = raidunit(dev);
    419 	struct raid_softc *rs;
    420 	struct disklabel *lp;
    421 	int     part, pmask;
    422 	int     error = 0;
    423 
    424 	if (unit >= numraid)
    425 		return (ENXIO);
    426 	rs = &raid_softc[unit];
    427 
    428 	if ((error = raidlock(rs)) != 0)
    429 		return (error);
    430 	lp = rs->sc_dkdev.dk_label;
    431 
    432 	part = DISKPART(dev);
    433 	pmask = (1 << part);
    434 
    435 	db1_printf(("Opening raid device number: %d partition: %d\n",
    436 		unit, part));
    437 
    438 
    439 	if ((rs->sc_flags & RAIDF_INITED) &&
    440 	    (rs->sc_dkdev.dk_openmask == 0))
    441 		raidgetdisklabel(dev);
    442 
    443 	/* make sure that this partition exists */
    444 
    445 	if (part != RAW_PART) {
    446 		db1_printf(("Not a raw partition..\n"));
    447 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    448 		    ((part >= lp->d_npartitions) ||
    449 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    450 			error = ENXIO;
    451 			raidunlock(rs);
    452 			db1_printf(("Bailing out...\n"));
    453 			return (error);
    454 		}
    455 	}
    456 	/* Prevent this unit from being unconfigured while open. */
    457 	switch (fmt) {
    458 	case S_IFCHR:
    459 		rs->sc_dkdev.dk_copenmask |= pmask;
    460 		break;
    461 
    462 	case S_IFBLK:
    463 		rs->sc_dkdev.dk_bopenmask |= pmask;
    464 		break;
    465 	}
    466 
    467 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    468 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    469 		/* First one... mark things as dirty... Note that we *MUST*
    470 		 have done a configure before this.  I DO NOT WANT TO BE
    471 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    472 		 THAT THEY BELONG TOGETHER!!!!! */
    473 		/* XXX should check to see if we're only open for reading
    474 		   here... If so, we needn't do this, but then need some
    475 		   other way of keeping track of what's happened.. */
    476 
    477 		rf_markalldirty( raidPtrs[unit] );
    478 	}
    479 
    480 
    481 	rs->sc_dkdev.dk_openmask =
    482 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    483 
    484 	raidunlock(rs);
    485 
    486 	return (error);
    487 
    488 
    489 }
    490 /* ARGSUSED */
    491 int
    492 raidclose(dev, flags, fmt, p)
    493 	dev_t   dev;
    494 	int     flags, fmt;
    495 	struct proc *p;
    496 {
    497 	int     unit = raidunit(dev);
    498 	struct raid_softc *rs;
    499 	int     error = 0;
    500 	int     part;
    501 
    502 	if (unit >= numraid)
    503 		return (ENXIO);
    504 	rs = &raid_softc[unit];
    505 
    506 	if ((error = raidlock(rs)) != 0)
    507 		return (error);
    508 
    509 	part = DISKPART(dev);
    510 
    511 	/* ...that much closer to allowing unconfiguration... */
    512 	switch (fmt) {
    513 	case S_IFCHR:
    514 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    515 		break;
    516 
    517 	case S_IFBLK:
    518 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    519 		break;
    520 	}
    521 	rs->sc_dkdev.dk_openmask =
    522 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    523 
    524 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    525 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    526 		/* Last one... device is not unconfigured yet.
    527 		   Device shutdown has taken care of setting the
    528 		   clean bits if RAIDF_INITED is not set
    529 		   mark things as clean... */
    530 		rf_update_component_labels( raidPtrs[unit] );
    531 	}
    532 
    533 	raidunlock(rs);
    534 	return (0);
    535 
    536 }
    537 
    538 void
    539 raidstrategy(bp)
    540 	register struct buf *bp;
    541 {
    542 	register int s;
    543 
    544 	unsigned int raidID = raidunit(bp->b_dev);
    545 	RF_Raid_t *raidPtr;
    546 	struct raid_softc *rs = &raid_softc[raidID];
    547 	struct disklabel *lp;
    548 	int     wlabel;
    549 
    550 #if 0
    551 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    552 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    553 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    554 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    555 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    556 
    557 	if (bp->b_flags & B_READ)
    558 		db1_printf(("READ\n"));
    559 	else
    560 		db1_printf(("WRITE\n"));
    561 #endif
    562 	if (rf_kbooted != RFK_BOOT_GOOD)
    563 		return;
    564 	if (raidID >= numraid || !raidPtrs[raidID]) {
    565 		bp->b_error = ENODEV;
    566 		bp->b_flags |= B_ERROR;
    567 		bp->b_resid = bp->b_bcount;
    568 		biodone(bp);
    569 		return;
    570 	}
    571 	raidPtr = raidPtrs[raidID];
    572 	if (!raidPtr->valid) {
    573 		bp->b_error = ENODEV;
    574 		bp->b_flags |= B_ERROR;
    575 		bp->b_resid = bp->b_bcount;
    576 		biodone(bp);
    577 		return;
    578 	}
    579 	if (bp->b_bcount == 0) {
    580 		db1_printf(("b_bcount is zero..\n"));
    581 		biodone(bp);
    582 		return;
    583 	}
    584 	lp = rs->sc_dkdev.dk_label;
    585 
    586 	/*
    587 	 * Do bounds checking and adjust transfer.  If there's an
    588 	 * error, the bounds check will flag that for us.
    589 	 */
    590 
    591 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    592 	if (DISKPART(bp->b_dev) != RAW_PART)
    593 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    594 			db1_printf(("Bounds check failed!!:%d %d\n",
    595 				(int) bp->b_blkno, (int) wlabel));
    596 			biodone(bp);
    597 			return;
    598 		}
    599 	s = splbio();		/* XXX Needed? */
    600 	db1_printf(("Beginning strategy...\n"));
    601 
    602 	bp->b_resid = 0;
    603 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    604 	    NULL, NULL, NULL);
    605 	if (bp->b_error) {
    606 		bp->b_flags |= B_ERROR;
    607 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    608 			bp->b_error));
    609 	}
    610 	splx(s);
    611 #if 0
    612 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    613 		bp, bp->b_data,
    614 		(int) bp->b_bcount, (int) bp->b_resid));
    615 #endif
    616 }
    617 /* ARGSUSED */
    618 int
    619 raidread(dev, uio, flags)
    620 	dev_t   dev;
    621 	struct uio *uio;
    622 	int     flags;
    623 {
    624 	int     unit = raidunit(dev);
    625 	struct raid_softc *rs;
    626 	int     part;
    627 
    628 	if (unit >= numraid)
    629 		return (ENXIO);
    630 	rs = &raid_softc[unit];
    631 
    632 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    633 		return (ENXIO);
    634 	part = DISKPART(dev);
    635 
    636 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    637 
    638 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    639 
    640 }
    641 /* ARGSUSED */
    642 int
    643 raidwrite(dev, uio, flags)
    644 	dev_t   dev;
    645 	struct uio *uio;
    646 	int     flags;
    647 {
    648 	int     unit = raidunit(dev);
    649 	struct raid_softc *rs;
    650 
    651 	if (unit >= numraid)
    652 		return (ENXIO);
    653 	rs = &raid_softc[unit];
    654 
    655 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    656 		return (ENXIO);
    657 	db1_printf(("raidwrite\n"));
    658 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    659 
    660 }
    661 
    662 int
    663 raidioctl(dev, cmd, data, flag, p)
    664 	dev_t   dev;
    665 	u_long  cmd;
    666 	caddr_t data;
    667 	int     flag;
    668 	struct proc *p;
    669 {
    670 	int     unit = raidunit(dev);
    671 	int     error = 0;
    672 	int     part, pmask;
    673 	struct raid_softc *rs;
    674 #if 0
    675 	int     r, c;
    676 #endif
    677 	/* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
    678 
    679 	/* struct ccdbuf *cbp; */
    680 	/* struct raidbuf *raidbp; */
    681 	RF_Config_t *k_cfg, *u_cfg;
    682 	u_char *specific_buf;
    683 	int retcode = 0;
    684 	int row;
    685 	int column;
    686 	int s;
    687 	struct rf_recon_req *rrcopy, *rr;
    688 	RF_ComponentLabel_t *component_label;
    689 	RF_ComponentLabel_t ci_label;
    690 	RF_ComponentLabel_t **c_label_ptr;
    691 	RF_SingleComponent_t *sparePtr,*componentPtr;
    692 	RF_SingleComponent_t hot_spare;
    693 	RF_SingleComponent_t component;
    694 
    695 	if (unit >= numraid)
    696 		return (ENXIO);
    697 	rs = &raid_softc[unit];
    698 
    699 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    700 		(int) DISKPART(dev), (int) unit, (int) cmd));
    701 
    702 	/* Must be open for writes for these commands... */
    703 	switch (cmd) {
    704 	case DIOCSDINFO:
    705 	case DIOCWDINFO:
    706 	case DIOCWLABEL:
    707 		if ((flag & FWRITE) == 0)
    708 			return (EBADF);
    709 	}
    710 
    711 	/* Must be initialized for these... */
    712 	switch (cmd) {
    713 	case DIOCGDINFO:
    714 	case DIOCSDINFO:
    715 	case DIOCWDINFO:
    716 	case DIOCGPART:
    717 	case DIOCWLABEL:
    718 	case DIOCGDEFLABEL:
    719 	case RAIDFRAME_SHUTDOWN:
    720 	case RAIDFRAME_REWRITEPARITY:
    721 	case RAIDFRAME_GET_INFO:
    722 	case RAIDFRAME_RESET_ACCTOTALS:
    723 	case RAIDFRAME_GET_ACCTOTALS:
    724 	case RAIDFRAME_KEEP_ACCTOTALS:
    725 	case RAIDFRAME_GET_SIZE:
    726 	case RAIDFRAME_FAIL_DISK:
    727 	case RAIDFRAME_COPYBACK:
    728 	case RAIDFRAME_CHECKRECON:
    729 	case RAIDFRAME_GET_COMPONENT_LABEL:
    730 	case RAIDFRAME_SET_COMPONENT_LABEL:
    731 	case RAIDFRAME_ADD_HOT_SPARE:
    732 	case RAIDFRAME_REMOVE_HOT_SPARE:
    733 	case RAIDFRAME_INIT_LABELS:
    734 	case RAIDFRAME_REBUILD_IN_PLACE:
    735 	case RAIDFRAME_CHECK_PARITY:
    736 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    737 			return (ENXIO);
    738 	}
    739 
    740 	switch (cmd) {
    741 
    742 
    743 		/* configure the system */
    744 	case RAIDFRAME_CONFIGURE:
    745 
    746 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    747 		/* copy-in the configuration information */
    748 		/* data points to a pointer to the configuration structure */
    749 		u_cfg = *((RF_Config_t **) data);
    750 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    751 		if (k_cfg == NULL) {
    752 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    753 			return (ENOMEM);
    754 		}
    755 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    756 		    sizeof(RF_Config_t));
    757 		if (retcode) {
    758 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    759 				retcode));
    760 			return (retcode);
    761 		}
    762 		/* allocate a buffer for the layout-specific data, and copy it
    763 		 * in */
    764 		if (k_cfg->layoutSpecificSize) {
    765 			if (k_cfg->layoutSpecificSize > 10000) {
    766 				/* sanity check */
    767 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    768 				return (EINVAL);
    769 			}
    770 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    771 			    (u_char *));
    772 			if (specific_buf == NULL) {
    773 				RF_Free(k_cfg, sizeof(RF_Config_t));
    774 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    775 				return (ENOMEM);
    776 			}
    777 			retcode = copyin(k_cfg->layoutSpecific,
    778 			    (caddr_t) specific_buf,
    779 			    k_cfg->layoutSpecificSize);
    780 			if (retcode) {
    781 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    782 					retcode));
    783 				return (retcode);
    784 			}
    785 		} else
    786 			specific_buf = NULL;
    787 		k_cfg->layoutSpecific = specific_buf;
    788 
    789 		/* should do some kind of sanity check on the configuration.
    790 		 * Store the sum of all the bytes in the last byte? */
    791 
    792 #if 0
    793 		db1_printf(("Considering configuring the system.:%d 0x%x\n",
    794 			unit, p));
    795 #endif
    796 
    797 		/* We need the pointer to this a little deeper, so stash it
    798 		 * here... */
    799 
    800 		raidPtrs[unit]->proc = p;
    801 
    802 		/* configure the system */
    803 
    804 		raidPtrs[unit]->raidid = unit;
    805 
    806 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    807 
    808 		/* allow this many simultaneous IO's to this RAID device */
    809 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    810 
    811 		if (retcode == 0) {
    812 			retcode = raidinit(dev, raidPtrs[unit], unit);
    813 			rf_markalldirty( raidPtrs[unit] );
    814 		}
    815 		/* free the buffers.  No return code here. */
    816 		if (k_cfg->layoutSpecificSize) {
    817 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    818 		}
    819 		RF_Free(k_cfg, sizeof(RF_Config_t));
    820 
    821 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    822 			retcode));
    823 
    824 		return (retcode);
    825 
    826 		/* shutdown the system */
    827 	case RAIDFRAME_SHUTDOWN:
    828 
    829 		if ((error = raidlock(rs)) != 0)
    830 			return (error);
    831 
    832 		/*
    833 		 * If somebody has a partition mounted, we shouldn't
    834 		 * shutdown.
    835 		 */
    836 
    837 		part = DISKPART(dev);
    838 		pmask = (1 << part);
    839 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    840 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    841 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    842 			raidunlock(rs);
    843 			return (EBUSY);
    844 		}
    845 
    846 		if (rf_debugKernelAccess) {
    847 			printf("call shutdown\n");
    848 		}
    849 		raidPtrs[unit]->proc = p;	/* XXX  necessary evil */
    850 
    851 		retcode = rf_Shutdown(raidPtrs[unit]);
    852 
    853 		db1_printf(("Done main shutdown\n"));
    854 
    855 		pool_destroy(&rs->sc_cbufpool);
    856 		db1_printf(("Done freeing component buffer freelist\n"));
    857 
    858 		/* It's no longer initialized... */
    859 		rs->sc_flags &= ~RAIDF_INITED;
    860 
    861 		/* Detach the disk. */
    862 		disk_detach(&rs->sc_dkdev);
    863 
    864 		raidunlock(rs);
    865 
    866 		return (retcode);
    867 	case RAIDFRAME_GET_COMPONENT_LABEL:
    868 		c_label_ptr = (RF_ComponentLabel_t **) data;
    869 		/* need to read the component label for the disk indicated
    870 		   by row,column in component_label
    871 		   XXX need to sanity check these values!!!
    872 		   */
    873 
    874 		/* For practice, let's get it directly fromdisk, rather
    875 		   than from the in-core copy */
    876 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    877 			   (RF_ComponentLabel_t *));
    878 		if (component_label == NULL)
    879 			return (ENOMEM);
    880 
    881 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    882 
    883 		retcode = copyin( *c_label_ptr, component_label,
    884 				  sizeof(RF_ComponentLabel_t));
    885 
    886 		if (retcode) {
    887 			return(retcode);
    888 		}
    889 
    890 		row = component_label->row;
    891 		column = component_label->column;
    892 
    893 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    894 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    895 			return(EINVAL);
    896 		}
    897 
    898 		raidread_component_label(
    899                               raidPtrs[unit]->Disks[row][column].dev,
    900 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    901 			      component_label );
    902 
    903 		retcode = copyout((caddr_t) component_label,
    904 				  (caddr_t) *c_label_ptr,
    905 				  sizeof(RF_ComponentLabel_t));
    906 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    907 		return (retcode);
    908 
    909 	case RAIDFRAME_SET_COMPONENT_LABEL:
    910 		component_label = (RF_ComponentLabel_t *) data;
    911 
    912 		/* XXX check the label for valid stuff... */
    913 		/* Note that some things *should not* get modified --
    914 		   the user should be re-initing the labels instead of
    915 		   trying to patch things.
    916 		   */
    917 
    918 		printf("Got component label:\n");
    919 		printf("Version: %d\n",component_label->version);
    920 		printf("Serial Number: %d\n",component_label->serial_number);
    921 		printf("Mod counter: %d\n",component_label->mod_counter);
    922 		printf("Row: %d\n", component_label->row);
    923 		printf("Column: %d\n", component_label->column);
    924 		printf("Num Rows: %d\n", component_label->num_rows);
    925 		printf("Num Columns: %d\n", component_label->num_columns);
    926 		printf("Clean: %d\n", component_label->clean);
    927 		printf("Status: %d\n", component_label->status);
    928 
    929 		row = component_label->row;
    930 		column = component_label->column;
    931 
    932 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    933 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    934 			return(EINVAL);
    935 		}
    936 
    937 		/* XXX this isn't allowed to do anything for now :-) */
    938 #if 0
    939 		raidwrite_component_label(
    940                             raidPtrs[unit]->Disks[row][column].dev,
    941 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    942 			    component_label );
    943 #endif
    944 		return (0);
    945 
    946 	case RAIDFRAME_INIT_LABELS:
    947 		component_label = (RF_ComponentLabel_t *) data;
    948 		/*
    949 		   we only want the serial number from
    950 		   the above.  We get all the rest of the information
    951 		   from the config that was used to create this RAID
    952 		   set.
    953 		   */
    954 
    955 		raidPtrs[unit]->serial_number = component_label->serial_number;
    956 		/* current version number */
    957 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    958 		ci_label.serial_number = component_label->serial_number;
    959 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    960 		ci_label.num_rows = raidPtrs[unit]->numRow;
    961 		ci_label.num_columns = raidPtrs[unit]->numCol;
    962 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    963 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    964 
    965 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    966 			ci_label.row = row;
    967 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    968 				ci_label.column = column;
    969 				raidwrite_component_label(
    970 				  raidPtrs[unit]->Disks[row][column].dev,
    971 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    972 				  &ci_label );
    973 			}
    974 		}
    975 
    976 		return (retcode);
    977 
    978 		/* initialize all parity */
    979 	case RAIDFRAME_REWRITEPARITY:
    980 
    981 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    982 			/* Parity for RAID 0 is trivially correct */
    983 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    984 			return(0);
    985 		}
    986 
    987 		/* borrow the thread of the requesting process */
    988 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
    989 		s = splbio();
    990 		retcode = rf_RewriteParity(raidPtrs[unit]);
    991 		splx(s);
    992 		/* return I/O Error if the parity rewrite fails */
    993 
    994 		if (retcode) {
    995 			retcode = EIO;
    996 		} else {
    997 			/* set the clean bit!  If we shutdown correctly,
    998 			 the clean bit on each component label will get
    999 			 set */
   1000 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
   1001 		}
   1002 		return (retcode);
   1003 
   1004 
   1005 	case RAIDFRAME_ADD_HOT_SPARE:
   1006 		sparePtr = (RF_SingleComponent_t *) data;
   1007 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1008 		printf("Adding spare\n");
   1009 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
   1010 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1011 		return(retcode);
   1012 
   1013 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1014 		return(retcode);
   1015 
   1016 	case RAIDFRAME_REBUILD_IN_PLACE:
   1017 
   1018 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1019 			/* Can't do this on a RAID 0!! */
   1020 			return(EINVAL);
   1021 		}
   1022 
   1023 		componentPtr = (RF_SingleComponent_t *) data;
   1024 		memcpy( &component, componentPtr,
   1025 			sizeof(RF_SingleComponent_t));
   1026 		row = component.row;
   1027 		column = component.column;
   1028 		printf("Rebuild: %d %d\n",row, column);
   1029 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1030 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1031 			return(EINVAL);
   1032 		}
   1033 		printf("Attempting a rebuild in place\n");
   1034 		s = splbio();
   1035 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
   1036 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1037 		splx(s);
   1038 		return(retcode);
   1039 
   1040 		/* issue a test-unit-ready through raidframe to the indicated
   1041 		 * device */
   1042 #if 0				/* XXX not supported yet (ever?) */
   1043 	case RAIDFRAME_TUR:
   1044 		/* debug only */
   1045 		retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
   1046 		return (retcode);
   1047 #endif
   1048 	case RAIDFRAME_GET_INFO:
   1049 		{
   1050 			RF_Raid_t *raid = raidPtrs[unit];
   1051 			RF_DeviceConfig_t *cfg, **ucfgp;
   1052 			int     i, j, d;
   1053 
   1054 			if (!raid->valid)
   1055 				return (ENODEV);
   1056 			ucfgp = (RF_DeviceConfig_t **) data;
   1057 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1058 				  (RF_DeviceConfig_t *));
   1059 			if (cfg == NULL)
   1060 				return (ENOMEM);
   1061 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1062 			cfg->rows = raid->numRow;
   1063 			cfg->cols = raid->numCol;
   1064 			cfg->ndevs = raid->numRow * raid->numCol;
   1065 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1066 				cfg->ndevs = 0;
   1067 				return (ENOMEM);
   1068 			}
   1069 			cfg->nspares = raid->numSpare;
   1070 			if (cfg->nspares >= RF_MAX_DISKS) {
   1071 				cfg->nspares = 0;
   1072 				return (ENOMEM);
   1073 			}
   1074 			cfg->maxqdepth = raid->maxQueueDepth;
   1075 			d = 0;
   1076 			for (i = 0; i < cfg->rows; i++) {
   1077 				for (j = 0; j < cfg->cols; j++) {
   1078 					cfg->devs[d] = raid->Disks[i][j];
   1079 					d++;
   1080 				}
   1081 			}
   1082 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1083 				cfg->spares[i] = raid->Disks[0][j];
   1084 			}
   1085 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1086 					  sizeof(RF_DeviceConfig_t));
   1087 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1088 
   1089 			return (retcode);
   1090 		}
   1091 		break;
   1092 	case RAIDFRAME_CHECK_PARITY:
   1093 		*(int *) data = raidPtrs[unit]->parity_good;
   1094 		return (0);
   1095 	case RAIDFRAME_RESET_ACCTOTALS:
   1096 		{
   1097 			RF_Raid_t *raid = raidPtrs[unit];
   1098 
   1099 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1100 			return (0);
   1101 		}
   1102 		break;
   1103 
   1104 	case RAIDFRAME_GET_ACCTOTALS:
   1105 		{
   1106 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1107 			RF_Raid_t *raid = raidPtrs[unit];
   1108 
   1109 			*totals = raid->acc_totals;
   1110 			return (0);
   1111 		}
   1112 		break;
   1113 
   1114 	case RAIDFRAME_KEEP_ACCTOTALS:
   1115 		{
   1116 			RF_Raid_t *raid = raidPtrs[unit];
   1117 			int    *keep = (int *) data;
   1118 
   1119 			raid->keep_acc_totals = *keep;
   1120 			return (0);
   1121 		}
   1122 		break;
   1123 
   1124 	case RAIDFRAME_GET_SIZE:
   1125 		*(int *) data = raidPtrs[unit]->totalSectors;
   1126 		return (0);
   1127 
   1128 #define RAIDFRAME_RECON 1
   1129 		/* XXX The above should probably be set somewhere else!! GO */
   1130 #if RAIDFRAME_RECON > 0
   1131 
   1132 		/* fail a disk & optionally start reconstruction */
   1133 	case RAIDFRAME_FAIL_DISK:
   1134 
   1135 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1136 			/* Can't do this on a RAID 0!! */
   1137 			return(EINVAL);
   1138 		}
   1139 
   1140 		rr = (struct rf_recon_req *) data;
   1141 
   1142 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1143 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1144 			return (EINVAL);
   1145 
   1146 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1147 		       unit, rr->row, rr->col);
   1148 
   1149 		/* make a copy of the recon request so that we don't rely on
   1150 		 * the user's buffer */
   1151 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1152 		bcopy(rr, rrcopy, sizeof(*rr));
   1153 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1154 
   1155 		LOCK_RECON_Q_MUTEX();
   1156 		rrcopy->next = recon_queue;
   1157 		recon_queue = rrcopy;
   1158 		wakeup(&recon_queue);
   1159 		UNLOCK_RECON_Q_MUTEX();
   1160 
   1161 		return (0);
   1162 
   1163 		/* invoke a copyback operation after recon on whatever disk
   1164 		 * needs it, if any */
   1165 	case RAIDFRAME_COPYBACK:
   1166 
   1167 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1168 			/* This makes no sense on a RAID 0!! */
   1169 			return(EINVAL);
   1170 		}
   1171 
   1172 		/* borrow the current thread to get this done */
   1173 		raidPtrs[unit]->proc = p;	/* ICK.. but needed :-p  GO */
   1174 		s = splbio();
   1175 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1176 		splx(s);
   1177 		return (0);
   1178 
   1179 		/* return the percentage completion of reconstruction */
   1180 	case RAIDFRAME_CHECKRECON:
   1181 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1182 			/* This makes no sense on a RAID 0 */
   1183 			return(EINVAL);
   1184 		}
   1185 
   1186 		row = *(int *) data;
   1187 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1188 			return (EINVAL);
   1189 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1190 			*(int *) data = 100;
   1191 		else
   1192 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1193 		return (0);
   1194 
   1195 		/* the sparetable daemon calls this to wait for the kernel to
   1196 		 * need a spare table. this ioctl does not return until a
   1197 		 * spare table is needed. XXX -- calling mpsleep here in the
   1198 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1199 		 * -- I should either compute the spare table in the kernel,
   1200 		 * or have a different -- XXX XXX -- interface (a different
   1201 		 * character device) for delivering the table          -- XXX */
   1202 #if 0
   1203 	case RAIDFRAME_SPARET_WAIT:
   1204 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1205 		while (!rf_sparet_wait_queue)
   1206 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1207 		waitreq = rf_sparet_wait_queue;
   1208 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1209 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1210 
   1211 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1212 
   1213 		RF_Free(waitreq, sizeof(*waitreq));
   1214 		return (0);
   1215 
   1216 
   1217 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1218 		 * code in it that will cause the dameon to exit */
   1219 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1220 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1221 		waitreq->fcol = -1;
   1222 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1223 		waitreq->next = rf_sparet_wait_queue;
   1224 		rf_sparet_wait_queue = waitreq;
   1225 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1226 		wakeup(&rf_sparet_wait_queue);
   1227 		return (0);
   1228 
   1229 		/* used by the spare table daemon to deliver a spare table
   1230 		 * into the kernel */
   1231 	case RAIDFRAME_SEND_SPARET:
   1232 
   1233 		/* install the spare table */
   1234 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1235 
   1236 		/* respond to the requestor.  the return status of the spare
   1237 		 * table installation is passed in the "fcol" field */
   1238 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1239 		waitreq->fcol = retcode;
   1240 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1241 		waitreq->next = rf_sparet_resp_queue;
   1242 		rf_sparet_resp_queue = waitreq;
   1243 		wakeup(&rf_sparet_resp_queue);
   1244 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1245 
   1246 		return (retcode);
   1247 #endif
   1248 
   1249 
   1250 #endif				/* RAIDFRAME_RECON > 0 */
   1251 
   1252 	default:
   1253 		break;		/* fall through to the os-specific code below */
   1254 
   1255 	}
   1256 
   1257 	if (!raidPtrs[unit]->valid)
   1258 		return (EINVAL);
   1259 
   1260 	/*
   1261 	 * Add support for "regular" device ioctls here.
   1262 	 */
   1263 
   1264 	switch (cmd) {
   1265 	case DIOCGDINFO:
   1266 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1267 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1268 		break;
   1269 
   1270 	case DIOCGPART:
   1271 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1272 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1273 		((struct partinfo *) data)->part =
   1274 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1275 		break;
   1276 
   1277 	case DIOCWDINFO:
   1278 		db1_printf(("DIOCWDINFO\n"));
   1279 	case DIOCSDINFO:
   1280 		db1_printf(("DIOCSDINFO\n"));
   1281 		if ((error = raidlock(rs)) != 0)
   1282 			return (error);
   1283 
   1284 		rs->sc_flags |= RAIDF_LABELLING;
   1285 
   1286 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1287 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1288 		if (error == 0) {
   1289 			if (cmd == DIOCWDINFO)
   1290 				error = writedisklabel(RAIDLABELDEV(dev),
   1291 				    raidstrategy, rs->sc_dkdev.dk_label,
   1292 				    rs->sc_dkdev.dk_cpulabel);
   1293 		}
   1294 		rs->sc_flags &= ~RAIDF_LABELLING;
   1295 
   1296 		raidunlock(rs);
   1297 
   1298 		if (error)
   1299 			return (error);
   1300 		break;
   1301 
   1302 	case DIOCWLABEL:
   1303 		db1_printf(("DIOCWLABEL\n"));
   1304 		if (*(int *) data != 0)
   1305 			rs->sc_flags |= RAIDF_WLABEL;
   1306 		else
   1307 			rs->sc_flags &= ~RAIDF_WLABEL;
   1308 		break;
   1309 
   1310 	case DIOCGDEFLABEL:
   1311 		db1_printf(("DIOCGDEFLABEL\n"));
   1312 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1313 		    (struct disklabel *) data);
   1314 		break;
   1315 
   1316 	default:
   1317 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1318 	}
   1319 	return (retcode);
   1320 
   1321 }
   1322 
   1323 
   1324 /* raidinit -- complete the rest of the initialization for the
   1325    RAIDframe device.  */
   1326 
   1327 
   1328 static int
   1329 raidinit(dev, raidPtr, unit)
   1330 	dev_t   dev;
   1331 	RF_Raid_t *raidPtr;
   1332 	int     unit;
   1333 {
   1334 	int     retcode;
   1335 	/* int ix; */
   1336 	/* struct raidbuf *raidbp; */
   1337 	struct raid_softc *rs;
   1338 
   1339 	retcode = 0;
   1340 
   1341 	rs = &raid_softc[unit];
   1342 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1343 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1344 
   1345 
   1346 	/* XXX should check return code first... */
   1347 	rs->sc_flags |= RAIDF_INITED;
   1348 
   1349 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1350 
   1351 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1352 
   1353 	/* disk_attach actually creates space for the CPU disklabel, among
   1354 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1355 	 * with disklabels. */
   1356 
   1357 	disk_attach(&rs->sc_dkdev);
   1358 
   1359 	/* XXX There may be a weird interaction here between this, and
   1360 	 * protectedSectors, as used in RAIDframe.  */
   1361 
   1362 	rs->sc_size = raidPtr->totalSectors;
   1363 	rs->sc_dev = dev;
   1364 
   1365 	return (retcode);
   1366 }
   1367 
   1368 /*
   1369  * This kernel thread never exits.  It is created once, and persists
   1370  * until the system reboots.
   1371  */
   1372 
   1373 void
   1374 rf_ReconKernelThread()
   1375 {
   1376 	struct rf_recon_req *req;
   1377 	int     s;
   1378 
   1379 	/* XXX not sure what spl() level we should be at here... probably
   1380 	 * splbio() */
   1381 	s = splbio();
   1382 
   1383 	while (1) {
   1384 		/* grab the next reconstruction request from the queue */
   1385 		LOCK_RECON_Q_MUTEX();
   1386 		while (!recon_queue) {
   1387 			UNLOCK_RECON_Q_MUTEX();
   1388 			tsleep(&recon_queue, PRIBIO,
   1389 			       "raidframe recon", 0);
   1390 			LOCK_RECON_Q_MUTEX();
   1391 		}
   1392 		req = recon_queue;
   1393 		recon_queue = recon_queue->next;
   1394 		UNLOCK_RECON_Q_MUTEX();
   1395 
   1396 		/*
   1397 	         * If flags specifies that we should start recon, this call
   1398 	         * will not return until reconstruction completes, fails,
   1399 		 * or is aborted.
   1400 	         */
   1401 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1402 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1403 
   1404 		RF_Free(req, sizeof(*req));
   1405 	}
   1406 }
   1407 /* wake up the daemon & tell it to get us a spare table
   1408  * XXX
   1409  * the entries in the queues should be tagged with the raidPtr
   1410  * so that in the extremely rare case that two recons happen at once,
   1411  * we know for which device were requesting a spare table
   1412  * XXX
   1413  */
   1414 int
   1415 rf_GetSpareTableFromDaemon(req)
   1416 	RF_SparetWait_t *req;
   1417 {
   1418 	int     retcode;
   1419 
   1420 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1421 	req->next = rf_sparet_wait_queue;
   1422 	rf_sparet_wait_queue = req;
   1423 	wakeup(&rf_sparet_wait_queue);
   1424 
   1425 	/* mpsleep unlocks the mutex */
   1426 	while (!rf_sparet_resp_queue) {
   1427 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1428 		    "raidframe getsparetable", 0);
   1429 #if 0
   1430 		mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
   1431 			(void *) simple_lock_addr(rf_sparet_wait_mutex),
   1432 			MS_LOCK_SIMPLE);
   1433 #endif
   1434 	}
   1435 	req = rf_sparet_resp_queue;
   1436 	rf_sparet_resp_queue = req->next;
   1437 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1438 
   1439 	retcode = req->fcol;
   1440 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1441 					 * alloc'd */
   1442 	return (retcode);
   1443 }
   1444 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1445  * bp & passes it down.
   1446  * any calls originating in the kernel must use non-blocking I/O
   1447  * do some extra sanity checking to return "appropriate" error values for
   1448  * certain conditions (to make some standard utilities work)
   1449  */
   1450 int
   1451 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1452 	RF_Raid_t *raidPtr;
   1453 	struct buf *bp;
   1454 	RF_RaidAccessFlags_t flags;
   1455 	void    (*cbFunc) (struct buf *);
   1456 	void   *cbArg;
   1457 {
   1458 	RF_SectorCount_t num_blocks, pb, sum;
   1459 	RF_RaidAddr_t raid_addr;
   1460 	int     retcode;
   1461 	struct partition *pp;
   1462 	daddr_t blocknum;
   1463 	int     unit;
   1464 	struct raid_softc *rs;
   1465 	int     do_async;
   1466 
   1467 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1468 
   1469 	unit = raidPtr->raidid;
   1470 	rs = &raid_softc[unit];
   1471 
   1472 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1473 	 * partition.. Need to make it absolute to the underlying device.. */
   1474 
   1475 	blocknum = bp->b_blkno;
   1476 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1477 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1478 		blocknum += pp->p_offset;
   1479 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1480 			pp->p_offset));
   1481 	} else {
   1482 		db1_printf(("Is raw..\n"));
   1483 	}
   1484 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1485 
   1486 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1487 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1488 
   1489 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1490 	 * TOUCH bp->b_blkno!!! */
   1491 	raid_addr = blocknum;
   1492 
   1493 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1494 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1495 	sum = raid_addr + num_blocks + pb;
   1496 	if (1 || rf_debugKernelAccess) {
   1497 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1498 			(int) raid_addr, (int) sum, (int) num_blocks,
   1499 			(int) pb, (int) bp->b_resid));
   1500 	}
   1501 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1502 	    || (sum < num_blocks) || (sum < pb)) {
   1503 		bp->b_error = ENOSPC;
   1504 		bp->b_flags |= B_ERROR;
   1505 		bp->b_resid = bp->b_bcount;
   1506 		biodone(bp);
   1507 		return (bp->b_error);
   1508 	}
   1509 	/*
   1510 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1511 	 */
   1512 
   1513 	if (bp->b_bcount & raidPtr->sectorMask) {
   1514 		bp->b_error = EINVAL;
   1515 		bp->b_flags |= B_ERROR;
   1516 		bp->b_resid = bp->b_bcount;
   1517 		biodone(bp);
   1518 		return (bp->b_error);
   1519 	}
   1520 	db1_printf(("Calling DoAccess..\n"));
   1521 
   1522 
   1523 	/* Put a throttle on the number of requests we handle simultanously */
   1524 
   1525 	RF_LOCK_MUTEX(raidPtr->mutex);
   1526 
   1527 	while(raidPtr->openings <= 0) {
   1528 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1529 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1530 		RF_LOCK_MUTEX(raidPtr->mutex);
   1531 	}
   1532 	raidPtr->openings--;
   1533 
   1534 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1535 
   1536 	/*
   1537 	 * Everything is async.
   1538 	 */
   1539 	do_async = 1;
   1540 
   1541 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1542 	 * B_READ instead */
   1543 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1544 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1545 	    do_async, raid_addr, num_blocks,
   1546 	    bp->b_un.b_addr,
   1547 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1548 	    NULL, cbFunc, cbArg);
   1549 #if 0
   1550 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1551 		bp->b_data, (int) bp->b_resid));
   1552 #endif
   1553 
   1554 	return (retcode);
   1555 }
   1556 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1557 
   1558 int
   1559 rf_DispatchKernelIO(queue, req)
   1560 	RF_DiskQueue_t *queue;
   1561 	RF_DiskQueueData_t *req;
   1562 {
   1563 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1564 	struct buf *bp;
   1565 	struct raidbuf *raidbp = NULL;
   1566 	struct raid_softc *rs;
   1567 	int     unit;
   1568 
   1569 	/* XXX along with the vnode, we also need the softc associated with
   1570 	 * this device.. */
   1571 
   1572 	req->queue = queue;
   1573 
   1574 	unit = queue->raidPtr->raidid;
   1575 
   1576 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1577 
   1578 	if (unit >= numraid) {
   1579 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1580 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1581 	}
   1582 	rs = &raid_softc[unit];
   1583 
   1584 	/* XXX is this the right place? */
   1585 	disk_busy(&rs->sc_dkdev);
   1586 
   1587 	bp = req->bp;
   1588 #if 1
   1589 	/* XXX when there is a physical disk failure, someone is passing us a
   1590 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1591 	 * without taking a performance hit... (not sure where the real bug
   1592 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1593 
   1594 	if (bp->b_flags & B_ERROR) {
   1595 		bp->b_flags &= ~B_ERROR;
   1596 	}
   1597 	if (bp->b_error != 0) {
   1598 		bp->b_error = 0;
   1599 	}
   1600 #endif
   1601 	raidbp = RAIDGETBUF(rs);
   1602 
   1603 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1604 
   1605 	/*
   1606 	 * context for raidiodone
   1607 	 */
   1608 	raidbp->rf_obp = bp;
   1609 	raidbp->req = req;
   1610 
   1611 	switch (req->type) {
   1612 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1613 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1614 		 * queue->row, queue->col); */
   1615 		/* XXX need to do something extra here.. */
   1616 		/* I'm leaving this in, as I've never actually seen it used,
   1617 		 * and I'd like folks to report it... GO */
   1618 		printf(("WAKEUP CALLED\n"));
   1619 		queue->numOutstanding++;
   1620 
   1621 		/* XXX need to glue the original buffer into this??  */
   1622 
   1623 		KernelWakeupFunc(&raidbp->rf_buf);
   1624 		break;
   1625 
   1626 	case RF_IO_TYPE_READ:
   1627 	case RF_IO_TYPE_WRITE:
   1628 
   1629 		if (req->tracerec) {
   1630 			RF_ETIMER_START(req->tracerec->timer);
   1631 		}
   1632 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1633 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1634 		    req->sectorOffset, req->numSector,
   1635 		    req->buf, KernelWakeupFunc, (void *) req,
   1636 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1637 
   1638 		if (rf_debugKernelAccess) {
   1639 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1640 				(long) bp->b_blkno));
   1641 		}
   1642 		queue->numOutstanding++;
   1643 		queue->last_deq_sector = req->sectorOffset;
   1644 		/* acc wouldn't have been let in if there were any pending
   1645 		 * reqs at any other priority */
   1646 		queue->curPriority = req->priority;
   1647 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1648 		 * req->type, queue->row, queue->col); */
   1649 
   1650 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1651 			req->type, unit, queue->row, queue->col));
   1652 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1653 			(int) req->sectorOffset, (int) req->numSector,
   1654 			(int) (req->numSector <<
   1655 			    queue->raidPtr->logBytesPerSector),
   1656 			(int) queue->raidPtr->logBytesPerSector));
   1657 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1658 			raidbp->rf_buf.b_vp->v_numoutput++;
   1659 		}
   1660 		VOP_STRATEGY(&raidbp->rf_buf);
   1661 
   1662 		break;
   1663 
   1664 	default:
   1665 		panic("bad req->type in rf_DispatchKernelIO");
   1666 	}
   1667 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1668 	return (0);
   1669 }
   1670 /* this is the callback function associated with a I/O invoked from
   1671    kernel code.
   1672  */
   1673 static void
   1674 KernelWakeupFunc(vbp)
   1675 	struct buf *vbp;
   1676 {
   1677 	RF_DiskQueueData_t *req = NULL;
   1678 	RF_DiskQueue_t *queue;
   1679 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1680 	struct buf *bp;
   1681 	struct raid_softc *rs;
   1682 	int     unit;
   1683 	register int s;
   1684 
   1685 	s = splbio();		/* XXX */
   1686 	db1_printf(("recovering the request queue:\n"));
   1687 	req = raidbp->req;
   1688 
   1689 	bp = raidbp->rf_obp;
   1690 #if 0
   1691 	db1_printf(("bp=0x%x\n", bp));
   1692 #endif
   1693 
   1694 	queue = (RF_DiskQueue_t *) req->queue;
   1695 
   1696 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1697 #if 0
   1698 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1699 #endif
   1700 		bp->b_flags |= B_ERROR;
   1701 		bp->b_error = raidbp->rf_buf.b_error ?
   1702 		    raidbp->rf_buf.b_error : EIO;
   1703 	}
   1704 #if 0
   1705 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1706 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1707 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1708 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1709 #endif
   1710 
   1711 	/* XXX methinks this could be wrong... */
   1712 #if 1
   1713 	bp->b_resid = raidbp->rf_buf.b_resid;
   1714 #endif
   1715 
   1716 	if (req->tracerec) {
   1717 		RF_ETIMER_STOP(req->tracerec->timer);
   1718 		RF_ETIMER_EVAL(req->tracerec->timer);
   1719 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1720 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1721 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1722 		req->tracerec->num_phys_ios++;
   1723 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1724 	}
   1725 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1726 
   1727 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1728 
   1729 
   1730 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1731 	 * ballistic, and mark the component as hosed... */
   1732 #if 1
   1733 	if (bp->b_flags & B_ERROR) {
   1734 		/* Mark the disk as dead */
   1735 		/* but only mark it once... */
   1736 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1737 		    rf_ds_optimal) {
   1738 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1739 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1740 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1741 			    rf_ds_failed;
   1742 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1743 			queue->raidPtr->numFailures++;
   1744 			/* XXX here we should bump the version number for each component, and write that data out */
   1745 		} else {	/* Disk is already dead... */
   1746 			/* printf("Disk already marked as dead!\n"); */
   1747 		}
   1748 
   1749 	}
   1750 #endif
   1751 
   1752 	rs = &raid_softc[unit];
   1753 	RAIDPUTBUF(rs, raidbp);
   1754 
   1755 
   1756 	if (bp->b_resid == 0) {
   1757 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1758 			unit, bp->b_resid, bp->b_bcount));
   1759 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1760 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1761 	} else {
   1762 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1763 	}
   1764 
   1765 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1766 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1767 	/* printf("Exiting KernelWakeupFunc\n"); */
   1768 
   1769 	splx(s);		/* XXX */
   1770 }
   1771 
   1772 
   1773 
   1774 /*
   1775  * initialize a buf structure for doing an I/O in the kernel.
   1776  */
   1777 static void
   1778 InitBP(
   1779     struct buf * bp,
   1780     struct vnode * b_vp,
   1781     unsigned rw_flag,
   1782     dev_t dev,
   1783     RF_SectorNum_t startSect,
   1784     RF_SectorCount_t numSect,
   1785     caddr_t buf,
   1786     void (*cbFunc) (struct buf *),
   1787     void *cbArg,
   1788     int logBytesPerSector,
   1789     struct proc * b_proc)
   1790 {
   1791 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1792 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1793 	bp->b_bcount = numSect << logBytesPerSector;
   1794 	bp->b_bufsize = bp->b_bcount;
   1795 	bp->b_error = 0;
   1796 	bp->b_dev = dev;
   1797 	db1_printf(("bp->b_dev is %d\n", dev));
   1798 	bp->b_un.b_addr = buf;
   1799 #if 0
   1800 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1801 #endif
   1802 
   1803 	bp->b_blkno = startSect;
   1804 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1805 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1806 	if (bp->b_bcount == 0) {
   1807 		panic("bp->b_bcount is zero in InitBP!!\n");
   1808 	}
   1809 	bp->b_proc = b_proc;
   1810 	bp->b_iodone = cbFunc;
   1811 	bp->b_vp = b_vp;
   1812 
   1813 }
   1814 /* Extras... */
   1815 
   1816 #if 0
   1817 int
   1818 rf_GetSpareTableFromDaemon(req)
   1819 	RF_SparetWait_t *req;
   1820 {
   1821 	int     retcode = 1;
   1822 	printf("This is supposed to do something useful!!\n");	/* XXX */
   1823 
   1824 	return (retcode);
   1825 
   1826 }
   1827 #endif
   1828 
   1829 static void
   1830 raidgetdefaultlabel(raidPtr, rs, lp)
   1831 	RF_Raid_t *raidPtr;
   1832 	struct raid_softc *rs;
   1833 	struct disklabel *lp;
   1834 {
   1835 	db1_printf(("Building a default label...\n"));
   1836 	bzero(lp, sizeof(*lp));
   1837 
   1838 	/* fabricate a label... */
   1839 	lp->d_secperunit = raidPtr->totalSectors;
   1840 	lp->d_secsize = raidPtr->bytesPerSector;
   1841 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1842 	lp->d_ntracks = 1;
   1843 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1844 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1845 
   1846 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1847 	lp->d_type = DTYPE_RAID;
   1848 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1849 	lp->d_rpm = 3600;
   1850 	lp->d_interleave = 1;
   1851 	lp->d_flags = 0;
   1852 
   1853 	lp->d_partitions[RAW_PART].p_offset = 0;
   1854 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1855 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1856 	lp->d_npartitions = RAW_PART + 1;
   1857 
   1858 	lp->d_magic = DISKMAGIC;
   1859 	lp->d_magic2 = DISKMAGIC;
   1860 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1861 
   1862 }
   1863 /*
   1864  * Read the disklabel from the raid device.  If one is not present, fake one
   1865  * up.
   1866  */
   1867 static void
   1868 raidgetdisklabel(dev)
   1869 	dev_t   dev;
   1870 {
   1871 	int     unit = raidunit(dev);
   1872 	struct raid_softc *rs = &raid_softc[unit];
   1873 	char   *errstring;
   1874 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1875 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1876 	RF_Raid_t *raidPtr;
   1877 
   1878 	db1_printf(("Getting the disklabel...\n"));
   1879 
   1880 	bzero(clp, sizeof(*clp));
   1881 
   1882 	raidPtr = raidPtrs[unit];
   1883 
   1884 	raidgetdefaultlabel(raidPtr, rs, lp);
   1885 
   1886 	/*
   1887 	 * Call the generic disklabel extraction routine.
   1888 	 */
   1889 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1890 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1891 	if (errstring)
   1892 		raidmakedisklabel(rs);
   1893 	else {
   1894 		int     i;
   1895 		struct partition *pp;
   1896 
   1897 		/*
   1898 		 * Sanity check whether the found disklabel is valid.
   1899 		 *
   1900 		 * This is necessary since total size of the raid device
   1901 		 * may vary when an interleave is changed even though exactly
   1902 		 * same componets are used, and old disklabel may used
   1903 		 * if that is found.
   1904 		 */
   1905 		if (lp->d_secperunit != rs->sc_size)
   1906 			printf("WARNING: %s: "
   1907 			    "total sector size in disklabel (%d) != "
   1908 			    "the size of raid (%ld)\n", rs->sc_xname,
   1909 			    lp->d_secperunit, (long) rs->sc_size);
   1910 		for (i = 0; i < lp->d_npartitions; i++) {
   1911 			pp = &lp->d_partitions[i];
   1912 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1913 				printf("WARNING: %s: end of partition `%c' "
   1914 				    "exceeds the size of raid (%ld)\n",
   1915 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1916 		}
   1917 	}
   1918 
   1919 }
   1920 /*
   1921  * Take care of things one might want to take care of in the event
   1922  * that a disklabel isn't present.
   1923  */
   1924 static void
   1925 raidmakedisklabel(rs)
   1926 	struct raid_softc *rs;
   1927 {
   1928 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1929 	db1_printf(("Making a label..\n"));
   1930 
   1931 	/*
   1932 	 * For historical reasons, if there's no disklabel present
   1933 	 * the raw partition must be marked FS_BSDFFS.
   1934 	 */
   1935 
   1936 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1937 
   1938 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1939 
   1940 	lp->d_checksum = dkcksum(lp);
   1941 }
   1942 /*
   1943  * Lookup the provided name in the filesystem.  If the file exists,
   1944  * is a valid block device, and isn't being used by anyone else,
   1945  * set *vpp to the file's vnode.
   1946  * You'll find the original of this in ccd.c
   1947  */
   1948 int
   1949 raidlookup(path, p, vpp)
   1950 	char   *path;
   1951 	struct proc *p;
   1952 	struct vnode **vpp;	/* result */
   1953 {
   1954 	struct nameidata nd;
   1955 	struct vnode *vp;
   1956 	struct vattr va;
   1957 	int     error;
   1958 
   1959 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1960 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1961 #ifdef DEBUG
   1962 		printf("RAIDframe: vn_open returned %d\n", error);
   1963 #endif
   1964 		return (error);
   1965 	}
   1966 	vp = nd.ni_vp;
   1967 	if (vp->v_usecount > 1) {
   1968 		VOP_UNLOCK(vp, 0);
   1969 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1970 		return (EBUSY);
   1971 	}
   1972 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1973 		VOP_UNLOCK(vp, 0);
   1974 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1975 		return (error);
   1976 	}
   1977 	/* XXX: eventually we should handle VREG, too. */
   1978 	if (va.va_type != VBLK) {
   1979 		VOP_UNLOCK(vp, 0);
   1980 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1981 		return (ENOTBLK);
   1982 	}
   1983 	VOP_UNLOCK(vp, 0);
   1984 	*vpp = vp;
   1985 	return (0);
   1986 }
   1987 /*
   1988  * Wait interruptibly for an exclusive lock.
   1989  *
   1990  * XXX
   1991  * Several drivers do this; it should be abstracted and made MP-safe.
   1992  * (Hmm... where have we seen this warning before :->  GO )
   1993  */
   1994 static int
   1995 raidlock(rs)
   1996 	struct raid_softc *rs;
   1997 {
   1998 	int     error;
   1999 
   2000 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2001 		rs->sc_flags |= RAIDF_WANTED;
   2002 		if ((error =
   2003 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2004 			return (error);
   2005 	}
   2006 	rs->sc_flags |= RAIDF_LOCKED;
   2007 	return (0);
   2008 }
   2009 /*
   2010  * Unlock and wake up any waiters.
   2011  */
   2012 static void
   2013 raidunlock(rs)
   2014 	struct raid_softc *rs;
   2015 {
   2016 
   2017 	rs->sc_flags &= ~RAIDF_LOCKED;
   2018 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2019 		rs->sc_flags &= ~RAIDF_WANTED;
   2020 		wakeup(rs);
   2021 	}
   2022 }
   2023 
   2024 
   2025 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2026 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2027 
   2028 int
   2029 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2030 {
   2031 	RF_ComponentLabel_t component_label;
   2032 	raidread_component_label(dev, b_vp, &component_label);
   2033 	component_label.mod_counter = mod_counter;
   2034 	component_label.clean = RF_RAID_CLEAN;
   2035 	raidwrite_component_label(dev, b_vp, &component_label);
   2036 	return(0);
   2037 }
   2038 
   2039 
   2040 int
   2041 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2042 {
   2043 	RF_ComponentLabel_t component_label;
   2044 	raidread_component_label(dev, b_vp, &component_label);
   2045 	component_label.mod_counter = mod_counter;
   2046 	component_label.clean = RF_RAID_DIRTY;
   2047 	raidwrite_component_label(dev, b_vp, &component_label);
   2048 	return(0);
   2049 }
   2050 
   2051 /* ARGSUSED */
   2052 int
   2053 raidread_component_label(dev, b_vp, component_label)
   2054 	dev_t dev;
   2055 	struct vnode *b_vp;
   2056 	RF_ComponentLabel_t *component_label;
   2057 {
   2058 	struct buf *bp;
   2059 	int error;
   2060 
   2061 	/* XXX should probably ensure that we don't try to do this if
   2062 	   someone has changed rf_protected_sectors. */
   2063 
   2064 	/* get a block of the appropriate size... */
   2065 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2066 	bp->b_dev = dev;
   2067 
   2068 	/* get our ducks in a row for the read */
   2069 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2070 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2071 	bp->b_flags = B_BUSY | B_READ;
   2072  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2073 
   2074 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2075 
   2076 	error = biowait(bp);
   2077 
   2078 	if (!error) {
   2079 		memcpy(component_label, bp->b_un.b_addr,
   2080 		       sizeof(RF_ComponentLabel_t));
   2081 #if 0
   2082 		printf("raidread_component_label: got component label:\n");
   2083 		printf("Version: %d\n",component_label->version);
   2084 		printf("Serial Number: %d\n",component_label->serial_number);
   2085 		printf("Mod counter: %d\n",component_label->mod_counter);
   2086 		printf("Row: %d\n", component_label->row);
   2087 		printf("Column: %d\n", component_label->column);
   2088 		printf("Num Rows: %d\n", component_label->num_rows);
   2089 		printf("Num Columns: %d\n", component_label->num_columns);
   2090 		printf("Clean: %d\n", component_label->clean);
   2091 		printf("Status: %d\n", component_label->status);
   2092 #endif
   2093         } else {
   2094 		printf("Failed to read RAID component label!\n");
   2095 	}
   2096 
   2097         bp->b_flags = B_INVAL | B_AGE;
   2098 	brelse(bp);
   2099 	return(error);
   2100 }
   2101 /* ARGSUSED */
   2102 int
   2103 raidwrite_component_label(dev, b_vp, component_label)
   2104 	dev_t dev;
   2105 	struct vnode *b_vp;
   2106 	RF_ComponentLabel_t *component_label;
   2107 {
   2108 	struct buf *bp;
   2109 	int error;
   2110 
   2111 	/* get a block of the appropriate size... */
   2112 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2113 	bp->b_dev = dev;
   2114 
   2115 	/* get our ducks in a row for the write */
   2116 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2117 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2118 	bp->b_flags = B_BUSY | B_WRITE;
   2119  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2120 
   2121 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2122 
   2123 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2124 
   2125 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2126 	error = biowait(bp);
   2127         bp->b_flags = B_INVAL | B_AGE;
   2128 	brelse(bp);
   2129 	if (error) {
   2130 		printf("Failed to write RAID component info!\n");
   2131 	}
   2132 
   2133 	return(error);
   2134 }
   2135 
   2136 void
   2137 rf_markalldirty( raidPtr )
   2138 	RF_Raid_t *raidPtr;
   2139 {
   2140 	RF_ComponentLabel_t c_label;
   2141 	int r,c;
   2142 
   2143 	raidPtr->mod_counter++;
   2144 	for (r = 0; r < raidPtr->numRow; r++) {
   2145 		for (c = 0; c < raidPtr->numCol; c++) {
   2146 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2147 				raidread_component_label(
   2148 					raidPtr->Disks[r][c].dev,
   2149 					raidPtr->raid_cinfo[r][c].ci_vp,
   2150 					&c_label);
   2151 				if (c_label.status == rf_ds_spared) {
   2152 					/* XXX do something special...
   2153 					 but whatever you do, don't
   2154 					 try to access it!! */
   2155 				} else {
   2156 #if 0
   2157 				c_label.status =
   2158 					raidPtr->Disks[r][c].status;
   2159 				raidwrite_component_label(
   2160 					raidPtr->Disks[r][c].dev,
   2161 					raidPtr->raid_cinfo[r][c].ci_vp,
   2162 					&c_label);
   2163 #endif
   2164 				raidmarkdirty(
   2165 				       raidPtr->Disks[r][c].dev,
   2166 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2167 				       raidPtr->mod_counter);
   2168 				}
   2169 			}
   2170 		}
   2171 	}
   2172 	/* printf("Component labels marked dirty.\n"); */
   2173 #if 0
   2174 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2175 		sparecol = raidPtr->numCol + c;
   2176 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2177 			/*
   2178 
   2179 			   XXX this is where we get fancy and map this spare
   2180 			   into it's correct spot in the array.
   2181 
   2182 			 */
   2183 			/*
   2184 
   2185 			   we claim this disk is "optimal" if it's
   2186 			   rf_ds_used_spare, as that means it should be
   2187 			   directly substitutable for the disk it replaced.
   2188 			   We note that too...
   2189 
   2190 			 */
   2191 
   2192 			for(i=0;i<raidPtr->numRow;i++) {
   2193 				for(j=0;j<raidPtr->numCol;j++) {
   2194 					if ((raidPtr->Disks[i][j].spareRow ==
   2195 					     r) &&
   2196 					    (raidPtr->Disks[i][j].spareCol ==
   2197 					     sparecol)) {
   2198 						srow = r;
   2199 						scol = sparecol;
   2200 						break;
   2201 					}
   2202 				}
   2203 			}
   2204 
   2205 			raidread_component_label(
   2206 				      raidPtr->Disks[r][sparecol].dev,
   2207 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2208 				      &c_label);
   2209 			/* make sure status is noted */
   2210 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2211 			c_label.mod_counter = raidPtr->mod_counter;
   2212 			c_label.serial_number = raidPtr->serial_number;
   2213 			c_label.row = srow;
   2214 			c_label.column = scol;
   2215 			c_label.num_rows = raidPtr->numRow;
   2216 			c_label.num_columns = raidPtr->numCol;
   2217 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2218 			c_label.status = rf_ds_optimal;
   2219 			raidwrite_component_label(
   2220 				      raidPtr->Disks[r][sparecol].dev,
   2221 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2222 				      &c_label);
   2223 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2224 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2225 		}
   2226 	}
   2227 
   2228 #endif
   2229 }
   2230 
   2231 
   2232 void
   2233 rf_update_component_labels( raidPtr )
   2234 	RF_Raid_t *raidPtr;
   2235 {
   2236 	RF_ComponentLabel_t c_label;
   2237 	int sparecol;
   2238 	int r,c;
   2239 	int i,j;
   2240 	int srow, scol;
   2241 
   2242 	srow = -1;
   2243 	scol = -1;
   2244 
   2245 	/* XXX should do extra checks to make sure things really are clean,
   2246 	   rather than blindly setting the clean bit... */
   2247 
   2248 	raidPtr->mod_counter++;
   2249 
   2250 	for (r = 0; r < raidPtr->numRow; r++) {
   2251 		for (c = 0; c < raidPtr->numCol; c++) {
   2252 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2253 				raidread_component_label(
   2254 					raidPtr->Disks[r][c].dev,
   2255 					raidPtr->raid_cinfo[r][c].ci_vp,
   2256 					&c_label);
   2257 				/* make sure status is noted */
   2258 				c_label.status = rf_ds_optimal;
   2259 				raidwrite_component_label(
   2260 					raidPtr->Disks[r][c].dev,
   2261 					raidPtr->raid_cinfo[r][c].ci_vp,
   2262 					&c_label);
   2263 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2264 					raidmarkclean(
   2265 					      raidPtr->Disks[r][c].dev,
   2266 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2267 					      raidPtr->mod_counter);
   2268 				}
   2269 			}
   2270 			/* else we don't touch it.. */
   2271 #if 0
   2272 			else if (raidPtr->Disks[r][c].status !=
   2273 				   rf_ds_failed) {
   2274 				raidread_component_label(
   2275 					raidPtr->Disks[r][c].dev,
   2276 					raidPtr->raid_cinfo[r][c].ci_vp,
   2277 					&c_label);
   2278 				/* make sure status is noted */
   2279 				c_label.status =
   2280 					raidPtr->Disks[r][c].status;
   2281 				raidwrite_component_label(
   2282 					raidPtr->Disks[r][c].dev,
   2283 					raidPtr->raid_cinfo[r][c].ci_vp,
   2284 					&c_label);
   2285 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2286 					raidmarkclean(
   2287 					      raidPtr->Disks[r][c].dev,
   2288 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2289 					      raidPtr->mod_counter);
   2290 				}
   2291 			}
   2292 #endif
   2293 		}
   2294 	}
   2295 
   2296 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2297 		sparecol = raidPtr->numCol + c;
   2298 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2299 			/*
   2300 
   2301 			   we claim this disk is "optimal" if it's
   2302 			   rf_ds_used_spare, as that means it should be
   2303 			   directly substitutable for the disk it replaced.
   2304 			   We note that too...
   2305 
   2306 			 */
   2307 
   2308 			for(i=0;i<raidPtr->numRow;i++) {
   2309 				for(j=0;j<raidPtr->numCol;j++) {
   2310 					if ((raidPtr->Disks[i][j].spareRow ==
   2311 					     0) &&
   2312 					    (raidPtr->Disks[i][j].spareCol ==
   2313 					     sparecol)) {
   2314 						srow = i;
   2315 						scol = j;
   2316 						break;
   2317 					}
   2318 				}
   2319 			}
   2320 
   2321 			raidread_component_label(
   2322 				      raidPtr->Disks[0][sparecol].dev,
   2323 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2324 				      &c_label);
   2325 			/* make sure status is noted */
   2326 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2327 			c_label.mod_counter = raidPtr->mod_counter;
   2328 			c_label.serial_number = raidPtr->serial_number;
   2329 			c_label.row = srow;
   2330 			c_label.column = scol;
   2331 			c_label.num_rows = raidPtr->numRow;
   2332 			c_label.num_columns = raidPtr->numCol;
   2333 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2334 			c_label.status = rf_ds_optimal;
   2335 			raidwrite_component_label(
   2336 				      raidPtr->Disks[0][sparecol].dev,
   2337 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2338 				      &c_label);
   2339 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2340 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2341 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2342 					       raidPtr->mod_counter);
   2343 			}
   2344 		}
   2345 	}
   2346 	/* 	printf("Component labels updated\n"); */
   2347 }
   2348