Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.35
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.35 1999/12/14 15:27:00 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #ifdef DEBUG
    157 #define db0_printf(a) printf a
    158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    164 #else				/* DEBUG */
    165 #define db0_printf(a) printf a
    166 #define db1_printf(a) { }
    167 #define db2_printf(a) { }
    168 #define db3_printf(a) { }
    169 #define db4_printf(a) { }
    170 #define db5_printf(a) { }
    171 #endif				/* DEBUG */
    172 
    173 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    174 
    175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    176 
    177 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    178 						 * spare table */
    179 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    180 						 * installation process */
    181 
    182 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    183 						 * reconstruction
    184 						 * requests */
    185 
    186 
    187 decl_simple_lock_data(, recon_queue_mutex)
    188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    190 
    191 /* prototypes */
    192 static void KernelWakeupFunc(struct buf * bp);
    193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    194 		   dev_t dev, RF_SectorNum_t startSect,
    195 		   RF_SectorCount_t numSect, caddr_t buf,
    196 		   void (*cbFunc) (struct buf *), void *cbArg,
    197 		   int logBytesPerSector, struct proc * b_proc);
    198 
    199 #define Dprintf0(s)       if (rf_queueDebug) \
    200      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    201 #define Dprintf1(s,a)     if (rf_queueDebug) \
    202      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    203 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    204      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    206      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    207 
    208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    210 
    211 void raidattach __P((int));
    212 int raidsize __P((dev_t));
    213 
    214 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    215 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    216 static int raidinit __P((dev_t, RF_Raid_t *, int));
    217 
    218 int raidopen __P((dev_t, int, int, struct proc *));
    219 int raidclose __P((dev_t, int, int, struct proc *));
    220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    221 int raidwrite __P((dev_t, struct uio *, int));
    222 int raidread __P((dev_t, struct uio *, int));
    223 void raidstrategy __P((struct buf *));
    224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    225 
    226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    228 void rf_update_component_labels( RF_Raid_t *);
    229 /*
    230  * Pilfered from ccd.c
    231  */
    232 
    233 struct raidbuf {
    234 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    235 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    236 	int     rf_flags;	/* misc. flags */
    237 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    238 };
    239 
    240 
    241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    242 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    243 
    244 /* XXX Not sure if the following should be replacing the raidPtrs above,
    245    or if it should be used in conjunction with that... */
    246 
    247 struct raid_softc {
    248 	int     sc_flags;	/* flags */
    249 	int     sc_cflags;	/* configuration flags */
    250 	size_t  sc_size;        /* size of the raid device */
    251 	dev_t   sc_dev;	        /* our device.. */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct pool sc_cbufpool;	/* component buffer pool */
    255 	struct buf buf_queue;   /* used for the device queue */
    256 };
    257 /* sc_flags */
    258 #define RAIDF_INITED	0x01	/* unit has been initialized */
    259 #define RAIDF_WLABEL	0x02	/* label area is writable */
    260 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    261 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    262 #define RAIDF_LOCKED	0x80	/* unit is locked */
    263 
    264 #define	raidunit(x)	DISKUNIT(x)
    265 static int numraid = 0;
    266 
    267 /*
    268  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    269  * Be aware that large numbers can allow the driver to consume a lot of
    270  * kernel memory, especially on writes, and in degraded mode reads.
    271  *
    272  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    273  * a single 64K write will typically require 64K for the old data,
    274  * 64K for the old parity, and 64K for the new parity, for a total
    275  * of 192K (if the parity buffer is not re-used immediately).
    276  * Even it if is used immedately, that's still 128K, which when multiplied
    277  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    278  *
    279  * Now in degraded mode, for example, a 64K read on the above setup may
    280  * require data reconstruction, which will require *all* of the 4 remaining
    281  * disks to participate -- 4 * 32K/disk == 128K again.
    282  */
    283 
    284 #ifndef RAIDOUTSTANDING
    285 #define RAIDOUTSTANDING   6
    286 #endif
    287 
    288 #define RAIDLABELDEV(dev)	\
    289 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    290 
    291 /* declared here, and made public, for the benefit of KVM stuff.. */
    292 struct raid_softc *raid_softc;
    293 
    294 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    295 				     struct disklabel *));
    296 static void raidgetdisklabel __P((dev_t));
    297 static void raidmakedisklabel __P((struct raid_softc *));
    298 
    299 static int raidlock __P((struct raid_softc *));
    300 static void raidunlock __P((struct raid_softc *));
    301 int raidlookup __P((char *, struct proc * p, struct vnode **));
    302 
    303 static void rf_markalldirty __P((RF_Raid_t *));
    304 
    305 void
    306 raidattach(num)
    307 	int     num;
    308 {
    309 	int raidID;
    310 	int i, rc;
    311 
    312 #ifdef DEBUG
    313 	printf("raidattach: Asked for %d units\n", num);
    314 #endif
    315 
    316 	if (num <= 0) {
    317 #ifdef DIAGNOSTIC
    318 		panic("raidattach: count <= 0");
    319 #endif
    320 		return;
    321 	}
    322 	/* This is where all the initialization stuff gets done. */
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!\n");
    329 	}
    330 
    331 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    332 	if (rc) {
    333 		RF_PANIC();
    334 	}
    335 
    336 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    337 	recon_queue = NULL;
    338 
    339 	for (i = 0; i < numraid; i++)
    340 		raidPtrs[i] = NULL;
    341 	rc = rf_BootRaidframe();
    342 	if (rc == 0)
    343 		printf("Kernelized RAIDframe activated\n");
    344 	else
    345 		panic("Serious error booting RAID!!\n");
    346 
    347 	/* put together some datastructures like the CCD device does.. This
    348 	 * lets us lock the device and what-not when it gets opened. */
    349 
    350 	raid_softc = (struct raid_softc *)
    351 	    malloc(num * sizeof(struct raid_softc),
    352 	    M_RAIDFRAME, M_NOWAIT);
    353 	if (raid_softc == NULL) {
    354 		printf("WARNING: no memory for RAIDframe driver\n");
    355 		return;
    356 	}
    357 	numraid = num;
    358 	bzero(raid_softc, num * sizeof(struct raid_softc));
    359 
    360 	for (raidID = 0; raidID < num; raidID++) {
    361 		raid_softc[raidID].buf_queue.b_actf = NULL;
    362 		raid_softc[raidID].buf_queue.b_actb =
    363 			&raid_softc[raidID].buf_queue.b_actf;
    364 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    365 			  (RF_Raid_t *));
    366 		if (raidPtrs[raidID] == NULL) {
    367 			printf("raidPtrs[%d] is NULL\n", raidID);
    368 		}
    369 	}
    370 }
    371 
    372 
    373 int
    374 raidsize(dev)
    375 	dev_t   dev;
    376 {
    377 	struct raid_softc *rs;
    378 	struct disklabel *lp;
    379 	int     part, unit, omask, size;
    380 
    381 	unit = raidunit(dev);
    382 	if (unit >= numraid)
    383 		return (-1);
    384 	rs = &raid_softc[unit];
    385 
    386 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    387 		return (-1);
    388 
    389 	part = DISKPART(dev);
    390 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    391 	lp = rs->sc_dkdev.dk_label;
    392 
    393 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    394 		return (-1);
    395 
    396 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    397 		size = -1;
    398 	else
    399 		size = lp->d_partitions[part].p_size *
    400 		    (lp->d_secsize / DEV_BSIZE);
    401 
    402 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    403 		return (-1);
    404 
    405 	return (size);
    406 
    407 }
    408 
    409 int
    410 raiddump(dev, blkno, va, size)
    411 	dev_t   dev;
    412 	daddr_t blkno;
    413 	caddr_t va;
    414 	size_t  size;
    415 {
    416 	/* Not implemented. */
    417 	return ENXIO;
    418 }
    419 /* ARGSUSED */
    420 int
    421 raidopen(dev, flags, fmt, p)
    422 	dev_t   dev;
    423 	int     flags, fmt;
    424 	struct proc *p;
    425 {
    426 	int     unit = raidunit(dev);
    427 	struct raid_softc *rs;
    428 	struct disklabel *lp;
    429 	int     part, pmask;
    430 	int     error = 0;
    431 
    432 	if (unit >= numraid)
    433 		return (ENXIO);
    434 	rs = &raid_softc[unit];
    435 
    436 	if ((error = raidlock(rs)) != 0)
    437 		return (error);
    438 	lp = rs->sc_dkdev.dk_label;
    439 
    440 	part = DISKPART(dev);
    441 	pmask = (1 << part);
    442 
    443 	db1_printf(("Opening raid device number: %d partition: %d\n",
    444 		unit, part));
    445 
    446 
    447 	if ((rs->sc_flags & RAIDF_INITED) &&
    448 	    (rs->sc_dkdev.dk_openmask == 0))
    449 		raidgetdisklabel(dev);
    450 
    451 	/* make sure that this partition exists */
    452 
    453 	if (part != RAW_PART) {
    454 		db1_printf(("Not a raw partition..\n"));
    455 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    456 		    ((part >= lp->d_npartitions) ||
    457 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    458 			error = ENXIO;
    459 			raidunlock(rs);
    460 			db1_printf(("Bailing out...\n"));
    461 			return (error);
    462 		}
    463 	}
    464 	/* Prevent this unit from being unconfigured while open. */
    465 	switch (fmt) {
    466 	case S_IFCHR:
    467 		rs->sc_dkdev.dk_copenmask |= pmask;
    468 		break;
    469 
    470 	case S_IFBLK:
    471 		rs->sc_dkdev.dk_bopenmask |= pmask;
    472 		break;
    473 	}
    474 
    475 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    476 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    477 		/* First one... mark things as dirty... Note that we *MUST*
    478 		 have done a configure before this.  I DO NOT WANT TO BE
    479 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    480 		 THAT THEY BELONG TOGETHER!!!!! */
    481 		/* XXX should check to see if we're only open for reading
    482 		   here... If so, we needn't do this, but then need some
    483 		   other way of keeping track of what's happened.. */
    484 
    485 		rf_markalldirty( raidPtrs[unit] );
    486 	}
    487 
    488 
    489 	rs->sc_dkdev.dk_openmask =
    490 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    491 
    492 	raidunlock(rs);
    493 
    494 	return (error);
    495 
    496 
    497 }
    498 /* ARGSUSED */
    499 int
    500 raidclose(dev, flags, fmt, p)
    501 	dev_t   dev;
    502 	int     flags, fmt;
    503 	struct proc *p;
    504 {
    505 	int     unit = raidunit(dev);
    506 	struct raid_softc *rs;
    507 	int     error = 0;
    508 	int     part;
    509 
    510 	if (unit >= numraid)
    511 		return (ENXIO);
    512 	rs = &raid_softc[unit];
    513 
    514 	if ((error = raidlock(rs)) != 0)
    515 		return (error);
    516 
    517 	part = DISKPART(dev);
    518 
    519 	/* ...that much closer to allowing unconfiguration... */
    520 	switch (fmt) {
    521 	case S_IFCHR:
    522 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    523 		break;
    524 
    525 	case S_IFBLK:
    526 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    527 		break;
    528 	}
    529 	rs->sc_dkdev.dk_openmask =
    530 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    531 
    532 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    533 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    534 		/* Last one... device is not unconfigured yet.
    535 		   Device shutdown has taken care of setting the
    536 		   clean bits if RAIDF_INITED is not set
    537 		   mark things as clean... */
    538 		rf_update_component_labels( raidPtrs[unit] );
    539 	}
    540 
    541 	raidunlock(rs);
    542 	return (0);
    543 
    544 }
    545 
    546 void
    547 raidstrategy(bp)
    548 	register struct buf *bp;
    549 {
    550 	register int s;
    551 
    552 	unsigned int raidID = raidunit(bp->b_dev);
    553 	RF_Raid_t *raidPtr;
    554 	struct raid_softc *rs = &raid_softc[raidID];
    555 	struct disklabel *lp;
    556 	struct buf *dp;
    557 	int     wlabel;
    558 
    559 #if 0
    560 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    561 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    562 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    563 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    564 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    565 
    566 	if (bp->b_flags & B_READ)
    567 		db1_printf(("READ\n"));
    568 	else
    569 		db1_printf(("WRITE\n"));
    570 #endif
    571 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    572 		bp->b_error = ENXIO;
    573 		bp->b_flags = B_ERROR;
    574 		bp->b_resid = bp->b_bcount;
    575 		biodone(bp);
    576 		return;
    577 	}
    578 	if (raidID >= numraid || !raidPtrs[raidID]) {
    579 		bp->b_error = ENODEV;
    580 		bp->b_flags |= B_ERROR;
    581 		bp->b_resid = bp->b_bcount;
    582 		biodone(bp);
    583 		return;
    584 	}
    585 	raidPtr = raidPtrs[raidID];
    586 	if (!raidPtr->valid) {
    587 		bp->b_error = ENODEV;
    588 		bp->b_flags |= B_ERROR;
    589 		bp->b_resid = bp->b_bcount;
    590 		biodone(bp);
    591 		return;
    592 	}
    593 	if (bp->b_bcount == 0) {
    594 		db1_printf(("b_bcount is zero..\n"));
    595 		biodone(bp);
    596 		return;
    597 	}
    598 	lp = rs->sc_dkdev.dk_label;
    599 
    600 	/*
    601 	 * Do bounds checking and adjust transfer.  If there's an
    602 	 * error, the bounds check will flag that for us.
    603 	 */
    604 
    605 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    606 	if (DISKPART(bp->b_dev) != RAW_PART)
    607 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    608 			db1_printf(("Bounds check failed!!:%d %d\n",
    609 				(int) bp->b_blkno, (int) wlabel));
    610 			biodone(bp);
    611 			return;
    612 		}
    613 	s = splbio();
    614 
    615 	bp->b_resid = 0;
    616 
    617 	/* stuff it onto our queue */
    618 
    619 	dp = &rs->buf_queue;
    620 	bp->b_actf = NULL;
    621 	bp->b_actb = dp->b_actb;
    622 	*dp->b_actb = bp;
    623 	dp->b_actb = &bp->b_actf;
    624 
    625 	raidstart(raidPtrs[raidID]);
    626 
    627 	splx(s);
    628 }
    629 /* ARGSUSED */
    630 int
    631 raidread(dev, uio, flags)
    632 	dev_t   dev;
    633 	struct uio *uio;
    634 	int     flags;
    635 {
    636 	int     unit = raidunit(dev);
    637 	struct raid_softc *rs;
    638 	int     part;
    639 
    640 	if (unit >= numraid)
    641 		return (ENXIO);
    642 	rs = &raid_softc[unit];
    643 
    644 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    645 		return (ENXIO);
    646 	part = DISKPART(dev);
    647 
    648 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    649 
    650 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    651 
    652 }
    653 /* ARGSUSED */
    654 int
    655 raidwrite(dev, uio, flags)
    656 	dev_t   dev;
    657 	struct uio *uio;
    658 	int     flags;
    659 {
    660 	int     unit = raidunit(dev);
    661 	struct raid_softc *rs;
    662 
    663 	if (unit >= numraid)
    664 		return (ENXIO);
    665 	rs = &raid_softc[unit];
    666 
    667 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    668 		return (ENXIO);
    669 	db1_printf(("raidwrite\n"));
    670 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    671 
    672 }
    673 
    674 int
    675 raidioctl(dev, cmd, data, flag, p)
    676 	dev_t   dev;
    677 	u_long  cmd;
    678 	caddr_t data;
    679 	int     flag;
    680 	struct proc *p;
    681 {
    682 	int     unit = raidunit(dev);
    683 	int     error = 0;
    684 	int     part, pmask;
    685 	struct raid_softc *rs;
    686 	RF_Config_t *k_cfg, *u_cfg;
    687 	u_char *specific_buf;
    688 	int retcode = 0;
    689 	int row;
    690 	int column;
    691 	int s;
    692 	struct rf_recon_req *rrcopy, *rr;
    693 	RF_ComponentLabel_t *component_label;
    694 	RF_ComponentLabel_t ci_label;
    695 	RF_ComponentLabel_t **c_label_ptr;
    696 	RF_SingleComponent_t *sparePtr,*componentPtr;
    697 	RF_SingleComponent_t hot_spare;
    698 	RF_SingleComponent_t component;
    699 
    700 	if (unit >= numraid)
    701 		return (ENXIO);
    702 	rs = &raid_softc[unit];
    703 
    704 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    705 		(int) DISKPART(dev), (int) unit, (int) cmd));
    706 
    707 	/* Must be open for writes for these commands... */
    708 	switch (cmd) {
    709 	case DIOCSDINFO:
    710 	case DIOCWDINFO:
    711 	case DIOCWLABEL:
    712 		if ((flag & FWRITE) == 0)
    713 			return (EBADF);
    714 	}
    715 
    716 	/* Must be initialized for these... */
    717 	switch (cmd) {
    718 	case DIOCGDINFO:
    719 	case DIOCSDINFO:
    720 	case DIOCWDINFO:
    721 	case DIOCGPART:
    722 	case DIOCWLABEL:
    723 	case DIOCGDEFLABEL:
    724 	case RAIDFRAME_SHUTDOWN:
    725 	case RAIDFRAME_REWRITEPARITY:
    726 	case RAIDFRAME_GET_INFO:
    727 	case RAIDFRAME_RESET_ACCTOTALS:
    728 	case RAIDFRAME_GET_ACCTOTALS:
    729 	case RAIDFRAME_KEEP_ACCTOTALS:
    730 	case RAIDFRAME_GET_SIZE:
    731 	case RAIDFRAME_FAIL_DISK:
    732 	case RAIDFRAME_COPYBACK:
    733 	case RAIDFRAME_CHECKRECON:
    734 	case RAIDFRAME_GET_COMPONENT_LABEL:
    735 	case RAIDFRAME_SET_COMPONENT_LABEL:
    736 	case RAIDFRAME_ADD_HOT_SPARE:
    737 	case RAIDFRAME_REMOVE_HOT_SPARE:
    738 	case RAIDFRAME_INIT_LABELS:
    739 	case RAIDFRAME_REBUILD_IN_PLACE:
    740 	case RAIDFRAME_CHECK_PARITY:
    741 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    742 			return (ENXIO);
    743 	}
    744 
    745 	switch (cmd) {
    746 
    747 
    748 		/* configure the system */
    749 	case RAIDFRAME_CONFIGURE:
    750 
    751 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    752 		/* copy-in the configuration information */
    753 		/* data points to a pointer to the configuration structure */
    754 		u_cfg = *((RF_Config_t **) data);
    755 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    756 		if (k_cfg == NULL) {
    757 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    758 			return (ENOMEM);
    759 		}
    760 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    761 		    sizeof(RF_Config_t));
    762 		if (retcode) {
    763 			RF_Free(k_cfg, sizeof(RF_Config_t));
    764 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    765 				retcode));
    766 			return (retcode);
    767 		}
    768 		/* allocate a buffer for the layout-specific data, and copy it
    769 		 * in */
    770 		if (k_cfg->layoutSpecificSize) {
    771 			if (k_cfg->layoutSpecificSize > 10000) {
    772 				/* sanity check */
    773 				RF_Free(k_cfg, sizeof(RF_Config_t));
    774 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    775 				return (EINVAL);
    776 			}
    777 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    778 			    (u_char *));
    779 			if (specific_buf == NULL) {
    780 				RF_Free(k_cfg, sizeof(RF_Config_t));
    781 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    782 				return (ENOMEM);
    783 			}
    784 			retcode = copyin(k_cfg->layoutSpecific,
    785 			    (caddr_t) specific_buf,
    786 			    k_cfg->layoutSpecificSize);
    787 			if (retcode) {
    788 				RF_Free(k_cfg, sizeof(RF_Config_t));
    789 				RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    790 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    791 					retcode));
    792 				return (retcode);
    793 			}
    794 		} else
    795 			specific_buf = NULL;
    796 		k_cfg->layoutSpecific = specific_buf;
    797 
    798 		/* should do some kind of sanity check on the configuration.
    799 		 * Store the sum of all the bytes in the last byte? */
    800 
    801 		/* configure the system */
    802 
    803 		raidPtrs[unit]->raidid = unit;
    804 
    805 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    806 
    807 		/* allow this many simultaneous IO's to this RAID device */
    808 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    809 
    810 		if (retcode == 0) {
    811 			retcode = raidinit(dev, raidPtrs[unit], unit);
    812 			rf_markalldirty( raidPtrs[unit] );
    813 		}
    814 		/* free the buffers.  No return code here. */
    815 		if (k_cfg->layoutSpecificSize) {
    816 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    817 		}
    818 		RF_Free(k_cfg, sizeof(RF_Config_t));
    819 
    820 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    821 			retcode));
    822 
    823 		return (retcode);
    824 
    825 		/* shutdown the system */
    826 	case RAIDFRAME_SHUTDOWN:
    827 
    828 		if ((error = raidlock(rs)) != 0)
    829 			return (error);
    830 
    831 		/*
    832 		 * If somebody has a partition mounted, we shouldn't
    833 		 * shutdown.
    834 		 */
    835 
    836 		part = DISKPART(dev);
    837 		pmask = (1 << part);
    838 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    839 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    840 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    841 			raidunlock(rs);
    842 			return (EBUSY);
    843 		}
    844 
    845 		if (rf_debugKernelAccess) {
    846 			printf("call shutdown\n");
    847 		}
    848 
    849 		retcode = rf_Shutdown(raidPtrs[unit]);
    850 
    851 		db1_printf(("Done main shutdown\n"));
    852 
    853 		pool_destroy(&rs->sc_cbufpool);
    854 		db1_printf(("Done freeing component buffer freelist\n"));
    855 
    856 		/* It's no longer initialized... */
    857 		rs->sc_flags &= ~RAIDF_INITED;
    858 
    859 		/* Detach the disk. */
    860 		disk_detach(&rs->sc_dkdev);
    861 
    862 		raidunlock(rs);
    863 
    864 		return (retcode);
    865 	case RAIDFRAME_GET_COMPONENT_LABEL:
    866 		c_label_ptr = (RF_ComponentLabel_t **) data;
    867 		/* need to read the component label for the disk indicated
    868 		   by row,column in component_label
    869 		   XXX need to sanity check these values!!!
    870 		   */
    871 
    872 		/* For practice, let's get it directly fromdisk, rather
    873 		   than from the in-core copy */
    874 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    875 			   (RF_ComponentLabel_t *));
    876 		if (component_label == NULL)
    877 			return (ENOMEM);
    878 
    879 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    880 
    881 		retcode = copyin( *c_label_ptr, component_label,
    882 				  sizeof(RF_ComponentLabel_t));
    883 
    884 		if (retcode) {
    885 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    886 			return(retcode);
    887 		}
    888 
    889 		row = component_label->row;
    890 		column = component_label->column;
    891 
    892 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    893 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    894 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    895 			return(EINVAL);
    896 		}
    897 
    898 		raidread_component_label(
    899                               raidPtrs[unit]->Disks[row][column].dev,
    900 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    901 			      component_label );
    902 
    903 		retcode = copyout((caddr_t) component_label,
    904 				  (caddr_t) *c_label_ptr,
    905 				  sizeof(RF_ComponentLabel_t));
    906 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    907 		return (retcode);
    908 
    909 	case RAIDFRAME_SET_COMPONENT_LABEL:
    910 		component_label = (RF_ComponentLabel_t *) data;
    911 
    912 		/* XXX check the label for valid stuff... */
    913 		/* Note that some things *should not* get modified --
    914 		   the user should be re-initing the labels instead of
    915 		   trying to patch things.
    916 		   */
    917 
    918 		printf("Got component label:\n");
    919 		printf("Version: %d\n",component_label->version);
    920 		printf("Serial Number: %d\n",component_label->serial_number);
    921 		printf("Mod counter: %d\n",component_label->mod_counter);
    922 		printf("Row: %d\n", component_label->row);
    923 		printf("Column: %d\n", component_label->column);
    924 		printf("Num Rows: %d\n", component_label->num_rows);
    925 		printf("Num Columns: %d\n", component_label->num_columns);
    926 		printf("Clean: %d\n", component_label->clean);
    927 		printf("Status: %d\n", component_label->status);
    928 
    929 		row = component_label->row;
    930 		column = component_label->column;
    931 
    932 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    933 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    934 			return(EINVAL);
    935 		}
    936 
    937 		/* XXX this isn't allowed to do anything for now :-) */
    938 #if 0
    939 		raidwrite_component_label(
    940                             raidPtrs[unit]->Disks[row][column].dev,
    941 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    942 			    component_label );
    943 #endif
    944 		return (0);
    945 
    946 	case RAIDFRAME_INIT_LABELS:
    947 		component_label = (RF_ComponentLabel_t *) data;
    948 		/*
    949 		   we only want the serial number from
    950 		   the above.  We get all the rest of the information
    951 		   from the config that was used to create this RAID
    952 		   set.
    953 		   */
    954 
    955 		raidPtrs[unit]->serial_number = component_label->serial_number;
    956 		/* current version number */
    957 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    958 		ci_label.serial_number = component_label->serial_number;
    959 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    960 		ci_label.num_rows = raidPtrs[unit]->numRow;
    961 		ci_label.num_columns = raidPtrs[unit]->numCol;
    962 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    963 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    964 
    965 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    966 			ci_label.row = row;
    967 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    968 				ci_label.column = column;
    969 				raidwrite_component_label(
    970 				  raidPtrs[unit]->Disks[row][column].dev,
    971 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    972 				  &ci_label );
    973 			}
    974 		}
    975 
    976 		return (retcode);
    977 
    978 		/* initialize all parity */
    979 	case RAIDFRAME_REWRITEPARITY:
    980 
    981 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    982 			/* Parity for RAID 0 is trivially correct */
    983 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    984 			return(0);
    985 		}
    986 
    987 		/* borrow the thread of the requesting process */
    988 
    989 		s = splbio();
    990 		retcode = rf_RewriteParity(raidPtrs[unit]);
    991 		splx(s);
    992 		/* return I/O Error if the parity rewrite fails */
    993 
    994 		if (retcode) {
    995 			retcode = EIO;
    996 		} else {
    997 			/* set the clean bit!  If we shutdown correctly,
    998 			 the clean bit on each component label will get
    999 			 set */
   1000 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
   1001 		}
   1002 		return (retcode);
   1003 
   1004 
   1005 	case RAIDFRAME_ADD_HOT_SPARE:
   1006 		sparePtr = (RF_SingleComponent_t *) data;
   1007 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1008 		printf("Adding spare\n");
   1009 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1010 		return(retcode);
   1011 
   1012 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1013 		return(retcode);
   1014 
   1015 	case RAIDFRAME_REBUILD_IN_PLACE:
   1016 
   1017 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1018 			/* Can't do this on a RAID 0!! */
   1019 			return(EINVAL);
   1020 		}
   1021 
   1022 		componentPtr = (RF_SingleComponent_t *) data;
   1023 		memcpy( &component, componentPtr,
   1024 			sizeof(RF_SingleComponent_t));
   1025 		row = component.row;
   1026 		column = component.column;
   1027 		printf("Rebuild: %d %d\n",row, column);
   1028 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1029 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1030 			return(EINVAL);
   1031 		}
   1032 		printf("Attempting a rebuild in place\n");
   1033 		s = splbio();
   1034 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1035 		splx(s);
   1036 		return(retcode);
   1037 
   1038 	case RAIDFRAME_GET_INFO:
   1039 		{
   1040 			RF_Raid_t *raid = raidPtrs[unit];
   1041 			RF_DeviceConfig_t *cfg, **ucfgp;
   1042 			int     i, j, d;
   1043 
   1044 			if (!raid->valid)
   1045 				return (ENODEV);
   1046 			ucfgp = (RF_DeviceConfig_t **) data;
   1047 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1048 				  (RF_DeviceConfig_t *));
   1049 			if (cfg == NULL)
   1050 				return (ENOMEM);
   1051 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1052 			cfg->rows = raid->numRow;
   1053 			cfg->cols = raid->numCol;
   1054 			cfg->ndevs = raid->numRow * raid->numCol;
   1055 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1056 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1057 				return (ENOMEM);
   1058 			}
   1059 			cfg->nspares = raid->numSpare;
   1060 			if (cfg->nspares >= RF_MAX_DISKS) {
   1061 				RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1062 				return (ENOMEM);
   1063 			}
   1064 			cfg->maxqdepth = raid->maxQueueDepth;
   1065 			d = 0;
   1066 			for (i = 0; i < cfg->rows; i++) {
   1067 				for (j = 0; j < cfg->cols; j++) {
   1068 					cfg->devs[d] = raid->Disks[i][j];
   1069 					d++;
   1070 				}
   1071 			}
   1072 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1073 				cfg->spares[i] = raid->Disks[0][j];
   1074 			}
   1075 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1076 					  sizeof(RF_DeviceConfig_t));
   1077 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1078 
   1079 			return (retcode);
   1080 		}
   1081 		break;
   1082 	case RAIDFRAME_CHECK_PARITY:
   1083 		*(int *) data = raidPtrs[unit]->parity_good;
   1084 		return (0);
   1085 	case RAIDFRAME_RESET_ACCTOTALS:
   1086 		{
   1087 			RF_Raid_t *raid = raidPtrs[unit];
   1088 
   1089 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1090 			return (0);
   1091 		}
   1092 		break;
   1093 
   1094 	case RAIDFRAME_GET_ACCTOTALS:
   1095 		{
   1096 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1097 			RF_Raid_t *raid = raidPtrs[unit];
   1098 
   1099 			*totals = raid->acc_totals;
   1100 			return (0);
   1101 		}
   1102 		break;
   1103 
   1104 	case RAIDFRAME_KEEP_ACCTOTALS:
   1105 		{
   1106 			RF_Raid_t *raid = raidPtrs[unit];
   1107 			int    *keep = (int *) data;
   1108 
   1109 			raid->keep_acc_totals = *keep;
   1110 			return (0);
   1111 		}
   1112 		break;
   1113 
   1114 	case RAIDFRAME_GET_SIZE:
   1115 		*(int *) data = raidPtrs[unit]->totalSectors;
   1116 		return (0);
   1117 
   1118 #define RAIDFRAME_RECON 1
   1119 		/* XXX The above should probably be set somewhere else!! GO */
   1120 #if RAIDFRAME_RECON > 0
   1121 
   1122 		/* fail a disk & optionally start reconstruction */
   1123 	case RAIDFRAME_FAIL_DISK:
   1124 
   1125 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1126 			/* Can't do this on a RAID 0!! */
   1127 			return(EINVAL);
   1128 		}
   1129 
   1130 		rr = (struct rf_recon_req *) data;
   1131 
   1132 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1133 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1134 			return (EINVAL);
   1135 
   1136 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1137 		       unit, rr->row, rr->col);
   1138 
   1139 		/* make a copy of the recon request so that we don't rely on
   1140 		 * the user's buffer */
   1141 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1142 		bcopy(rr, rrcopy, sizeof(*rr));
   1143 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1144 
   1145 		LOCK_RECON_Q_MUTEX();
   1146 		rrcopy->next = recon_queue;
   1147 		recon_queue = rrcopy;
   1148 		wakeup(&recon_queue);
   1149 		UNLOCK_RECON_Q_MUTEX();
   1150 
   1151 		return (0);
   1152 
   1153 		/* invoke a copyback operation after recon on whatever disk
   1154 		 * needs it, if any */
   1155 	case RAIDFRAME_COPYBACK:
   1156 
   1157 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1158 			/* This makes no sense on a RAID 0!! */
   1159 			return(EINVAL);
   1160 		}
   1161 
   1162 		/* borrow the current thread to get this done */
   1163 
   1164 		s = splbio();
   1165 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1166 		splx(s);
   1167 		return (0);
   1168 
   1169 		/* return the percentage completion of reconstruction */
   1170 	case RAIDFRAME_CHECKRECON:
   1171 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1172 			/* This makes no sense on a RAID 0 */
   1173 			return(EINVAL);
   1174 		}
   1175 
   1176 		row = *(int *) data;
   1177 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1178 			return (EINVAL);
   1179 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1180 			*(int *) data = 100;
   1181 		else
   1182 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1183 		return (0);
   1184 
   1185 		/* the sparetable daemon calls this to wait for the kernel to
   1186 		 * need a spare table. this ioctl does not return until a
   1187 		 * spare table is needed. XXX -- calling mpsleep here in the
   1188 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1189 		 * -- I should either compute the spare table in the kernel,
   1190 		 * or have a different -- XXX XXX -- interface (a different
   1191 		 * character device) for delivering the table          -- XXX */
   1192 #if 0
   1193 	case RAIDFRAME_SPARET_WAIT:
   1194 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1195 		while (!rf_sparet_wait_queue)
   1196 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1197 		waitreq = rf_sparet_wait_queue;
   1198 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1199 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1200 
   1201 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1202 
   1203 		RF_Free(waitreq, sizeof(*waitreq));
   1204 		return (0);
   1205 
   1206 
   1207 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1208 		 * code in it that will cause the dameon to exit */
   1209 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1210 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1211 		waitreq->fcol = -1;
   1212 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1213 		waitreq->next = rf_sparet_wait_queue;
   1214 		rf_sparet_wait_queue = waitreq;
   1215 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1216 		wakeup(&rf_sparet_wait_queue);
   1217 		return (0);
   1218 
   1219 		/* used by the spare table daemon to deliver a spare table
   1220 		 * into the kernel */
   1221 	case RAIDFRAME_SEND_SPARET:
   1222 
   1223 		/* install the spare table */
   1224 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1225 
   1226 		/* respond to the requestor.  the return status of the spare
   1227 		 * table installation is passed in the "fcol" field */
   1228 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1229 		waitreq->fcol = retcode;
   1230 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1231 		waitreq->next = rf_sparet_resp_queue;
   1232 		rf_sparet_resp_queue = waitreq;
   1233 		wakeup(&rf_sparet_resp_queue);
   1234 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1235 
   1236 		return (retcode);
   1237 #endif
   1238 
   1239 
   1240 #endif				/* RAIDFRAME_RECON > 0 */
   1241 
   1242 	default:
   1243 		break;		/* fall through to the os-specific code below */
   1244 
   1245 	}
   1246 
   1247 	if (!raidPtrs[unit]->valid)
   1248 		return (EINVAL);
   1249 
   1250 	/*
   1251 	 * Add support for "regular" device ioctls here.
   1252 	 */
   1253 
   1254 	switch (cmd) {
   1255 	case DIOCGDINFO:
   1256 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1257 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1258 		break;
   1259 
   1260 	case DIOCGPART:
   1261 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1262 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1263 		((struct partinfo *) data)->part =
   1264 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1265 		break;
   1266 
   1267 	case DIOCWDINFO:
   1268 		db1_printf(("DIOCWDINFO\n"));
   1269 	case DIOCSDINFO:
   1270 		db1_printf(("DIOCSDINFO\n"));
   1271 		if ((error = raidlock(rs)) != 0)
   1272 			return (error);
   1273 
   1274 		rs->sc_flags |= RAIDF_LABELLING;
   1275 
   1276 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1277 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1278 		if (error == 0) {
   1279 			if (cmd == DIOCWDINFO)
   1280 				error = writedisklabel(RAIDLABELDEV(dev),
   1281 				    raidstrategy, rs->sc_dkdev.dk_label,
   1282 				    rs->sc_dkdev.dk_cpulabel);
   1283 		}
   1284 		rs->sc_flags &= ~RAIDF_LABELLING;
   1285 
   1286 		raidunlock(rs);
   1287 
   1288 		if (error)
   1289 			return (error);
   1290 		break;
   1291 
   1292 	case DIOCWLABEL:
   1293 		db1_printf(("DIOCWLABEL\n"));
   1294 		if (*(int *) data != 0)
   1295 			rs->sc_flags |= RAIDF_WLABEL;
   1296 		else
   1297 			rs->sc_flags &= ~RAIDF_WLABEL;
   1298 		break;
   1299 
   1300 	case DIOCGDEFLABEL:
   1301 		db1_printf(("DIOCGDEFLABEL\n"));
   1302 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1303 		    (struct disklabel *) data);
   1304 		break;
   1305 
   1306 	default:
   1307 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1308 	}
   1309 	return (retcode);
   1310 
   1311 }
   1312 
   1313 
   1314 /* raidinit -- complete the rest of the initialization for the
   1315    RAIDframe device.  */
   1316 
   1317 
   1318 static int
   1319 raidinit(dev, raidPtr, unit)
   1320 	dev_t   dev;
   1321 	RF_Raid_t *raidPtr;
   1322 	int     unit;
   1323 {
   1324 	int     retcode;
   1325 	/* int ix; */
   1326 	/* struct raidbuf *raidbp; */
   1327 	struct raid_softc *rs;
   1328 
   1329 	retcode = 0;
   1330 
   1331 	rs = &raid_softc[unit];
   1332 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1333 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1334 
   1335 
   1336 	/* XXX should check return code first... */
   1337 	rs->sc_flags |= RAIDF_INITED;
   1338 
   1339 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1340 
   1341 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1342 
   1343 	/* disk_attach actually creates space for the CPU disklabel, among
   1344 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1345 	 * with disklabels. */
   1346 
   1347 	disk_attach(&rs->sc_dkdev);
   1348 
   1349 	/* XXX There may be a weird interaction here between this, and
   1350 	 * protectedSectors, as used in RAIDframe.  */
   1351 
   1352 	rs->sc_size = raidPtr->totalSectors;
   1353 	rs->sc_dev = dev;
   1354 
   1355 	return (retcode);
   1356 }
   1357 
   1358 /*
   1359  * This kernel thread never exits.  It is created once, and persists
   1360  * until the system reboots.
   1361  */
   1362 
   1363 void
   1364 rf_ReconKernelThread()
   1365 {
   1366 	struct rf_recon_req *req;
   1367 	int     s;
   1368 
   1369 	/* XXX not sure what spl() level we should be at here... probably
   1370 	 * splbio() */
   1371 	s = splbio();
   1372 
   1373 	while (1) {
   1374 		/* grab the next reconstruction request from the queue */
   1375 		LOCK_RECON_Q_MUTEX();
   1376 		while (!recon_queue) {
   1377 			UNLOCK_RECON_Q_MUTEX();
   1378 			tsleep(&recon_queue, PRIBIO,
   1379 			       "raidframe recon", 0);
   1380 			LOCK_RECON_Q_MUTEX();
   1381 		}
   1382 		req = recon_queue;
   1383 		recon_queue = recon_queue->next;
   1384 		UNLOCK_RECON_Q_MUTEX();
   1385 
   1386 		/*
   1387 	         * If flags specifies that we should start recon, this call
   1388 	         * will not return until reconstruction completes, fails,
   1389 		 * or is aborted.
   1390 	         */
   1391 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1392 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1393 
   1394 		RF_Free(req, sizeof(*req));
   1395 	}
   1396 }
   1397 /* wake up the daemon & tell it to get us a spare table
   1398  * XXX
   1399  * the entries in the queues should be tagged with the raidPtr
   1400  * so that in the extremely rare case that two recons happen at once,
   1401  * we know for which device were requesting a spare table
   1402  * XXX
   1403  */
   1404 int
   1405 rf_GetSpareTableFromDaemon(req)
   1406 	RF_SparetWait_t *req;
   1407 {
   1408 	int     retcode;
   1409 
   1410 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1411 	req->next = rf_sparet_wait_queue;
   1412 	rf_sparet_wait_queue = req;
   1413 	wakeup(&rf_sparet_wait_queue);
   1414 
   1415 	/* mpsleep unlocks the mutex */
   1416 	while (!rf_sparet_resp_queue) {
   1417 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1418 		    "raidframe getsparetable", 0);
   1419 	}
   1420 	req = rf_sparet_resp_queue;
   1421 	rf_sparet_resp_queue = req->next;
   1422 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1423 
   1424 	retcode = req->fcol;
   1425 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1426 					 * alloc'd */
   1427 	return (retcode);
   1428 }
   1429 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1430  * bp & passes it down.
   1431  * any calls originating in the kernel must use non-blocking I/O
   1432  * do some extra sanity checking to return "appropriate" error values for
   1433  * certain conditions (to make some standard utilities work)
   1434  *
   1435  * Formerly known as: rf_DoAccessKernel
   1436  */
   1437 void
   1438 raidstart(raidPtr)
   1439 	RF_Raid_t *raidPtr;
   1440 {
   1441 	RF_SectorCount_t num_blocks, pb, sum;
   1442 	RF_RaidAddr_t raid_addr;
   1443 	int     retcode;
   1444 	struct partition *pp;
   1445 	daddr_t blocknum;
   1446 	int     unit;
   1447 	struct raid_softc *rs;
   1448 	int     do_async;
   1449 	struct buf *bp;
   1450 	struct buf *dp;
   1451 
   1452 	unit = raidPtr->raidid;
   1453 	rs = &raid_softc[unit];
   1454 
   1455 	/* Check to see if we're at the limit... */
   1456 	RF_LOCK_MUTEX(raidPtr->mutex);
   1457 	while (raidPtr->openings > 0) {
   1458 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1459 
   1460 		/* get the next item, if any, from the queue */
   1461 		dp = &rs->buf_queue;
   1462 		bp = dp->b_actf;
   1463 		if (bp == NULL) {
   1464 			/* nothing more to do */
   1465 			return;
   1466 		}
   1467 
   1468 		/* update structures */
   1469 		dp = bp->b_actf;
   1470 		if (dp != NULL) {
   1471 			dp->b_actb = bp->b_actb;
   1472 		} else {
   1473 			rs->buf_queue.b_actb = bp->b_actb;
   1474 		}
   1475 		*bp->b_actb = dp;
   1476 
   1477 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1478 		 * partition.. Need to make it absolute to the underlying
   1479 		 * device.. */
   1480 
   1481 		blocknum = bp->b_blkno;
   1482 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1483 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1484 			blocknum += pp->p_offset;
   1485 		}
   1486 
   1487 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1488 			    (int) blocknum));
   1489 
   1490 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1491 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1492 
   1493 		/* *THIS* is where we adjust what block we're going to...
   1494 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1495 		raid_addr = blocknum;
   1496 
   1497 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1498 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1499 		sum = raid_addr + num_blocks + pb;
   1500 		if (1 || rf_debugKernelAccess) {
   1501 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1502 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1503 				    (int) pb, (int) bp->b_resid));
   1504 		}
   1505 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1506 		    || (sum < num_blocks) || (sum < pb)) {
   1507 			bp->b_error = ENOSPC;
   1508 			bp->b_flags |= B_ERROR;
   1509 			bp->b_resid = bp->b_bcount;
   1510 			biodone(bp);
   1511 			RF_LOCK_MUTEX(raidPtr->mutex);
   1512 			continue;
   1513 		}
   1514 		/*
   1515 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1516 		 */
   1517 
   1518 		if (bp->b_bcount & raidPtr->sectorMask) {
   1519 			bp->b_error = EINVAL;
   1520 			bp->b_flags |= B_ERROR;
   1521 			bp->b_resid = bp->b_bcount;
   1522 			biodone(bp);
   1523 			RF_LOCK_MUTEX(raidPtr->mutex);
   1524 			continue;
   1525 
   1526 		}
   1527 		db1_printf(("Calling DoAccess..\n"));
   1528 
   1529 
   1530 		RF_LOCK_MUTEX(raidPtr->mutex);
   1531 		raidPtr->openings--;
   1532 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1533 
   1534 		/*
   1535 		 * Everything is async.
   1536 		 */
   1537 		do_async = 1;
   1538 
   1539 		/* don't ever condition on bp->b_flags & B_WRITE.
   1540 		 * always condition on B_READ instead */
   1541 
   1542 		/* XXX we're still at splbio() here... do we *really*
   1543 		   need to be? */
   1544 
   1545 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1546 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1547 				      do_async, raid_addr, num_blocks,
   1548 				      bp->b_un.b_addr, bp, NULL, NULL,
   1549 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1550 
   1551 
   1552 		RF_LOCK_MUTEX(raidPtr->mutex);
   1553 	}
   1554 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1555 }
   1556 
   1557 
   1558 
   1559 
   1560 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1561 
   1562 int
   1563 rf_DispatchKernelIO(queue, req)
   1564 	RF_DiskQueue_t *queue;
   1565 	RF_DiskQueueData_t *req;
   1566 {
   1567 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1568 	struct buf *bp;
   1569 	struct raidbuf *raidbp = NULL;
   1570 	struct raid_softc *rs;
   1571 	int     unit;
   1572 
   1573 	/* XXX along with the vnode, we also need the softc associated with
   1574 	 * this device.. */
   1575 
   1576 	req->queue = queue;
   1577 
   1578 	unit = queue->raidPtr->raidid;
   1579 
   1580 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1581 
   1582 	if (unit >= numraid) {
   1583 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1584 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1585 	}
   1586 	rs = &raid_softc[unit];
   1587 
   1588 	/* XXX is this the right place? */
   1589 	disk_busy(&rs->sc_dkdev);
   1590 
   1591 	bp = req->bp;
   1592 #if 1
   1593 	/* XXX when there is a physical disk failure, someone is passing us a
   1594 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1595 	 * without taking a performance hit... (not sure where the real bug
   1596 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1597 
   1598 	if (bp->b_flags & B_ERROR) {
   1599 		bp->b_flags &= ~B_ERROR;
   1600 	}
   1601 	if (bp->b_error != 0) {
   1602 		bp->b_error = 0;
   1603 	}
   1604 #endif
   1605 	raidbp = RAIDGETBUF(rs);
   1606 
   1607 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1608 
   1609 	/*
   1610 	 * context for raidiodone
   1611 	 */
   1612 	raidbp->rf_obp = bp;
   1613 	raidbp->req = req;
   1614 
   1615 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1616 
   1617 	switch (req->type) {
   1618 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1619 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1620 		 * queue->row, queue->col); */
   1621 		/* XXX need to do something extra here.. */
   1622 		/* I'm leaving this in, as I've never actually seen it used,
   1623 		 * and I'd like folks to report it... GO */
   1624 		printf(("WAKEUP CALLED\n"));
   1625 		queue->numOutstanding++;
   1626 
   1627 		/* XXX need to glue the original buffer into this??  */
   1628 
   1629 		KernelWakeupFunc(&raidbp->rf_buf);
   1630 		break;
   1631 
   1632 	case RF_IO_TYPE_READ:
   1633 	case RF_IO_TYPE_WRITE:
   1634 
   1635 		if (req->tracerec) {
   1636 			RF_ETIMER_START(req->tracerec->timer);
   1637 		}
   1638 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1639 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1640 		    req->sectorOffset, req->numSector,
   1641 		    req->buf, KernelWakeupFunc, (void *) req,
   1642 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1643 
   1644 		if (rf_debugKernelAccess) {
   1645 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1646 				(long) bp->b_blkno));
   1647 		}
   1648 		queue->numOutstanding++;
   1649 		queue->last_deq_sector = req->sectorOffset;
   1650 		/* acc wouldn't have been let in if there were any pending
   1651 		 * reqs at any other priority */
   1652 		queue->curPriority = req->priority;
   1653 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1654 		 * req->type, queue->row, queue->col); */
   1655 
   1656 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1657 			req->type, unit, queue->row, queue->col));
   1658 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1659 			(int) req->sectorOffset, (int) req->numSector,
   1660 			(int) (req->numSector <<
   1661 			    queue->raidPtr->logBytesPerSector),
   1662 			(int) queue->raidPtr->logBytesPerSector));
   1663 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1664 			raidbp->rf_buf.b_vp->v_numoutput++;
   1665 		}
   1666 		VOP_STRATEGY(&raidbp->rf_buf);
   1667 
   1668 		break;
   1669 
   1670 	default:
   1671 		panic("bad req->type in rf_DispatchKernelIO");
   1672 	}
   1673 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1674 	return (0);
   1675 }
   1676 /* this is the callback function associated with a I/O invoked from
   1677    kernel code.
   1678  */
   1679 static void
   1680 KernelWakeupFunc(vbp)
   1681 	struct buf *vbp;
   1682 {
   1683 	RF_DiskQueueData_t *req = NULL;
   1684 	RF_DiskQueue_t *queue;
   1685 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1686 	struct buf *bp;
   1687 	struct raid_softc *rs;
   1688 	int     unit;
   1689 	register int s;
   1690 
   1691 	s = splbio();		/* XXX */
   1692 	db1_printf(("recovering the request queue:\n"));
   1693 	req = raidbp->req;
   1694 
   1695 	bp = raidbp->rf_obp;
   1696 #if 0
   1697 	db1_printf(("bp=0x%x\n", bp));
   1698 #endif
   1699 
   1700 	queue = (RF_DiskQueue_t *) req->queue;
   1701 
   1702 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1703 #if 0
   1704 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1705 #endif
   1706 		bp->b_flags |= B_ERROR;
   1707 		bp->b_error = raidbp->rf_buf.b_error ?
   1708 		    raidbp->rf_buf.b_error : EIO;
   1709 	}
   1710 #if 0
   1711 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1712 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1713 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1714 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1715 #endif
   1716 
   1717 	/* XXX methinks this could be wrong... */
   1718 #if 1
   1719 	bp->b_resid = raidbp->rf_buf.b_resid;
   1720 #endif
   1721 
   1722 	if (req->tracerec) {
   1723 		RF_ETIMER_STOP(req->tracerec->timer);
   1724 		RF_ETIMER_EVAL(req->tracerec->timer);
   1725 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1726 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1727 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1728 		req->tracerec->num_phys_ios++;
   1729 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1730 	}
   1731 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1732 
   1733 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1734 
   1735 
   1736 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1737 	 * ballistic, and mark the component as hosed... */
   1738 #if 1
   1739 	if (bp->b_flags & B_ERROR) {
   1740 		/* Mark the disk as dead */
   1741 		/* but only mark it once... */
   1742 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1743 		    rf_ds_optimal) {
   1744 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1745 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1746 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1747 			    rf_ds_failed;
   1748 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1749 			queue->raidPtr->numFailures++;
   1750 			/* XXX here we should bump the version number for each component, and write that data out */
   1751 		} else {	/* Disk is already dead... */
   1752 			/* printf("Disk already marked as dead!\n"); */
   1753 		}
   1754 
   1755 	}
   1756 #endif
   1757 
   1758 	rs = &raid_softc[unit];
   1759 	RAIDPUTBUF(rs, raidbp);
   1760 
   1761 
   1762 	if (bp->b_resid == 0) {
   1763 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1764 			unit, bp->b_resid, bp->b_bcount));
   1765 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1766 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1767 	} else {
   1768 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1769 	}
   1770 
   1771 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1772 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1773 	/* printf("Exiting KernelWakeupFunc\n"); */
   1774 
   1775 	splx(s);		/* XXX */
   1776 }
   1777 
   1778 
   1779 
   1780 /*
   1781  * initialize a buf structure for doing an I/O in the kernel.
   1782  */
   1783 static void
   1784 InitBP(
   1785     struct buf * bp,
   1786     struct vnode * b_vp,
   1787     unsigned rw_flag,
   1788     dev_t dev,
   1789     RF_SectorNum_t startSect,
   1790     RF_SectorCount_t numSect,
   1791     caddr_t buf,
   1792     void (*cbFunc) (struct buf *),
   1793     void *cbArg,
   1794     int logBytesPerSector,
   1795     struct proc * b_proc)
   1796 {
   1797 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1798 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1799 	bp->b_bcount = numSect << logBytesPerSector;
   1800 	bp->b_bufsize = bp->b_bcount;
   1801 	bp->b_error = 0;
   1802 	bp->b_dev = dev;
   1803 	db1_printf(("bp->b_dev is %d\n", dev));
   1804 	bp->b_un.b_addr = buf;
   1805 #if 0
   1806 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1807 #endif
   1808 	bp->b_blkno = startSect;
   1809 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1810 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1811 	if (bp->b_bcount == 0) {
   1812 		panic("bp->b_bcount is zero in InitBP!!\n");
   1813 	}
   1814 	bp->b_proc = b_proc;
   1815 	bp->b_iodone = cbFunc;
   1816 	bp->b_vp = b_vp;
   1817 
   1818 }
   1819 
   1820 static void
   1821 raidgetdefaultlabel(raidPtr, rs, lp)
   1822 	RF_Raid_t *raidPtr;
   1823 	struct raid_softc *rs;
   1824 	struct disklabel *lp;
   1825 {
   1826 	db1_printf(("Building a default label...\n"));
   1827 	bzero(lp, sizeof(*lp));
   1828 
   1829 	/* fabricate a label... */
   1830 	lp->d_secperunit = raidPtr->totalSectors;
   1831 	lp->d_secsize = raidPtr->bytesPerSector;
   1832 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1833 	lp->d_ntracks = 1;
   1834 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1835 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1836 
   1837 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1838 	lp->d_type = DTYPE_RAID;
   1839 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1840 	lp->d_rpm = 3600;
   1841 	lp->d_interleave = 1;
   1842 	lp->d_flags = 0;
   1843 
   1844 	lp->d_partitions[RAW_PART].p_offset = 0;
   1845 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1846 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1847 	lp->d_npartitions = RAW_PART + 1;
   1848 
   1849 	lp->d_magic = DISKMAGIC;
   1850 	lp->d_magic2 = DISKMAGIC;
   1851 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1852 
   1853 }
   1854 /*
   1855  * Read the disklabel from the raid device.  If one is not present, fake one
   1856  * up.
   1857  */
   1858 static void
   1859 raidgetdisklabel(dev)
   1860 	dev_t   dev;
   1861 {
   1862 	int     unit = raidunit(dev);
   1863 	struct raid_softc *rs = &raid_softc[unit];
   1864 	char   *errstring;
   1865 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1866 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1867 	RF_Raid_t *raidPtr;
   1868 
   1869 	db1_printf(("Getting the disklabel...\n"));
   1870 
   1871 	bzero(clp, sizeof(*clp));
   1872 
   1873 	raidPtr = raidPtrs[unit];
   1874 
   1875 	raidgetdefaultlabel(raidPtr, rs, lp);
   1876 
   1877 	/*
   1878 	 * Call the generic disklabel extraction routine.
   1879 	 */
   1880 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1881 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1882 	if (errstring)
   1883 		raidmakedisklabel(rs);
   1884 	else {
   1885 		int     i;
   1886 		struct partition *pp;
   1887 
   1888 		/*
   1889 		 * Sanity check whether the found disklabel is valid.
   1890 		 *
   1891 		 * This is necessary since total size of the raid device
   1892 		 * may vary when an interleave is changed even though exactly
   1893 		 * same componets are used, and old disklabel may used
   1894 		 * if that is found.
   1895 		 */
   1896 		if (lp->d_secperunit != rs->sc_size)
   1897 			printf("WARNING: %s: "
   1898 			    "total sector size in disklabel (%d) != "
   1899 			    "the size of raid (%ld)\n", rs->sc_xname,
   1900 			    lp->d_secperunit, (long) rs->sc_size);
   1901 		for (i = 0; i < lp->d_npartitions; i++) {
   1902 			pp = &lp->d_partitions[i];
   1903 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1904 				printf("WARNING: %s: end of partition `%c' "
   1905 				    "exceeds the size of raid (%ld)\n",
   1906 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1907 		}
   1908 	}
   1909 
   1910 }
   1911 /*
   1912  * Take care of things one might want to take care of in the event
   1913  * that a disklabel isn't present.
   1914  */
   1915 static void
   1916 raidmakedisklabel(rs)
   1917 	struct raid_softc *rs;
   1918 {
   1919 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1920 	db1_printf(("Making a label..\n"));
   1921 
   1922 	/*
   1923 	 * For historical reasons, if there's no disklabel present
   1924 	 * the raw partition must be marked FS_BSDFFS.
   1925 	 */
   1926 
   1927 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1928 
   1929 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1930 
   1931 	lp->d_checksum = dkcksum(lp);
   1932 }
   1933 /*
   1934  * Lookup the provided name in the filesystem.  If the file exists,
   1935  * is a valid block device, and isn't being used by anyone else,
   1936  * set *vpp to the file's vnode.
   1937  * You'll find the original of this in ccd.c
   1938  */
   1939 int
   1940 raidlookup(path, p, vpp)
   1941 	char   *path;
   1942 	struct proc *p;
   1943 	struct vnode **vpp;	/* result */
   1944 {
   1945 	struct nameidata nd;
   1946 	struct vnode *vp;
   1947 	struct vattr va;
   1948 	int     error;
   1949 
   1950 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1951 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1952 #ifdef DEBUG
   1953 		printf("RAIDframe: vn_open returned %d\n", error);
   1954 #endif
   1955 		return (error);
   1956 	}
   1957 	vp = nd.ni_vp;
   1958 	if (vp->v_usecount > 1) {
   1959 		VOP_UNLOCK(vp, 0);
   1960 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1961 		return (EBUSY);
   1962 	}
   1963 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1964 		VOP_UNLOCK(vp, 0);
   1965 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1966 		return (error);
   1967 	}
   1968 	/* XXX: eventually we should handle VREG, too. */
   1969 	if (va.va_type != VBLK) {
   1970 		VOP_UNLOCK(vp, 0);
   1971 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1972 		return (ENOTBLK);
   1973 	}
   1974 	VOP_UNLOCK(vp, 0);
   1975 	*vpp = vp;
   1976 	return (0);
   1977 }
   1978 /*
   1979  * Wait interruptibly for an exclusive lock.
   1980  *
   1981  * XXX
   1982  * Several drivers do this; it should be abstracted and made MP-safe.
   1983  * (Hmm... where have we seen this warning before :->  GO )
   1984  */
   1985 static int
   1986 raidlock(rs)
   1987 	struct raid_softc *rs;
   1988 {
   1989 	int     error;
   1990 
   1991 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1992 		rs->sc_flags |= RAIDF_WANTED;
   1993 		if ((error =
   1994 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1995 			return (error);
   1996 	}
   1997 	rs->sc_flags |= RAIDF_LOCKED;
   1998 	return (0);
   1999 }
   2000 /*
   2001  * Unlock and wake up any waiters.
   2002  */
   2003 static void
   2004 raidunlock(rs)
   2005 	struct raid_softc *rs;
   2006 {
   2007 
   2008 	rs->sc_flags &= ~RAIDF_LOCKED;
   2009 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2010 		rs->sc_flags &= ~RAIDF_WANTED;
   2011 		wakeup(rs);
   2012 	}
   2013 }
   2014 
   2015 
   2016 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2017 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2018 
   2019 int
   2020 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2021 {
   2022 	RF_ComponentLabel_t component_label;
   2023 	raidread_component_label(dev, b_vp, &component_label);
   2024 	component_label.mod_counter = mod_counter;
   2025 	component_label.clean = RF_RAID_CLEAN;
   2026 	raidwrite_component_label(dev, b_vp, &component_label);
   2027 	return(0);
   2028 }
   2029 
   2030 
   2031 int
   2032 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2033 {
   2034 	RF_ComponentLabel_t component_label;
   2035 	raidread_component_label(dev, b_vp, &component_label);
   2036 	component_label.mod_counter = mod_counter;
   2037 	component_label.clean = RF_RAID_DIRTY;
   2038 	raidwrite_component_label(dev, b_vp, &component_label);
   2039 	return(0);
   2040 }
   2041 
   2042 /* ARGSUSED */
   2043 int
   2044 raidread_component_label(dev, b_vp, component_label)
   2045 	dev_t dev;
   2046 	struct vnode *b_vp;
   2047 	RF_ComponentLabel_t *component_label;
   2048 {
   2049 	struct buf *bp;
   2050 	int error;
   2051 
   2052 	/* XXX should probably ensure that we don't try to do this if
   2053 	   someone has changed rf_protected_sectors. */
   2054 
   2055 	/* get a block of the appropriate size... */
   2056 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2057 	bp->b_dev = dev;
   2058 
   2059 	/* get our ducks in a row for the read */
   2060 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2061 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2062 	bp->b_flags = B_BUSY | B_READ;
   2063  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2064 
   2065 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2066 
   2067 	error = biowait(bp);
   2068 
   2069 	if (!error) {
   2070 		memcpy(component_label, bp->b_un.b_addr,
   2071 		       sizeof(RF_ComponentLabel_t));
   2072 #if 0
   2073 		printf("raidread_component_label: got component label:\n");
   2074 		printf("Version: %d\n",component_label->version);
   2075 		printf("Serial Number: %d\n",component_label->serial_number);
   2076 		printf("Mod counter: %d\n",component_label->mod_counter);
   2077 		printf("Row: %d\n", component_label->row);
   2078 		printf("Column: %d\n", component_label->column);
   2079 		printf("Num Rows: %d\n", component_label->num_rows);
   2080 		printf("Num Columns: %d\n", component_label->num_columns);
   2081 		printf("Clean: %d\n", component_label->clean);
   2082 		printf("Status: %d\n", component_label->status);
   2083 #endif
   2084         } else {
   2085 		printf("Failed to read RAID component label!\n");
   2086 	}
   2087 
   2088         bp->b_flags = B_INVAL | B_AGE;
   2089 	brelse(bp);
   2090 	return(error);
   2091 }
   2092 /* ARGSUSED */
   2093 int
   2094 raidwrite_component_label(dev, b_vp, component_label)
   2095 	dev_t dev;
   2096 	struct vnode *b_vp;
   2097 	RF_ComponentLabel_t *component_label;
   2098 {
   2099 	struct buf *bp;
   2100 	int error;
   2101 
   2102 	/* get a block of the appropriate size... */
   2103 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2104 	bp->b_dev = dev;
   2105 
   2106 	/* get our ducks in a row for the write */
   2107 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2108 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2109 	bp->b_flags = B_BUSY | B_WRITE;
   2110  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2111 
   2112 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2113 
   2114 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2115 
   2116 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2117 	error = biowait(bp);
   2118         bp->b_flags = B_INVAL | B_AGE;
   2119 	brelse(bp);
   2120 	if (error) {
   2121 		printf("Failed to write RAID component info!\n");
   2122 	}
   2123 
   2124 	return(error);
   2125 }
   2126 
   2127 void
   2128 rf_markalldirty( raidPtr )
   2129 	RF_Raid_t *raidPtr;
   2130 {
   2131 	RF_ComponentLabel_t c_label;
   2132 	int r,c;
   2133 
   2134 	raidPtr->mod_counter++;
   2135 	for (r = 0; r < raidPtr->numRow; r++) {
   2136 		for (c = 0; c < raidPtr->numCol; c++) {
   2137 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2138 				raidread_component_label(
   2139 					raidPtr->Disks[r][c].dev,
   2140 					raidPtr->raid_cinfo[r][c].ci_vp,
   2141 					&c_label);
   2142 				if (c_label.status == rf_ds_spared) {
   2143 					/* XXX do something special...
   2144 					 but whatever you do, don't
   2145 					 try to access it!! */
   2146 				} else {
   2147 #if 0
   2148 				c_label.status =
   2149 					raidPtr->Disks[r][c].status;
   2150 				raidwrite_component_label(
   2151 					raidPtr->Disks[r][c].dev,
   2152 					raidPtr->raid_cinfo[r][c].ci_vp,
   2153 					&c_label);
   2154 #endif
   2155 				raidmarkdirty(
   2156 				       raidPtr->Disks[r][c].dev,
   2157 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2158 				       raidPtr->mod_counter);
   2159 				}
   2160 			}
   2161 		}
   2162 	}
   2163 	/* printf("Component labels marked dirty.\n"); */
   2164 #if 0
   2165 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2166 		sparecol = raidPtr->numCol + c;
   2167 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2168 			/*
   2169 
   2170 			   XXX this is where we get fancy and map this spare
   2171 			   into it's correct spot in the array.
   2172 
   2173 			 */
   2174 			/*
   2175 
   2176 			   we claim this disk is "optimal" if it's
   2177 			   rf_ds_used_spare, as that means it should be
   2178 			   directly substitutable for the disk it replaced.
   2179 			   We note that too...
   2180 
   2181 			 */
   2182 
   2183 			for(i=0;i<raidPtr->numRow;i++) {
   2184 				for(j=0;j<raidPtr->numCol;j++) {
   2185 					if ((raidPtr->Disks[i][j].spareRow ==
   2186 					     r) &&
   2187 					    (raidPtr->Disks[i][j].spareCol ==
   2188 					     sparecol)) {
   2189 						srow = r;
   2190 						scol = sparecol;
   2191 						break;
   2192 					}
   2193 				}
   2194 			}
   2195 
   2196 			raidread_component_label(
   2197 				      raidPtr->Disks[r][sparecol].dev,
   2198 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2199 				      &c_label);
   2200 			/* make sure status is noted */
   2201 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2202 			c_label.mod_counter = raidPtr->mod_counter;
   2203 			c_label.serial_number = raidPtr->serial_number;
   2204 			c_label.row = srow;
   2205 			c_label.column = scol;
   2206 			c_label.num_rows = raidPtr->numRow;
   2207 			c_label.num_columns = raidPtr->numCol;
   2208 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2209 			c_label.status = rf_ds_optimal;
   2210 			raidwrite_component_label(
   2211 				      raidPtr->Disks[r][sparecol].dev,
   2212 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2213 				      &c_label);
   2214 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2215 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2216 		}
   2217 	}
   2218 
   2219 #endif
   2220 }
   2221 
   2222 
   2223 void
   2224 rf_update_component_labels( raidPtr )
   2225 	RF_Raid_t *raidPtr;
   2226 {
   2227 	RF_ComponentLabel_t c_label;
   2228 	int sparecol;
   2229 	int r,c;
   2230 	int i,j;
   2231 	int srow, scol;
   2232 
   2233 	srow = -1;
   2234 	scol = -1;
   2235 
   2236 	/* XXX should do extra checks to make sure things really are clean,
   2237 	   rather than blindly setting the clean bit... */
   2238 
   2239 	raidPtr->mod_counter++;
   2240 
   2241 	for (r = 0; r < raidPtr->numRow; r++) {
   2242 		for (c = 0; c < raidPtr->numCol; c++) {
   2243 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2244 				raidread_component_label(
   2245 					raidPtr->Disks[r][c].dev,
   2246 					raidPtr->raid_cinfo[r][c].ci_vp,
   2247 					&c_label);
   2248 				/* make sure status is noted */
   2249 				c_label.status = rf_ds_optimal;
   2250 				raidwrite_component_label(
   2251 					raidPtr->Disks[r][c].dev,
   2252 					raidPtr->raid_cinfo[r][c].ci_vp,
   2253 					&c_label);
   2254 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2255 					raidmarkclean(
   2256 					      raidPtr->Disks[r][c].dev,
   2257 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2258 					      raidPtr->mod_counter);
   2259 				}
   2260 			}
   2261 			/* else we don't touch it.. */
   2262 #if 0
   2263 			else if (raidPtr->Disks[r][c].status !=
   2264 				   rf_ds_failed) {
   2265 				raidread_component_label(
   2266 					raidPtr->Disks[r][c].dev,
   2267 					raidPtr->raid_cinfo[r][c].ci_vp,
   2268 					&c_label);
   2269 				/* make sure status is noted */
   2270 				c_label.status =
   2271 					raidPtr->Disks[r][c].status;
   2272 				raidwrite_component_label(
   2273 					raidPtr->Disks[r][c].dev,
   2274 					raidPtr->raid_cinfo[r][c].ci_vp,
   2275 					&c_label);
   2276 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2277 					raidmarkclean(
   2278 					      raidPtr->Disks[r][c].dev,
   2279 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2280 					      raidPtr->mod_counter);
   2281 				}
   2282 			}
   2283 #endif
   2284 		}
   2285 	}
   2286 
   2287 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2288 		sparecol = raidPtr->numCol + c;
   2289 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2290 			/*
   2291 
   2292 			   we claim this disk is "optimal" if it's
   2293 			   rf_ds_used_spare, as that means it should be
   2294 			   directly substitutable for the disk it replaced.
   2295 			   We note that too...
   2296 
   2297 			 */
   2298 
   2299 			for(i=0;i<raidPtr->numRow;i++) {
   2300 				for(j=0;j<raidPtr->numCol;j++) {
   2301 					if ((raidPtr->Disks[i][j].spareRow ==
   2302 					     0) &&
   2303 					    (raidPtr->Disks[i][j].spareCol ==
   2304 					     sparecol)) {
   2305 						srow = i;
   2306 						scol = j;
   2307 						break;
   2308 					}
   2309 				}
   2310 			}
   2311 
   2312 			raidread_component_label(
   2313 				      raidPtr->Disks[0][sparecol].dev,
   2314 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2315 				      &c_label);
   2316 			/* make sure status is noted */
   2317 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2318 			c_label.mod_counter = raidPtr->mod_counter;
   2319 			c_label.serial_number = raidPtr->serial_number;
   2320 			c_label.row = srow;
   2321 			c_label.column = scol;
   2322 			c_label.num_rows = raidPtr->numRow;
   2323 			c_label.num_columns = raidPtr->numCol;
   2324 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2325 			c_label.status = rf_ds_optimal;
   2326 			raidwrite_component_label(
   2327 				      raidPtr->Disks[0][sparecol].dev,
   2328 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2329 				      &c_label);
   2330 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2331 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2332 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2333 					       raidPtr->mod_counter);
   2334 			}
   2335 		}
   2336 	}
   2337 	/* 	printf("Component labels updated\n"); */
   2338 }
   2339