Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.47
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.47 2000/01/21 23:39:59 thorpej Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static int raidinit __P((dev_t, RF_Raid_t *, int));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that... */
    208 
    209 struct raid_softc {
    210 	int     sc_flags;	/* flags */
    211 	int     sc_cflags;	/* configuration flags */
    212 	size_t  sc_size;        /* size of the raid device */
    213 	dev_t   sc_dev;	        /* our device.. */
    214 	char    sc_xname[20];	/* XXX external name */
    215 	struct disk sc_dkdev;	/* generic disk device info */
    216 	struct pool sc_cbufpool;	/* component buffer pool */
    217 	struct buf_queue buf_queue;	/* used for the device queue */
    218 };
    219 /* sc_flags */
    220 #define RAIDF_INITED	0x01	/* unit has been initialized */
    221 #define RAIDF_WLABEL	0x02	/* label area is writable */
    222 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    223 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    224 #define RAIDF_LOCKED	0x80	/* unit is locked */
    225 
    226 #define	raidunit(x)	DISKUNIT(x)
    227 static int numraid = 0;
    228 
    229 /*
    230  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    231  * Be aware that large numbers can allow the driver to consume a lot of
    232  * kernel memory, especially on writes, and in degraded mode reads.
    233  *
    234  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    235  * a single 64K write will typically require 64K for the old data,
    236  * 64K for the old parity, and 64K for the new parity, for a total
    237  * of 192K (if the parity buffer is not re-used immediately).
    238  * Even it if is used immedately, that's still 128K, which when multiplied
    239  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    240  *
    241  * Now in degraded mode, for example, a 64K read on the above setup may
    242  * require data reconstruction, which will require *all* of the 4 remaining
    243  * disks to participate -- 4 * 32K/disk == 128K again.
    244  */
    245 
    246 #ifndef RAIDOUTSTANDING
    247 #define RAIDOUTSTANDING   6
    248 #endif
    249 
    250 #define RAIDLABELDEV(dev)	\
    251 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    252 
    253 /* declared here, and made public, for the benefit of KVM stuff.. */
    254 struct raid_softc *raid_softc;
    255 
    256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    257 				     struct disklabel *));
    258 static void raidgetdisklabel __P((dev_t));
    259 static void raidmakedisklabel __P((struct raid_softc *));
    260 
    261 static int raidlock __P((struct raid_softc *));
    262 static void raidunlock __P((struct raid_softc *));
    263 
    264 static void rf_markalldirty __P((RF_Raid_t *));
    265 
    266 void rf_ReconThread __P((struct rf_recon_req *));
    267 /* XXX what I want is: */
    268 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    269 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    270 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    271 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    272 
    273 void
    274 raidattach(num)
    275 	int     num;
    276 {
    277 	int raidID;
    278 	int i, rc;
    279 
    280 #ifdef DEBUG
    281 	printf("raidattach: Asked for %d units\n", num);
    282 #endif
    283 
    284 	if (num <= 0) {
    285 #ifdef DIAGNOSTIC
    286 		panic("raidattach: count <= 0");
    287 #endif
    288 		return;
    289 	}
    290 	/* This is where all the initialization stuff gets done. */
    291 
    292 	/* Make some space for requested number of units... */
    293 
    294 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    295 	if (raidPtrs == NULL) {
    296 		panic("raidPtrs is NULL!!\n");
    297 	}
    298 
    299 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    300 	if (rc) {
    301 		RF_PANIC();
    302 	}
    303 
    304 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    305 
    306 	for (i = 0; i < numraid; i++)
    307 		raidPtrs[i] = NULL;
    308 	rc = rf_BootRaidframe();
    309 	if (rc == 0)
    310 		printf("Kernelized RAIDframe activated\n");
    311 	else
    312 		panic("Serious error booting RAID!!\n");
    313 
    314 	/* put together some datastructures like the CCD device does.. This
    315 	 * lets us lock the device and what-not when it gets opened. */
    316 
    317 	raid_softc = (struct raid_softc *)
    318 	    malloc(num * sizeof(struct raid_softc),
    319 	    M_RAIDFRAME, M_NOWAIT);
    320 	if (raid_softc == NULL) {
    321 		printf("WARNING: no memory for RAIDframe driver\n");
    322 		return;
    323 	}
    324 	numraid = num;
    325 	bzero(raid_softc, num * sizeof(struct raid_softc));
    326 
    327 	for (raidID = 0; raidID < num; raidID++) {
    328 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    329 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    330 			  (RF_Raid_t *));
    331 		if (raidPtrs[raidID] == NULL) {
    332 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    333 			numraid = raidID;
    334 			return;
    335 		}
    336 	}
    337 }
    338 
    339 
    340 int
    341 raidsize(dev)
    342 	dev_t   dev;
    343 {
    344 	struct raid_softc *rs;
    345 	struct disklabel *lp;
    346 	int     part, unit, omask, size;
    347 
    348 	unit = raidunit(dev);
    349 	if (unit >= numraid)
    350 		return (-1);
    351 	rs = &raid_softc[unit];
    352 
    353 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    354 		return (-1);
    355 
    356 	part = DISKPART(dev);
    357 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    358 	lp = rs->sc_dkdev.dk_label;
    359 
    360 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    361 		return (-1);
    362 
    363 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    364 		size = -1;
    365 	else
    366 		size = lp->d_partitions[part].p_size *
    367 		    (lp->d_secsize / DEV_BSIZE);
    368 
    369 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    370 		return (-1);
    371 
    372 	return (size);
    373 
    374 }
    375 
    376 int
    377 raiddump(dev, blkno, va, size)
    378 	dev_t   dev;
    379 	daddr_t blkno;
    380 	caddr_t va;
    381 	size_t  size;
    382 {
    383 	/* Not implemented. */
    384 	return ENXIO;
    385 }
    386 /* ARGSUSED */
    387 int
    388 raidopen(dev, flags, fmt, p)
    389 	dev_t   dev;
    390 	int     flags, fmt;
    391 	struct proc *p;
    392 {
    393 	int     unit = raidunit(dev);
    394 	struct raid_softc *rs;
    395 	struct disklabel *lp;
    396 	int     part, pmask;
    397 	int     error = 0;
    398 
    399 	if (unit >= numraid)
    400 		return (ENXIO);
    401 	rs = &raid_softc[unit];
    402 
    403 	if ((error = raidlock(rs)) != 0)
    404 		return (error);
    405 	lp = rs->sc_dkdev.dk_label;
    406 
    407 	part = DISKPART(dev);
    408 	pmask = (1 << part);
    409 
    410 	db1_printf(("Opening raid device number: %d partition: %d\n",
    411 		unit, part));
    412 
    413 
    414 	if ((rs->sc_flags & RAIDF_INITED) &&
    415 	    (rs->sc_dkdev.dk_openmask == 0))
    416 		raidgetdisklabel(dev);
    417 
    418 	/* make sure that this partition exists */
    419 
    420 	if (part != RAW_PART) {
    421 		db1_printf(("Not a raw partition..\n"));
    422 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    423 		    ((part >= lp->d_npartitions) ||
    424 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    425 			error = ENXIO;
    426 			raidunlock(rs);
    427 			db1_printf(("Bailing out...\n"));
    428 			return (error);
    429 		}
    430 	}
    431 	/* Prevent this unit from being unconfigured while open. */
    432 	switch (fmt) {
    433 	case S_IFCHR:
    434 		rs->sc_dkdev.dk_copenmask |= pmask;
    435 		break;
    436 
    437 	case S_IFBLK:
    438 		rs->sc_dkdev.dk_bopenmask |= pmask;
    439 		break;
    440 	}
    441 
    442 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    443 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    444 		/* First one... mark things as dirty... Note that we *MUST*
    445 		 have done a configure before this.  I DO NOT WANT TO BE
    446 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    447 		 THAT THEY BELONG TOGETHER!!!!! */
    448 		/* XXX should check to see if we're only open for reading
    449 		   here... If so, we needn't do this, but then need some
    450 		   other way of keeping track of what's happened.. */
    451 
    452 		rf_markalldirty( raidPtrs[unit] );
    453 	}
    454 
    455 
    456 	rs->sc_dkdev.dk_openmask =
    457 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    458 
    459 	raidunlock(rs);
    460 
    461 	return (error);
    462 
    463 
    464 }
    465 /* ARGSUSED */
    466 int
    467 raidclose(dev, flags, fmt, p)
    468 	dev_t   dev;
    469 	int     flags, fmt;
    470 	struct proc *p;
    471 {
    472 	int     unit = raidunit(dev);
    473 	struct raid_softc *rs;
    474 	int     error = 0;
    475 	int     part;
    476 
    477 	if (unit >= numraid)
    478 		return (ENXIO);
    479 	rs = &raid_softc[unit];
    480 
    481 	if ((error = raidlock(rs)) != 0)
    482 		return (error);
    483 
    484 	part = DISKPART(dev);
    485 
    486 	/* ...that much closer to allowing unconfiguration... */
    487 	switch (fmt) {
    488 	case S_IFCHR:
    489 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    490 		break;
    491 
    492 	case S_IFBLK:
    493 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    494 		break;
    495 	}
    496 	rs->sc_dkdev.dk_openmask =
    497 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    498 
    499 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    500 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    501 		/* Last one... device is not unconfigured yet.
    502 		   Device shutdown has taken care of setting the
    503 		   clean bits if RAIDF_INITED is not set
    504 		   mark things as clean... */
    505 		rf_update_component_labels( raidPtrs[unit] );
    506 	}
    507 
    508 	raidunlock(rs);
    509 	return (0);
    510 
    511 }
    512 
    513 void
    514 raidstrategy(bp)
    515 	register struct buf *bp;
    516 {
    517 	register int s;
    518 
    519 	unsigned int raidID = raidunit(bp->b_dev);
    520 	RF_Raid_t *raidPtr;
    521 	struct raid_softc *rs = &raid_softc[raidID];
    522 	struct disklabel *lp;
    523 	int     wlabel;
    524 
    525 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    526 		bp->b_error = ENXIO;
    527 		bp->b_flags = B_ERROR;
    528 		bp->b_resid = bp->b_bcount;
    529 		biodone(bp);
    530 		return;
    531 	}
    532 	if (raidID >= numraid || !raidPtrs[raidID]) {
    533 		bp->b_error = ENODEV;
    534 		bp->b_flags |= B_ERROR;
    535 		bp->b_resid = bp->b_bcount;
    536 		biodone(bp);
    537 		return;
    538 	}
    539 	raidPtr = raidPtrs[raidID];
    540 	if (!raidPtr->valid) {
    541 		bp->b_error = ENODEV;
    542 		bp->b_flags |= B_ERROR;
    543 		bp->b_resid = bp->b_bcount;
    544 		biodone(bp);
    545 		return;
    546 	}
    547 	if (bp->b_bcount == 0) {
    548 		db1_printf(("b_bcount is zero..\n"));
    549 		biodone(bp);
    550 		return;
    551 	}
    552 	lp = rs->sc_dkdev.dk_label;
    553 
    554 	/*
    555 	 * Do bounds checking and adjust transfer.  If there's an
    556 	 * error, the bounds check will flag that for us.
    557 	 */
    558 
    559 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    560 	if (DISKPART(bp->b_dev) != RAW_PART)
    561 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    562 			db1_printf(("Bounds check failed!!:%d %d\n",
    563 				(int) bp->b_blkno, (int) wlabel));
    564 			biodone(bp);
    565 			return;
    566 		}
    567 	s = splbio();
    568 
    569 	bp->b_resid = 0;
    570 
    571 	/* stuff it onto our queue */
    572 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    573 
    574 	raidstart(raidPtrs[raidID]);
    575 
    576 	splx(s);
    577 }
    578 /* ARGSUSED */
    579 int
    580 raidread(dev, uio, flags)
    581 	dev_t   dev;
    582 	struct uio *uio;
    583 	int     flags;
    584 {
    585 	int     unit = raidunit(dev);
    586 	struct raid_softc *rs;
    587 	int     part;
    588 
    589 	if (unit >= numraid)
    590 		return (ENXIO);
    591 	rs = &raid_softc[unit];
    592 
    593 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    594 		return (ENXIO);
    595 	part = DISKPART(dev);
    596 
    597 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    598 
    599 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    600 
    601 }
    602 /* ARGSUSED */
    603 int
    604 raidwrite(dev, uio, flags)
    605 	dev_t   dev;
    606 	struct uio *uio;
    607 	int     flags;
    608 {
    609 	int     unit = raidunit(dev);
    610 	struct raid_softc *rs;
    611 
    612 	if (unit >= numraid)
    613 		return (ENXIO);
    614 	rs = &raid_softc[unit];
    615 
    616 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    617 		return (ENXIO);
    618 	db1_printf(("raidwrite\n"));
    619 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    620 
    621 }
    622 
    623 int
    624 raidioctl(dev, cmd, data, flag, p)
    625 	dev_t   dev;
    626 	u_long  cmd;
    627 	caddr_t data;
    628 	int     flag;
    629 	struct proc *p;
    630 {
    631 	int     unit = raidunit(dev);
    632 	int     error = 0;
    633 	int     part, pmask;
    634 	struct raid_softc *rs;
    635 	RF_Config_t *k_cfg, *u_cfg;
    636 	RF_Raid_t *raidPtr;
    637 	RF_AccTotals_t *totals;
    638 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    639 	u_char *specific_buf;
    640 	int retcode = 0;
    641 	int row;
    642 	int column;
    643 	struct rf_recon_req *rrcopy, *rr;
    644 	RF_ComponentLabel_t *component_label;
    645 	RF_ComponentLabel_t ci_label;
    646 	RF_ComponentLabel_t **c_label_ptr;
    647 	RF_SingleComponent_t *sparePtr,*componentPtr;
    648 	RF_SingleComponent_t hot_spare;
    649 	RF_SingleComponent_t component;
    650 	int i, j, d;
    651 
    652 	if (unit >= numraid)
    653 		return (ENXIO);
    654 	rs = &raid_softc[unit];
    655 	raidPtr = raidPtrs[unit];
    656 
    657 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    658 		(int) DISKPART(dev), (int) unit, (int) cmd));
    659 
    660 	/* Must be open for writes for these commands... */
    661 	switch (cmd) {
    662 	case DIOCSDINFO:
    663 	case DIOCWDINFO:
    664 	case DIOCWLABEL:
    665 		if ((flag & FWRITE) == 0)
    666 			return (EBADF);
    667 	}
    668 
    669 	/* Must be initialized for these... */
    670 	switch (cmd) {
    671 	case DIOCGDINFO:
    672 	case DIOCSDINFO:
    673 	case DIOCWDINFO:
    674 	case DIOCGPART:
    675 	case DIOCWLABEL:
    676 	case DIOCGDEFLABEL:
    677 	case RAIDFRAME_SHUTDOWN:
    678 	case RAIDFRAME_REWRITEPARITY:
    679 	case RAIDFRAME_GET_INFO:
    680 	case RAIDFRAME_RESET_ACCTOTALS:
    681 	case RAIDFRAME_GET_ACCTOTALS:
    682 	case RAIDFRAME_KEEP_ACCTOTALS:
    683 	case RAIDFRAME_GET_SIZE:
    684 	case RAIDFRAME_FAIL_DISK:
    685 	case RAIDFRAME_COPYBACK:
    686 	case RAIDFRAME_CHECK_RECON_STATUS:
    687 	case RAIDFRAME_GET_COMPONENT_LABEL:
    688 	case RAIDFRAME_SET_COMPONENT_LABEL:
    689 	case RAIDFRAME_ADD_HOT_SPARE:
    690 	case RAIDFRAME_REMOVE_HOT_SPARE:
    691 	case RAIDFRAME_INIT_LABELS:
    692 	case RAIDFRAME_REBUILD_IN_PLACE:
    693 	case RAIDFRAME_CHECK_PARITY:
    694 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    695 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    696 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    697 			return (ENXIO);
    698 	}
    699 
    700 	switch (cmd) {
    701 
    702 		/* configure the system */
    703 	case RAIDFRAME_CONFIGURE:
    704 		/* copy-in the configuration information */
    705 		/* data points to a pointer to the configuration structure */
    706 
    707 		u_cfg = *((RF_Config_t **) data);
    708 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    709 		if (k_cfg == NULL) {
    710 			return (ENOMEM);
    711 		}
    712 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    713 		    sizeof(RF_Config_t));
    714 		if (retcode) {
    715 			RF_Free(k_cfg, sizeof(RF_Config_t));
    716 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    717 				retcode));
    718 			return (retcode);
    719 		}
    720 		/* allocate a buffer for the layout-specific data, and copy it
    721 		 * in */
    722 		if (k_cfg->layoutSpecificSize) {
    723 			if (k_cfg->layoutSpecificSize > 10000) {
    724 				/* sanity check */
    725 				RF_Free(k_cfg, sizeof(RF_Config_t));
    726 				return (EINVAL);
    727 			}
    728 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    729 			    (u_char *));
    730 			if (specific_buf == NULL) {
    731 				RF_Free(k_cfg, sizeof(RF_Config_t));
    732 				return (ENOMEM);
    733 			}
    734 			retcode = copyin(k_cfg->layoutSpecific,
    735 			    (caddr_t) specific_buf,
    736 			    k_cfg->layoutSpecificSize);
    737 			if (retcode) {
    738 				RF_Free(k_cfg, sizeof(RF_Config_t));
    739 				RF_Free(specific_buf,
    740 					k_cfg->layoutSpecificSize);
    741 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    742 					retcode));
    743 				return (retcode);
    744 			}
    745 		} else
    746 			specific_buf = NULL;
    747 		k_cfg->layoutSpecific = specific_buf;
    748 
    749 		/* should do some kind of sanity check on the configuration.
    750 		 * Store the sum of all the bytes in the last byte? */
    751 
    752 		/* configure the system */
    753 
    754 		raidPtr->raidid = unit;
    755 
    756 		retcode = rf_Configure(raidPtr, k_cfg);
    757 
    758 		if (retcode == 0) {
    759 
    760 			/* allow this many simultaneous IO's to
    761 			   this RAID device */
    762 			raidPtr->openings = RAIDOUTSTANDING;
    763 
    764 			/* XXX should be moved to rf_Configure() */
    765 
    766 			raidPtr->copyback_in_progress = 0;
    767 			raidPtr->parity_rewrite_in_progress = 0;
    768 			raidPtr->recon_in_progress = 0;
    769 
    770 			retcode = raidinit(dev, raidPtr, unit);
    771 			rf_markalldirty( raidPtr );
    772 		}
    773 		/* free the buffers.  No return code here. */
    774 		if (k_cfg->layoutSpecificSize) {
    775 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    776 		}
    777 		RF_Free(k_cfg, sizeof(RF_Config_t));
    778 
    779 		return (retcode);
    780 
    781 		/* shutdown the system */
    782 	case RAIDFRAME_SHUTDOWN:
    783 
    784 		if ((error = raidlock(rs)) != 0)
    785 			return (error);
    786 
    787 		/*
    788 		 * If somebody has a partition mounted, we shouldn't
    789 		 * shutdown.
    790 		 */
    791 
    792 		part = DISKPART(dev);
    793 		pmask = (1 << part);
    794 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    795 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    796 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    797 			raidunlock(rs);
    798 			return (EBUSY);
    799 		}
    800 
    801 		retcode = rf_Shutdown(raidPtr);
    802 
    803 		pool_destroy(&rs->sc_cbufpool);
    804 
    805 		/* It's no longer initialized... */
    806 		rs->sc_flags &= ~RAIDF_INITED;
    807 
    808 		/* Detach the disk. */
    809 		disk_detach(&rs->sc_dkdev);
    810 
    811 		raidunlock(rs);
    812 
    813 		return (retcode);
    814 	case RAIDFRAME_GET_COMPONENT_LABEL:
    815 		c_label_ptr = (RF_ComponentLabel_t **) data;
    816 		/* need to read the component label for the disk indicated
    817 		   by row,column in component_label */
    818 
    819 		/* For practice, let's get it directly fromdisk, rather
    820 		   than from the in-core copy */
    821 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    822 			   (RF_ComponentLabel_t *));
    823 		if (component_label == NULL)
    824 			return (ENOMEM);
    825 
    826 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    827 
    828 		retcode = copyin( *c_label_ptr, component_label,
    829 				  sizeof(RF_ComponentLabel_t));
    830 
    831 		if (retcode) {
    832 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    833 			return(retcode);
    834 		}
    835 
    836 		row = component_label->row;
    837 		column = component_label->column;
    838 
    839 		if ((row < 0) || (row >= raidPtr->numRow) ||
    840 		    (column < 0) || (column >= raidPtr->numCol)) {
    841 			RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    842 			return(EINVAL);
    843 		}
    844 
    845 		raidread_component_label(
    846                               raidPtr->Disks[row][column].dev,
    847 			      raidPtr->raid_cinfo[row][column].ci_vp,
    848 			      component_label );
    849 
    850 		retcode = copyout((caddr_t) component_label,
    851 				  (caddr_t) *c_label_ptr,
    852 				  sizeof(RF_ComponentLabel_t));
    853 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    854 		return (retcode);
    855 
    856 	case RAIDFRAME_SET_COMPONENT_LABEL:
    857 		component_label = (RF_ComponentLabel_t *) data;
    858 
    859 		/* XXX check the label for valid stuff... */
    860 		/* Note that some things *should not* get modified --
    861 		   the user should be re-initing the labels instead of
    862 		   trying to patch things.
    863 		   */
    864 
    865 		printf("Got component label:\n");
    866 		printf("Version: %d\n",component_label->version);
    867 		printf("Serial Number: %d\n",component_label->serial_number);
    868 		printf("Mod counter: %d\n",component_label->mod_counter);
    869 		printf("Row: %d\n", component_label->row);
    870 		printf("Column: %d\n", component_label->column);
    871 		printf("Num Rows: %d\n", component_label->num_rows);
    872 		printf("Num Columns: %d\n", component_label->num_columns);
    873 		printf("Clean: %d\n", component_label->clean);
    874 		printf("Status: %d\n", component_label->status);
    875 
    876 		row = component_label->row;
    877 		column = component_label->column;
    878 
    879 		if ((row < 0) || (row >= raidPtr->numRow) ||
    880 		    (column < 0) || (column >= raidPtr->numCol)) {
    881 			return(EINVAL);
    882 		}
    883 
    884 		/* XXX this isn't allowed to do anything for now :-) */
    885 #if 0
    886 		raidwrite_component_label(
    887                             raidPtr->Disks[row][column].dev,
    888 			    raidPtr->raid_cinfo[row][column].ci_vp,
    889 			    component_label );
    890 #endif
    891 		return (0);
    892 
    893 	case RAIDFRAME_INIT_LABELS:
    894 		component_label = (RF_ComponentLabel_t *) data;
    895 		/*
    896 		   we only want the serial number from
    897 		   the above.  We get all the rest of the information
    898 		   from the config that was used to create this RAID
    899 		   set.
    900 		   */
    901 
    902 		raidPtr->serial_number = component_label->serial_number;
    903 		/* current version number */
    904 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    905 		ci_label.serial_number = component_label->serial_number;
    906 		ci_label.mod_counter = raidPtr->mod_counter;
    907 		ci_label.num_rows = raidPtr->numRow;
    908 		ci_label.num_columns = raidPtr->numCol;
    909 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    910 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    911 
    912 		for(row=0;row<raidPtr->numRow;row++) {
    913 			ci_label.row = row;
    914 			for(column=0;column<raidPtr->numCol;column++) {
    915 				ci_label.column = column;
    916 				raidwrite_component_label(
    917 				  raidPtr->Disks[row][column].dev,
    918 				  raidPtr->raid_cinfo[row][column].ci_vp,
    919 				  &ci_label );
    920 			}
    921 		}
    922 
    923 		return (retcode);
    924 
    925 		/* initialize all parity */
    926 	case RAIDFRAME_REWRITEPARITY:
    927 
    928 		if (raidPtr->Layout.map->faultsTolerated == 0) {
    929 			/* Parity for RAID 0 is trivially correct */
    930 			raidPtr->parity_good = RF_RAID_CLEAN;
    931 			return(0);
    932 		}
    933 
    934 		if (raidPtr->parity_rewrite_in_progress == 1) {
    935 			/* Re-write is already in progress! */
    936 			return(EINVAL);
    937 		}
    938 
    939 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
    940 					   rf_RewriteParityThread,
    941 					   raidPtr,"raid_parity");
    942 		return (retcode);
    943 
    944 
    945 	case RAIDFRAME_ADD_HOT_SPARE:
    946 		sparePtr = (RF_SingleComponent_t *) data;
    947 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
    948 		printf("Adding spare\n");
    949 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
    950 		return(retcode);
    951 
    952 	case RAIDFRAME_REMOVE_HOT_SPARE:
    953 		return(retcode);
    954 
    955 	case RAIDFRAME_REBUILD_IN_PLACE:
    956 
    957 		if (raidPtr->Layout.map->faultsTolerated == 0) {
    958 			/* Can't do this on a RAID 0!! */
    959 			return(EINVAL);
    960 		}
    961 
    962 		if (raidPtr->recon_in_progress == 1) {
    963 			/* a reconstruct is already in progress! */
    964 			return(EINVAL);
    965 		}
    966 
    967 		componentPtr = (RF_SingleComponent_t *) data;
    968 		memcpy( &component, componentPtr,
    969 			sizeof(RF_SingleComponent_t));
    970 		row = component.row;
    971 		column = component.column;
    972 		printf("Rebuild: %d %d\n",row, column);
    973 		if ((row < 0) || (row >= raidPtr->numRow) ||
    974 		    (column < 0) || (column >= raidPtr->numCol)) {
    975 			return(EINVAL);
    976 		}
    977 
    978 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
    979 		if (rrcopy == NULL)
    980 			return(ENOMEM);
    981 
    982 		rrcopy->raidPtr = (void *) raidPtr;
    983 		rrcopy->row = row;
    984 		rrcopy->col = column;
    985 
    986 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
    987 					   rf_ReconstructInPlaceThread,
    988 					   rrcopy,"raid_reconip");
    989 		return(retcode);
    990 
    991 	case RAIDFRAME_GET_INFO:
    992 		if (!raidPtr->valid)
    993 			return (ENODEV);
    994 		ucfgp = (RF_DeviceConfig_t **) data;
    995 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
    996 			  (RF_DeviceConfig_t *));
    997 		if (d_cfg == NULL)
    998 			return (ENOMEM);
    999 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1000 		d_cfg->rows = raidPtr->numRow;
   1001 		d_cfg->cols = raidPtr->numCol;
   1002 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1003 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1004 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1005 			return (ENOMEM);
   1006 		}
   1007 		d_cfg->nspares = raidPtr->numSpare;
   1008 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1009 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1010 			return (ENOMEM);
   1011 		}
   1012 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1013 		d = 0;
   1014 		for (i = 0; i < d_cfg->rows; i++) {
   1015 			for (j = 0; j < d_cfg->cols; j++) {
   1016 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1017 				d++;
   1018 			}
   1019 		}
   1020 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1021 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1022 		}
   1023 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1024 				  sizeof(RF_DeviceConfig_t));
   1025 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1026 
   1027 		return (retcode);
   1028 
   1029 	case RAIDFRAME_CHECK_PARITY:
   1030 		*(int *) data = raidPtr->parity_good;
   1031 		return (0);
   1032 
   1033 	case RAIDFRAME_RESET_ACCTOTALS:
   1034 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1035 		return (0);
   1036 
   1037 	case RAIDFRAME_GET_ACCTOTALS:
   1038 		totals = (RF_AccTotals_t *) data;
   1039 		*totals = raidPtr->acc_totals;
   1040 		return (0);
   1041 
   1042 	case RAIDFRAME_KEEP_ACCTOTALS:
   1043 		raidPtr->keep_acc_totals = *(int *)data;
   1044 		return (0);
   1045 
   1046 	case RAIDFRAME_GET_SIZE:
   1047 		*(int *) data = raidPtr->totalSectors;
   1048 		return (0);
   1049 
   1050 		/* fail a disk & optionally start reconstruction */
   1051 	case RAIDFRAME_FAIL_DISK:
   1052 
   1053 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1054 			/* Can't do this on a RAID 0!! */
   1055 			return(EINVAL);
   1056 		}
   1057 
   1058 		rr = (struct rf_recon_req *) data;
   1059 
   1060 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1061 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1062 			return (EINVAL);
   1063 
   1064 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1065 		       unit, rr->row, rr->col);
   1066 
   1067 		/* make a copy of the recon request so that we don't rely on
   1068 		 * the user's buffer */
   1069 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1070 		if (rrcopy == NULL)
   1071 			return(ENOMEM);
   1072 		bcopy(rr, rrcopy, sizeof(*rr));
   1073 		rrcopy->raidPtr = (void *) raidPtr;
   1074 
   1075 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1076 					   rf_ReconThread,
   1077 					   rrcopy,"raid_recon");
   1078 		return (0);
   1079 
   1080 		/* invoke a copyback operation after recon on whatever disk
   1081 		 * needs it, if any */
   1082 	case RAIDFRAME_COPYBACK:
   1083 
   1084 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1085 			/* This makes no sense on a RAID 0!! */
   1086 			return(EINVAL);
   1087 		}
   1088 
   1089 		if (raidPtr->copyback_in_progress == 1) {
   1090 			/* Copyback is already in progress! */
   1091 			return(EINVAL);
   1092 		}
   1093 
   1094 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1095 					   rf_CopybackThread,
   1096 					   raidPtr,"raid_copyback");
   1097 		return (retcode);
   1098 
   1099 		/* return the percentage completion of reconstruction */
   1100 	case RAIDFRAME_CHECK_RECON_STATUS:
   1101 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1102 			/* This makes no sense on a RAID 0 */
   1103 			return(EINVAL);
   1104 		}
   1105 		row = 0; /* XXX we only consider a single row... */
   1106 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1107 			*(int *) data = 100;
   1108 		else
   1109 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1110 		return (0);
   1111 
   1112 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1113 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1114 			/* This makes no sense on a RAID 0 */
   1115 			return(EINVAL);
   1116 		}
   1117 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1118 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1119 		} else {
   1120 			*(int *) data = 100;
   1121 		}
   1122 		return (0);
   1123 
   1124 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1125 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1126 			/* This makes no sense on a RAID 0 */
   1127 			return(EINVAL);
   1128 		}
   1129 		if (raidPtr->copyback_in_progress == 1) {
   1130 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1131 				raidPtr->Layout.numStripe;
   1132 		} else {
   1133 			*(int *) data = 100;
   1134 		}
   1135 		return (0);
   1136 
   1137 
   1138 		/* the sparetable daemon calls this to wait for the kernel to
   1139 		 * need a spare table. this ioctl does not return until a
   1140 		 * spare table is needed. XXX -- calling mpsleep here in the
   1141 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1142 		 * -- I should either compute the spare table in the kernel,
   1143 		 * or have a different -- XXX XXX -- interface (a different
   1144 		 * character device) for delivering the table     -- XXX */
   1145 #if 0
   1146 	case RAIDFRAME_SPARET_WAIT:
   1147 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1148 		while (!rf_sparet_wait_queue)
   1149 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1150 		waitreq = rf_sparet_wait_queue;
   1151 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1152 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1153 
   1154 		/* structure assignment */
   1155 		*((RF_SparetWait_t *) data) = *waitreq;
   1156 
   1157 		RF_Free(waitreq, sizeof(*waitreq));
   1158 		return (0);
   1159 
   1160 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1161 		 * code in it that will cause the dameon to exit */
   1162 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1163 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1164 		waitreq->fcol = -1;
   1165 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1166 		waitreq->next = rf_sparet_wait_queue;
   1167 		rf_sparet_wait_queue = waitreq;
   1168 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1169 		wakeup(&rf_sparet_wait_queue);
   1170 		return (0);
   1171 
   1172 		/* used by the spare table daemon to deliver a spare table
   1173 		 * into the kernel */
   1174 	case RAIDFRAME_SEND_SPARET:
   1175 
   1176 		/* install the spare table */
   1177 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1178 
   1179 		/* respond to the requestor.  the return status of the spare
   1180 		 * table installation is passed in the "fcol" field */
   1181 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1182 		waitreq->fcol = retcode;
   1183 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1184 		waitreq->next = rf_sparet_resp_queue;
   1185 		rf_sparet_resp_queue = waitreq;
   1186 		wakeup(&rf_sparet_resp_queue);
   1187 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1188 
   1189 		return (retcode);
   1190 #endif
   1191 
   1192 	default:
   1193 		break; /* fall through to the os-specific code below */
   1194 
   1195 	}
   1196 
   1197 	if (!raidPtr->valid)
   1198 		return (EINVAL);
   1199 
   1200 	/*
   1201 	 * Add support for "regular" device ioctls here.
   1202 	 */
   1203 
   1204 	switch (cmd) {
   1205 	case DIOCGDINFO:
   1206 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1207 		break;
   1208 
   1209 	case DIOCGPART:
   1210 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1211 		((struct partinfo *) data)->part =
   1212 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1213 		break;
   1214 
   1215 	case DIOCWDINFO:
   1216 	case DIOCSDINFO:
   1217 		if ((error = raidlock(rs)) != 0)
   1218 			return (error);
   1219 
   1220 		rs->sc_flags |= RAIDF_LABELLING;
   1221 
   1222 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1223 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1224 		if (error == 0) {
   1225 			if (cmd == DIOCWDINFO)
   1226 				error = writedisklabel(RAIDLABELDEV(dev),
   1227 				    raidstrategy, rs->sc_dkdev.dk_label,
   1228 				    rs->sc_dkdev.dk_cpulabel);
   1229 		}
   1230 		rs->sc_flags &= ~RAIDF_LABELLING;
   1231 
   1232 		raidunlock(rs);
   1233 
   1234 		if (error)
   1235 			return (error);
   1236 		break;
   1237 
   1238 	case DIOCWLABEL:
   1239 		if (*(int *) data != 0)
   1240 			rs->sc_flags |= RAIDF_WLABEL;
   1241 		else
   1242 			rs->sc_flags &= ~RAIDF_WLABEL;
   1243 		break;
   1244 
   1245 	case DIOCGDEFLABEL:
   1246 		raidgetdefaultlabel(raidPtr, rs,
   1247 		    (struct disklabel *) data);
   1248 		break;
   1249 
   1250 	default:
   1251 		retcode = ENOTTY;
   1252 	}
   1253 	return (retcode);
   1254 
   1255 }
   1256 
   1257 
   1258 /* raidinit -- complete the rest of the initialization for the
   1259    RAIDframe device.  */
   1260 
   1261 
   1262 static int
   1263 raidinit(dev, raidPtr, unit)
   1264 	dev_t   dev;
   1265 	RF_Raid_t *raidPtr;
   1266 	int     unit;
   1267 {
   1268 	int     retcode;
   1269 	struct raid_softc *rs;
   1270 
   1271 	retcode = 0;
   1272 
   1273 	rs = &raid_softc[unit];
   1274 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1275 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1276 
   1277 
   1278 	/* XXX should check return code first... */
   1279 	rs->sc_flags |= RAIDF_INITED;
   1280 
   1281 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1282 
   1283 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1284 
   1285 	/* disk_attach actually creates space for the CPU disklabel, among
   1286 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1287 	 * with disklabels. */
   1288 
   1289 	disk_attach(&rs->sc_dkdev);
   1290 
   1291 	/* XXX There may be a weird interaction here between this, and
   1292 	 * protectedSectors, as used in RAIDframe.  */
   1293 
   1294 	rs->sc_size = raidPtr->totalSectors;
   1295 	rs->sc_dev = dev;
   1296 
   1297 	return (retcode);
   1298 }
   1299 
   1300 /* wake up the daemon & tell it to get us a spare table
   1301  * XXX
   1302  * the entries in the queues should be tagged with the raidPtr
   1303  * so that in the extremely rare case that two recons happen at once,
   1304  * we know for which device were requesting a spare table
   1305  * XXX
   1306  *
   1307  * XXX This code is not currently used. GO
   1308  */
   1309 int
   1310 rf_GetSpareTableFromDaemon(req)
   1311 	RF_SparetWait_t *req;
   1312 {
   1313 	int     retcode;
   1314 
   1315 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1316 	req->next = rf_sparet_wait_queue;
   1317 	rf_sparet_wait_queue = req;
   1318 	wakeup(&rf_sparet_wait_queue);
   1319 
   1320 	/* mpsleep unlocks the mutex */
   1321 	while (!rf_sparet_resp_queue) {
   1322 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1323 		    "raidframe getsparetable", 0);
   1324 	}
   1325 	req = rf_sparet_resp_queue;
   1326 	rf_sparet_resp_queue = req->next;
   1327 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1328 
   1329 	retcode = req->fcol;
   1330 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1331 					 * alloc'd */
   1332 	return (retcode);
   1333 }
   1334 
   1335 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1336  * bp & passes it down.
   1337  * any calls originating in the kernel must use non-blocking I/O
   1338  * do some extra sanity checking to return "appropriate" error values for
   1339  * certain conditions (to make some standard utilities work)
   1340  *
   1341  * Formerly known as: rf_DoAccessKernel
   1342  */
   1343 void
   1344 raidstart(raidPtr)
   1345 	RF_Raid_t *raidPtr;
   1346 {
   1347 	RF_SectorCount_t num_blocks, pb, sum;
   1348 	RF_RaidAddr_t raid_addr;
   1349 	int     retcode;
   1350 	struct partition *pp;
   1351 	daddr_t blocknum;
   1352 	int     unit;
   1353 	struct raid_softc *rs;
   1354 	int     do_async;
   1355 	struct buf *bp;
   1356 
   1357 	unit = raidPtr->raidid;
   1358 	rs = &raid_softc[unit];
   1359 
   1360 	/* Check to see if we're at the limit... */
   1361 	RF_LOCK_MUTEX(raidPtr->mutex);
   1362 	while (raidPtr->openings > 0) {
   1363 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1364 
   1365 		/* get the next item, if any, from the queue */
   1366 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1367 			/* nothing more to do */
   1368 			return;
   1369 		}
   1370 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1371 
   1372 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1373 		 * partition.. Need to make it absolute to the underlying
   1374 		 * device.. */
   1375 
   1376 		blocknum = bp->b_blkno;
   1377 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1378 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1379 			blocknum += pp->p_offset;
   1380 		}
   1381 
   1382 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1383 			    (int) blocknum));
   1384 
   1385 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1386 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1387 
   1388 		/* *THIS* is where we adjust what block we're going to...
   1389 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1390 		raid_addr = blocknum;
   1391 
   1392 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1393 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1394 		sum = raid_addr + num_blocks + pb;
   1395 		if (1 || rf_debugKernelAccess) {
   1396 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1397 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1398 				    (int) pb, (int) bp->b_resid));
   1399 		}
   1400 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1401 		    || (sum < num_blocks) || (sum < pb)) {
   1402 			bp->b_error = ENOSPC;
   1403 			bp->b_flags |= B_ERROR;
   1404 			bp->b_resid = bp->b_bcount;
   1405 			biodone(bp);
   1406 			RF_LOCK_MUTEX(raidPtr->mutex);
   1407 			continue;
   1408 		}
   1409 		/*
   1410 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1411 		 */
   1412 
   1413 		if (bp->b_bcount & raidPtr->sectorMask) {
   1414 			bp->b_error = EINVAL;
   1415 			bp->b_flags |= B_ERROR;
   1416 			bp->b_resid = bp->b_bcount;
   1417 			biodone(bp);
   1418 			RF_LOCK_MUTEX(raidPtr->mutex);
   1419 			continue;
   1420 
   1421 		}
   1422 		db1_printf(("Calling DoAccess..\n"));
   1423 
   1424 
   1425 		RF_LOCK_MUTEX(raidPtr->mutex);
   1426 		raidPtr->openings--;
   1427 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1428 
   1429 		/*
   1430 		 * Everything is async.
   1431 		 */
   1432 		do_async = 1;
   1433 
   1434 		/* don't ever condition on bp->b_flags & B_WRITE.
   1435 		 * always condition on B_READ instead */
   1436 
   1437 		/* XXX we're still at splbio() here... do we *really*
   1438 		   need to be? */
   1439 
   1440 
   1441 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1442 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1443 				      do_async, raid_addr, num_blocks,
   1444 				      bp->b_un.b_addr, bp, NULL, NULL,
   1445 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1446 
   1447 
   1448 		RF_LOCK_MUTEX(raidPtr->mutex);
   1449 	}
   1450 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1451 }
   1452 
   1453 
   1454 
   1455 
   1456 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1457 
   1458 int
   1459 rf_DispatchKernelIO(queue, req)
   1460 	RF_DiskQueue_t *queue;
   1461 	RF_DiskQueueData_t *req;
   1462 {
   1463 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1464 	struct buf *bp;
   1465 	struct raidbuf *raidbp = NULL;
   1466 	struct raid_softc *rs;
   1467 	int     unit;
   1468 	int s;
   1469 
   1470 	s=0;
   1471 	/* s = splbio();*/ /* want to test this */
   1472 	/* XXX along with the vnode, we also need the softc associated with
   1473 	 * this device.. */
   1474 
   1475 	req->queue = queue;
   1476 
   1477 	unit = queue->raidPtr->raidid;
   1478 
   1479 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1480 
   1481 	if (unit >= numraid) {
   1482 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1483 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1484 	}
   1485 	rs = &raid_softc[unit];
   1486 
   1487 	/* XXX is this the right place? */
   1488 	disk_busy(&rs->sc_dkdev);
   1489 
   1490 	bp = req->bp;
   1491 #if 1
   1492 	/* XXX when there is a physical disk failure, someone is passing us a
   1493 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1494 	 * without taking a performance hit... (not sure where the real bug
   1495 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1496 
   1497 	if (bp->b_flags & B_ERROR) {
   1498 		bp->b_flags &= ~B_ERROR;
   1499 	}
   1500 	if (bp->b_error != 0) {
   1501 		bp->b_error = 0;
   1502 	}
   1503 #endif
   1504 	raidbp = RAIDGETBUF(rs);
   1505 
   1506 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1507 
   1508 	/*
   1509 	 * context for raidiodone
   1510 	 */
   1511 	raidbp->rf_obp = bp;
   1512 	raidbp->req = req;
   1513 
   1514 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1515 
   1516 	switch (req->type) {
   1517 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1518 		/* XXX need to do something extra here.. */
   1519 		/* I'm leaving this in, as I've never actually seen it used,
   1520 		 * and I'd like folks to report it... GO */
   1521 		printf(("WAKEUP CALLED\n"));
   1522 		queue->numOutstanding++;
   1523 
   1524 		/* XXX need to glue the original buffer into this??  */
   1525 
   1526 		KernelWakeupFunc(&raidbp->rf_buf);
   1527 		break;
   1528 
   1529 	case RF_IO_TYPE_READ:
   1530 	case RF_IO_TYPE_WRITE:
   1531 
   1532 		if (req->tracerec) {
   1533 			RF_ETIMER_START(req->tracerec->timer);
   1534 		}
   1535 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1536 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1537 		    req->sectorOffset, req->numSector,
   1538 		    req->buf, KernelWakeupFunc, (void *) req,
   1539 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1540 
   1541 		if (rf_debugKernelAccess) {
   1542 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1543 				(long) bp->b_blkno));
   1544 		}
   1545 		queue->numOutstanding++;
   1546 		queue->last_deq_sector = req->sectorOffset;
   1547 		/* acc wouldn't have been let in if there were any pending
   1548 		 * reqs at any other priority */
   1549 		queue->curPriority = req->priority;
   1550 
   1551 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1552 			req->type, unit, queue->row, queue->col));
   1553 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1554 			(int) req->sectorOffset, (int) req->numSector,
   1555 			(int) (req->numSector <<
   1556 			    queue->raidPtr->logBytesPerSector),
   1557 			(int) queue->raidPtr->logBytesPerSector));
   1558 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1559 			raidbp->rf_buf.b_vp->v_numoutput++;
   1560 		}
   1561 		VOP_STRATEGY(&raidbp->rf_buf);
   1562 
   1563 		break;
   1564 
   1565 	default:
   1566 		panic("bad req->type in rf_DispatchKernelIO");
   1567 	}
   1568 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1569 	/* splx(s); */ /* want to test this */
   1570 	return (0);
   1571 }
   1572 /* this is the callback function associated with a I/O invoked from
   1573    kernel code.
   1574  */
   1575 static void
   1576 KernelWakeupFunc(vbp)
   1577 	struct buf *vbp;
   1578 {
   1579 	RF_DiskQueueData_t *req = NULL;
   1580 	RF_DiskQueue_t *queue;
   1581 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1582 	struct buf *bp;
   1583 	struct raid_softc *rs;
   1584 	int     unit;
   1585 	register int s;
   1586 
   1587 	s = splbio();
   1588 	db1_printf(("recovering the request queue:\n"));
   1589 	req = raidbp->req;
   1590 
   1591 	bp = raidbp->rf_obp;
   1592 
   1593 	queue = (RF_DiskQueue_t *) req->queue;
   1594 
   1595 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1596 		bp->b_flags |= B_ERROR;
   1597 		bp->b_error = raidbp->rf_buf.b_error ?
   1598 		    raidbp->rf_buf.b_error : EIO;
   1599 	}
   1600 
   1601 	/* XXX methinks this could be wrong... */
   1602 #if 1
   1603 	bp->b_resid = raidbp->rf_buf.b_resid;
   1604 #endif
   1605 
   1606 	if (req->tracerec) {
   1607 		RF_ETIMER_STOP(req->tracerec->timer);
   1608 		RF_ETIMER_EVAL(req->tracerec->timer);
   1609 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1610 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1611 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1612 		req->tracerec->num_phys_ios++;
   1613 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1614 	}
   1615 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1616 
   1617 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1618 
   1619 
   1620 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1621 	 * ballistic, and mark the component as hosed... */
   1622 
   1623 	if (bp->b_flags & B_ERROR) {
   1624 		/* Mark the disk as dead */
   1625 		/* but only mark it once... */
   1626 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1627 		    rf_ds_optimal) {
   1628 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1629 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1630 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1631 			    rf_ds_failed;
   1632 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1633 			queue->raidPtr->numFailures++;
   1634 			/* XXX here we should bump the version number for each component, and write that data out */
   1635 		} else {	/* Disk is already dead... */
   1636 			/* printf("Disk already marked as dead!\n"); */
   1637 		}
   1638 
   1639 	}
   1640 
   1641 	rs = &raid_softc[unit];
   1642 	RAIDPUTBUF(rs, raidbp);
   1643 
   1644 
   1645 	if (bp->b_resid == 0) {
   1646 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1647 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1648 	}
   1649 
   1650 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1651 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1652 
   1653 	splx(s);
   1654 }
   1655 
   1656 
   1657 
   1658 /*
   1659  * initialize a buf structure for doing an I/O in the kernel.
   1660  */
   1661 static void
   1662 InitBP(
   1663     struct buf * bp,
   1664     struct vnode * b_vp,
   1665     unsigned rw_flag,
   1666     dev_t dev,
   1667     RF_SectorNum_t startSect,
   1668     RF_SectorCount_t numSect,
   1669     caddr_t buf,
   1670     void (*cbFunc) (struct buf *),
   1671     void *cbArg,
   1672     int logBytesPerSector,
   1673     struct proc * b_proc)
   1674 {
   1675 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1676 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1677 	bp->b_bcount = numSect << logBytesPerSector;
   1678 	bp->b_bufsize = bp->b_bcount;
   1679 	bp->b_error = 0;
   1680 	bp->b_dev = dev;
   1681 	bp->b_un.b_addr = buf;
   1682 	bp->b_blkno = startSect;
   1683 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1684 	if (bp->b_bcount == 0) {
   1685 		panic("bp->b_bcount is zero in InitBP!!\n");
   1686 	}
   1687 	bp->b_proc = b_proc;
   1688 	bp->b_iodone = cbFunc;
   1689 	bp->b_vp = b_vp;
   1690 
   1691 }
   1692 
   1693 static void
   1694 raidgetdefaultlabel(raidPtr, rs, lp)
   1695 	RF_Raid_t *raidPtr;
   1696 	struct raid_softc *rs;
   1697 	struct disklabel *lp;
   1698 {
   1699 	db1_printf(("Building a default label...\n"));
   1700 	bzero(lp, sizeof(*lp));
   1701 
   1702 	/* fabricate a label... */
   1703 	lp->d_secperunit = raidPtr->totalSectors;
   1704 	lp->d_secsize = raidPtr->bytesPerSector;
   1705 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1706 	lp->d_ntracks = 1;
   1707 	lp->d_ncylinders = raidPtr->totalSectors /
   1708 		(lp->d_nsectors * lp->d_ntracks);
   1709 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1710 
   1711 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1712 	lp->d_type = DTYPE_RAID;
   1713 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1714 	lp->d_rpm = 3600;
   1715 	lp->d_interleave = 1;
   1716 	lp->d_flags = 0;
   1717 
   1718 	lp->d_partitions[RAW_PART].p_offset = 0;
   1719 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1720 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1721 	lp->d_npartitions = RAW_PART + 1;
   1722 
   1723 	lp->d_magic = DISKMAGIC;
   1724 	lp->d_magic2 = DISKMAGIC;
   1725 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1726 
   1727 }
   1728 /*
   1729  * Read the disklabel from the raid device.  If one is not present, fake one
   1730  * up.
   1731  */
   1732 static void
   1733 raidgetdisklabel(dev)
   1734 	dev_t   dev;
   1735 {
   1736 	int     unit = raidunit(dev);
   1737 	struct raid_softc *rs = &raid_softc[unit];
   1738 	char   *errstring;
   1739 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1740 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1741 	RF_Raid_t *raidPtr;
   1742 
   1743 	db1_printf(("Getting the disklabel...\n"));
   1744 
   1745 	bzero(clp, sizeof(*clp));
   1746 
   1747 	raidPtr = raidPtrs[unit];
   1748 
   1749 	raidgetdefaultlabel(raidPtr, rs, lp);
   1750 
   1751 	/*
   1752 	 * Call the generic disklabel extraction routine.
   1753 	 */
   1754 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1755 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1756 	if (errstring)
   1757 		raidmakedisklabel(rs);
   1758 	else {
   1759 		int     i;
   1760 		struct partition *pp;
   1761 
   1762 		/*
   1763 		 * Sanity check whether the found disklabel is valid.
   1764 		 *
   1765 		 * This is necessary since total size of the raid device
   1766 		 * may vary when an interleave is changed even though exactly
   1767 		 * same componets are used, and old disklabel may used
   1768 		 * if that is found.
   1769 		 */
   1770 		if (lp->d_secperunit != rs->sc_size)
   1771 			printf("WARNING: %s: "
   1772 			    "total sector size in disklabel (%d) != "
   1773 			    "the size of raid (%ld)\n", rs->sc_xname,
   1774 			    lp->d_secperunit, (long) rs->sc_size);
   1775 		for (i = 0; i < lp->d_npartitions; i++) {
   1776 			pp = &lp->d_partitions[i];
   1777 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1778 				printf("WARNING: %s: end of partition `%c' "
   1779 				    "exceeds the size of raid (%ld)\n",
   1780 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1781 		}
   1782 	}
   1783 
   1784 }
   1785 /*
   1786  * Take care of things one might want to take care of in the event
   1787  * that a disklabel isn't present.
   1788  */
   1789 static void
   1790 raidmakedisklabel(rs)
   1791 	struct raid_softc *rs;
   1792 {
   1793 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1794 	db1_printf(("Making a label..\n"));
   1795 
   1796 	/*
   1797 	 * For historical reasons, if there's no disklabel present
   1798 	 * the raw partition must be marked FS_BSDFFS.
   1799 	 */
   1800 
   1801 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1802 
   1803 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1804 
   1805 	lp->d_checksum = dkcksum(lp);
   1806 }
   1807 /*
   1808  * Lookup the provided name in the filesystem.  If the file exists,
   1809  * is a valid block device, and isn't being used by anyone else,
   1810  * set *vpp to the file's vnode.
   1811  * You'll find the original of this in ccd.c
   1812  */
   1813 int
   1814 raidlookup(path, p, vpp)
   1815 	char   *path;
   1816 	struct proc *p;
   1817 	struct vnode **vpp;	/* result */
   1818 {
   1819 	struct nameidata nd;
   1820 	struct vnode *vp;
   1821 	struct vattr va;
   1822 	int     error;
   1823 
   1824 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1825 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1826 #ifdef DEBUG
   1827 		printf("RAIDframe: vn_open returned %d\n", error);
   1828 #endif
   1829 		return (error);
   1830 	}
   1831 	vp = nd.ni_vp;
   1832 	if (vp->v_usecount > 1) {
   1833 		VOP_UNLOCK(vp, 0);
   1834 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1835 		return (EBUSY);
   1836 	}
   1837 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1838 		VOP_UNLOCK(vp, 0);
   1839 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1840 		return (error);
   1841 	}
   1842 	/* XXX: eventually we should handle VREG, too. */
   1843 	if (va.va_type != VBLK) {
   1844 		VOP_UNLOCK(vp, 0);
   1845 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1846 		return (ENOTBLK);
   1847 	}
   1848 	VOP_UNLOCK(vp, 0);
   1849 	*vpp = vp;
   1850 	return (0);
   1851 }
   1852 /*
   1853  * Wait interruptibly for an exclusive lock.
   1854  *
   1855  * XXX
   1856  * Several drivers do this; it should be abstracted and made MP-safe.
   1857  * (Hmm... where have we seen this warning before :->  GO )
   1858  */
   1859 static int
   1860 raidlock(rs)
   1861 	struct raid_softc *rs;
   1862 {
   1863 	int     error;
   1864 
   1865 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1866 		rs->sc_flags |= RAIDF_WANTED;
   1867 		if ((error =
   1868 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1869 			return (error);
   1870 	}
   1871 	rs->sc_flags |= RAIDF_LOCKED;
   1872 	return (0);
   1873 }
   1874 /*
   1875  * Unlock and wake up any waiters.
   1876  */
   1877 static void
   1878 raidunlock(rs)
   1879 	struct raid_softc *rs;
   1880 {
   1881 
   1882 	rs->sc_flags &= ~RAIDF_LOCKED;
   1883 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1884 		rs->sc_flags &= ~RAIDF_WANTED;
   1885 		wakeup(rs);
   1886 	}
   1887 }
   1888 
   1889 
   1890 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1891 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1892 
   1893 int
   1894 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   1895 {
   1896 	RF_ComponentLabel_t component_label;
   1897 	raidread_component_label(dev, b_vp, &component_label);
   1898 	component_label.mod_counter = mod_counter;
   1899 	component_label.clean = RF_RAID_CLEAN;
   1900 	raidwrite_component_label(dev, b_vp, &component_label);
   1901 	return(0);
   1902 }
   1903 
   1904 
   1905 int
   1906 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   1907 {
   1908 	RF_ComponentLabel_t component_label;
   1909 	raidread_component_label(dev, b_vp, &component_label);
   1910 	component_label.mod_counter = mod_counter;
   1911 	component_label.clean = RF_RAID_DIRTY;
   1912 	raidwrite_component_label(dev, b_vp, &component_label);
   1913 	return(0);
   1914 }
   1915 
   1916 /* ARGSUSED */
   1917 int
   1918 raidread_component_label(dev, b_vp, component_label)
   1919 	dev_t dev;
   1920 	struct vnode *b_vp;
   1921 	RF_ComponentLabel_t *component_label;
   1922 {
   1923 	struct buf *bp;
   1924 	int error;
   1925 
   1926 	/* XXX should probably ensure that we don't try to do this if
   1927 	   someone has changed rf_protected_sectors. */
   1928 
   1929 	/* get a block of the appropriate size... */
   1930 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   1931 	bp->b_dev = dev;
   1932 
   1933 	/* get our ducks in a row for the read */
   1934 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   1935 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   1936 	bp->b_flags = B_BUSY | B_READ;
   1937  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   1938 
   1939 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   1940 
   1941 	error = biowait(bp);
   1942 
   1943 	if (!error) {
   1944 		memcpy(component_label, bp->b_un.b_addr,
   1945 		       sizeof(RF_ComponentLabel_t));
   1946 #if 0
   1947 		printf("raidread_component_label: got component label:\n");
   1948 		printf("Version: %d\n",component_label->version);
   1949 		printf("Serial Number: %d\n",component_label->serial_number);
   1950 		printf("Mod counter: %d\n",component_label->mod_counter);
   1951 		printf("Row: %d\n", component_label->row);
   1952 		printf("Column: %d\n", component_label->column);
   1953 		printf("Num Rows: %d\n", component_label->num_rows);
   1954 		printf("Num Columns: %d\n", component_label->num_columns);
   1955 		printf("Clean: %d\n", component_label->clean);
   1956 		printf("Status: %d\n", component_label->status);
   1957 #endif
   1958         } else {
   1959 		printf("Failed to read RAID component label!\n");
   1960 	}
   1961 
   1962         bp->b_flags = B_INVAL | B_AGE;
   1963 	brelse(bp);
   1964 	return(error);
   1965 }
   1966 /* ARGSUSED */
   1967 int
   1968 raidwrite_component_label(dev, b_vp, component_label)
   1969 	dev_t dev;
   1970 	struct vnode *b_vp;
   1971 	RF_ComponentLabel_t *component_label;
   1972 {
   1973 	struct buf *bp;
   1974 	int error;
   1975 
   1976 	/* get a block of the appropriate size... */
   1977 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   1978 	bp->b_dev = dev;
   1979 
   1980 	/* get our ducks in a row for the write */
   1981 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   1982 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   1983 	bp->b_flags = B_BUSY | B_WRITE;
   1984  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   1985 
   1986 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   1987 
   1988 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   1989 
   1990 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   1991 	error = biowait(bp);
   1992         bp->b_flags = B_INVAL | B_AGE;
   1993 	brelse(bp);
   1994 	if (error) {
   1995 		printf("Failed to write RAID component info!\n");
   1996 	}
   1997 
   1998 	return(error);
   1999 }
   2000 
   2001 void
   2002 rf_markalldirty( raidPtr )
   2003 	RF_Raid_t *raidPtr;
   2004 {
   2005 	RF_ComponentLabel_t c_label;
   2006 	int r,c;
   2007 
   2008 	raidPtr->mod_counter++;
   2009 	for (r = 0; r < raidPtr->numRow; r++) {
   2010 		for (c = 0; c < raidPtr->numCol; c++) {
   2011 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2012 				raidread_component_label(
   2013 					raidPtr->Disks[r][c].dev,
   2014 					raidPtr->raid_cinfo[r][c].ci_vp,
   2015 					&c_label);
   2016 				if (c_label.status == rf_ds_spared) {
   2017 					/* XXX do something special...
   2018 					 but whatever you do, don't
   2019 					 try to access it!! */
   2020 				} else {
   2021 #if 0
   2022 				c_label.status =
   2023 					raidPtr->Disks[r][c].status;
   2024 				raidwrite_component_label(
   2025 					raidPtr->Disks[r][c].dev,
   2026 					raidPtr->raid_cinfo[r][c].ci_vp,
   2027 					&c_label);
   2028 #endif
   2029 				raidmarkdirty(
   2030 				       raidPtr->Disks[r][c].dev,
   2031 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2032 				       raidPtr->mod_counter);
   2033 				}
   2034 			}
   2035 		}
   2036 	}
   2037 	/* printf("Component labels marked dirty.\n"); */
   2038 #if 0
   2039 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2040 		sparecol = raidPtr->numCol + c;
   2041 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2042 			/*
   2043 
   2044 			   XXX this is where we get fancy and map this spare
   2045 			   into it's correct spot in the array.
   2046 
   2047 			 */
   2048 			/*
   2049 
   2050 			   we claim this disk is "optimal" if it's
   2051 			   rf_ds_used_spare, as that means it should be
   2052 			   directly substitutable for the disk it replaced.
   2053 			   We note that too...
   2054 
   2055 			 */
   2056 
   2057 			for(i=0;i<raidPtr->numRow;i++) {
   2058 				for(j=0;j<raidPtr->numCol;j++) {
   2059 					if ((raidPtr->Disks[i][j].spareRow ==
   2060 					     r) &&
   2061 					    (raidPtr->Disks[i][j].spareCol ==
   2062 					     sparecol)) {
   2063 						srow = r;
   2064 						scol = sparecol;
   2065 						break;
   2066 					}
   2067 				}
   2068 			}
   2069 
   2070 			raidread_component_label(
   2071 				      raidPtr->Disks[r][sparecol].dev,
   2072 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2073 				      &c_label);
   2074 			/* make sure status is noted */
   2075 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2076 			c_label.mod_counter = raidPtr->mod_counter;
   2077 			c_label.serial_number = raidPtr->serial_number;
   2078 			c_label.row = srow;
   2079 			c_label.column = scol;
   2080 			c_label.num_rows = raidPtr->numRow;
   2081 			c_label.num_columns = raidPtr->numCol;
   2082 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2083 			c_label.status = rf_ds_optimal;
   2084 			raidwrite_component_label(
   2085 				      raidPtr->Disks[r][sparecol].dev,
   2086 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2087 				      &c_label);
   2088 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2089 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2090 		}
   2091 	}
   2092 
   2093 #endif
   2094 }
   2095 
   2096 
   2097 void
   2098 rf_update_component_labels( raidPtr )
   2099 	RF_Raid_t *raidPtr;
   2100 {
   2101 	RF_ComponentLabel_t c_label;
   2102 	int sparecol;
   2103 	int r,c;
   2104 	int i,j;
   2105 	int srow, scol;
   2106 
   2107 	srow = -1;
   2108 	scol = -1;
   2109 
   2110 	/* XXX should do extra checks to make sure things really are clean,
   2111 	   rather than blindly setting the clean bit... */
   2112 
   2113 	raidPtr->mod_counter++;
   2114 
   2115 	for (r = 0; r < raidPtr->numRow; r++) {
   2116 		for (c = 0; c < raidPtr->numCol; c++) {
   2117 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2118 				raidread_component_label(
   2119 					raidPtr->Disks[r][c].dev,
   2120 					raidPtr->raid_cinfo[r][c].ci_vp,
   2121 					&c_label);
   2122 				/* make sure status is noted */
   2123 				c_label.status = rf_ds_optimal;
   2124 				raidwrite_component_label(
   2125 					raidPtr->Disks[r][c].dev,
   2126 					raidPtr->raid_cinfo[r][c].ci_vp,
   2127 					&c_label);
   2128 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2129 					raidmarkclean(
   2130 					      raidPtr->Disks[r][c].dev,
   2131 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2132 					      raidPtr->mod_counter);
   2133 				}
   2134 			}
   2135 			/* else we don't touch it.. */
   2136 #if 0
   2137 			else if (raidPtr->Disks[r][c].status !=
   2138 				   rf_ds_failed) {
   2139 				raidread_component_label(
   2140 					raidPtr->Disks[r][c].dev,
   2141 					raidPtr->raid_cinfo[r][c].ci_vp,
   2142 					&c_label);
   2143 				/* make sure status is noted */
   2144 				c_label.status =
   2145 					raidPtr->Disks[r][c].status;
   2146 				raidwrite_component_label(
   2147 					raidPtr->Disks[r][c].dev,
   2148 					raidPtr->raid_cinfo[r][c].ci_vp,
   2149 					&c_label);
   2150 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2151 					raidmarkclean(
   2152 					      raidPtr->Disks[r][c].dev,
   2153 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2154 					      raidPtr->mod_counter);
   2155 				}
   2156 			}
   2157 #endif
   2158 		}
   2159 	}
   2160 
   2161 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2162 		sparecol = raidPtr->numCol + c;
   2163 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2164 			/*
   2165 
   2166 			   we claim this disk is "optimal" if it's
   2167 			   rf_ds_used_spare, as that means it should be
   2168 			   directly substitutable for the disk it replaced.
   2169 			   We note that too...
   2170 
   2171 			 */
   2172 
   2173 			for(i=0;i<raidPtr->numRow;i++) {
   2174 				for(j=0;j<raidPtr->numCol;j++) {
   2175 					if ((raidPtr->Disks[i][j].spareRow ==
   2176 					     0) &&
   2177 					    (raidPtr->Disks[i][j].spareCol ==
   2178 					     sparecol)) {
   2179 						srow = i;
   2180 						scol = j;
   2181 						break;
   2182 					}
   2183 				}
   2184 			}
   2185 
   2186 			raidread_component_label(
   2187 				      raidPtr->Disks[0][sparecol].dev,
   2188 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2189 				      &c_label);
   2190 			/* make sure status is noted */
   2191 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2192 			c_label.mod_counter = raidPtr->mod_counter;
   2193 			c_label.serial_number = raidPtr->serial_number;
   2194 			c_label.row = srow;
   2195 			c_label.column = scol;
   2196 			c_label.num_rows = raidPtr->numRow;
   2197 			c_label.num_columns = raidPtr->numCol;
   2198 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2199 			c_label.status = rf_ds_optimal;
   2200 			raidwrite_component_label(
   2201 				      raidPtr->Disks[0][sparecol].dev,
   2202 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2203 				      &c_label);
   2204 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2205 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2206 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2207 					       raidPtr->mod_counter);
   2208 			}
   2209 		}
   2210 	}
   2211 	/* 	printf("Component labels updated\n"); */
   2212 }
   2213 
   2214 void
   2215 rf_ReconThread(req)
   2216 	struct rf_recon_req *req;
   2217 {
   2218 	int     s;
   2219 	RF_Raid_t *raidPtr;
   2220 
   2221 	s = splbio();
   2222 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2223 	raidPtr->recon_in_progress = 1;
   2224 
   2225 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2226 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2227 
   2228 	/* XXX get rid of this! we don't need it at all.. */
   2229 	RF_Free(req, sizeof(*req));
   2230 
   2231 	raidPtr->recon_in_progress = 0;
   2232 	splx(s);
   2233 
   2234 	/* That's all... */
   2235 	kthread_exit(0);        /* does not return */
   2236 }
   2237 
   2238 void
   2239 rf_RewriteParityThread(raidPtr)
   2240 	RF_Raid_t *raidPtr;
   2241 {
   2242 	int retcode;
   2243 	int s;
   2244 
   2245 	raidPtr->parity_rewrite_in_progress = 1;
   2246 	s = splbio();
   2247 	retcode = rf_RewriteParity(raidPtr);
   2248 	splx(s);
   2249 	if (retcode) {
   2250 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2251 	} else {
   2252 		/* set the clean bit!  If we shutdown correctly,
   2253 		   the clean bit on each component label will get
   2254 		   set */
   2255 		raidPtr->parity_good = RF_RAID_CLEAN;
   2256 	}
   2257 	raidPtr->parity_rewrite_in_progress = 0;
   2258 
   2259 	/* That's all... */
   2260 	kthread_exit(0);        /* does not return */
   2261 }
   2262 
   2263 
   2264 void
   2265 rf_CopybackThread(raidPtr)
   2266 	RF_Raid_t *raidPtr;
   2267 {
   2268 	int s;
   2269 
   2270 	raidPtr->copyback_in_progress = 1;
   2271 	s = splbio();
   2272 	rf_CopybackReconstructedData(raidPtr);
   2273 	splx(s);
   2274 	raidPtr->copyback_in_progress = 0;
   2275 
   2276 	/* That's all... */
   2277 	kthread_exit(0);        /* does not return */
   2278 }
   2279 
   2280 
   2281 void
   2282 rf_ReconstructInPlaceThread(req)
   2283 	struct rf_recon_req *req;
   2284 {
   2285 	int retcode;
   2286 	int s;
   2287 	RF_Raid_t *raidPtr;
   2288 
   2289 	s = splbio();
   2290 	raidPtr = req->raidPtr;
   2291 	raidPtr->recon_in_progress = 1;
   2292 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2293 	RF_Free(req, sizeof(*req));
   2294 	raidPtr->recon_in_progress = 0;
   2295 	splx(s);
   2296 
   2297 	/* That's all... */
   2298 	kthread_exit(0);        /* does not return */
   2299 }
   2300