Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.12
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.12 1999/03/02 03:18:49 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #define RFK_BOOT_NONE 0
    157 #define RFK_BOOT_GOOD 1
    158 #define RFK_BOOT_BAD  2
    159 static int rf_kbooted = RFK_BOOT_NONE;
    160 
    161 #ifdef DEBUG
    162 #define db0_printf(a) printf a
    163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    169 #else				/* DEBUG */
    170 #define db0_printf(a) printf a
    171 #define db1_printf(a) { }
    172 #define db2_printf(a) { }
    173 #define db3_printf(a) { }
    174 #define db4_printf(a) { }
    175 #define db5_printf(a) { }
    176 #endif				/* DEBUG */
    177 
    178 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    179 
    180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    181 
    182 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    183 						 * spare table */
    184 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    185 						 * installation process */
    186 
    187 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    188 						 * reconstruction
    189 						 * requests */
    190 
    191 
    192 decl_simple_lock_data(, recon_queue_mutex)
    193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    195 
    196 /* prototypes */
    197 static void KernelWakeupFunc(struct buf * bp);
    198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    199 		   dev_t dev, RF_SectorNum_t startSect,
    200 		   RF_SectorCount_t numSect, caddr_t buf,
    201 		   void (*cbFunc) (struct buf *), void *cbArg,
    202 		   int logBytesPerSector, struct proc * b_proc);
    203 
    204 #define Dprintf0(s)       if (rf_queueDebug) \
    205      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    206 #define Dprintf1(s,a)     if (rf_queueDebug) \
    207      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    208 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    209      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    211      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    212 
    213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    215 
    216 void  raid_shutdown(void *);
    217 
    218 void raidattach __P((int));
    219 int raidsize __P((dev_t));
    220 
    221 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    222 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    223 static int raidinit __P((dev_t, RF_Raid_t *, int));
    224 
    225 int raidopen __P((dev_t, int, int, struct proc *));
    226 int raidclose __P((dev_t, int, int, struct proc *));
    227 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    228 int raidwrite __P((dev_t, struct uio *, int));
    229 int raidread __P((dev_t, struct uio *, int));
    230 void raidstrategy __P((struct buf *));
    231 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    232 
    233 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    234 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    235 
    236 /*
    237  * Pilfered from ccd.c
    238  */
    239 
    240 struct raidbuf {
    241 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    242 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    243 	int     rf_flags;	/* misc. flags */
    244 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    245 };
    246 
    247 
    248 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    249 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    250 
    251 /* XXX Not sure if the following should be replacing the raidPtrs above,
    252    or if it should be used in conjunction with that... */
    253 
    254 struct raid_softc {
    255 	int     sc_flags;	/* flags */
    256 	int     sc_cflags;	/* configuration flags */
    257 	size_t  sc_size;        /* size of the raid device */
    258 	dev_t   sc_dev;	        /* our device.. */
    259  	void *  sc_sdhook;      /* our shutdown hook */
    260 	char    sc_xname[20];	/* XXX external name */
    261 	struct disk sc_dkdev;	/* generic disk device info */
    262 	struct pool sc_cbufpool;	/* component buffer pool */
    263 };
    264 /* sc_flags */
    265 #define RAIDF_INITED	0x01	/* unit has been initialized */
    266 #define RAIDF_WLABEL	0x02	/* label area is writable */
    267 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    268 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    269 #define RAIDF_LOCKED	0x80	/* unit is locked */
    270 
    271 #define	raidunit(x)	DISKUNIT(x)
    272 static int numraid = 0;
    273 
    274 #define RAIDLABELDEV(dev)	\
    275 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    276 
    277 /* declared here, and made public, for the benefit of KVM stuff.. */
    278 struct raid_softc *raid_softc;
    279 
    280 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    281 				     struct disklabel *));
    282 static void raidgetdisklabel __P((dev_t));
    283 static void raidmakedisklabel __P((struct raid_softc *));
    284 
    285 static int raidlock __P((struct raid_softc *));
    286 static void raidunlock __P((struct raid_softc *));
    287 int raidlookup __P((char *, struct proc * p, struct vnode **));
    288 
    289 static void rf_markalldirty __P((RF_Raid_t *));
    290 
    291 void
    292 raidattach(num)
    293 	int     num;
    294 {
    295 	int     raidID;
    296 
    297 #ifdef DEBUG
    298 	printf("raidattach: Asked for %d units\n", num);
    299 #endif
    300 
    301 	if (num <= 0) {
    302 #ifdef DIAGNOSTIC
    303 		panic("raidattach: count <= 0");
    304 #endif
    305 		return;
    306 	}
    307 	/* This is where all the initialization stuff gets done. */
    308 
    309 	/* Make some space for requested number of units... */
    310 
    311 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    312 	if (raidPtrs == NULL) {
    313 		panic("raidPtrs is NULL!!\n");
    314 	}
    315 	rf_kbooted = rf_boot();
    316 	if (rf_kbooted) {
    317 		panic("Serious error booting RAID!!\n");
    318 	}
    319 	rf_kbooted = RFK_BOOT_GOOD;
    320 
    321 	/* put together some datastructures like the CCD device does.. This
    322 	 * lets us lock the device and what-not when it gets opened. */
    323 
    324 	raid_softc = (struct raid_softc *)
    325 	    malloc(num * sizeof(struct raid_softc),
    326 	    M_RAIDFRAME, M_NOWAIT);
    327 	if (raid_softc == NULL) {
    328 		printf("WARNING: no memory for RAIDframe driver\n");
    329 		return;
    330 	}
    331 	numraid = num;
    332 	bzero(raid_softc, num * sizeof(struct raid_softc));
    333 
    334 	for (raidID = 0; raidID < num; raidID++) {
    335 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    336 			  (RF_Raid_t *));
    337 		if (raidPtrs[raidID] == NULL) {
    338 			printf("raidPtrs[%d] is NULL\n", raidID);
    339 		}
    340 	}
    341 }
    342 
    343 
    344 int
    345 raidsize(dev)
    346 	dev_t   dev;
    347 {
    348 	struct raid_softc *rs;
    349 	struct disklabel *lp;
    350 	int     part, unit, omask, size;
    351 
    352 	unit = raidunit(dev);
    353 	if (unit >= numraid)
    354 		return (-1);
    355 	rs = &raid_softc[unit];
    356 
    357 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    358 		return (-1);
    359 
    360 	part = DISKPART(dev);
    361 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    362 	lp = rs->sc_dkdev.dk_label;
    363 
    364 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    365 		return (-1);
    366 
    367 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    368 		size = -1;
    369 	else
    370 		size = lp->d_partitions[part].p_size *
    371 		    (lp->d_secsize / DEV_BSIZE);
    372 
    373 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    374 		return (-1);
    375 
    376 	return (size);
    377 
    378 }
    379 
    380 int
    381 raiddump(dev, blkno, va, size)
    382 	dev_t   dev;
    383 	daddr_t blkno;
    384 	caddr_t va;
    385 	size_t  size;
    386 {
    387 	/* Not implemented. */
    388 	return ENXIO;
    389 }
    390 /* ARGSUSED */
    391 int
    392 raidopen(dev, flags, fmt, p)
    393 	dev_t   dev;
    394 	int     flags, fmt;
    395 	struct proc *p;
    396 {
    397 	int     unit = raidunit(dev);
    398 	struct raid_softc *rs;
    399 	struct disklabel *lp;
    400 	int     part, pmask;
    401 	unsigned int raidID;
    402 	int     rc;
    403 	int     error = 0;
    404 
    405 	/* This whole next chunk of code is somewhat suspect... Not sure it's
    406 	 * needed here at all... XXX */
    407 
    408 	if (rf_kbooted == RFK_BOOT_NONE) {
    409 		printf("Doing restart on raidopen.\n");
    410 		rf_kbooted = RFK_BOOT_GOOD;
    411 		rc = rf_boot();
    412 		if (rc) {
    413 			rf_kbooted = RFK_BOOT_BAD;
    414 			printf("Someone is unhappy...\n");
    415 			return (rc);
    416 		}
    417 	}
    418 	if (unit >= numraid)
    419 		return (ENXIO);
    420 	rs = &raid_softc[unit];
    421 
    422 	if ((error = raidlock(rs)) != 0)
    423 		return (error);
    424 	lp = rs->sc_dkdev.dk_label;
    425 
    426 	raidID = raidunit(dev);
    427 
    428 	part = DISKPART(dev);
    429 	pmask = (1 << part);
    430 
    431 	db1_printf(("Opening raid device number: %d partition: %d\n",
    432 		raidID, part));
    433 
    434 
    435 	if ((rs->sc_flags & RAIDF_INITED) &&
    436 	    (rs->sc_dkdev.dk_openmask == 0))
    437 		raidgetdisklabel(dev);
    438 
    439 	/* make sure that this partition exists */
    440 
    441 	if (part != RAW_PART) {
    442 		db1_printf(("Not a raw partition..\n"));
    443 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    444 		    ((part >= lp->d_npartitions) ||
    445 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    446 			error = ENXIO;
    447 			raidunlock(rs);
    448 			db1_printf(("Bailing out...\n"));
    449 			return (error);
    450 		}
    451 	}
    452 	/* Prevent this unit from being unconfigured while open. */
    453 	switch (fmt) {
    454 	case S_IFCHR:
    455 		rs->sc_dkdev.dk_copenmask |= pmask;
    456 		break;
    457 
    458 	case S_IFBLK:
    459 		rs->sc_dkdev.dk_bopenmask |= pmask;
    460 		break;
    461 	}
    462 	rs->sc_dkdev.dk_openmask =
    463 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    464 
    465 	raidunlock(rs);
    466 
    467 	return (error);
    468 
    469 
    470 }
    471 /* ARGSUSED */
    472 int
    473 raidclose(dev, flags, fmt, p)
    474 	dev_t   dev;
    475 	int     flags, fmt;
    476 	struct proc *p;
    477 {
    478 	int     unit = raidunit(dev);
    479 	struct raid_softc *rs;
    480 	int     error = 0;
    481 	int     part;
    482 
    483 	if (unit >= numraid)
    484 		return (ENXIO);
    485 	rs = &raid_softc[unit];
    486 
    487 	if ((error = raidlock(rs)) != 0)
    488 		return (error);
    489 
    490 	part = DISKPART(dev);
    491 
    492 	/* ...that much closer to allowing unconfiguration... */
    493 	switch (fmt) {
    494 	case S_IFCHR:
    495 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    496 		break;
    497 
    498 	case S_IFBLK:
    499 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    500 		break;
    501 	}
    502 	rs->sc_dkdev.dk_openmask =
    503 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    504 
    505 	raidunlock(rs);
    506 	return (0);
    507 
    508 }
    509 
    510 void
    511 raidstrategy(bp)
    512 	register struct buf *bp;
    513 {
    514 	register int s;
    515 
    516 	unsigned int raidID = raidunit(bp->b_dev);
    517 	RF_Raid_t *raidPtr;
    518 	struct raid_softc *rs = &raid_softc[raidID];
    519 	struct disklabel *lp;
    520 	int     wlabel;
    521 
    522 #if 0
    523 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    524 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    525 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    526 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    527 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    528 
    529 	if (bp->b_flags & B_READ)
    530 		db1_printf(("READ\n"));
    531 	else
    532 		db1_printf(("WRITE\n"));
    533 #endif
    534 	if (rf_kbooted != RFK_BOOT_GOOD)
    535 		return;
    536 	if (raidID >= numraid || !raidPtrs[raidID]) {
    537 		bp->b_error = ENODEV;
    538 		bp->b_flags |= B_ERROR;
    539 		bp->b_resid = bp->b_bcount;
    540 		biodone(bp);
    541 		return;
    542 	}
    543 	raidPtr = raidPtrs[raidID];
    544 	if (!raidPtr->valid) {
    545 		bp->b_error = ENODEV;
    546 		bp->b_flags |= B_ERROR;
    547 		bp->b_resid = bp->b_bcount;
    548 		biodone(bp);
    549 		return;
    550 	}
    551 	if (bp->b_bcount == 0) {
    552 		db1_printf(("b_bcount is zero..\n"));
    553 		biodone(bp);
    554 		return;
    555 	}
    556 	lp = rs->sc_dkdev.dk_label;
    557 
    558 	/*
    559 	 * Do bounds checking and adjust transfer.  If there's an
    560 	 * error, the bounds check will flag that for us.
    561 	 */
    562 
    563 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    564 	if (DISKPART(bp->b_dev) != RAW_PART)
    565 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    566 			db1_printf(("Bounds check failed!!:%d %d\n",
    567 				(int) bp->b_blkno, (int) wlabel));
    568 			biodone(bp);
    569 			return;
    570 		}
    571 	s = splbio();		/* XXX Needed? */
    572 	db1_printf(("Beginning strategy...\n"));
    573 
    574 	bp->b_resid = 0;
    575 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    576 	    NULL, NULL, NULL);
    577 	if (bp->b_error) {
    578 		bp->b_flags |= B_ERROR;
    579 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    580 			bp->b_error));
    581 	}
    582 	splx(s);
    583 #if 0
    584 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    585 		bp, bp->b_data,
    586 		(int) bp->b_bcount, (int) bp->b_resid));
    587 #endif
    588 }
    589 /* ARGSUSED */
    590 int
    591 raidread(dev, uio, flags)
    592 	dev_t   dev;
    593 	struct uio *uio;
    594 	int     flags;
    595 {
    596 	int     unit = raidunit(dev);
    597 	struct raid_softc *rs;
    598 	int     part;
    599 
    600 	if (unit >= numraid)
    601 		return (ENXIO);
    602 	rs = &raid_softc[unit];
    603 
    604 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    605 		return (ENXIO);
    606 	part = DISKPART(dev);
    607 
    608 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    609 
    610 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    611 
    612 }
    613 /* ARGSUSED */
    614 int
    615 raidwrite(dev, uio, flags)
    616 	dev_t   dev;
    617 	struct uio *uio;
    618 	int     flags;
    619 {
    620 	int     unit = raidunit(dev);
    621 	struct raid_softc *rs;
    622 
    623 	if (unit >= numraid)
    624 		return (ENXIO);
    625 	rs = &raid_softc[unit];
    626 
    627 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    628 		return (ENXIO);
    629 	db1_printf(("raidwrite\n"));
    630 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    631 
    632 }
    633 
    634 int
    635 raidioctl(dev, cmd, data, flag, p)
    636 	dev_t   dev;
    637 	u_long  cmd;
    638 	caddr_t data;
    639 	int     flag;
    640 	struct proc *p;
    641 {
    642 	int     unit = raidunit(dev);
    643 	int     error = 0;
    644 	int     part, pmask;
    645 	struct raid_softc *rs;
    646 #if 0
    647 	int     r, c;
    648 #endif
    649 	/* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
    650 
    651 	/* struct ccdbuf *cbp; */
    652 	/* struct raidbuf *raidbp; */
    653 	RF_Config_t *k_cfg, *u_cfg;
    654 	u_char *specific_buf;
    655 	int retcode = 0;
    656 	int row;
    657 	int column;
    658 	struct rf_recon_req *rrcopy, *rr;
    659 	RF_ComponentLabel_t *component_label;
    660 	RF_ComponentLabel_t ci_label;
    661 	RF_ComponentLabel_t **c_label_ptr;
    662 	RF_SingleComponent_t *sparePtr,*componentPtr;
    663 	RF_SingleComponent_t hot_spare;
    664 	RF_SingleComponent_t component;
    665 
    666 	if (unit >= numraid)
    667 		return (ENXIO);
    668 	rs = &raid_softc[unit];
    669 
    670 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    671 		(int) DISKPART(dev), (int) unit, (int) cmd));
    672 
    673 	/* Must be open for writes for these commands... */
    674 	switch (cmd) {
    675 	case DIOCSDINFO:
    676 	case DIOCWDINFO:
    677 	case DIOCWLABEL:
    678 		if ((flag & FWRITE) == 0)
    679 			return (EBADF);
    680 	}
    681 
    682 	/* Must be initialized for these... */
    683 	switch (cmd) {
    684 	case DIOCGDINFO:
    685 	case DIOCSDINFO:
    686 	case DIOCWDINFO:
    687 	case DIOCGPART:
    688 	case DIOCWLABEL:
    689 	case DIOCGDEFLABEL:
    690 	case RAIDFRAME_SHUTDOWN:
    691 	case RAIDFRAME_REWRITEPARITY:
    692 	case RAIDFRAME_GET_INFO:
    693 	case RAIDFRAME_RESET_ACCTOTALS:
    694 	case RAIDFRAME_GET_ACCTOTALS:
    695 	case RAIDFRAME_KEEP_ACCTOTALS:
    696 	case RAIDFRAME_GET_SIZE:
    697 	case RAIDFRAME_FAIL_DISK:
    698 	case RAIDFRAME_COPYBACK:
    699 	case RAIDFRAME_CHECKRECON:
    700 	case RAIDFRAME_GET_COMPONENT_LABEL:
    701 	case RAIDFRAME_SET_COMPONENT_LABEL:
    702 	case RAIDFRAME_ADD_HOT_SPARE:
    703 	case RAIDFRAME_REMOVE_HOT_SPARE:
    704 	case RAIDFRAME_INIT_LABELS:
    705 	case RAIDFRAME_REBUILD_IN_PLACE:
    706 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    707 			return (ENXIO);
    708 	}
    709 
    710 	switch (cmd) {
    711 
    712 
    713 		/* configure the system */
    714 	case RAIDFRAME_CONFIGURE:
    715 
    716 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    717 		/* copy-in the configuration information */
    718 		/* data points to a pointer to the configuration structure */
    719 		u_cfg = *((RF_Config_t **) data);
    720 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    721 		if (k_cfg == NULL) {
    722 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    723 			return (ENOMEM);
    724 		}
    725 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    726 		    sizeof(RF_Config_t));
    727 		if (retcode) {
    728 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    729 				retcode));
    730 			return (retcode);
    731 		}
    732 		/* allocate a buffer for the layout-specific data, and copy it
    733 		 * in */
    734 		if (k_cfg->layoutSpecificSize) {
    735 			if (k_cfg->layoutSpecificSize > 10000) {
    736 				/* sanity check */
    737 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    738 				return (EINVAL);
    739 			}
    740 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    741 			    (u_char *));
    742 			if (specific_buf == NULL) {
    743 				RF_Free(k_cfg, sizeof(RF_Config_t));
    744 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    745 				return (ENOMEM);
    746 			}
    747 			retcode = copyin(k_cfg->layoutSpecific,
    748 			    (caddr_t) specific_buf,
    749 			    k_cfg->layoutSpecificSize);
    750 			if (retcode) {
    751 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    752 					retcode));
    753 				return (retcode);
    754 			}
    755 		} else
    756 			specific_buf = NULL;
    757 		k_cfg->layoutSpecific = specific_buf;
    758 
    759 		/* should do some kind of sanity check on the configuration.
    760 		 * Store the sum of all the bytes in the last byte? */
    761 
    762 #if 0
    763 		db1_printf(("Considering configuring the system.:%d 0x%x\n",
    764 			unit, p));
    765 #endif
    766 
    767 		/* We need the pointer to this a little deeper, so stash it
    768 		 * here... */
    769 
    770 		raidPtrs[unit]->proc = p;
    771 
    772 		/* configure the system */
    773 
    774 		raidPtrs[unit]->raidid = unit;
    775 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    776 
    777 
    778 		if (retcode == 0) {
    779 			retcode = raidinit(dev, raidPtrs[unit], unit);
    780 			rf_markalldirty( raidPtrs[unit] );
    781 			/* register our shutdown hook */
    782 			if ((rs->sc_sdhook =
    783 			     shutdownhook_establish(raid_shutdown,
    784 						raidPtrs[unit])) == NULL) {
    785 				printf("raid%d: WARNING: unable to establish shutdown hook\n",raidPtrs[unit]->raidid);
    786 			}
    787 
    788 
    789 		}
    790 		/* free the buffers.  No return code here. */
    791 		if (k_cfg->layoutSpecificSize) {
    792 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    793 		}
    794 		RF_Free(k_cfg, sizeof(RF_Config_t));
    795 
    796 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    797 			retcode));
    798 
    799 		return (retcode);
    800 
    801 		/* shutdown the system */
    802 	case RAIDFRAME_SHUTDOWN:
    803 
    804 		if ((error = raidlock(rs)) != 0)
    805 			return (error);
    806 
    807 		/*
    808 		 * If somebody has a partition mounted, we shouldn't
    809 		 * shutdown.
    810 		 */
    811 
    812 		part = DISKPART(dev);
    813 		pmask = (1 << part);
    814 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    815 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    816 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    817 			raidunlock(rs);
    818 			return (EBUSY);
    819 		}
    820 
    821 		if (rf_debugKernelAccess) {
    822 			printf("call shutdown\n");
    823 		}
    824 		raidPtrs[unit]->proc = p;	/* XXX  necessary evil */
    825 
    826 		retcode = rf_Shutdown(raidPtrs[unit]);
    827 
    828 		db1_printf(("Done main shutdown\n"));
    829 
    830 		pool_destroy(&rs->sc_cbufpool);
    831 		db1_printf(("Done freeing component buffer freelist\n"));
    832 
    833 		/* It's no longer initialized... */
    834 		rs->sc_flags &= ~RAIDF_INITED;
    835 
    836 		shutdownhook_disestablish( rs->sc_sdhook );
    837 		rs->sc_sdhook = NULL;
    838 
    839 		/* Detach the disk. */
    840 		disk_detach(&rs->sc_dkdev);
    841 
    842 		raidunlock(rs);
    843 
    844 		return (retcode);
    845 	case RAIDFRAME_GET_COMPONENT_LABEL:
    846 		c_label_ptr = (RF_ComponentLabel_t **) data;
    847 		/* need to read the component label for the disk indicated
    848 		   by row,column in component_label
    849 		   XXX need to sanity check these values!!!
    850 		   */
    851 
    852 		/* For practice, let's get it directly fromdisk, rather
    853 		   than from the in-core copy */
    854 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    855 			   (RF_ComponentLabel_t *));
    856 		if (component_label == NULL)
    857 			return (ENOMEM);
    858 
    859 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    860 
    861 		retcode = copyin( *c_label_ptr, component_label,
    862 				  sizeof(RF_ComponentLabel_t));
    863 
    864 		if (retcode) {
    865 			return(retcode);
    866 		}
    867 
    868 		row = component_label->row;
    869 		printf("Row: %d\n",row);
    870 		if (row > raidPtrs[unit]->numRow) {
    871 			row = 0; /* XXX */
    872 		}
    873 		column = component_label->column;
    874 		printf("Column: %d\n",column);
    875 		if (column > raidPtrs[unit]->numCol) {
    876 			column = 0; /* XXX */
    877 		}
    878 
    879 		raidread_component_label(
    880                               raidPtrs[unit]->Disks[row][column].dev,
    881 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    882 			      component_label );
    883 
    884 		retcode = copyout((caddr_t) component_label,
    885 				  (caddr_t) *c_label_ptr,
    886 				  sizeof(RF_ComponentLabel_t));
    887 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    888 		return (retcode);
    889 
    890 	case RAIDFRAME_SET_COMPONENT_LABEL:
    891 		component_label = (RF_ComponentLabel_t *) data;
    892 
    893 		/* XXX check the label for valid stuff... */
    894 		/* Note that some things *should not* get modified --
    895 		   the user should be re-initing the labels instead of
    896 		   trying to patch things.
    897 		   */
    898 
    899 		printf("Got component label:\n");
    900 		printf("Version: %d\n",component_label->version);
    901 		printf("Serial Number: %d\n",component_label->serial_number);
    902 		printf("Mod counter: %d\n",component_label->mod_counter);
    903 		printf("Row: %d\n", component_label->row);
    904 		printf("Column: %d\n", component_label->column);
    905 		printf("Num Rows: %d\n", component_label->num_rows);
    906 		printf("Num Columns: %d\n", component_label->num_columns);
    907 		printf("Clean: %d\n", component_label->clean);
    908 		printf("Status: %d\n", component_label->status);
    909 
    910 		row = component_label->row;
    911 		column = component_label->column;
    912 
    913 		if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
    914 		    (column < 0) || (column > raidPtrs[unit]->numCol)) {
    915 			return(EINVAL);
    916 		}
    917 
    918 		/* XXX this isn't allowed to do anything for now :-) */
    919 #if 0
    920 		raidwrite_component_label(
    921                             raidPtrs[unit]->Disks[row][column].dev,
    922 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    923 			    component_label );
    924 #endif
    925 		return (0);
    926 
    927 	case RAIDFRAME_INIT_LABELS:
    928 		component_label = (RF_ComponentLabel_t *) data;
    929 		/*
    930 		   we only want the serial number from
    931 		   the above.  We get all the rest of the information
    932 		   from the config that was used to create this RAID
    933 		   set.
    934 		   */
    935 
    936 		raidPtrs[unit]->serial_number = component_label->serial_number;
    937 		/* current version number */
    938 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    939 		ci_label.serial_number = component_label->serial_number;
    940 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    941 		ci_label.num_rows = raidPtrs[unit]->numRow;
    942 		ci_label.num_columns = raidPtrs[unit]->numCol;
    943 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    944 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    945 
    946 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    947 			ci_label.row = row;
    948 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    949 				ci_label.column = column;
    950 				raidwrite_component_label(
    951 				  raidPtrs[unit]->Disks[row][column].dev,
    952 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    953 				  &ci_label );
    954 			}
    955 		}
    956 
    957 		return (retcode);
    958 
    959 		/* initialize all parity */
    960 	case RAIDFRAME_REWRITEPARITY:
    961 
    962 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
    963 			return (EINVAL);
    964 		/* borrow the thread of the requesting process */
    965 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
    966 		retcode = rf_RewriteParity(raidPtrs[unit]);
    967 		/* return I/O Error if the parity rewrite fails */
    968 
    969 		if (retcode) {
    970 			retcode = EIO;
    971 		} else {
    972 			/* set the clean bit!  If we shutdown correctly,
    973 			 the clean bit on each component label will get
    974 			 set */
    975 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    976 		}
    977 		return (retcode);
    978 
    979 
    980 	case RAIDFRAME_ADD_HOT_SPARE:
    981 		sparePtr = (RF_SingleComponent_t *) data;
    982 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
    983 		printf("Adding spare\n");
    984 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
    985 		return(retcode);
    986 
    987 	case RAIDFRAME_REMOVE_HOT_SPARE:
    988 		return(retcode);
    989 
    990 	case RAIDFRAME_REBUILD_IN_PLACE:
    991 		componentPtr = (RF_SingleComponent_t *) data;
    992 		memcpy( &component, componentPtr,
    993 			sizeof(RF_SingleComponent_t));
    994 		row = component.row;
    995 		column = component.column;
    996 		printf("Rebuild: %d %d\n",row, column);
    997 		if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
    998 		    (column < 0) || (column > raidPtrs[unit]->numCol)) {
    999 			return(EINVAL);
   1000 		}
   1001 		printf("Attempting a rebuild in place\n");
   1002 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
   1003 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1004 		return(retcode);
   1005 
   1006 		/* issue a test-unit-ready through raidframe to the indicated
   1007 		 * device */
   1008 #if 0				/* XXX not supported yet (ever?) */
   1009 	case RAIDFRAME_TUR:
   1010 		/* debug only */
   1011 		retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
   1012 		return (retcode);
   1013 #endif
   1014 	case RAIDFRAME_GET_INFO:
   1015 		{
   1016 			RF_Raid_t *raid = raidPtrs[unit];
   1017 			RF_DeviceConfig_t *cfg, **ucfgp;
   1018 			int     i, j, d;
   1019 
   1020 			if (!raid->valid)
   1021 				return (ENODEV);
   1022 			ucfgp = (RF_DeviceConfig_t **) data;
   1023 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1024 				  (RF_DeviceConfig_t *));
   1025 			if (cfg == NULL)
   1026 				return (ENOMEM);
   1027 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1028 			cfg->rows = raid->numRow;
   1029 			cfg->cols = raid->numCol;
   1030 			cfg->ndevs = raid->numRow * raid->numCol;
   1031 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1032 				cfg->ndevs = 0;
   1033 				return (ENOMEM);
   1034 			}
   1035 			cfg->nspares = raid->numSpare;
   1036 			if (cfg->nspares >= RF_MAX_DISKS) {
   1037 				cfg->nspares = 0;
   1038 				return (ENOMEM);
   1039 			}
   1040 			cfg->maxqdepth = raid->maxQueueDepth;
   1041 			d = 0;
   1042 			for (i = 0; i < cfg->rows; i++) {
   1043 				for (j = 0; j < cfg->cols; j++) {
   1044 					cfg->devs[d] = raid->Disks[i][j];
   1045 					d++;
   1046 				}
   1047 			}
   1048 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1049 				cfg->spares[i] = raid->Disks[0][j];
   1050 			}
   1051 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1052 					  sizeof(RF_DeviceConfig_t));
   1053 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1054 
   1055 			return (retcode);
   1056 		}
   1057 		break;
   1058 
   1059 	case RAIDFRAME_RESET_ACCTOTALS:
   1060 		{
   1061 			RF_Raid_t *raid = raidPtrs[unit];
   1062 
   1063 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1064 			return (0);
   1065 		}
   1066 		break;
   1067 
   1068 	case RAIDFRAME_GET_ACCTOTALS:
   1069 		{
   1070 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1071 			RF_Raid_t *raid = raidPtrs[unit];
   1072 
   1073 			*totals = raid->acc_totals;
   1074 			return (0);
   1075 		}
   1076 		break;
   1077 
   1078 	case RAIDFRAME_KEEP_ACCTOTALS:
   1079 		{
   1080 			RF_Raid_t *raid = raidPtrs[unit];
   1081 			int    *keep = (int *) data;
   1082 
   1083 			raid->keep_acc_totals = *keep;
   1084 			return (0);
   1085 		}
   1086 		break;
   1087 
   1088 	case RAIDFRAME_GET_SIZE:
   1089 		*(int *) data = raidPtrs[unit]->totalSectors;
   1090 		return (0);
   1091 
   1092 #define RAIDFRAME_RECON 1
   1093 		/* XXX The above should probably be set somewhere else!! GO */
   1094 #if RAIDFRAME_RECON > 0
   1095 
   1096 		/* fail a disk & optionally start reconstruction */
   1097 	case RAIDFRAME_FAIL_DISK:
   1098 		rr = (struct rf_recon_req *) data;
   1099 
   1100 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1101 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1102 			return (EINVAL);
   1103 
   1104 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1105 		       unit, rr->row, rr->col);
   1106 
   1107 		/* make a copy of the recon request so that we don't rely on
   1108 		 * the user's buffer */
   1109 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1110 		bcopy(rr, rrcopy, sizeof(*rr));
   1111 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1112 
   1113 		LOCK_RECON_Q_MUTEX();
   1114 		rrcopy->next = recon_queue;
   1115 		recon_queue = rrcopy;
   1116 		wakeup(&recon_queue);
   1117 		UNLOCK_RECON_Q_MUTEX();
   1118 
   1119 		return (0);
   1120 
   1121 		/* invoke a copyback operation after recon on whatever disk
   1122 		 * needs it, if any */
   1123 	case RAIDFRAME_COPYBACK:
   1124 		/* borrow the current thread to get this done */
   1125 		raidPtrs[unit]->proc = p;	/* ICK.. but needed :-p  GO */
   1126 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1127 		return (0);
   1128 
   1129 		/* return the percentage completion of reconstruction */
   1130 	case RAIDFRAME_CHECKRECON:
   1131 		row = *(int *) data;
   1132 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1133 			return (EINVAL);
   1134 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1135 			*(int *) data = 100;
   1136 		else
   1137 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1138 		return (0);
   1139 
   1140 		/* the sparetable daemon calls this to wait for the kernel to
   1141 		 * need a spare table. this ioctl does not return until a
   1142 		 * spare table is needed. XXX -- calling mpsleep here in the
   1143 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1144 		 * -- I should either compute the spare table in the kernel,
   1145 		 * or have a different -- XXX XXX -- interface (a different
   1146 		 * character device) for delivering the table          -- XXX */
   1147 #if 0
   1148 	case RAIDFRAME_SPARET_WAIT:
   1149 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1150 		while (!rf_sparet_wait_queue)
   1151 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1152 		waitreq = rf_sparet_wait_queue;
   1153 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1154 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1155 
   1156 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1157 
   1158 		RF_Free(waitreq, sizeof(*waitreq));
   1159 		return (0);
   1160 
   1161 
   1162 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1163 		 * code in it that will cause the dameon to exit */
   1164 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1165 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1166 		waitreq->fcol = -1;
   1167 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1168 		waitreq->next = rf_sparet_wait_queue;
   1169 		rf_sparet_wait_queue = waitreq;
   1170 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1171 		wakeup(&rf_sparet_wait_queue);
   1172 		return (0);
   1173 
   1174 		/* used by the spare table daemon to deliver a spare table
   1175 		 * into the kernel */
   1176 	case RAIDFRAME_SEND_SPARET:
   1177 
   1178 		/* install the spare table */
   1179 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1180 
   1181 		/* respond to the requestor.  the return status of the spare
   1182 		 * table installation is passed in the "fcol" field */
   1183 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1184 		waitreq->fcol = retcode;
   1185 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1186 		waitreq->next = rf_sparet_resp_queue;
   1187 		rf_sparet_resp_queue = waitreq;
   1188 		wakeup(&rf_sparet_resp_queue);
   1189 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1190 
   1191 		return (retcode);
   1192 #endif
   1193 
   1194 
   1195 #endif				/* RAIDFRAME_RECON > 0 */
   1196 
   1197 	default:
   1198 		break;		/* fall through to the os-specific code below */
   1199 
   1200 	}
   1201 
   1202 	if (!raidPtrs[unit]->valid)
   1203 		return (EINVAL);
   1204 
   1205 	/*
   1206 	 * Add support for "regular" device ioctls here.
   1207 	 */
   1208 
   1209 	switch (cmd) {
   1210 	case DIOCGDINFO:
   1211 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1212 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1213 		break;
   1214 
   1215 	case DIOCGPART:
   1216 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1217 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1218 		((struct partinfo *) data)->part =
   1219 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1220 		break;
   1221 
   1222 	case DIOCWDINFO:
   1223 		db1_printf(("DIOCWDINFO\n"));
   1224 	case DIOCSDINFO:
   1225 		db1_printf(("DIOCSDINFO\n"));
   1226 		if ((error = raidlock(rs)) != 0)
   1227 			return (error);
   1228 
   1229 		rs->sc_flags |= RAIDF_LABELLING;
   1230 
   1231 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1232 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1233 		if (error == 0) {
   1234 			if (cmd == DIOCWDINFO)
   1235 				error = writedisklabel(RAIDLABELDEV(dev),
   1236 				    raidstrategy, rs->sc_dkdev.dk_label,
   1237 				    rs->sc_dkdev.dk_cpulabel);
   1238 		}
   1239 		rs->sc_flags &= ~RAIDF_LABELLING;
   1240 
   1241 		raidunlock(rs);
   1242 
   1243 		if (error)
   1244 			return (error);
   1245 		break;
   1246 
   1247 	case DIOCWLABEL:
   1248 		db1_printf(("DIOCWLABEL\n"));
   1249 		if (*(int *) data != 0)
   1250 			rs->sc_flags |= RAIDF_WLABEL;
   1251 		else
   1252 			rs->sc_flags &= ~RAIDF_WLABEL;
   1253 		break;
   1254 
   1255 	case DIOCGDEFLABEL:
   1256 		db1_printf(("DIOCGDEFLABEL\n"));
   1257 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1258 		    (struct disklabel *) data);
   1259 		break;
   1260 
   1261 	default:
   1262 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1263 	}
   1264 	return (retcode);
   1265 
   1266 }
   1267 
   1268 
   1269 /* raidinit -- complete the rest of the initialization for the
   1270    RAIDframe device.  */
   1271 
   1272 
   1273 static int
   1274 raidinit(dev, raidPtr, unit)
   1275 	dev_t   dev;
   1276 	RF_Raid_t *raidPtr;
   1277 	int     unit;
   1278 {
   1279 	int     retcode;
   1280 	/* int ix; */
   1281 	/* struct raidbuf *raidbp; */
   1282 	struct raid_softc *rs;
   1283 
   1284 	retcode = 0;
   1285 
   1286 	rs = &raid_softc[unit];
   1287 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1288 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1289 
   1290 
   1291 	/* XXX should check return code first... */
   1292 	rs->sc_flags |= RAIDF_INITED;
   1293 
   1294 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1295 
   1296 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1297 
   1298 	/* disk_attach actually creates space for the CPU disklabel, among
   1299 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1300 	 * with disklabels. */
   1301 
   1302 	disk_attach(&rs->sc_dkdev);
   1303 
   1304 	/* XXX There may be a weird interaction here between this, and
   1305 	 * protectedSectors, as used in RAIDframe.  */
   1306 
   1307 	rs->sc_size = raidPtr->totalSectors;
   1308 	rs->sc_dev = dev;
   1309 
   1310 	return (retcode);
   1311 }
   1312 
   1313 void
   1314 raid_shutdown(arg)
   1315 	void *arg;
   1316 {
   1317 	RF_Raid_t *raidPtr = arg;
   1318 	struct raid_softc *rs;
   1319 
   1320 	/* This is called by out shutdown hook.
   1321 	   The lights are being turned out, so lets shutdown as
   1322 	   gracefully as possible */
   1323 
   1324 	rs = &raid_softc[raidPtr->raidid];
   1325 
   1326 	printf("raid%d: shutdown hooks called\n",raidPtr->raidid);
   1327 	rf_Shutdown(raidPtr);
   1328 
   1329 	/* It's no longer initialized... */
   1330 	rs->sc_flags &= ~RAIDF_INITED;
   1331 
   1332 
   1333 }
   1334 
   1335 
   1336 /*********************************************************
   1337  *
   1338  * initialization code called at boot time (startup.c)
   1339  *
   1340  ********************************************************/
   1341 int
   1342 rf_boot()
   1343 {
   1344 	int     i, rc;
   1345 
   1346 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
   1347 	if (rc) {
   1348 		RF_PANIC();
   1349 	}
   1350 
   1351 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   1352 	recon_queue = NULL;
   1353 
   1354 	for (i = 0; i < numraid; i++)
   1355 		raidPtrs[i] = NULL;
   1356 	rc = rf_BootRaidframe();
   1357 	if (rc == 0)
   1358 		printf("Kernelized RAIDframe activated\n");
   1359 	else
   1360 		rf_kbooted = RFK_BOOT_BAD;
   1361 	return (rc);
   1362 }
   1363 
   1364 /*
   1365  * This kernel thread never exits.  It is created once, and persists
   1366  * until the system reboots.
   1367  */
   1368 
   1369 void
   1370 rf_ReconKernelThread()
   1371 {
   1372 	struct rf_recon_req *req;
   1373 	int     s;
   1374 
   1375 	/* XXX not sure what spl() level we should be at here... probably
   1376 	 * splbio() */
   1377 	s = splbio();
   1378 
   1379 	while (1) {
   1380 		/* grab the next reconstruction request from the queue */
   1381 		LOCK_RECON_Q_MUTEX();
   1382 		while (!recon_queue) {
   1383 			UNLOCK_RECON_Q_MUTEX();
   1384 			tsleep(&recon_queue, PRIBIO | PCATCH,
   1385 			       "raidframe recon", 0);
   1386 			LOCK_RECON_Q_MUTEX();
   1387 		}
   1388 		req = recon_queue;
   1389 		recon_queue = recon_queue->next;
   1390 		UNLOCK_RECON_Q_MUTEX();
   1391 
   1392 		/*
   1393 	         * If flags specifies that we should start recon, this call
   1394 	         * will not return until reconstruction completes, fails,
   1395 		 * or is aborted.
   1396 	         */
   1397 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1398 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1399 
   1400 		RF_Free(req, sizeof(*req));
   1401 	}
   1402 }
   1403 /* wake up the daemon & tell it to get us a spare table
   1404  * XXX
   1405  * the entries in the queues should be tagged with the raidPtr
   1406  * so that in the extremely rare case that two recons happen at once,
   1407  * we know for which device were requesting a spare table
   1408  * XXX
   1409  */
   1410 int
   1411 rf_GetSpareTableFromDaemon(req)
   1412 	RF_SparetWait_t *req;
   1413 {
   1414 	int     retcode;
   1415 
   1416 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1417 	req->next = rf_sparet_wait_queue;
   1418 	rf_sparet_wait_queue = req;
   1419 	wakeup(&rf_sparet_wait_queue);
   1420 
   1421 	/* mpsleep unlocks the mutex */
   1422 	while (!rf_sparet_resp_queue) {
   1423 		tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
   1424 		    "raidframe getsparetable", 0);
   1425 #if 0
   1426 		mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
   1427 			(void *) simple_lock_addr(rf_sparet_wait_mutex),
   1428 			MS_LOCK_SIMPLE);
   1429 #endif
   1430 	}
   1431 	req = rf_sparet_resp_queue;
   1432 	rf_sparet_resp_queue = req->next;
   1433 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1434 
   1435 	retcode = req->fcol;
   1436 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1437 					 * alloc'd */
   1438 	return (retcode);
   1439 }
   1440 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1441  * bp & passes it down.
   1442  * any calls originating in the kernel must use non-blocking I/O
   1443  * do some extra sanity checking to return "appropriate" error values for
   1444  * certain conditions (to make some standard utilities work)
   1445  */
   1446 int
   1447 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1448 	RF_Raid_t *raidPtr;
   1449 	struct buf *bp;
   1450 	RF_RaidAccessFlags_t flags;
   1451 	void    (*cbFunc) (struct buf *);
   1452 	void   *cbArg;
   1453 {
   1454 	RF_SectorCount_t num_blocks, pb, sum;
   1455 	RF_RaidAddr_t raid_addr;
   1456 	int     retcode;
   1457 	struct partition *pp;
   1458 	daddr_t blocknum;
   1459 	int     unit;
   1460 	struct raid_softc *rs;
   1461 	int     do_async;
   1462 
   1463 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1464 
   1465 	unit = raidPtr->raidid;
   1466 	rs = &raid_softc[unit];
   1467 
   1468 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1469 	 * partition.. Need to make it absolute to the underlying device.. */
   1470 
   1471 	blocknum = bp->b_blkno;
   1472 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1473 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1474 		blocknum += pp->p_offset;
   1475 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1476 			pp->p_offset));
   1477 	} else {
   1478 		db1_printf(("Is raw..\n"));
   1479 	}
   1480 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1481 
   1482 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1483 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1484 
   1485 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1486 	 * TOUCH bp->b_blkno!!! */
   1487 	raid_addr = blocknum;
   1488 
   1489 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1490 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1491 	sum = raid_addr + num_blocks + pb;
   1492 	if (1 || rf_debugKernelAccess) {
   1493 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1494 			(int) raid_addr, (int) sum, (int) num_blocks,
   1495 			(int) pb, (int) bp->b_resid));
   1496 	}
   1497 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1498 	    || (sum < num_blocks) || (sum < pb)) {
   1499 		bp->b_error = ENOSPC;
   1500 		bp->b_flags |= B_ERROR;
   1501 		bp->b_resid = bp->b_bcount;
   1502 		biodone(bp);
   1503 		return (bp->b_error);
   1504 	}
   1505 	/*
   1506 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1507 	 */
   1508 
   1509 	if (bp->b_bcount & raidPtr->sectorMask) {
   1510 		bp->b_error = EINVAL;
   1511 		bp->b_flags |= B_ERROR;
   1512 		bp->b_resid = bp->b_bcount;
   1513 		biodone(bp);
   1514 		return (bp->b_error);
   1515 	}
   1516 	db1_printf(("Calling DoAccess..\n"));
   1517 
   1518 	/*
   1519 	 * XXX For now, all writes are sync
   1520 	 */
   1521 	do_async = 1;
   1522 	if ((bp->b_flags & B_READ) == 0)
   1523 		do_async = 0;
   1524 
   1525 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1526 	 * B_READ instead */
   1527 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1528 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1529 	    do_async, raid_addr, num_blocks,
   1530 	    bp->b_un.b_addr,
   1531 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1532 	    NULL, cbFunc, cbArg);
   1533 #if 0
   1534 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1535 		bp->b_data, (int) bp->b_resid));
   1536 #endif
   1537 
   1538 	/*
   1539 	 * If we requested sync I/O, sleep here.
   1540 	 */
   1541 	if ((retcode == 0) && (do_async == 0))
   1542 		tsleep(bp, PRIBIO, "raidsyncio", 0);
   1543 
   1544 	return (retcode);
   1545 }
   1546 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1547 
   1548 int
   1549 rf_DispatchKernelIO(queue, req)
   1550 	RF_DiskQueue_t *queue;
   1551 	RF_DiskQueueData_t *req;
   1552 {
   1553 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1554 	struct buf *bp;
   1555 	struct raidbuf *raidbp = NULL;
   1556 	struct raid_softc *rs;
   1557 	int     unit;
   1558 
   1559 	/* XXX along with the vnode, we also need the softc associated with
   1560 	 * this device.. */
   1561 
   1562 	req->queue = queue;
   1563 
   1564 	unit = queue->raidPtr->raidid;
   1565 
   1566 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1567 
   1568 	if (unit >= numraid) {
   1569 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1570 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1571 	}
   1572 	rs = &raid_softc[unit];
   1573 
   1574 	/* XXX is this the right place? */
   1575 	disk_busy(&rs->sc_dkdev);
   1576 
   1577 	bp = req->bp;
   1578 
   1579 	/* XXX when there is a physical disk failure, someone is passing us a
   1580 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1581 	 * without taking a performance hit... (not sure where the real bug
   1582 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1583 
   1584 	if (bp->b_flags & B_ERROR) {
   1585 		bp->b_flags &= ~B_ERROR;
   1586 	}
   1587 	if (bp->b_error != 0) {
   1588 		bp->b_error = 0;
   1589 	}
   1590 	raidbp = RAIDGETBUF(rs);
   1591 
   1592 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1593 
   1594 	/*
   1595 	 * context for raidiodone
   1596 	 */
   1597 	raidbp->rf_obp = bp;
   1598 	raidbp->req = req;
   1599 
   1600 	switch (req->type) {
   1601 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1602 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1603 		 * queue->row, queue->col); */
   1604 		/* XXX need to do something extra here.. */
   1605 		/* I'm leaving this in, as I've never actually seen it used,
   1606 		 * and I'd like folks to report it... GO */
   1607 		printf(("WAKEUP CALLED\n"));
   1608 		queue->numOutstanding++;
   1609 
   1610 		/* XXX need to glue the original buffer into this??  */
   1611 
   1612 		KernelWakeupFunc(&raidbp->rf_buf);
   1613 		break;
   1614 
   1615 	case RF_IO_TYPE_READ:
   1616 	case RF_IO_TYPE_WRITE:
   1617 
   1618 		if (req->tracerec) {
   1619 			RF_ETIMER_START(req->tracerec->timer);
   1620 		}
   1621 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1622 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1623 		    req->sectorOffset, req->numSector,
   1624 		    req->buf, KernelWakeupFunc, (void *) req,
   1625 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1626 
   1627 		if (rf_debugKernelAccess) {
   1628 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1629 				(long) bp->b_blkno));
   1630 		}
   1631 		queue->numOutstanding++;
   1632 		queue->last_deq_sector = req->sectorOffset;
   1633 		/* acc wouldn't have been let in if there were any pending
   1634 		 * reqs at any other priority */
   1635 		queue->curPriority = req->priority;
   1636 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1637 		 * req->type, queue->row, queue->col); */
   1638 
   1639 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1640 			req->type, unit, queue->row, queue->col));
   1641 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1642 			(int) req->sectorOffset, (int) req->numSector,
   1643 			(int) (req->numSector <<
   1644 			    queue->raidPtr->logBytesPerSector),
   1645 			(int) queue->raidPtr->logBytesPerSector));
   1646 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1647 			raidbp->rf_buf.b_vp->v_numoutput++;
   1648 		}
   1649 		VOP_STRATEGY(&raidbp->rf_buf);
   1650 
   1651 		break;
   1652 
   1653 	default:
   1654 		panic("bad req->type in rf_DispatchKernelIO");
   1655 	}
   1656 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1657 	return (0);
   1658 }
   1659 /* this is the callback function associated with a I/O invoked from
   1660    kernel code.
   1661  */
   1662 static void
   1663 KernelWakeupFunc(vbp)
   1664 	struct buf *vbp;
   1665 {
   1666 	RF_DiskQueueData_t *req = NULL;
   1667 	RF_DiskQueue_t *queue;
   1668 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1669 	struct buf *bp;
   1670 	struct raid_softc *rs;
   1671 	int     unit;
   1672 	register int s;
   1673 
   1674 	s = splbio();		/* XXX */
   1675 	db1_printf(("recovering the request queue:\n"));
   1676 	req = raidbp->req;
   1677 
   1678 	bp = raidbp->rf_obp;
   1679 #if 0
   1680 	db1_printf(("bp=0x%x\n", bp));
   1681 #endif
   1682 
   1683 	queue = (RF_DiskQueue_t *) req->queue;
   1684 
   1685 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1686 #if 0
   1687 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1688 #endif
   1689 		bp->b_flags |= B_ERROR;
   1690 		bp->b_error = raidbp->rf_buf.b_error ?
   1691 		    raidbp->rf_buf.b_error : EIO;
   1692 	}
   1693 #if 0
   1694 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1695 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1696 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1697 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1698 #endif
   1699 
   1700 	/* XXX methinks this could be wrong... */
   1701 #if 1
   1702 	bp->b_resid = raidbp->rf_buf.b_resid;
   1703 #endif
   1704 
   1705 	if (req->tracerec) {
   1706 		RF_ETIMER_STOP(req->tracerec->timer);
   1707 		RF_ETIMER_EVAL(req->tracerec->timer);
   1708 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1709 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1710 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1711 		req->tracerec->num_phys_ios++;
   1712 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1713 	}
   1714 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1715 
   1716 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1717 
   1718 
   1719 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1720 	 * ballistic, and mark the component as hosed... */
   1721 #if 1
   1722 	if (bp->b_flags & B_ERROR) {
   1723 		/* Mark the disk as dead */
   1724 		/* but only mark it once... */
   1725 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1726 		    rf_ds_optimal) {
   1727 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1728 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1729 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1730 			    rf_ds_failed;
   1731 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1732 			queue->raidPtr->numFailures++;
   1733 			/* XXX here we should bump the version number for each component, and write that data out */
   1734 		} else {	/* Disk is already dead... */
   1735 			/* printf("Disk already marked as dead!\n"); */
   1736 		}
   1737 
   1738 	}
   1739 #endif
   1740 
   1741 	rs = &raid_softc[unit];
   1742 	RAIDPUTBUF(rs, raidbp);
   1743 
   1744 
   1745 	if (bp->b_resid == 0) {
   1746 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1747 			unit, bp->b_resid, bp->b_bcount));
   1748 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1749 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1750 	} else {
   1751 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1752 	}
   1753 
   1754 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1755 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1756 	/* printf("Exiting KernelWakeupFunc\n"); */
   1757 
   1758 	splx(s);		/* XXX */
   1759 }
   1760 
   1761 
   1762 
   1763 /*
   1764  * initialize a buf structure for doing an I/O in the kernel.
   1765  */
   1766 static void
   1767 InitBP(
   1768     struct buf * bp,
   1769     struct vnode * b_vp,
   1770     unsigned rw_flag,
   1771     dev_t dev,
   1772     RF_SectorNum_t startSect,
   1773     RF_SectorCount_t numSect,
   1774     caddr_t buf,
   1775     void (*cbFunc) (struct buf *),
   1776     void *cbArg,
   1777     int logBytesPerSector,
   1778     struct proc * b_proc)
   1779 {
   1780 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1781 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1782 	bp->b_bcount = numSect << logBytesPerSector;
   1783 	bp->b_bufsize = bp->b_bcount;
   1784 	bp->b_error = 0;
   1785 	bp->b_dev = dev;
   1786 	db1_printf(("bp->b_dev is %d\n", dev));
   1787 	bp->b_un.b_addr = buf;
   1788 #if 0
   1789 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1790 #endif
   1791 
   1792 	bp->b_blkno = startSect;
   1793 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1794 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1795 	if (bp->b_bcount == 0) {
   1796 		panic("bp->b_bcount is zero in InitBP!!\n");
   1797 	}
   1798 	bp->b_proc = b_proc;
   1799 	bp->b_iodone = cbFunc;
   1800 	bp->b_vp = b_vp;
   1801 
   1802 }
   1803 /* Extras... */
   1804 
   1805 unsigned int
   1806 rpcc()
   1807 {
   1808 	/* XXX no clue what this is supposed to do.. my guess is that it's
   1809 	 * supposed to read the CPU cycle counter... */
   1810 	/* db1_printf("this is supposed to do something useful too!??\n"); */
   1811 	return (0);
   1812 }
   1813 #if 0
   1814 int
   1815 rf_GetSpareTableFromDaemon(req)
   1816 	RF_SparetWait_t *req;
   1817 {
   1818 	int     retcode = 1;
   1819 	printf("This is supposed to do something useful!!\n");	/* XXX */
   1820 
   1821 	return (retcode);
   1822 
   1823 }
   1824 #endif
   1825 
   1826 static void
   1827 raidgetdefaultlabel(raidPtr, rs, lp)
   1828 	RF_Raid_t *raidPtr;
   1829 	struct raid_softc *rs;
   1830 	struct disklabel *lp;
   1831 {
   1832 	db1_printf(("Building a default label...\n"));
   1833 	bzero(lp, sizeof(*lp));
   1834 
   1835 	/* fabricate a label... */
   1836 	lp->d_secperunit = raidPtr->totalSectors;
   1837 	lp->d_secsize = raidPtr->bytesPerSector;
   1838 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1839 	lp->d_ntracks = 1;
   1840 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1841 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1842 
   1843 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1844 	lp->d_type = DTYPE_RAID;
   1845 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1846 	lp->d_rpm = 3600;
   1847 	lp->d_interleave = 1;
   1848 	lp->d_flags = 0;
   1849 
   1850 	lp->d_partitions[RAW_PART].p_offset = 0;
   1851 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1852 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1853 	lp->d_npartitions = RAW_PART + 1;
   1854 
   1855 	lp->d_magic = DISKMAGIC;
   1856 	lp->d_magic2 = DISKMAGIC;
   1857 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1858 
   1859 }
   1860 /*
   1861  * Read the disklabel from the raid device.  If one is not present, fake one
   1862  * up.
   1863  */
   1864 static void
   1865 raidgetdisklabel(dev)
   1866 	dev_t   dev;
   1867 {
   1868 	int     unit = raidunit(dev);
   1869 	struct raid_softc *rs = &raid_softc[unit];
   1870 	char   *errstring;
   1871 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1872 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1873 	RF_Raid_t *raidPtr;
   1874 
   1875 	db1_printf(("Getting the disklabel...\n"));
   1876 
   1877 	bzero(clp, sizeof(*clp));
   1878 
   1879 	raidPtr = raidPtrs[unit];
   1880 
   1881 	raidgetdefaultlabel(raidPtr, rs, lp);
   1882 
   1883 	/*
   1884 	 * Call the generic disklabel extraction routine.
   1885 	 */
   1886 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1887 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1888 	if (errstring)
   1889 		raidmakedisklabel(rs);
   1890 	else {
   1891 		int     i;
   1892 		struct partition *pp;
   1893 
   1894 		/*
   1895 		 * Sanity check whether the found disklabel is valid.
   1896 		 *
   1897 		 * This is necessary since total size of the raid device
   1898 		 * may vary when an interleave is changed even though exactly
   1899 		 * same componets are used, and old disklabel may used
   1900 		 * if that is found.
   1901 		 */
   1902 		if (lp->d_secperunit != rs->sc_size)
   1903 			printf("WARNING: %s: "
   1904 			    "total sector size in disklabel (%d) != "
   1905 			    "the size of raid (%d)\n", rs->sc_xname,
   1906 			    lp->d_secperunit, rs->sc_size);
   1907 		for (i = 0; i < lp->d_npartitions; i++) {
   1908 			pp = &lp->d_partitions[i];
   1909 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1910 				printf("WARNING: %s: end of partition `%c' "
   1911 				    "exceeds the size of raid (%d)\n",
   1912 				    rs->sc_xname, 'a' + i, rs->sc_size);
   1913 		}
   1914 	}
   1915 
   1916 }
   1917 /*
   1918  * Take care of things one might want to take care of in the event
   1919  * that a disklabel isn't present.
   1920  */
   1921 static void
   1922 raidmakedisklabel(rs)
   1923 	struct raid_softc *rs;
   1924 {
   1925 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1926 	db1_printf(("Making a label..\n"));
   1927 
   1928 	/*
   1929 	 * For historical reasons, if there's no disklabel present
   1930 	 * the raw partition must be marked FS_BSDFFS.
   1931 	 */
   1932 
   1933 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1934 
   1935 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1936 
   1937 	lp->d_checksum = dkcksum(lp);
   1938 }
   1939 /*
   1940  * Lookup the provided name in the filesystem.  If the file exists,
   1941  * is a valid block device, and isn't being used by anyone else,
   1942  * set *vpp to the file's vnode.
   1943  * You'll find the original of this in ccd.c
   1944  */
   1945 int
   1946 raidlookup(path, p, vpp)
   1947 	char   *path;
   1948 	struct proc *p;
   1949 	struct vnode **vpp;	/* result */
   1950 {
   1951 	struct nameidata nd;
   1952 	struct vnode *vp;
   1953 	struct vattr va;
   1954 	int     error;
   1955 
   1956 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1957 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1958 #ifdef DEBUG
   1959 		printf("RAIDframe: vn_open returned %d\n", error);
   1960 #endif
   1961 		return (error);
   1962 	}
   1963 	vp = nd.ni_vp;
   1964 	if (vp->v_usecount > 1) {
   1965 		VOP_UNLOCK(vp, 0);
   1966 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1967 		return (EBUSY);
   1968 	}
   1969 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1970 		VOP_UNLOCK(vp, 0);
   1971 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1972 		return (error);
   1973 	}
   1974 	/* XXX: eventually we should handle VREG, too. */
   1975 	if (va.va_type != VBLK) {
   1976 		VOP_UNLOCK(vp, 0);
   1977 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1978 		return (ENOTBLK);
   1979 	}
   1980 	VOP_UNLOCK(vp, 0);
   1981 	*vpp = vp;
   1982 	return (0);
   1983 }
   1984 /*
   1985  * Wait interruptibly for an exclusive lock.
   1986  *
   1987  * XXX
   1988  * Several drivers do this; it should be abstracted and made MP-safe.
   1989  * (Hmm... where have we seen this warning before :->  GO )
   1990  */
   1991 static int
   1992 raidlock(rs)
   1993 	struct raid_softc *rs;
   1994 {
   1995 	int     error;
   1996 
   1997 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1998 		rs->sc_flags |= RAIDF_WANTED;
   1999 		if ((error =
   2000 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2001 			return (error);
   2002 	}
   2003 	rs->sc_flags |= RAIDF_LOCKED;
   2004 	return (0);
   2005 }
   2006 /*
   2007  * Unlock and wake up any waiters.
   2008  */
   2009 static void
   2010 raidunlock(rs)
   2011 	struct raid_softc *rs;
   2012 {
   2013 
   2014 	rs->sc_flags &= ~RAIDF_LOCKED;
   2015 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2016 		rs->sc_flags &= ~RAIDF_WANTED;
   2017 		wakeup(rs);
   2018 	}
   2019 }
   2020 
   2021 
   2022 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2023 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2024 
   2025 int
   2026 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2027 {
   2028 	RF_ComponentLabel_t component_label;
   2029 	raidread_component_label(dev, b_vp, &component_label);
   2030 	component_label.mod_counter = mod_counter;
   2031 	component_label.clean = RF_RAID_CLEAN;
   2032 	raidwrite_component_label(dev, b_vp, &component_label);
   2033 	return(0);
   2034 }
   2035 
   2036 
   2037 int
   2038 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2039 {
   2040 	RF_ComponentLabel_t component_label;
   2041 	raidread_component_label(dev, b_vp, &component_label);
   2042 	component_label.mod_counter = mod_counter;
   2043 	component_label.clean = RF_RAID_DIRTY;
   2044 	raidwrite_component_label(dev, b_vp, &component_label);
   2045 	return(0);
   2046 }
   2047 
   2048 /* ARGSUSED */
   2049 int
   2050 raidread_component_label(dev, b_vp, component_label)
   2051 	dev_t dev;
   2052 	struct vnode *b_vp;
   2053 	RF_ComponentLabel_t *component_label;
   2054 {
   2055 	struct buf *bp;
   2056 	int error;
   2057 
   2058 	/* XXX should probably ensure that we don't try to do this if
   2059 	   someone has changed rf_protected_sectors. */
   2060 
   2061 	/* get a block of the appropriate size... */
   2062 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2063 	bp->b_dev = dev;
   2064 
   2065 	/* get our ducks in a row for the read */
   2066 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2067 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2068 	bp->b_flags = B_BUSY | B_READ;
   2069  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2070 
   2071 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2072 
   2073 	error = biowait(bp);
   2074 
   2075 	if (!error) {
   2076 		memcpy(component_label, bp->b_un.b_addr,
   2077 		       sizeof(RF_ComponentLabel_t));
   2078 #if 0
   2079 		printf("raidread_component_label: got component label:\n");
   2080 		printf("Version: %d\n",component_label->version);
   2081 		printf("Serial Number: %d\n",component_label->serial_number);
   2082 		printf("Mod counter: %d\n",component_label->mod_counter);
   2083 		printf("Row: %d\n", component_label->row);
   2084 		printf("Column: %d\n", component_label->column);
   2085 		printf("Num Rows: %d\n", component_label->num_rows);
   2086 		printf("Num Columns: %d\n", component_label->num_columns);
   2087 		printf("Clean: %d\n", component_label->clean);
   2088 		printf("Status: %d\n", component_label->status);
   2089 #endif
   2090         } else {
   2091 		printf("Failed to read RAID component label!\n");
   2092 	}
   2093 
   2094         bp->b_flags = B_INVAL | B_AGE;
   2095 	brelse(bp);
   2096 	return(error);
   2097 }
   2098 /* ARGSUSED */
   2099 int
   2100 raidwrite_component_label(dev, b_vp, component_label)
   2101 	dev_t dev;
   2102 	struct vnode *b_vp;
   2103 	RF_ComponentLabel_t *component_label;
   2104 {
   2105 	struct buf *bp;
   2106 	int error;
   2107 
   2108 	/* get a block of the appropriate size... */
   2109 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2110 	bp->b_dev = dev;
   2111 
   2112 	/* get our ducks in a row for the write */
   2113 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2114 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2115 	bp->b_flags = B_BUSY | B_WRITE;
   2116  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2117 
   2118 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2119 
   2120 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2121 
   2122 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2123 	error = biowait(bp);
   2124         bp->b_flags = B_INVAL | B_AGE;
   2125 	brelse(bp);
   2126 	if (error) {
   2127 		printf("Failed to write RAID component info!\n");
   2128 	}
   2129 
   2130 	return(error);
   2131 }
   2132 
   2133 void
   2134 rf_markalldirty( raidPtr )
   2135 	RF_Raid_t *raidPtr;
   2136 {
   2137 	RF_ComponentLabel_t c_label;
   2138 	int r,c;
   2139 
   2140 	raidPtr->mod_counter++;
   2141 	for (r = 0; r < raidPtr->numRow; r++) {
   2142 		for (c = 0; c < raidPtr->numCol; c++) {
   2143 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2144 				raidread_component_label(
   2145 					raidPtr->Disks[r][c].dev,
   2146 					raidPtr->raid_cinfo[r][c].ci_vp,
   2147 					&c_label);
   2148 				if (c_label.status == rf_ds_spared) {
   2149 					/* XXX do something special...
   2150 					 but whatever you do, don't
   2151 					 try to access it!! */
   2152 				} else {
   2153 #if 0
   2154 				c_label.status =
   2155 					raidPtr->Disks[r][c].status;
   2156 				raidwrite_component_label(
   2157 					raidPtr->Disks[r][c].dev,
   2158 					raidPtr->raid_cinfo[r][c].ci_vp,
   2159 					&c_label);
   2160 #endif
   2161 				raidmarkdirty(
   2162 				       raidPtr->Disks[r][c].dev,
   2163 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2164 				       raidPtr->mod_counter);
   2165 				}
   2166 			}
   2167 		}
   2168 	}
   2169 #if 0
   2170 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2171 		sparecol = raidPtr->numCol + c;
   2172 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2173 			/*
   2174 
   2175 			   XXX this is where we get fancy and map this spare
   2176 			   into it's correct spot in the array.
   2177 
   2178 			 */
   2179 			/*
   2180 
   2181 			   we claim this disk is "optimal" if it's
   2182 			   rf_ds_used_spare, as that means it should be
   2183 			   directly substitutable for the disk it replaced.
   2184 			   We note that too...
   2185 
   2186 			 */
   2187 
   2188 			for(i=0;i<raidPtr->numRow;i++) {
   2189 				for(j=0;j<raidPtr->numCol;j++) {
   2190 					if ((raidPtr->Disks[i][j].spareRow ==
   2191 					     r) &&
   2192 					    (raidPtr->Disks[i][j].spareCol ==
   2193 					     sparecol)) {
   2194 						srow = r;
   2195 						scol = sparecol;
   2196 						break;
   2197 					}
   2198 				}
   2199 			}
   2200 
   2201 			raidread_component_label(
   2202 				      raidPtr->Disks[r][sparecol].dev,
   2203 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2204 				      &c_label);
   2205 			/* make sure status is noted */
   2206 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2207 			c_label.mod_counter = raidPtr->mod_counter;
   2208 			c_label.serial_number = raidPtr->serial_number;
   2209 			c_label.row = srow;
   2210 			c_label.column = scol;
   2211 			c_label.num_rows = raidPtr->numRow;
   2212 			c_label.num_columns = raidPtr->numCol;
   2213 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2214 			c_label.status = rf_ds_optimal;
   2215 			raidwrite_component_label(
   2216 				      raidPtr->Disks[r][sparecol].dev,
   2217 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2218 				      &c_label);
   2219 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2220 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2221 		}
   2222 	}
   2223 
   2224 #endif
   2225 }
   2226 
   2227