Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.11
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.11 1999/02/23 23:57:53 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #define RFK_BOOT_NONE 0
    157 #define RFK_BOOT_GOOD 1
    158 #define RFK_BOOT_BAD  2
    159 static int rf_kbooted = RFK_BOOT_NONE;
    160 
    161 #ifdef DEBUG
    162 #define db0_printf(a) printf a
    163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    169 #else				/* DEBUG */
    170 #define db0_printf(a) printf a
    171 #define db1_printf(a) { }
    172 #define db2_printf(a) { }
    173 #define db3_printf(a) { }
    174 #define db4_printf(a) { }
    175 #define db5_printf(a) { }
    176 #endif				/* DEBUG */
    177 
    178 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    179 
    180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    181 
    182 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    183 						 * spare table */
    184 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    185 						 * installation process */
    186 
    187 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    188 						 * reconstruction
    189 						 * requests */
    190 
    191 
    192 decl_simple_lock_data(, recon_queue_mutex)
    193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    195 
    196 /* prototypes */
    197 static void KernelWakeupFunc(struct buf * bp);
    198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    199 		   dev_t dev, RF_SectorNum_t startSect,
    200 		   RF_SectorCount_t numSect, caddr_t buf,
    201 		   void (*cbFunc) (struct buf *), void *cbArg,
    202 		   int logBytesPerSector, struct proc * b_proc);
    203 
    204 #define Dprintf0(s)       if (rf_queueDebug) \
    205      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    206 #define Dprintf1(s,a)     if (rf_queueDebug) \
    207      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    208 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    209      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    211      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    212 
    213 static int raidmarkclean(dev_t dev, struct vnode *b_vp);
    214 
    215 
    216 void raidattach __P((int));
    217 int raidsize __P((dev_t));
    218 
    219 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    220 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    221 static int raidinit __P((dev_t, RF_Raid_t *, int));
    222 
    223 int raidopen __P((dev_t, int, int, struct proc *));
    224 int raidclose __P((dev_t, int, int, struct proc *));
    225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    226 int raidwrite __P((dev_t, struct uio *, int));
    227 int raidread __P((dev_t, struct uio *, int));
    228 void raidstrategy __P((struct buf *));
    229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    230 
    231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    233 
    234 /*
    235  * Pilfered from ccd.c
    236  */
    237 
    238 struct raidbuf {
    239 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    240 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    241 	int     rf_flags;	/* misc. flags */
    242 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    243 };
    244 
    245 
    246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    247 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    248 
    249 /* XXX Not sure if the following should be replacing the raidPtrs above,
    250    or if it should be used in conjunction with that... */
    251 
    252 struct raid_softc {
    253 	int     sc_flags;	/* flags */
    254 	int     sc_cflags;	/* configuration flags */
    255 	size_t  sc_size;        /* size of the raid device */
    256 	dev_t   sc_dev;	        /* our device.. */
    257 	char    sc_xname[20];	/* XXX external name */
    258 	struct disk sc_dkdev;	/* generic disk device info */
    259 	struct pool sc_cbufpool;	/* component buffer pool */
    260 };
    261 /* sc_flags */
    262 #define RAIDF_INITED	0x01	/* unit has been initialized */
    263 #define RAIDF_WLABEL	0x02	/* label area is writable */
    264 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    265 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    266 #define RAIDF_LOCKED	0x80	/* unit is locked */
    267 
    268 #define	raidunit(x)	DISKUNIT(x)
    269 static int numraid = 0;
    270 
    271 #define RAIDLABELDEV(dev)	\
    272 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    273 
    274 /* declared here, and made public, for the benefit of KVM stuff.. */
    275 struct raid_softc *raid_softc;
    276 
    277 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    278 				     struct disklabel *));
    279 static void raidgetdisklabel __P((dev_t));
    280 static void raidmakedisklabel __P((struct raid_softc *));
    281 
    282 static int raidlock __P((struct raid_softc *));
    283 static void raidunlock __P((struct raid_softc *));
    284 int raidlookup __P((char *, struct proc * p, struct vnode **));
    285 
    286 
    287 void
    288 raidattach(num)
    289 	int     num;
    290 {
    291 	int     raidID;
    292 
    293 #ifdef DEBUG
    294 	printf("raidattach: Asked for %d units\n", num);
    295 #endif
    296 
    297 	if (num <= 0) {
    298 #ifdef DIAGNOSTIC
    299 		panic("raidattach: count <= 0");
    300 #endif
    301 		return;
    302 	}
    303 	/* This is where all the initialization stuff gets done. */
    304 
    305 	/* Make some space for requested number of units... */
    306 
    307 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    308 	if (raidPtrs == NULL) {
    309 		panic("raidPtrs is NULL!!\n");
    310 	}
    311 	rf_kbooted = rf_boot();
    312 	if (rf_kbooted) {
    313 		panic("Serious error booting RAID!!\n");
    314 	}
    315 	rf_kbooted = RFK_BOOT_GOOD;
    316 
    317 	/* put together some datastructures like the CCD device does.. This
    318 	 * lets us lock the device and what-not when it gets opened. */
    319 
    320 	raid_softc = (struct raid_softc *)
    321 	    malloc(num * sizeof(struct raid_softc),
    322 	    M_RAIDFRAME, M_NOWAIT);
    323 	if (raid_softc == NULL) {
    324 		printf("WARNING: no memory for RAIDframe driver\n");
    325 		return;
    326 	}
    327 	numraid = num;
    328 	bzero(raid_softc, num * sizeof(struct raid_softc));
    329 
    330 	for (raidID = 0; raidID < num; raidID++) {
    331 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    332 			  (RF_Raid_t *));
    333 		if (raidPtrs[raidID] == NULL) {
    334 			printf("raidPtrs[%d] is NULL\n", raidID);
    335 		}
    336 	}
    337 }
    338 
    339 
    340 int
    341 raidsize(dev)
    342 	dev_t   dev;
    343 {
    344 	struct raid_softc *rs;
    345 	struct disklabel *lp;
    346 	int     part, unit, omask, size;
    347 
    348 	unit = raidunit(dev);
    349 	if (unit >= numraid)
    350 		return (-1);
    351 	rs = &raid_softc[unit];
    352 
    353 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    354 		return (-1);
    355 
    356 	part = DISKPART(dev);
    357 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    358 	lp = rs->sc_dkdev.dk_label;
    359 
    360 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    361 		return (-1);
    362 
    363 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    364 		size = -1;
    365 	else
    366 		size = lp->d_partitions[part].p_size *
    367 		    (lp->d_secsize / DEV_BSIZE);
    368 
    369 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    370 		return (-1);
    371 
    372 	return (size);
    373 
    374 }
    375 
    376 int
    377 raiddump(dev, blkno, va, size)
    378 	dev_t   dev;
    379 	daddr_t blkno;
    380 	caddr_t va;
    381 	size_t  size;
    382 {
    383 	/* Not implemented. */
    384 	return ENXIO;
    385 }
    386 /* ARGSUSED */
    387 int
    388 raidopen(dev, flags, fmt, p)
    389 	dev_t   dev;
    390 	int     flags, fmt;
    391 	struct proc *p;
    392 {
    393 	int     unit = raidunit(dev);
    394 	struct raid_softc *rs;
    395 	struct disklabel *lp;
    396 	int     part, pmask;
    397 	unsigned int raidID;
    398 	int     rc;
    399 	int     error = 0;
    400 
    401 	/* This whole next chunk of code is somewhat suspect... Not sure it's
    402 	 * needed here at all... XXX */
    403 
    404 	if (rf_kbooted == RFK_BOOT_NONE) {
    405 		printf("Doing restart on raidopen.\n");
    406 		rf_kbooted = RFK_BOOT_GOOD;
    407 		rc = rf_boot();
    408 		if (rc) {
    409 			rf_kbooted = RFK_BOOT_BAD;
    410 			printf("Someone is unhappy...\n");
    411 			return (rc);
    412 		}
    413 	}
    414 	if (unit >= numraid)
    415 		return (ENXIO);
    416 	rs = &raid_softc[unit];
    417 
    418 	if ((error = raidlock(rs)) != 0)
    419 		return (error);
    420 	lp = rs->sc_dkdev.dk_label;
    421 
    422 	raidID = raidunit(dev);
    423 
    424 	part = DISKPART(dev);
    425 	pmask = (1 << part);
    426 
    427 	db1_printf(("Opening raid device number: %d partition: %d\n",
    428 		raidID, part));
    429 
    430 
    431 	if ((rs->sc_flags & RAIDF_INITED) &&
    432 	    (rs->sc_dkdev.dk_openmask == 0))
    433 		raidgetdisklabel(dev);
    434 
    435 	/* make sure that this partition exists */
    436 
    437 	if (part != RAW_PART) {
    438 		db1_printf(("Not a raw partition..\n"));
    439 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    440 		    ((part >= lp->d_npartitions) ||
    441 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    442 			error = ENXIO;
    443 			raidunlock(rs);
    444 			db1_printf(("Bailing out...\n"));
    445 			return (error);
    446 		}
    447 	}
    448 	/* Prevent this unit from being unconfigured while open. */
    449 	switch (fmt) {
    450 	case S_IFCHR:
    451 		rs->sc_dkdev.dk_copenmask |= pmask;
    452 		break;
    453 
    454 	case S_IFBLK:
    455 		rs->sc_dkdev.dk_bopenmask |= pmask;
    456 		break;
    457 	}
    458 	rs->sc_dkdev.dk_openmask =
    459 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    460 
    461 	raidunlock(rs);
    462 
    463 	return (error);
    464 
    465 
    466 }
    467 /* ARGSUSED */
    468 int
    469 raidclose(dev, flags, fmt, p)
    470 	dev_t   dev;
    471 	int     flags, fmt;
    472 	struct proc *p;
    473 {
    474 	int     unit = raidunit(dev);
    475 	struct raid_softc *rs;
    476 	int     error = 0;
    477 	int     part;
    478 
    479 	if (unit >= numraid)
    480 		return (ENXIO);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((error = raidlock(rs)) != 0)
    484 		return (error);
    485 
    486 	part = DISKPART(dev);
    487 
    488 	/* ...that much closer to allowing unconfiguration... */
    489 	switch (fmt) {
    490 	case S_IFCHR:
    491 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    492 		break;
    493 
    494 	case S_IFBLK:
    495 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    496 		break;
    497 	}
    498 	rs->sc_dkdev.dk_openmask =
    499 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    500 
    501 	raidunlock(rs);
    502 	return (0);
    503 
    504 }
    505 
    506 void
    507 raidstrategy(bp)
    508 	register struct buf *bp;
    509 {
    510 	register int s;
    511 
    512 	unsigned int raidID = raidunit(bp->b_dev);
    513 	RF_Raid_t *raidPtr;
    514 	struct raid_softc *rs = &raid_softc[raidID];
    515 	struct disklabel *lp;
    516 	int     wlabel;
    517 
    518 #if 0
    519 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    520 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    521 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    522 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    523 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    524 
    525 	if (bp->b_flags & B_READ)
    526 		db1_printf(("READ\n"));
    527 	else
    528 		db1_printf(("WRITE\n"));
    529 #endif
    530 	if (rf_kbooted != RFK_BOOT_GOOD)
    531 		return;
    532 	if (raidID >= numraid || !raidPtrs[raidID]) {
    533 		bp->b_error = ENODEV;
    534 		bp->b_flags |= B_ERROR;
    535 		bp->b_resid = bp->b_bcount;
    536 		biodone(bp);
    537 		return;
    538 	}
    539 	raidPtr = raidPtrs[raidID];
    540 	if (!raidPtr->valid) {
    541 		bp->b_error = ENODEV;
    542 		bp->b_flags |= B_ERROR;
    543 		bp->b_resid = bp->b_bcount;
    544 		biodone(bp);
    545 		return;
    546 	}
    547 	if (bp->b_bcount == 0) {
    548 		db1_printf(("b_bcount is zero..\n"));
    549 		biodone(bp);
    550 		return;
    551 	}
    552 	lp = rs->sc_dkdev.dk_label;
    553 
    554 	/*
    555 	 * Do bounds checking and adjust transfer.  If there's an
    556 	 * error, the bounds check will flag that for us.
    557 	 */
    558 
    559 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    560 	if (DISKPART(bp->b_dev) != RAW_PART)
    561 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    562 			db1_printf(("Bounds check failed!!:%d %d\n",
    563 				(int) bp->b_blkno, (int) wlabel));
    564 			biodone(bp);
    565 			return;
    566 		}
    567 	s = splbio();		/* XXX Needed? */
    568 	db1_printf(("Beginning strategy...\n"));
    569 
    570 	bp->b_resid = 0;
    571 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    572 	    NULL, NULL, NULL);
    573 	if (bp->b_error) {
    574 		bp->b_flags |= B_ERROR;
    575 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    576 			bp->b_error));
    577 	}
    578 	splx(s);
    579 #if 0
    580 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    581 		bp, bp->b_data,
    582 		(int) bp->b_bcount, (int) bp->b_resid));
    583 #endif
    584 }
    585 /* ARGSUSED */
    586 int
    587 raidread(dev, uio, flags)
    588 	dev_t   dev;
    589 	struct uio *uio;
    590 	int     flags;
    591 {
    592 	int     unit = raidunit(dev);
    593 	struct raid_softc *rs;
    594 	int     part;
    595 
    596 	if (unit >= numraid)
    597 		return (ENXIO);
    598 	rs = &raid_softc[unit];
    599 
    600 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    601 		return (ENXIO);
    602 	part = DISKPART(dev);
    603 
    604 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    605 
    606 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    607 
    608 }
    609 /* ARGSUSED */
    610 int
    611 raidwrite(dev, uio, flags)
    612 	dev_t   dev;
    613 	struct uio *uio;
    614 	int     flags;
    615 {
    616 	int     unit = raidunit(dev);
    617 	struct raid_softc *rs;
    618 
    619 	if (unit >= numraid)
    620 		return (ENXIO);
    621 	rs = &raid_softc[unit];
    622 
    623 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    624 		return (ENXIO);
    625 	db1_printf(("raidwrite\n"));
    626 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    627 
    628 }
    629 
    630 int
    631 raidioctl(dev, cmd, data, flag, p)
    632 	dev_t   dev;
    633 	u_long  cmd;
    634 	caddr_t data;
    635 	int     flag;
    636 	struct proc *p;
    637 {
    638 	int     unit = raidunit(dev);
    639 	int     error = 0;
    640 	int     part, pmask;
    641 	struct raid_softc *rs;
    642 #if 0
    643 	int     r, c;
    644 #endif
    645 	/* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
    646 
    647 	/* struct ccdbuf *cbp; */
    648 	/* struct raidbuf *raidbp; */
    649 	RF_Config_t *k_cfg, *u_cfg;
    650 	u_char *specific_buf;
    651 	int retcode = 0;
    652 	int row;
    653 	int column;
    654 	struct rf_recon_req *rrcopy, *rr;
    655 	RF_ComponentLabel_t *component_label;
    656 	RF_ComponentLabel_t ci_label;
    657 	RF_ComponentLabel_t **c_label_ptr;
    658 	RF_HotSpare_t *sparePtr;
    659 	RF_HotSpare_t hot_spare;
    660 
    661 	if (unit >= numraid)
    662 		return (ENXIO);
    663 	rs = &raid_softc[unit];
    664 
    665 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    666 		(int) DISKPART(dev), (int) unit, (int) cmd));
    667 
    668 	/* Must be open for writes for these commands... */
    669 	switch (cmd) {
    670 	case DIOCSDINFO:
    671 	case DIOCWDINFO:
    672 	case DIOCWLABEL:
    673 		if ((flag & FWRITE) == 0)
    674 			return (EBADF);
    675 	}
    676 
    677 	/* Must be initialized for these... */
    678 	switch (cmd) {
    679 	case DIOCGDINFO:
    680 	case DIOCSDINFO:
    681 	case DIOCWDINFO:
    682 	case DIOCGPART:
    683 	case DIOCWLABEL:
    684 	case DIOCGDEFLABEL:
    685 	case RAIDFRAME_SHUTDOWN:
    686 	case RAIDFRAME_REWRITEPARITY:
    687 	case RAIDFRAME_GET_INFO:
    688 	case RAIDFRAME_RESET_ACCTOTALS:
    689 	case RAIDFRAME_GET_ACCTOTALS:
    690 	case RAIDFRAME_KEEP_ACCTOTALS:
    691 	case RAIDFRAME_GET_SIZE:
    692 	case RAIDFRAME_FAIL_DISK:
    693 	case RAIDFRAME_COPYBACK:
    694 	case RAIDFRAME_CHECKRECON:
    695 	case RAIDFRAME_GET_COMPONENT_LABEL:
    696 	case RAIDFRAME_SET_COMPONENT_LABEL:
    697 	case RAIDFRAME_ADD_HOT_SPARE:
    698 	case RAIDFRAME_REMOVE_HOT_SPARE:
    699 	case RAIDFRAME_INIT_LABELS:
    700 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    701 			return (ENXIO);
    702 	}
    703 
    704 	switch (cmd) {
    705 
    706 
    707 		/* configure the system */
    708 	case RAIDFRAME_CONFIGURE:
    709 
    710 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    711 		/* copy-in the configuration information */
    712 		/* data points to a pointer to the configuration structure */
    713 		u_cfg = *((RF_Config_t **) data);
    714 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    715 		if (k_cfg == NULL) {
    716 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    717 			return (ENOMEM);
    718 		}
    719 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    720 		    sizeof(RF_Config_t));
    721 		if (retcode) {
    722 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    723 				retcode));
    724 			return (retcode);
    725 		}
    726 		/* allocate a buffer for the layout-specific data, and copy it
    727 		 * in */
    728 		if (k_cfg->layoutSpecificSize) {
    729 			if (k_cfg->layoutSpecificSize > 10000) {
    730 				/* sanity check */
    731 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    732 				return (EINVAL);
    733 			}
    734 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    735 			    (u_char *));
    736 			if (specific_buf == NULL) {
    737 				RF_Free(k_cfg, sizeof(RF_Config_t));
    738 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    739 				return (ENOMEM);
    740 			}
    741 			retcode = copyin(k_cfg->layoutSpecific,
    742 			    (caddr_t) specific_buf,
    743 			    k_cfg->layoutSpecificSize);
    744 			if (retcode) {
    745 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    746 					retcode));
    747 				return (retcode);
    748 			}
    749 		} else
    750 			specific_buf = NULL;
    751 		k_cfg->layoutSpecific = specific_buf;
    752 
    753 		/* should do some kind of sanity check on the configuration.
    754 		 * Store the sum of all the bytes in the last byte? */
    755 
    756 #if 0
    757 		db1_printf(("Considering configuring the system.:%d 0x%x\n",
    758 			unit, p));
    759 #endif
    760 
    761 		/* We need the pointer to this a little deeper, so stash it
    762 		 * here... */
    763 
    764 		raidPtrs[unit]->proc = p;
    765 
    766 		/* configure the system */
    767 
    768 		raidPtrs[unit]->raidid = unit;
    769 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    770 
    771 
    772 		if (retcode == 0) {
    773 			retcode = raidinit(dev, raidPtrs[unit], unit);
    774 		}
    775 		/* free the buffers.  No return code here. */
    776 		if (k_cfg->layoutSpecificSize) {
    777 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    778 		}
    779 		RF_Free(k_cfg, sizeof(RF_Config_t));
    780 
    781 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    782 			retcode));
    783 
    784 		return (retcode);
    785 
    786 		/* shutdown the system */
    787 	case RAIDFRAME_SHUTDOWN:
    788 
    789 		if ((error = raidlock(rs)) != 0)
    790 			return (error);
    791 
    792 		/*
    793 		 * If somebody has a partition mounted, we shouldn't
    794 		 * shutdown.
    795 		 */
    796 
    797 		part = DISKPART(dev);
    798 		pmask = (1 << part);
    799 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    800 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    801 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    802 			raidunlock(rs);
    803 			return (EBUSY);
    804 		}
    805 
    806 		if (rf_debugKernelAccess) {
    807 			printf("call shutdown\n");
    808 		}
    809 		raidPtrs[unit]->proc = p;	/* XXX  necessary evil */
    810 
    811 #if 1
    812 		raidmarkclean( raidPtrs[unit]->Disks[0][0].dev, raidPtrs[unit]->raid_cinfo[0][0].ci_vp);
    813 #endif
    814 		retcode = rf_Shutdown(raidPtrs[unit]);
    815 
    816 		db1_printf(("Done main shutdown\n"));
    817 
    818 		pool_destroy(&rs->sc_cbufpool);
    819 		db1_printf(("Done freeing component buffer freelist\n"));
    820 
    821 		/* It's no longer initialized... */
    822 		rs->sc_flags &= ~RAIDF_INITED;
    823 
    824 		/* Detach the disk. */
    825 		disk_detach(&rs->sc_dkdev);
    826 
    827 		raidunlock(rs);
    828 
    829 		return (retcode);
    830 	case RAIDFRAME_GET_COMPONENT_LABEL:
    831 		c_label_ptr = (RF_ComponentLabel_t **) data;
    832 		/* need to read the component label for the disk indicated
    833 		   by row,column in component_label
    834 		   XXX need to sanity check these values!!!
    835 		   */
    836 
    837 		/* For practice, let's get it directly fromdisk, rather
    838 		   than from the in-core copy */
    839 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    840 			   (RF_ComponentLabel_t *));
    841 		if (component_label == NULL)
    842 			return (ENOMEM);
    843 
    844 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    845 
    846 		retcode = copyin( *c_label_ptr, component_label,
    847 				  sizeof(RF_ComponentLabel_t));
    848 
    849 		if (retcode) {
    850 			return(retcode);
    851 		}
    852 
    853 		row = component_label->row;
    854 		printf("Row: %d\n",row);
    855 		if (row > raidPtrs[unit]->numRow) {
    856 			row = 0; /* XXX */
    857 		}
    858 		column = component_label->column;
    859 		printf("Column: %d\n",column);
    860 		if (column > raidPtrs[unit]->numCol) {
    861 			column = 0; /* XXX */
    862 		}
    863 
    864 		raidread_component_label(
    865                               raidPtrs[unit]->Disks[row][column].dev,
    866 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    867 			      component_label );
    868 
    869 		retcode = copyout((caddr_t) component_label,
    870 				  (caddr_t) *c_label_ptr,
    871 				  sizeof(RF_ComponentLabel_t));
    872 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    873 		return (retcode);
    874 
    875 	case RAIDFRAME_SET_COMPONENT_LABEL:
    876 		component_label = (RF_ComponentLabel_t *) data;
    877 
    878 		/* XXX check the label for valid stuff... */
    879 		/* Note that some things *should not* get modified --
    880 		   the user should be re-initing the labels instead of
    881 		   trying to patch things.
    882 		   */
    883 
    884 		printf("Got component label:\n");
    885 		printf("Version: %d\n",component_label->version);
    886 		printf("Serial Number: %d\n",component_label->serial_number);
    887 		printf("Mod counter: %d\n",component_label->mod_counter);
    888 		printf("Row: %d\n", component_label->row);
    889 		printf("Column: %d\n", component_label->column);
    890 		printf("Num Rows: %d\n", component_label->num_rows);
    891 		printf("Num Columns: %d\n", component_label->num_columns);
    892 		printf("Clean: %d\n", component_label->clean);
    893 		printf("Status: %d\n", component_label->status);
    894 
    895 		row = component_label->row;
    896 		printf("Row: %d\n",row);
    897 		if (row > raidPtrs[unit]->numRow) {
    898 			row = 0; /* XXX */
    899 		}
    900 		column = component_label->column;
    901 		printf("Column: %d\n",column);
    902 		if (column > raidPtrs[unit]->numCol) {
    903 			column = 0; /* XXX */
    904 		}
    905 		raidwrite_component_label(
    906                             raidPtrs[unit]->Disks[row][column].dev,
    907 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    908 			    component_label );
    909 
    910 		return (retcode);
    911 
    912 	case RAIDFRAME_INIT_LABELS:
    913 		component_label = (RF_ComponentLabel_t *) data;
    914 		/*
    915 		   we only want the serial number from
    916 		   the above.  We get all the rest of the information
    917 		   from the config that was used to create this RAID
    918 		   set.
    919 		   */
    920 		ci_label.version = 1; /* current version number */
    921 		ci_label.serial_number = component_label->serial_number;
    922 		ci_label.mod_counter = 0; /* XXX this should be non-zero.. */
    923 		ci_label.num_rows = raidPtrs[unit]->numRow;
    924 		ci_label.num_columns = raidPtrs[unit]->numCol;
    925 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    926 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    927 
    928 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    929 			ci_label.row = row;
    930 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    931 				ci_label.column = column;
    932 				raidwrite_component_label(
    933 				  raidPtrs[unit]->Disks[row][column].dev,
    934 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    935 				  &ci_label );
    936 			}
    937 		}
    938 
    939 		return (retcode);
    940 
    941 		/* initialize all parity */
    942 	case RAIDFRAME_REWRITEPARITY:
    943 
    944 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
    945 			return (EINVAL);
    946 		/* borrow the thread of the requesting process */
    947 		raidPtrs[unit]->proc = p;	/* Blah... :-p GO */
    948 		retcode = rf_RewriteParity(raidPtrs[unit]);
    949 		/* return I/O Error if the parity rewrite fails */
    950 
    951 		if (retcode) {
    952 			retcode = EIO;
    953 		} else {
    954 			/* XXX set the clean bit! */
    955 		}
    956 		return (retcode);
    957 
    958 
    959 	case RAIDFRAME_ADD_HOT_SPARE:
    960 		sparePtr = (RF_HotSpare_t *) data;
    961 #if 0
    962 		retcode = copyin( sparePtr, &hot_spare, sizeof(RF_HotSpare_t));
    963 #endif
    964 		memcpy( &hot_spare, sparePtr, sizeof(RF_HotSpare_t));
    965 		if (!retcode) {
    966 			printf("Adding spare\n");
    967 			retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
    968 		}
    969 		return(retcode);
    970 
    971 	case RAIDFRAME_REMOVE_HOT_SPARE:
    972 		return(retcode);
    973 
    974 		/* issue a test-unit-ready through raidframe to the indicated
    975 		 * device */
    976 #if 0				/* XXX not supported yet (ever?) */
    977 	case RAIDFRAME_TUR:
    978 		/* debug only */
    979 		retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
    980 		return (retcode);
    981 #endif
    982 	case RAIDFRAME_GET_INFO:
    983 		{
    984 			RF_Raid_t *raid = raidPtrs[unit];
    985 			RF_DeviceConfig_t *cfg, **ucfgp;
    986 			int     i, j, d;
    987 
    988 			if (!raid->valid)
    989 				return (ENODEV);
    990 			ucfgp = (RF_DeviceConfig_t **) data;
    991 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
    992 				  (RF_DeviceConfig_t *));
    993 			if (cfg == NULL)
    994 				return (ENOMEM);
    995 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
    996 			cfg->rows = raid->numRow;
    997 			cfg->cols = raid->numCol;
    998 			cfg->ndevs = raid->numRow * raid->numCol;
    999 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1000 				cfg->ndevs = 0;
   1001 				return (ENOMEM);
   1002 			}
   1003 			cfg->nspares = raid->numSpare;
   1004 			if (cfg->nspares >= RF_MAX_DISKS) {
   1005 				cfg->nspares = 0;
   1006 				return (ENOMEM);
   1007 			}
   1008 			cfg->maxqdepth = raid->maxQueueDepth;
   1009 			d = 0;
   1010 			for (i = 0; i < cfg->rows; i++) {
   1011 				for (j = 0; j < cfg->cols; j++) {
   1012 					cfg->devs[d] = raid->Disks[i][j];
   1013 					d++;
   1014 				}
   1015 			}
   1016 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1017 				cfg->spares[i] = raid->Disks[0][j];
   1018 			}
   1019 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1020 					  sizeof(RF_DeviceConfig_t));
   1021 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1022 
   1023 			return (retcode);
   1024 		}
   1025 		break;
   1026 
   1027 	case RAIDFRAME_RESET_ACCTOTALS:
   1028 		{
   1029 			RF_Raid_t *raid = raidPtrs[unit];
   1030 
   1031 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1032 			return (0);
   1033 		}
   1034 		break;
   1035 
   1036 	case RAIDFRAME_GET_ACCTOTALS:
   1037 		{
   1038 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1039 			RF_Raid_t *raid = raidPtrs[unit];
   1040 
   1041 			*totals = raid->acc_totals;
   1042 			return (0);
   1043 		}
   1044 		break;
   1045 
   1046 	case RAIDFRAME_KEEP_ACCTOTALS:
   1047 		{
   1048 			RF_Raid_t *raid = raidPtrs[unit];
   1049 			int    *keep = (int *) data;
   1050 
   1051 			raid->keep_acc_totals = *keep;
   1052 			return (0);
   1053 		}
   1054 		break;
   1055 
   1056 	case RAIDFRAME_GET_SIZE:
   1057 		*(int *) data = raidPtrs[unit]->totalSectors;
   1058 		return (0);
   1059 
   1060 #define RAIDFRAME_RECON 1
   1061 		/* XXX The above should probably be set somewhere else!! GO */
   1062 #if RAIDFRAME_RECON > 0
   1063 
   1064 		/* fail a disk & optionally start reconstruction */
   1065 	case RAIDFRAME_FAIL_DISK:
   1066 		rr = (struct rf_recon_req *) data;
   1067 
   1068 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1069 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1070 			return (EINVAL);
   1071 
   1072 		printf("Failing the disk: row: %d col: %d\n", rr->row, rr->col);
   1073 
   1074 		/* make a copy of the recon request so that we don't rely on
   1075 		 * the user's buffer */
   1076 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1077 		bcopy(rr, rrcopy, sizeof(*rr));
   1078 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1079 
   1080 		LOCK_RECON_Q_MUTEX();
   1081 		rrcopy->next = recon_queue;
   1082 		recon_queue = rrcopy;
   1083 		wakeup(&recon_queue);
   1084 		UNLOCK_RECON_Q_MUTEX();
   1085 
   1086 		return (0);
   1087 
   1088 		/* invoke a copyback operation after recon on whatever disk
   1089 		 * needs it, if any */
   1090 	case RAIDFRAME_COPYBACK:
   1091 		/* borrow the current thread to get this done */
   1092 		raidPtrs[unit]->proc = p;	/* ICK.. but needed :-p  GO */
   1093 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1094 		return (0);
   1095 
   1096 		/* return the percentage completion of reconstruction */
   1097 	case RAIDFRAME_CHECKRECON:
   1098 		row = *(int *) data;
   1099 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1100 			return (EINVAL);
   1101 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1102 			*(int *) data = 100;
   1103 		else
   1104 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1105 		return (0);
   1106 
   1107 		/* the sparetable daemon calls this to wait for the kernel to
   1108 		 * need a spare table. this ioctl does not return until a
   1109 		 * spare table is needed. XXX -- calling mpsleep here in the
   1110 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1111 		 * -- I should either compute the spare table in the kernel,
   1112 		 * or have a different -- XXX XXX -- interface (a different
   1113 		 * character device) for delivering the table          -- XXX */
   1114 #if 0
   1115 	case RAIDFRAME_SPARET_WAIT:
   1116 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1117 		while (!rf_sparet_wait_queue)
   1118 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1119 		waitreq = rf_sparet_wait_queue;
   1120 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1121 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1122 
   1123 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1124 
   1125 		RF_Free(waitreq, sizeof(*waitreq));
   1126 		return (0);
   1127 
   1128 
   1129 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1130 		 * code in it that will cause the dameon to exit */
   1131 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1132 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1133 		waitreq->fcol = -1;
   1134 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1135 		waitreq->next = rf_sparet_wait_queue;
   1136 		rf_sparet_wait_queue = waitreq;
   1137 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1138 		wakeup(&rf_sparet_wait_queue);
   1139 		return (0);
   1140 
   1141 		/* used by the spare table daemon to deliver a spare table
   1142 		 * into the kernel */
   1143 	case RAIDFRAME_SEND_SPARET:
   1144 
   1145 		/* install the spare table */
   1146 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1147 
   1148 		/* respond to the requestor.  the return status of the spare
   1149 		 * table installation is passed in the "fcol" field */
   1150 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1151 		waitreq->fcol = retcode;
   1152 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1153 		waitreq->next = rf_sparet_resp_queue;
   1154 		rf_sparet_resp_queue = waitreq;
   1155 		wakeup(&rf_sparet_resp_queue);
   1156 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1157 
   1158 		return (retcode);
   1159 #endif
   1160 
   1161 
   1162 #endif				/* RAIDFRAME_RECON > 0 */
   1163 
   1164 	default:
   1165 		break;		/* fall through to the os-specific code below */
   1166 
   1167 	}
   1168 
   1169 	if (!raidPtrs[unit]->valid)
   1170 		return (EINVAL);
   1171 
   1172 	/*
   1173 	 * Add support for "regular" device ioctls here.
   1174 	 */
   1175 
   1176 	switch (cmd) {
   1177 	case DIOCGDINFO:
   1178 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1179 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1180 		break;
   1181 
   1182 	case DIOCGPART:
   1183 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1184 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1185 		((struct partinfo *) data)->part =
   1186 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1187 		break;
   1188 
   1189 	case DIOCWDINFO:
   1190 		db1_printf(("DIOCWDINFO\n"));
   1191 	case DIOCSDINFO:
   1192 		db1_printf(("DIOCSDINFO\n"));
   1193 		if ((error = raidlock(rs)) != 0)
   1194 			return (error);
   1195 
   1196 		rs->sc_flags |= RAIDF_LABELLING;
   1197 
   1198 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1199 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1200 		if (error == 0) {
   1201 			if (cmd == DIOCWDINFO)
   1202 				error = writedisklabel(RAIDLABELDEV(dev),
   1203 				    raidstrategy, rs->sc_dkdev.dk_label,
   1204 				    rs->sc_dkdev.dk_cpulabel);
   1205 		}
   1206 		rs->sc_flags &= ~RAIDF_LABELLING;
   1207 
   1208 		raidunlock(rs);
   1209 
   1210 		if (error)
   1211 			return (error);
   1212 		break;
   1213 
   1214 	case DIOCWLABEL:
   1215 		db1_printf(("DIOCWLABEL\n"));
   1216 		if (*(int *) data != 0)
   1217 			rs->sc_flags |= RAIDF_WLABEL;
   1218 		else
   1219 			rs->sc_flags &= ~RAIDF_WLABEL;
   1220 		break;
   1221 
   1222 	case DIOCGDEFLABEL:
   1223 		db1_printf(("DIOCGDEFLABEL\n"));
   1224 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1225 		    (struct disklabel *) data);
   1226 		break;
   1227 
   1228 	default:
   1229 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1230 	}
   1231 	return (retcode);
   1232 
   1233 }
   1234 
   1235 
   1236 /* raidinit -- complete the rest of the initialization for the
   1237    RAIDframe device.  */
   1238 
   1239 
   1240 static int
   1241 raidinit(dev, raidPtr, unit)
   1242 	dev_t   dev;
   1243 	RF_Raid_t *raidPtr;
   1244 	int     unit;
   1245 {
   1246 	int     retcode;
   1247 	/* int ix; */
   1248 	/* struct raidbuf *raidbp; */
   1249 	struct raid_softc *rs;
   1250 
   1251 	retcode = 0;
   1252 
   1253 	rs = &raid_softc[unit];
   1254 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1255 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1256 
   1257 
   1258 	/* XXX should check return code first... */
   1259 	rs->sc_flags |= RAIDF_INITED;
   1260 
   1261 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1262 
   1263 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1264 
   1265 	/* disk_attach actually creates space for the CPU disklabel, among
   1266 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1267 	 * with disklabels. */
   1268 
   1269 	disk_attach(&rs->sc_dkdev);
   1270 
   1271 	/* XXX There may be a weird interaction here between this, and
   1272 	 * protectedSectors, as used in RAIDframe.  */
   1273 
   1274 	rs->sc_size = raidPtr->totalSectors;
   1275 	rs->sc_dev = dev;
   1276 
   1277 	return (retcode);
   1278 }
   1279 
   1280 
   1281 /*********************************************************
   1282  *
   1283  * initialization code called at boot time (startup.c)
   1284  *
   1285  ********************************************************/
   1286 int
   1287 rf_boot()
   1288 {
   1289 	int     i, rc;
   1290 
   1291 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
   1292 	if (rc) {
   1293 		RF_PANIC();
   1294 	}
   1295 
   1296 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   1297 	recon_queue = NULL;
   1298 
   1299 	for (i = 0; i < numraid; i++)
   1300 		raidPtrs[i] = NULL;
   1301 	rc = rf_BootRaidframe();
   1302 	if (rc == 0)
   1303 		printf("Kernelized RAIDframe activated\n");
   1304 	else
   1305 		rf_kbooted = RFK_BOOT_BAD;
   1306 	return (rc);
   1307 }
   1308 
   1309 /*
   1310  * This kernel thread never exits.  It is created once, and persists
   1311  * until the system reboots.
   1312  */
   1313 
   1314 void
   1315 rf_ReconKernelThread()
   1316 {
   1317 	struct rf_recon_req *req;
   1318 	int     s;
   1319 
   1320 	/* XXX not sure what spl() level we should be at here... probably
   1321 	 * splbio() */
   1322 	s = splbio();
   1323 
   1324 	while (1) {
   1325 		/* grab the next reconstruction request from the queue */
   1326 		LOCK_RECON_Q_MUTEX();
   1327 		while (!recon_queue) {
   1328 			UNLOCK_RECON_Q_MUTEX();
   1329 			tsleep(&recon_queue, PRIBIO | PCATCH,
   1330 			       "raidframe recon", 0);
   1331 			LOCK_RECON_Q_MUTEX();
   1332 		}
   1333 		req = recon_queue;
   1334 		recon_queue = recon_queue->next;
   1335 		UNLOCK_RECON_Q_MUTEX();
   1336 
   1337 		/*
   1338 	         * If flags specifies that we should start recon, this call
   1339 	         * will not return until reconstruction completes, fails,
   1340 		 * or is aborted.
   1341 	         */
   1342 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1343 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1344 
   1345 		RF_Free(req, sizeof(*req));
   1346 	}
   1347 }
   1348 /* wake up the daemon & tell it to get us a spare table
   1349  * XXX
   1350  * the entries in the queues should be tagged with the raidPtr
   1351  * so that in the extremely rare case that two recons happen at once,
   1352  * we know for which device were requesting a spare table
   1353  * XXX
   1354  */
   1355 int
   1356 rf_GetSpareTableFromDaemon(req)
   1357 	RF_SparetWait_t *req;
   1358 {
   1359 	int     retcode;
   1360 
   1361 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1362 	req->next = rf_sparet_wait_queue;
   1363 	rf_sparet_wait_queue = req;
   1364 	wakeup(&rf_sparet_wait_queue);
   1365 
   1366 	/* mpsleep unlocks the mutex */
   1367 	while (!rf_sparet_resp_queue) {
   1368 		tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
   1369 		    "raidframe getsparetable", 0);
   1370 #if 0
   1371 		mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
   1372 			(void *) simple_lock_addr(rf_sparet_wait_mutex),
   1373 			MS_LOCK_SIMPLE);
   1374 #endif
   1375 	}
   1376 	req = rf_sparet_resp_queue;
   1377 	rf_sparet_resp_queue = req->next;
   1378 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1379 
   1380 	retcode = req->fcol;
   1381 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1382 					 * alloc'd */
   1383 	return (retcode);
   1384 }
   1385 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1386  * bp & passes it down.
   1387  * any calls originating in the kernel must use non-blocking I/O
   1388  * do some extra sanity checking to return "appropriate" error values for
   1389  * certain conditions (to make some standard utilities work)
   1390  */
   1391 int
   1392 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1393 	RF_Raid_t *raidPtr;
   1394 	struct buf *bp;
   1395 	RF_RaidAccessFlags_t flags;
   1396 	void    (*cbFunc) (struct buf *);
   1397 	void   *cbArg;
   1398 {
   1399 	RF_SectorCount_t num_blocks, pb, sum;
   1400 	RF_RaidAddr_t raid_addr;
   1401 	int     retcode;
   1402 	struct partition *pp;
   1403 	daddr_t blocknum;
   1404 	int     unit;
   1405 	struct raid_softc *rs;
   1406 	int     do_async;
   1407 
   1408 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1409 
   1410 	unit = raidPtr->raidid;
   1411 	rs = &raid_softc[unit];
   1412 
   1413 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1414 	 * partition.. Need to make it absolute to the underlying device.. */
   1415 
   1416 	blocknum = bp->b_blkno;
   1417 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1418 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1419 		blocknum += pp->p_offset;
   1420 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1421 			pp->p_offset));
   1422 	} else {
   1423 		db1_printf(("Is raw..\n"));
   1424 	}
   1425 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1426 
   1427 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1428 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1429 
   1430 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1431 	 * TOUCH bp->b_blkno!!! */
   1432 	raid_addr = blocknum;
   1433 
   1434 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1435 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1436 	sum = raid_addr + num_blocks + pb;
   1437 	if (1 || rf_debugKernelAccess) {
   1438 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1439 			(int) raid_addr, (int) sum, (int) num_blocks,
   1440 			(int) pb, (int) bp->b_resid));
   1441 	}
   1442 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1443 	    || (sum < num_blocks) || (sum < pb)) {
   1444 		bp->b_error = ENOSPC;
   1445 		bp->b_flags |= B_ERROR;
   1446 		bp->b_resid = bp->b_bcount;
   1447 		biodone(bp);
   1448 		return (bp->b_error);
   1449 	}
   1450 	/*
   1451 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1452 	 */
   1453 
   1454 	if (bp->b_bcount & raidPtr->sectorMask) {
   1455 		bp->b_error = EINVAL;
   1456 		bp->b_flags |= B_ERROR;
   1457 		bp->b_resid = bp->b_bcount;
   1458 		biodone(bp);
   1459 		return (bp->b_error);
   1460 	}
   1461 	db1_printf(("Calling DoAccess..\n"));
   1462 
   1463 	/*
   1464 	 * XXX For now, all writes are sync
   1465 	 */
   1466 	do_async = 1;
   1467 	if ((bp->b_flags & B_READ) == 0)
   1468 		do_async = 0;
   1469 
   1470 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1471 	 * B_READ instead */
   1472 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1473 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1474 	    do_async, raid_addr, num_blocks,
   1475 	    bp->b_un.b_addr,
   1476 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1477 	    NULL, cbFunc, cbArg);
   1478 #if 0
   1479 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1480 		bp->b_data, (int) bp->b_resid));
   1481 #endif
   1482 
   1483 	/*
   1484 	 * If we requested sync I/O, sleep here.
   1485 	 */
   1486 	if ((retcode == 0) && (do_async == 0))
   1487 		tsleep(bp, PRIBIO, "raidsyncio", 0);
   1488 
   1489 	return (retcode);
   1490 }
   1491 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1492 
   1493 int
   1494 rf_DispatchKernelIO(queue, req)
   1495 	RF_DiskQueue_t *queue;
   1496 	RF_DiskQueueData_t *req;
   1497 {
   1498 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1499 	struct buf *bp;
   1500 	struct raidbuf *raidbp = NULL;
   1501 	struct raid_softc *rs;
   1502 	int     unit;
   1503 
   1504 	/* XXX along with the vnode, we also need the softc associated with
   1505 	 * this device.. */
   1506 
   1507 	req->queue = queue;
   1508 
   1509 	unit = queue->raidPtr->raidid;
   1510 
   1511 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1512 
   1513 	if (unit >= numraid) {
   1514 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1515 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1516 	}
   1517 	rs = &raid_softc[unit];
   1518 
   1519 	/* XXX is this the right place? */
   1520 	disk_busy(&rs->sc_dkdev);
   1521 
   1522 	bp = req->bp;
   1523 
   1524 	/* XXX when there is a physical disk failure, someone is passing us a
   1525 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1526 	 * without taking a performance hit... (not sure where the real bug
   1527 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1528 
   1529 	if (bp->b_flags & B_ERROR) {
   1530 		bp->b_flags &= ~B_ERROR;
   1531 	}
   1532 	if (bp->b_error != 0) {
   1533 		bp->b_error = 0;
   1534 	}
   1535 	raidbp = RAIDGETBUF(rs);
   1536 
   1537 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1538 
   1539 	/*
   1540 	 * context for raidiodone
   1541 	 */
   1542 	raidbp->rf_obp = bp;
   1543 	raidbp->req = req;
   1544 
   1545 	switch (req->type) {
   1546 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1547 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1548 		 * queue->row, queue->col); */
   1549 		/* XXX need to do something extra here.. */
   1550 		/* I'm leaving this in, as I've never actually seen it used,
   1551 		 * and I'd like folks to report it... GO */
   1552 		printf(("WAKEUP CALLED\n"));
   1553 		queue->numOutstanding++;
   1554 
   1555 		/* XXX need to glue the original buffer into this??  */
   1556 
   1557 		KernelWakeupFunc(&raidbp->rf_buf);
   1558 		break;
   1559 
   1560 	case RF_IO_TYPE_READ:
   1561 	case RF_IO_TYPE_WRITE:
   1562 
   1563 		if (req->tracerec) {
   1564 			RF_ETIMER_START(req->tracerec->timer);
   1565 		}
   1566 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1567 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1568 		    req->sectorOffset, req->numSector,
   1569 		    req->buf, KernelWakeupFunc, (void *) req,
   1570 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1571 
   1572 		if (rf_debugKernelAccess) {
   1573 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1574 				(long) bp->b_blkno));
   1575 		}
   1576 		queue->numOutstanding++;
   1577 		queue->last_deq_sector = req->sectorOffset;
   1578 		/* acc wouldn't have been let in if there were any pending
   1579 		 * reqs at any other priority */
   1580 		queue->curPriority = req->priority;
   1581 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1582 		 * req->type, queue->row, queue->col); */
   1583 
   1584 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1585 			req->type, unit, queue->row, queue->col));
   1586 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1587 			(int) req->sectorOffset, (int) req->numSector,
   1588 			(int) (req->numSector <<
   1589 			    queue->raidPtr->logBytesPerSector),
   1590 			(int) queue->raidPtr->logBytesPerSector));
   1591 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1592 			raidbp->rf_buf.b_vp->v_numoutput++;
   1593 		}
   1594 		VOP_STRATEGY(&raidbp->rf_buf);
   1595 
   1596 		break;
   1597 
   1598 	default:
   1599 		panic("bad req->type in rf_DispatchKernelIO");
   1600 	}
   1601 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1602 	return (0);
   1603 }
   1604 /* this is the callback function associated with a I/O invoked from
   1605    kernel code.
   1606  */
   1607 static void
   1608 KernelWakeupFunc(vbp)
   1609 	struct buf *vbp;
   1610 {
   1611 	RF_DiskQueueData_t *req = NULL;
   1612 	RF_DiskQueue_t *queue;
   1613 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1614 	struct buf *bp;
   1615 	struct raid_softc *rs;
   1616 	int     unit;
   1617 	register int s;
   1618 
   1619 	s = splbio();		/* XXX */
   1620 	db1_printf(("recovering the request queue:\n"));
   1621 	req = raidbp->req;
   1622 
   1623 	bp = raidbp->rf_obp;
   1624 #if 0
   1625 	db1_printf(("bp=0x%x\n", bp));
   1626 #endif
   1627 
   1628 	queue = (RF_DiskQueue_t *) req->queue;
   1629 
   1630 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1631 #if 0
   1632 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1633 #endif
   1634 		bp->b_flags |= B_ERROR;
   1635 		bp->b_error = raidbp->rf_buf.b_error ?
   1636 		    raidbp->rf_buf.b_error : EIO;
   1637 	}
   1638 #if 0
   1639 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1640 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1641 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1642 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1643 #endif
   1644 
   1645 	/* XXX methinks this could be wrong... */
   1646 #if 1
   1647 	bp->b_resid = raidbp->rf_buf.b_resid;
   1648 #endif
   1649 
   1650 	if (req->tracerec) {
   1651 		RF_ETIMER_STOP(req->tracerec->timer);
   1652 		RF_ETIMER_EVAL(req->tracerec->timer);
   1653 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1654 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1655 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1656 		req->tracerec->num_phys_ios++;
   1657 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1658 	}
   1659 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1660 
   1661 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1662 
   1663 
   1664 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1665 	 * ballistic, and mark the component as hosed... */
   1666 #if 1
   1667 	if (bp->b_flags & B_ERROR) {
   1668 		/* Mark the disk as dead */
   1669 		/* but only mark it once... */
   1670 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1671 		    rf_ds_optimal) {
   1672 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1673 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1674 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1675 			    rf_ds_failed;
   1676 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1677 			queue->raidPtr->numFailures++;
   1678 			/* XXX here we should bump the version number for each component, and write that data out */
   1679 		} else {	/* Disk is already dead... */
   1680 			/* printf("Disk already marked as dead!\n"); */
   1681 		}
   1682 
   1683 	}
   1684 #endif
   1685 
   1686 	rs = &raid_softc[unit];
   1687 	RAIDPUTBUF(rs, raidbp);
   1688 
   1689 
   1690 	if (bp->b_resid == 0) {
   1691 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1692 			unit, bp->b_resid, bp->b_bcount));
   1693 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1694 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1695 	} else {
   1696 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1697 	}
   1698 
   1699 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1700 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1701 	/* printf("Exiting KernelWakeupFunc\n"); */
   1702 
   1703 	splx(s);		/* XXX */
   1704 }
   1705 
   1706 
   1707 
   1708 /*
   1709  * initialize a buf structure for doing an I/O in the kernel.
   1710  */
   1711 static void
   1712 InitBP(
   1713     struct buf * bp,
   1714     struct vnode * b_vp,
   1715     unsigned rw_flag,
   1716     dev_t dev,
   1717     RF_SectorNum_t startSect,
   1718     RF_SectorCount_t numSect,
   1719     caddr_t buf,
   1720     void (*cbFunc) (struct buf *),
   1721     void *cbArg,
   1722     int logBytesPerSector,
   1723     struct proc * b_proc)
   1724 {
   1725 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1726 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1727 	bp->b_bcount = numSect << logBytesPerSector;
   1728 	bp->b_bufsize = bp->b_bcount;
   1729 	bp->b_error = 0;
   1730 	bp->b_dev = dev;
   1731 	db1_printf(("bp->b_dev is %d\n", dev));
   1732 	bp->b_un.b_addr = buf;
   1733 #if 0
   1734 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1735 #endif
   1736 
   1737 	bp->b_blkno = startSect;
   1738 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1739 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1740 	if (bp->b_bcount == 0) {
   1741 		panic("bp->b_bcount is zero in InitBP!!\n");
   1742 	}
   1743 	bp->b_proc = b_proc;
   1744 	bp->b_iodone = cbFunc;
   1745 	bp->b_vp = b_vp;
   1746 
   1747 }
   1748 /* Extras... */
   1749 
   1750 unsigned int
   1751 rpcc()
   1752 {
   1753 	/* XXX no clue what this is supposed to do.. my guess is that it's
   1754 	 * supposed to read the CPU cycle counter... */
   1755 	/* db1_printf("this is supposed to do something useful too!??\n"); */
   1756 	return (0);
   1757 }
   1758 #if 0
   1759 int
   1760 rf_GetSpareTableFromDaemon(req)
   1761 	RF_SparetWait_t *req;
   1762 {
   1763 	int     retcode = 1;
   1764 	printf("This is supposed to do something useful!!\n");	/* XXX */
   1765 
   1766 	return (retcode);
   1767 
   1768 }
   1769 #endif
   1770 
   1771 static void
   1772 raidgetdefaultlabel(raidPtr, rs, lp)
   1773 	RF_Raid_t *raidPtr;
   1774 	struct raid_softc *rs;
   1775 	struct disklabel *lp;
   1776 {
   1777 	db1_printf(("Building a default label...\n"));
   1778 	bzero(lp, sizeof(*lp));
   1779 
   1780 	/* fabricate a label... */
   1781 	lp->d_secperunit = raidPtr->totalSectors;
   1782 	lp->d_secsize = raidPtr->bytesPerSector;
   1783 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1784 	lp->d_ntracks = 1;
   1785 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1786 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1787 
   1788 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1789 	lp->d_type = DTYPE_RAID;
   1790 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1791 	lp->d_rpm = 3600;
   1792 	lp->d_interleave = 1;
   1793 	lp->d_flags = 0;
   1794 
   1795 	lp->d_partitions[RAW_PART].p_offset = 0;
   1796 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1797 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1798 	lp->d_npartitions = RAW_PART + 1;
   1799 
   1800 	lp->d_magic = DISKMAGIC;
   1801 	lp->d_magic2 = DISKMAGIC;
   1802 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1803 
   1804 }
   1805 /*
   1806  * Read the disklabel from the raid device.  If one is not present, fake one
   1807  * up.
   1808  */
   1809 static void
   1810 raidgetdisklabel(dev)
   1811 	dev_t   dev;
   1812 {
   1813 	int     unit = raidunit(dev);
   1814 	struct raid_softc *rs = &raid_softc[unit];
   1815 	char   *errstring;
   1816 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1817 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1818 	RF_Raid_t *raidPtr;
   1819 
   1820 	db1_printf(("Getting the disklabel...\n"));
   1821 
   1822 	bzero(clp, sizeof(*clp));
   1823 
   1824 	raidPtr = raidPtrs[unit];
   1825 
   1826 	raidgetdefaultlabel(raidPtr, rs, lp);
   1827 
   1828 	/*
   1829 	 * Call the generic disklabel extraction routine.
   1830 	 */
   1831 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1832 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1833 	if (errstring)
   1834 		raidmakedisklabel(rs);
   1835 	else {
   1836 		int     i;
   1837 		struct partition *pp;
   1838 
   1839 		/*
   1840 		 * Sanity check whether the found disklabel is valid.
   1841 		 *
   1842 		 * This is necessary since total size of the raid device
   1843 		 * may vary when an interleave is changed even though exactly
   1844 		 * same componets are used, and old disklabel may used
   1845 		 * if that is found.
   1846 		 */
   1847 		if (lp->d_secperunit != rs->sc_size)
   1848 			printf("WARNING: %s: "
   1849 			    "total sector size in disklabel (%d) != "
   1850 			    "the size of raid (%d)\n", rs->sc_xname,
   1851 			    lp->d_secperunit, rs->sc_size);
   1852 		for (i = 0; i < lp->d_npartitions; i++) {
   1853 			pp = &lp->d_partitions[i];
   1854 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1855 				printf("WARNING: %s: end of partition `%c' "
   1856 				    "exceeds the size of raid (%d)\n",
   1857 				    rs->sc_xname, 'a' + i, rs->sc_size);
   1858 		}
   1859 	}
   1860 
   1861 }
   1862 /*
   1863  * Take care of things one might want to take care of in the event
   1864  * that a disklabel isn't present.
   1865  */
   1866 static void
   1867 raidmakedisklabel(rs)
   1868 	struct raid_softc *rs;
   1869 {
   1870 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1871 	db1_printf(("Making a label..\n"));
   1872 
   1873 	/*
   1874 	 * For historical reasons, if there's no disklabel present
   1875 	 * the raw partition must be marked FS_BSDFFS.
   1876 	 */
   1877 
   1878 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1879 
   1880 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1881 
   1882 	lp->d_checksum = dkcksum(lp);
   1883 }
   1884 /*
   1885  * Lookup the provided name in the filesystem.  If the file exists,
   1886  * is a valid block device, and isn't being used by anyone else,
   1887  * set *vpp to the file's vnode.
   1888  * You'll find the original of this in ccd.c
   1889  */
   1890 int
   1891 raidlookup(path, p, vpp)
   1892 	char   *path;
   1893 	struct proc *p;
   1894 	struct vnode **vpp;	/* result */
   1895 {
   1896 	struct nameidata nd;
   1897 	struct vnode *vp;
   1898 	struct vattr va;
   1899 	int     error;
   1900 
   1901 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1902 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1903 #ifdef DEBUG
   1904 		printf("RAIDframe: vn_open returned %d\n", error);
   1905 #endif
   1906 		return (error);
   1907 	}
   1908 	vp = nd.ni_vp;
   1909 	if (vp->v_usecount > 1) {
   1910 		VOP_UNLOCK(vp, 0);
   1911 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1912 		return (EBUSY);
   1913 	}
   1914 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1915 		VOP_UNLOCK(vp, 0);
   1916 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1917 		return (error);
   1918 	}
   1919 	/* XXX: eventually we should handle VREG, too. */
   1920 	if (va.va_type != VBLK) {
   1921 		VOP_UNLOCK(vp, 0);
   1922 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1923 		return (ENOTBLK);
   1924 	}
   1925 	VOP_UNLOCK(vp, 0);
   1926 	*vpp = vp;
   1927 	return (0);
   1928 }
   1929 /*
   1930  * Wait interruptibly for an exclusive lock.
   1931  *
   1932  * XXX
   1933  * Several drivers do this; it should be abstracted and made MP-safe.
   1934  * (Hmm... where have we seen this warning before :->  GO )
   1935  */
   1936 static int
   1937 raidlock(rs)
   1938 	struct raid_softc *rs;
   1939 {
   1940 	int     error;
   1941 
   1942 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1943 		rs->sc_flags |= RAIDF_WANTED;
   1944 		if ((error =
   1945 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1946 			return (error);
   1947 	}
   1948 	rs->sc_flags |= RAIDF_LOCKED;
   1949 	return (0);
   1950 }
   1951 /*
   1952  * Unlock and wake up any waiters.
   1953  */
   1954 static void
   1955 raidunlock(rs)
   1956 	struct raid_softc *rs;
   1957 {
   1958 
   1959 	rs->sc_flags &= ~RAIDF_LOCKED;
   1960 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1961 		rs->sc_flags &= ~RAIDF_WANTED;
   1962 		wakeup(rs);
   1963 	}
   1964 }
   1965 
   1966 
   1967 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1968 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1969 
   1970 int
   1971 raidmarkclean(dev_t dev, struct vnode *b_vp)
   1972 {
   1973 	return(0);
   1974 }
   1975 
   1976 /* ARGSUSED */
   1977 int
   1978 raidread_component_label(dev, b_vp, component_label)
   1979 	dev_t dev;
   1980 	struct vnode *b_vp;
   1981 	RF_ComponentLabel_t *component_label;
   1982 {
   1983 	struct buf *bp;
   1984 	int error;
   1985 
   1986 	/* XXX should probably ensure that we don't try to do this if
   1987 	   someone has changed rf_protected_sectors. */
   1988 
   1989 	/* get a block of the appropriate size... */
   1990 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   1991 	bp->b_dev = dev;
   1992 
   1993 	/* get our ducks in a row for the read */
   1994 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   1995 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   1996 	bp->b_flags = B_BUSY | B_READ;
   1997  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   1998 
   1999 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2000 
   2001 	error = biowait(bp);
   2002 
   2003 	if (!error) {
   2004 		memcpy(component_label, bp->b_un.b_addr,
   2005 		       sizeof(RF_ComponentLabel_t));
   2006 #if 1
   2007 		printf("raidread_component_label: got component label:\n");
   2008 		printf("Version: %d\n",component_label->version);
   2009 		printf("Serial Number: %d\n",component_label->serial_number);
   2010 		printf("Mod counter: %d\n",component_label->mod_counter);
   2011 		printf("Row: %d\n", component_label->row);
   2012 		printf("Column: %d\n", component_label->column);
   2013 		printf("Num Rows: %d\n", component_label->num_rows);
   2014 		printf("Num Columns: %d\n", component_label->num_columns);
   2015 		printf("Clean: %d\n", component_label->clean);
   2016 		printf("Status: %d\n", component_label->status);
   2017 #endif
   2018         } else {
   2019 		printf("Failed to read RAID component label!\n");
   2020 	}
   2021 
   2022         bp->b_flags = B_INVAL | B_AGE;
   2023 	brelse(bp);
   2024 	return(error);
   2025 }
   2026 /* ARGSUSED */
   2027 int
   2028 raidwrite_component_label(dev, b_vp, component_label)
   2029 	dev_t dev;
   2030 	struct vnode *b_vp;
   2031 	RF_ComponentLabel_t *component_label;
   2032 {
   2033 	struct buf *bp;
   2034 	int error;
   2035 
   2036 	/* get a block of the appropriate size... */
   2037 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2038 	bp->b_dev = dev;
   2039 
   2040 	/* get our ducks in a row for the write */
   2041 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2042 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2043 	bp->b_flags = B_BUSY | B_WRITE;
   2044  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2045 
   2046 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2047 
   2048 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2049 
   2050 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2051 	error = biowait(bp);
   2052         bp->b_flags = B_INVAL | B_AGE;
   2053 	brelse(bp);
   2054 	if (error) {
   2055 		printf("Failed to write RAID component info!\n");
   2056 	}
   2057 
   2058 	return(error);
   2059 }
   2060