Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.104.2.12
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.104.2.12 2002/07/12 01:40:10 nathanw Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.12 2002/07/12 01:40:10 nathanw Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/proc.h>
    123 #include <sys/queue.h>
    124 #include <sys/disk.h>
    125 #include <sys/device.h>
    126 #include <sys/stat.h>
    127 #include <sys/ioctl.h>
    128 #include <sys/fcntl.h>
    129 #include <sys/systm.h>
    130 #include <sys/namei.h>
    131 #include <sys/vnode.h>
    132 #include <sys/disklabel.h>
    133 #include <sys/conf.h>
    134 #include <sys/lock.h>
    135 #include <sys/buf.h>
    136 #include <sys/user.h>
    137 #include <sys/reboot.h>
    138 
    139 #include <dev/raidframe/raidframevar.h>
    140 #include <dev/raidframe/raidframeio.h>
    141 #include "raid.h"
    142 #include "opt_raid_autoconfig.h"
    143 #include "rf_raid.h"
    144 #include "rf_copyback.h"
    145 #include "rf_dag.h"
    146 #include "rf_dagflags.h"
    147 #include "rf_desc.h"
    148 #include "rf_diskqueue.h"
    149 #include "rf_acctrace.h"
    150 #include "rf_etimer.h"
    151 #include "rf_general.h"
    152 #include "rf_debugMem.h"
    153 #include "rf_kintf.h"
    154 #include "rf_options.h"
    155 #include "rf_driver.h"
    156 #include "rf_parityscan.h"
    157 #include "rf_debugprint.h"
    158 #include "rf_threadstuff.h"
    159 
    160 int     rf_kdebug_level = 0;
    161 
    162 #ifdef DEBUG
    163 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    164 #else				/* DEBUG */
    165 #define db1_printf(a) { }
    166 #endif				/* DEBUG */
    167 
    168 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    169 
    170 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    171 
    172 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    173 						 * spare table */
    174 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    175 						 * installation process */
    176 
    177 /* prototypes */
    178 static void KernelWakeupFunc(struct buf * bp);
    179 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    180 		   dev_t dev, RF_SectorNum_t startSect,
    181 		   RF_SectorCount_t numSect, caddr_t buf,
    182 		   void (*cbFunc) (struct buf *), void *cbArg,
    183 		   int logBytesPerSector, struct proc * b_proc);
    184 static void raidinit(RF_Raid_t *);
    185 
    186 void raidattach(int);
    187 int raidsize(dev_t);
    188 int raidopen(dev_t, int, int, struct proc *);
    189 int raidclose(dev_t, int, int, struct proc *);
    190 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
    191 int raidwrite(dev_t, struct uio *, int);
    192 int raidread(dev_t, struct uio *, int);
    193 void raidstrategy(struct buf *);
    194 int raiddump(dev_t, daddr_t, caddr_t, size_t);
    195 
    196 /*
    197  * Pilfered from ccd.c
    198  */
    199 
    200 struct raidbuf {
    201 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    202 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    203 	int     rf_flags;	/* misc. flags */
    204 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    205 };
    206 
    207 /* component buffer pool */
    208 struct pool raidframe_cbufpool;
    209 
    210 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    211 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    212 
    213 /* XXX Not sure if the following should be replacing the raidPtrs above,
    214    or if it should be used in conjunction with that...
    215 */
    216 
    217 struct raid_softc {
    218 	int     sc_flags;	/* flags */
    219 	int     sc_cflags;	/* configuration flags */
    220 	size_t  sc_size;        /* size of the raid device */
    221 	char    sc_xname[20];	/* XXX external name */
    222 	struct disk sc_dkdev;	/* generic disk device info */
    223 	struct buf_queue buf_queue;	/* used for the device queue */
    224 };
    225 /* sc_flags */
    226 #define RAIDF_INITED	0x01	/* unit has been initialized */
    227 #define RAIDF_WLABEL	0x02	/* label area is writable */
    228 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    229 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    230 #define RAIDF_LOCKED	0x80	/* unit is locked */
    231 
    232 #define	raidunit(x)	DISKUNIT(x)
    233 int numraid = 0;
    234 
    235 /*
    236  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    237  * Be aware that large numbers can allow the driver to consume a lot of
    238  * kernel memory, especially on writes, and in degraded mode reads.
    239  *
    240  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    241  * a single 64K write will typically require 64K for the old data,
    242  * 64K for the old parity, and 64K for the new parity, for a total
    243  * of 192K (if the parity buffer is not re-used immediately).
    244  * Even it if is used immediately, that's still 128K, which when multiplied
    245  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    246  *
    247  * Now in degraded mode, for example, a 64K read on the above setup may
    248  * require data reconstruction, which will require *all* of the 4 remaining
    249  * disks to participate -- 4 * 32K/disk == 128K again.
    250  */
    251 
    252 #ifndef RAIDOUTSTANDING
    253 #define RAIDOUTSTANDING   6
    254 #endif
    255 
    256 #define RAIDLABELDEV(dev)	\
    257 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    258 
    259 /* declared here, and made public, for the benefit of KVM stuff.. */
    260 struct raid_softc *raid_softc;
    261 
    262 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    263 				     struct disklabel *);
    264 static void raidgetdisklabel(dev_t);
    265 static void raidmakedisklabel(struct raid_softc *);
    266 
    267 static int raidlock(struct raid_softc *);
    268 static void raidunlock(struct raid_softc *);
    269 
    270 static void rf_markalldirty(RF_Raid_t *);
    271 void rf_mountroot_hook(struct device *);
    272 
    273 struct device *raidrootdev;
    274 
    275 void rf_ReconThread(struct rf_recon_req *);
    276 /* XXX what I want is: */
    277 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    278 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    279 void rf_CopybackThread(RF_Raid_t *raidPtr);
    280 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    281 void rf_buildroothack(void *);
    282 
    283 RF_AutoConfig_t *rf_find_raid_components(void);
    284 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    285 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    286 static int rf_reasonable_label(RF_ComponentLabel_t *);
    287 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    288 int rf_set_autoconfig(RF_Raid_t *, int);
    289 int rf_set_rootpartition(RF_Raid_t *, int);
    290 void rf_release_all_vps(RF_ConfigSet_t *);
    291 void rf_cleanup_config_set(RF_ConfigSet_t *);
    292 int rf_have_enough_components(RF_ConfigSet_t *);
    293 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    294 
    295 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    296 				  allow autoconfig to take place.
    297 			          Note that this is overridden by having
    298 			          RAID_AUTOCONFIG as an option in the
    299 			          kernel config file.  */
    300 
    301 void
    302 raidattach(num)
    303 	int     num;
    304 {
    305 	int raidID;
    306 	int i, rc;
    307 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    308 	RF_ConfigSet_t *config_sets;
    309 
    310 #ifdef DEBUG
    311 	printf("raidattach: Asked for %d units\n", num);
    312 #endif
    313 
    314 	if (num <= 0) {
    315 #ifdef DIAGNOSTIC
    316 		panic("raidattach: count <= 0");
    317 #endif
    318 		return;
    319 	}
    320 	/* This is where all the initialization stuff gets done. */
    321 
    322 	numraid = num;
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!\n");
    329 	}
    330 
    331 	/* Initialize the component buffer pool. */
    332 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    333 	    0, 0, "raidpl", NULL);
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 
    342 	for (i = 0; i < num; i++)
    343 		raidPtrs[i] = NULL;
    344 	rc = rf_BootRaidframe();
    345 	if (rc == 0)
    346 		printf("Kernelized RAIDframe activated\n");
    347 	else
    348 		panic("Serious error booting RAID!!\n");
    349 
    350 	/* put together some datastructures like the CCD device does.. This
    351 	 * lets us lock the device and what-not when it gets opened. */
    352 
    353 	raid_softc = (struct raid_softc *)
    354 		malloc(num * sizeof(struct raid_softc),
    355 		       M_RAIDFRAME, M_NOWAIT);
    356 	if (raid_softc == NULL) {
    357 		printf("WARNING: no memory for RAIDframe driver\n");
    358 		return;
    359 	}
    360 
    361 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    362 
    363 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    364 					      M_RAIDFRAME, M_NOWAIT);
    365 	if (raidrootdev == NULL) {
    366 		panic("No memory for RAIDframe driver!!?!?!\n");
    367 	}
    368 
    369 	for (raidID = 0; raidID < num; raidID++) {
    370 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    371 
    372 		raidrootdev[raidID].dv_class  = DV_DISK;
    373 		raidrootdev[raidID].dv_cfdata = NULL;
    374 		raidrootdev[raidID].dv_unit   = raidID;
    375 		raidrootdev[raidID].dv_parent = NULL;
    376 		raidrootdev[raidID].dv_flags  = 0;
    377 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    378 
    379 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    380 			  (RF_Raid_t *));
    381 		if (raidPtrs[raidID] == NULL) {
    382 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    383 			numraid = raidID;
    384 			return;
    385 		}
    386 	}
    387 
    388 #ifdef RAID_AUTOCONFIG
    389 	raidautoconfig = 1;
    390 #endif
    391 
    392 if (raidautoconfig) {
    393 	/* 1. locate all RAID components on the system */
    394 
    395 #if DEBUG
    396 	printf("Searching for raid components...\n");
    397 #endif
    398 	ac_list = rf_find_raid_components();
    399 
    400 	/* 2. sort them into their respective sets */
    401 
    402 	config_sets = rf_create_auto_sets(ac_list);
    403 
    404 	/* 3. evaluate each set and configure the valid ones
    405 	   This gets done in rf_buildroothack() */
    406 
    407 	/* schedule the creation of the thread to do the
    408 	   "/ on RAID" stuff */
    409 
    410 	kthread_create(rf_buildroothack,config_sets);
    411 
    412 #if 0
    413 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    414 #endif
    415 }
    416 
    417 }
    418 
    419 void
    420 rf_buildroothack(arg)
    421 	void *arg;
    422 {
    423 	RF_ConfigSet_t *config_sets = arg;
    424 	RF_ConfigSet_t *cset;
    425 	RF_ConfigSet_t *next_cset;
    426 	int retcode;
    427 	int raidID;
    428 	int rootID;
    429 	int num_root;
    430 
    431 	rootID = 0;
    432 	num_root = 0;
    433 	cset = config_sets;
    434 	while(cset != NULL ) {
    435 		next_cset = cset->next;
    436 		if (rf_have_enough_components(cset) &&
    437 		    cset->ac->clabel->autoconfigure==1) {
    438 			retcode = rf_auto_config_set(cset,&raidID);
    439 			if (!retcode) {
    440 				if (cset->rootable) {
    441 					rootID = raidID;
    442 					num_root++;
    443 				}
    444 			} else {
    445 				/* The autoconfig didn't work :( */
    446 #if DEBUG
    447 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    448 #endif
    449 				rf_release_all_vps(cset);
    450 			}
    451 		} else {
    452 			/* we're not autoconfiguring this set...
    453 			   release the associated resources */
    454 			rf_release_all_vps(cset);
    455 		}
    456 		/* cleanup */
    457 		rf_cleanup_config_set(cset);
    458 		cset = next_cset;
    459 	}
    460 	if (boothowto & RB_ASKNAME) {
    461 		/* We don't auto-config... */
    462 	} else {
    463 		/* They didn't ask, and we found something bootable... */
    464 
    465 		if (num_root == 1) {
    466 			booted_device = &raidrootdev[rootID];
    467 		} else if (num_root > 1) {
    468 			/* we can't guess.. require the user to answer... */
    469 			boothowto |= RB_ASKNAME;
    470 		}
    471 	}
    472 }
    473 
    474 
    475 int
    476 raidsize(dev)
    477 	dev_t   dev;
    478 {
    479 	struct raid_softc *rs;
    480 	struct disklabel *lp;
    481 	int     part, unit, omask, size;
    482 
    483 	unit = raidunit(dev);
    484 	if (unit >= numraid)
    485 		return (-1);
    486 	rs = &raid_softc[unit];
    487 
    488 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    489 		return (-1);
    490 
    491 	part = DISKPART(dev);
    492 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    493 	lp = rs->sc_dkdev.dk_label;
    494 
    495 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    496 		return (-1);
    497 
    498 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    499 		size = -1;
    500 	else
    501 		size = lp->d_partitions[part].p_size *
    502 		    (lp->d_secsize / DEV_BSIZE);
    503 
    504 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    505 		return (-1);
    506 
    507 	return (size);
    508 
    509 }
    510 
    511 int
    512 raiddump(dev, blkno, va, size)
    513 	dev_t   dev;
    514 	daddr_t blkno;
    515 	caddr_t va;
    516 	size_t  size;
    517 {
    518 	/* Not implemented. */
    519 	return ENXIO;
    520 }
    521 /* ARGSUSED */
    522 int
    523 raidopen(dev, flags, fmt, p)
    524 	dev_t   dev;
    525 	int     flags, fmt;
    526 	struct proc *p;
    527 {
    528 	int     unit = raidunit(dev);
    529 	struct raid_softc *rs;
    530 	struct disklabel *lp;
    531 	int     part, pmask;
    532 	int     error = 0;
    533 
    534 	if (unit >= numraid)
    535 		return (ENXIO);
    536 	rs = &raid_softc[unit];
    537 
    538 	if ((error = raidlock(rs)) != 0)
    539 		return (error);
    540 	lp = rs->sc_dkdev.dk_label;
    541 
    542 	part = DISKPART(dev);
    543 	pmask = (1 << part);
    544 
    545 	db1_printf(("Opening raid device number: %d partition: %d\n",
    546 		unit, part));
    547 
    548 
    549 	if ((rs->sc_flags & RAIDF_INITED) &&
    550 	    (rs->sc_dkdev.dk_openmask == 0))
    551 		raidgetdisklabel(dev);
    552 
    553 	/* make sure that this partition exists */
    554 
    555 	if (part != RAW_PART) {
    556 		db1_printf(("Not a raw partition..\n"));
    557 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    558 		    ((part >= lp->d_npartitions) ||
    559 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    560 			error = ENXIO;
    561 			raidunlock(rs);
    562 			db1_printf(("Bailing out...\n"));
    563 			return (error);
    564 		}
    565 	}
    566 	/* Prevent this unit from being unconfigured while open. */
    567 	switch (fmt) {
    568 	case S_IFCHR:
    569 		rs->sc_dkdev.dk_copenmask |= pmask;
    570 		break;
    571 
    572 	case S_IFBLK:
    573 		rs->sc_dkdev.dk_bopenmask |= pmask;
    574 		break;
    575 	}
    576 
    577 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    578 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    579 		/* First one... mark things as dirty... Note that we *MUST*
    580 		 have done a configure before this.  I DO NOT WANT TO BE
    581 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    582 		 THAT THEY BELONG TOGETHER!!!!! */
    583 		/* XXX should check to see if we're only open for reading
    584 		   here... If so, we needn't do this, but then need some
    585 		   other way of keeping track of what's happened.. */
    586 
    587 		rf_markalldirty( raidPtrs[unit] );
    588 	}
    589 
    590 
    591 	rs->sc_dkdev.dk_openmask =
    592 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    593 
    594 	raidunlock(rs);
    595 
    596 	return (error);
    597 
    598 
    599 }
    600 /* ARGSUSED */
    601 int
    602 raidclose(dev, flags, fmt, p)
    603 	dev_t   dev;
    604 	int     flags, fmt;
    605 	struct proc *p;
    606 {
    607 	int     unit = raidunit(dev);
    608 	struct raid_softc *rs;
    609 	int     error = 0;
    610 	int     part;
    611 
    612 	if (unit >= numraid)
    613 		return (ENXIO);
    614 	rs = &raid_softc[unit];
    615 
    616 	if ((error = raidlock(rs)) != 0)
    617 		return (error);
    618 
    619 	part = DISKPART(dev);
    620 
    621 	/* ...that much closer to allowing unconfiguration... */
    622 	switch (fmt) {
    623 	case S_IFCHR:
    624 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    625 		break;
    626 
    627 	case S_IFBLK:
    628 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    629 		break;
    630 	}
    631 	rs->sc_dkdev.dk_openmask =
    632 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    633 
    634 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    635 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    636 		/* Last one... device is not unconfigured yet.
    637 		   Device shutdown has taken care of setting the
    638 		   clean bits if RAIDF_INITED is not set
    639 		   mark things as clean... */
    640 #if 0
    641 		printf("Last one on raid%d.  Updating status.\n",unit);
    642 #endif
    643 		rf_update_component_labels(raidPtrs[unit],
    644 						 RF_FINAL_COMPONENT_UPDATE);
    645 		if (doing_shutdown) {
    646 			/* last one, and we're going down, so
    647 			   lights out for this RAID set too. */
    648 			error = rf_Shutdown(raidPtrs[unit]);
    649 
    650 			/* It's no longer initialized... */
    651 			rs->sc_flags &= ~RAIDF_INITED;
    652 
    653 			/* Detach the disk. */
    654 			disk_detach(&rs->sc_dkdev);
    655 		}
    656 	}
    657 
    658 	raidunlock(rs);
    659 	return (0);
    660 
    661 }
    662 
    663 void
    664 raidstrategy(bp)
    665 	struct buf *bp;
    666 {
    667 	int s;
    668 
    669 	unsigned int raidID = raidunit(bp->b_dev);
    670 	RF_Raid_t *raidPtr;
    671 	struct raid_softc *rs = &raid_softc[raidID];
    672 	struct disklabel *lp;
    673 	int     wlabel;
    674 
    675 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    676 		bp->b_error = ENXIO;
    677 		bp->b_flags |= B_ERROR;
    678 		bp->b_resid = bp->b_bcount;
    679 		biodone(bp);
    680 		return;
    681 	}
    682 	if (raidID >= numraid || !raidPtrs[raidID]) {
    683 		bp->b_error = ENODEV;
    684 		bp->b_flags |= B_ERROR;
    685 		bp->b_resid = bp->b_bcount;
    686 		biodone(bp);
    687 		return;
    688 	}
    689 	raidPtr = raidPtrs[raidID];
    690 	if (!raidPtr->valid) {
    691 		bp->b_error = ENODEV;
    692 		bp->b_flags |= B_ERROR;
    693 		bp->b_resid = bp->b_bcount;
    694 		biodone(bp);
    695 		return;
    696 	}
    697 	if (bp->b_bcount == 0) {
    698 		db1_printf(("b_bcount is zero..\n"));
    699 		biodone(bp);
    700 		return;
    701 	}
    702 	lp = rs->sc_dkdev.dk_label;
    703 
    704 	/*
    705 	 * Do bounds checking and adjust transfer.  If there's an
    706 	 * error, the bounds check will flag that for us.
    707 	 */
    708 
    709 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    710 	if (DISKPART(bp->b_dev) != RAW_PART)
    711 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    712 			db1_printf(("Bounds check failed!!:%d %d\n",
    713 				(int) bp->b_blkno, (int) wlabel));
    714 			biodone(bp);
    715 			return;
    716 		}
    717 	s = splbio();
    718 
    719 	bp->b_resid = 0;
    720 
    721 	/* stuff it onto our queue */
    722 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    723 
    724 	raidstart(raidPtrs[raidID]);
    725 
    726 	splx(s);
    727 }
    728 /* ARGSUSED */
    729 int
    730 raidread(dev, uio, flags)
    731 	dev_t   dev;
    732 	struct uio *uio;
    733 	int     flags;
    734 {
    735 	int     unit = raidunit(dev);
    736 	struct raid_softc *rs;
    737 	int     part;
    738 
    739 	if (unit >= numraid)
    740 		return (ENXIO);
    741 	rs = &raid_softc[unit];
    742 
    743 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    744 		return (ENXIO);
    745 	part = DISKPART(dev);
    746 
    747 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    748 
    749 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    750 
    751 }
    752 /* ARGSUSED */
    753 int
    754 raidwrite(dev, uio, flags)
    755 	dev_t   dev;
    756 	struct uio *uio;
    757 	int     flags;
    758 {
    759 	int     unit = raidunit(dev);
    760 	struct raid_softc *rs;
    761 
    762 	if (unit >= numraid)
    763 		return (ENXIO);
    764 	rs = &raid_softc[unit];
    765 
    766 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    767 		return (ENXIO);
    768 	db1_printf(("raidwrite\n"));
    769 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    770 
    771 }
    772 
    773 int
    774 raidioctl(dev, cmd, data, flag, p)
    775 	dev_t   dev;
    776 	u_long  cmd;
    777 	caddr_t data;
    778 	int     flag;
    779 	struct proc *p;
    780 {
    781 	int     unit = raidunit(dev);
    782 	int     error = 0;
    783 	int     part, pmask;
    784 	struct raid_softc *rs;
    785 	RF_Config_t *k_cfg, *u_cfg;
    786 	RF_Raid_t *raidPtr;
    787 	RF_RaidDisk_t *diskPtr;
    788 	RF_AccTotals_t *totals;
    789 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    790 	u_char *specific_buf;
    791 	int retcode = 0;
    792 	int row;
    793 	int column;
    794 	struct rf_recon_req *rrcopy, *rr;
    795 	RF_ComponentLabel_t *clabel;
    796 	RF_ComponentLabel_t ci_label;
    797 	RF_ComponentLabel_t **clabel_ptr;
    798 	RF_SingleComponent_t *sparePtr,*componentPtr;
    799 	RF_SingleComponent_t hot_spare;
    800 	RF_SingleComponent_t component;
    801 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    802 	int i, j, d;
    803 #ifdef __HAVE_OLD_DISKLABEL
    804 	struct disklabel newlabel;
    805 #endif
    806 
    807 	if (unit >= numraid)
    808 		return (ENXIO);
    809 	rs = &raid_softc[unit];
    810 	raidPtr = raidPtrs[unit];
    811 
    812 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    813 		(int) DISKPART(dev), (int) unit, (int) cmd));
    814 
    815 	/* Must be open for writes for these commands... */
    816 	switch (cmd) {
    817 	case DIOCSDINFO:
    818 	case DIOCWDINFO:
    819 #ifdef __HAVE_OLD_DISKLABEL
    820 	case ODIOCWDINFO:
    821 	case ODIOCSDINFO:
    822 #endif
    823 	case DIOCWLABEL:
    824 		if ((flag & FWRITE) == 0)
    825 			return (EBADF);
    826 	}
    827 
    828 	/* Must be initialized for these... */
    829 	switch (cmd) {
    830 	case DIOCGDINFO:
    831 	case DIOCSDINFO:
    832 	case DIOCWDINFO:
    833 #ifdef __HAVE_OLD_DISKLABEL
    834 	case ODIOCGDINFO:
    835 	case ODIOCWDINFO:
    836 	case ODIOCSDINFO:
    837 	case ODIOCGDEFLABEL:
    838 #endif
    839 	case DIOCGPART:
    840 	case DIOCWLABEL:
    841 	case DIOCGDEFLABEL:
    842 	case RAIDFRAME_SHUTDOWN:
    843 	case RAIDFRAME_REWRITEPARITY:
    844 	case RAIDFRAME_GET_INFO:
    845 	case RAIDFRAME_RESET_ACCTOTALS:
    846 	case RAIDFRAME_GET_ACCTOTALS:
    847 	case RAIDFRAME_KEEP_ACCTOTALS:
    848 	case RAIDFRAME_GET_SIZE:
    849 	case RAIDFRAME_FAIL_DISK:
    850 	case RAIDFRAME_COPYBACK:
    851 	case RAIDFRAME_CHECK_RECON_STATUS:
    852 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    853 	case RAIDFRAME_GET_COMPONENT_LABEL:
    854 	case RAIDFRAME_SET_COMPONENT_LABEL:
    855 	case RAIDFRAME_ADD_HOT_SPARE:
    856 	case RAIDFRAME_REMOVE_HOT_SPARE:
    857 	case RAIDFRAME_INIT_LABELS:
    858 	case RAIDFRAME_REBUILD_IN_PLACE:
    859 	case RAIDFRAME_CHECK_PARITY:
    860 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    861 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    862 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    863 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    864 	case RAIDFRAME_SET_AUTOCONFIG:
    865 	case RAIDFRAME_SET_ROOT:
    866 	case RAIDFRAME_DELETE_COMPONENT:
    867 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    868 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    869 			return (ENXIO);
    870 	}
    871 
    872 	switch (cmd) {
    873 
    874 		/* configure the system */
    875 	case RAIDFRAME_CONFIGURE:
    876 
    877 		if (raidPtr->valid) {
    878 			/* There is a valid RAID set running on this unit! */
    879 			printf("raid%d: Device already configured!\n",unit);
    880 			return(EINVAL);
    881 		}
    882 
    883 		/* copy-in the configuration information */
    884 		/* data points to a pointer to the configuration structure */
    885 
    886 		u_cfg = *((RF_Config_t **) data);
    887 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    888 		if (k_cfg == NULL) {
    889 			return (ENOMEM);
    890 		}
    891 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    892 		    sizeof(RF_Config_t));
    893 		if (retcode) {
    894 			RF_Free(k_cfg, sizeof(RF_Config_t));
    895 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    896 				retcode));
    897 			return (retcode);
    898 		}
    899 		/* allocate a buffer for the layout-specific data, and copy it
    900 		 * in */
    901 		if (k_cfg->layoutSpecificSize) {
    902 			if (k_cfg->layoutSpecificSize > 10000) {
    903 				/* sanity check */
    904 				RF_Free(k_cfg, sizeof(RF_Config_t));
    905 				return (EINVAL);
    906 			}
    907 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    908 			    (u_char *));
    909 			if (specific_buf == NULL) {
    910 				RF_Free(k_cfg, sizeof(RF_Config_t));
    911 				return (ENOMEM);
    912 			}
    913 			retcode = copyin(k_cfg->layoutSpecific,
    914 			    (caddr_t) specific_buf,
    915 			    k_cfg->layoutSpecificSize);
    916 			if (retcode) {
    917 				RF_Free(k_cfg, sizeof(RF_Config_t));
    918 				RF_Free(specific_buf,
    919 					k_cfg->layoutSpecificSize);
    920 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    921 					retcode));
    922 				return (retcode);
    923 			}
    924 		} else
    925 			specific_buf = NULL;
    926 		k_cfg->layoutSpecific = specific_buf;
    927 
    928 		/* should do some kind of sanity check on the configuration.
    929 		 * Store the sum of all the bytes in the last byte? */
    930 
    931 		/* configure the system */
    932 
    933 		/*
    934 		 * Clear the entire RAID descriptor, just to make sure
    935 		 *  there is no stale data left in the case of a
    936 		 *  reconfiguration
    937 		 */
    938 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    939 		raidPtr->raidid = unit;
    940 
    941 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    942 
    943 		if (retcode == 0) {
    944 
    945 			/* allow this many simultaneous IO's to
    946 			   this RAID device */
    947 			raidPtr->openings = RAIDOUTSTANDING;
    948 
    949 			raidinit(raidPtr);
    950 			rf_markalldirty(raidPtr);
    951 		}
    952 		/* free the buffers.  No return code here. */
    953 		if (k_cfg->layoutSpecificSize) {
    954 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    955 		}
    956 		RF_Free(k_cfg, sizeof(RF_Config_t));
    957 
    958 		return (retcode);
    959 
    960 		/* shutdown the system */
    961 	case RAIDFRAME_SHUTDOWN:
    962 
    963 		if ((error = raidlock(rs)) != 0)
    964 			return (error);
    965 
    966 		/*
    967 		 * If somebody has a partition mounted, we shouldn't
    968 		 * shutdown.
    969 		 */
    970 
    971 		part = DISKPART(dev);
    972 		pmask = (1 << part);
    973 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    974 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    975 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    976 			raidunlock(rs);
    977 			return (EBUSY);
    978 		}
    979 
    980 		retcode = rf_Shutdown(raidPtr);
    981 
    982 		/* It's no longer initialized... */
    983 		rs->sc_flags &= ~RAIDF_INITED;
    984 
    985 		/* Detach the disk. */
    986 		disk_detach(&rs->sc_dkdev);
    987 
    988 		raidunlock(rs);
    989 
    990 		return (retcode);
    991 	case RAIDFRAME_GET_COMPONENT_LABEL:
    992 		clabel_ptr = (RF_ComponentLabel_t **) data;
    993 		/* need to read the component label for the disk indicated
    994 		   by row,column in clabel */
    995 
    996 		/* For practice, let's get it directly fromdisk, rather
    997 		   than from the in-core copy */
    998 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    999 			   (RF_ComponentLabel_t *));
   1000 		if (clabel == NULL)
   1001 			return (ENOMEM);
   1002 
   1003 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1004 
   1005 		retcode = copyin( *clabel_ptr, clabel,
   1006 				  sizeof(RF_ComponentLabel_t));
   1007 
   1008 		if (retcode) {
   1009 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1010 			return(retcode);
   1011 		}
   1012 
   1013 		row = clabel->row;
   1014 		column = clabel->column;
   1015 
   1016 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1017 		    (column < 0) || (column >= raidPtr->numCol +
   1018 				     raidPtr->numSpare)) {
   1019 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1020 			return(EINVAL);
   1021 		}
   1022 
   1023 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1024 				raidPtr->raid_cinfo[row][column].ci_vp,
   1025 				clabel );
   1026 
   1027 		retcode = copyout((caddr_t) clabel,
   1028 				  (caddr_t) *clabel_ptr,
   1029 				  sizeof(RF_ComponentLabel_t));
   1030 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1031 		return (retcode);
   1032 
   1033 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1034 		clabel = (RF_ComponentLabel_t *) data;
   1035 
   1036 		/* XXX check the label for valid stuff... */
   1037 		/* Note that some things *should not* get modified --
   1038 		   the user should be re-initing the labels instead of
   1039 		   trying to patch things.
   1040 		   */
   1041 
   1042 		printf("Got component label:\n");
   1043 		printf("Version: %d\n",clabel->version);
   1044 		printf("Serial Number: %d\n",clabel->serial_number);
   1045 		printf("Mod counter: %d\n",clabel->mod_counter);
   1046 		printf("Row: %d\n", clabel->row);
   1047 		printf("Column: %d\n", clabel->column);
   1048 		printf("Num Rows: %d\n", clabel->num_rows);
   1049 		printf("Num Columns: %d\n", clabel->num_columns);
   1050 		printf("Clean: %d\n", clabel->clean);
   1051 		printf("Status: %d\n", clabel->status);
   1052 
   1053 		row = clabel->row;
   1054 		column = clabel->column;
   1055 
   1056 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1057 		    (column < 0) || (column >= raidPtr->numCol)) {
   1058 			return(EINVAL);
   1059 		}
   1060 
   1061 		/* XXX this isn't allowed to do anything for now :-) */
   1062 
   1063 		/* XXX and before it is, we need to fill in the rest
   1064 		   of the fields!?!?!?! */
   1065 #if 0
   1066 		raidwrite_component_label(
   1067                             raidPtr->Disks[row][column].dev,
   1068 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1069 			    clabel );
   1070 #endif
   1071 		return (0);
   1072 
   1073 	case RAIDFRAME_INIT_LABELS:
   1074 		clabel = (RF_ComponentLabel_t *) data;
   1075 		/*
   1076 		   we only want the serial number from
   1077 		   the above.  We get all the rest of the information
   1078 		   from the config that was used to create this RAID
   1079 		   set.
   1080 		   */
   1081 
   1082 		raidPtr->serial_number = clabel->serial_number;
   1083 
   1084 		raid_init_component_label(raidPtr, &ci_label);
   1085 		ci_label.serial_number = clabel->serial_number;
   1086 
   1087 		for(row=0;row<raidPtr->numRow;row++) {
   1088 			ci_label.row = row;
   1089 			for(column=0;column<raidPtr->numCol;column++) {
   1090 				diskPtr = &raidPtr->Disks[row][column];
   1091 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1092 					ci_label.partitionSize = diskPtr->partitionSize;
   1093 					ci_label.column = column;
   1094 					raidwrite_component_label(
   1095 					  raidPtr->Disks[row][column].dev,
   1096 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1097 					  &ci_label );
   1098 				}
   1099 			}
   1100 		}
   1101 
   1102 		return (retcode);
   1103 	case RAIDFRAME_SET_AUTOCONFIG:
   1104 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1105 		printf("New autoconfig value is: %d\n", d);
   1106 		*(int *) data = d;
   1107 		return (retcode);
   1108 
   1109 	case RAIDFRAME_SET_ROOT:
   1110 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1111 		printf("New rootpartition value is: %d\n", d);
   1112 		*(int *) data = d;
   1113 		return (retcode);
   1114 
   1115 		/* initialize all parity */
   1116 	case RAIDFRAME_REWRITEPARITY:
   1117 
   1118 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1119 			/* Parity for RAID 0 is trivially correct */
   1120 			raidPtr->parity_good = RF_RAID_CLEAN;
   1121 			return(0);
   1122 		}
   1123 
   1124 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1125 			/* Re-write is already in progress! */
   1126 			return(EINVAL);
   1127 		}
   1128 
   1129 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1130 					   rf_RewriteParityThread,
   1131 					   raidPtr,"raid_parity");
   1132 		return (retcode);
   1133 
   1134 
   1135 	case RAIDFRAME_ADD_HOT_SPARE:
   1136 		sparePtr = (RF_SingleComponent_t *) data;
   1137 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1138 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1139 		return(retcode);
   1140 
   1141 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1142 		return(retcode);
   1143 
   1144 	case RAIDFRAME_DELETE_COMPONENT:
   1145 		componentPtr = (RF_SingleComponent_t *)data;
   1146 		memcpy( &component, componentPtr,
   1147 			sizeof(RF_SingleComponent_t));
   1148 		retcode = rf_delete_component(raidPtr, &component);
   1149 		return(retcode);
   1150 
   1151 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1152 		componentPtr = (RF_SingleComponent_t *)data;
   1153 		memcpy( &component, componentPtr,
   1154 			sizeof(RF_SingleComponent_t));
   1155 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1156 		return(retcode);
   1157 
   1158 	case RAIDFRAME_REBUILD_IN_PLACE:
   1159 
   1160 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1161 			/* Can't do this on a RAID 0!! */
   1162 			return(EINVAL);
   1163 		}
   1164 
   1165 		if (raidPtr->recon_in_progress == 1) {
   1166 			/* a reconstruct is already in progress! */
   1167 			return(EINVAL);
   1168 		}
   1169 
   1170 		componentPtr = (RF_SingleComponent_t *) data;
   1171 		memcpy( &component, componentPtr,
   1172 			sizeof(RF_SingleComponent_t));
   1173 		row = component.row;
   1174 		column = component.column;
   1175 		printf("Rebuild: %d %d\n",row, column);
   1176 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1177 		    (column < 0) || (column >= raidPtr->numCol)) {
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1182 		if (rrcopy == NULL)
   1183 			return(ENOMEM);
   1184 
   1185 		rrcopy->raidPtr = (void *) raidPtr;
   1186 		rrcopy->row = row;
   1187 		rrcopy->col = column;
   1188 
   1189 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1190 					   rf_ReconstructInPlaceThread,
   1191 					   rrcopy,"raid_reconip");
   1192 		return(retcode);
   1193 
   1194 	case RAIDFRAME_GET_INFO:
   1195 		if (!raidPtr->valid)
   1196 			return (ENODEV);
   1197 		ucfgp = (RF_DeviceConfig_t **) data;
   1198 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1199 			  (RF_DeviceConfig_t *));
   1200 		if (d_cfg == NULL)
   1201 			return (ENOMEM);
   1202 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1203 		d_cfg->rows = raidPtr->numRow;
   1204 		d_cfg->cols = raidPtr->numCol;
   1205 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1206 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1207 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1208 			return (ENOMEM);
   1209 		}
   1210 		d_cfg->nspares = raidPtr->numSpare;
   1211 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1212 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1213 			return (ENOMEM);
   1214 		}
   1215 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1216 		d = 0;
   1217 		for (i = 0; i < d_cfg->rows; i++) {
   1218 			for (j = 0; j < d_cfg->cols; j++) {
   1219 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1220 				d++;
   1221 			}
   1222 		}
   1223 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1224 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1225 		}
   1226 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1227 				  sizeof(RF_DeviceConfig_t));
   1228 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1229 
   1230 		return (retcode);
   1231 
   1232 	case RAIDFRAME_CHECK_PARITY:
   1233 		*(int *) data = raidPtr->parity_good;
   1234 		return (0);
   1235 
   1236 	case RAIDFRAME_RESET_ACCTOTALS:
   1237 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1238 		return (0);
   1239 
   1240 	case RAIDFRAME_GET_ACCTOTALS:
   1241 		totals = (RF_AccTotals_t *) data;
   1242 		*totals = raidPtr->acc_totals;
   1243 		return (0);
   1244 
   1245 	case RAIDFRAME_KEEP_ACCTOTALS:
   1246 		raidPtr->keep_acc_totals = *(int *)data;
   1247 		return (0);
   1248 
   1249 	case RAIDFRAME_GET_SIZE:
   1250 		*(int *) data = raidPtr->totalSectors;
   1251 		return (0);
   1252 
   1253 		/* fail a disk & optionally start reconstruction */
   1254 	case RAIDFRAME_FAIL_DISK:
   1255 
   1256 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1257 			/* Can't do this on a RAID 0!! */
   1258 			return(EINVAL);
   1259 		}
   1260 
   1261 		rr = (struct rf_recon_req *) data;
   1262 
   1263 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1264 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1265 			return (EINVAL);
   1266 
   1267 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1268 		       unit, rr->row, rr->col);
   1269 
   1270 		/* make a copy of the recon request so that we don't rely on
   1271 		 * the user's buffer */
   1272 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1273 		if (rrcopy == NULL)
   1274 			return(ENOMEM);
   1275 		memcpy(rrcopy, rr, sizeof(*rr));
   1276 		rrcopy->raidPtr = (void *) raidPtr;
   1277 
   1278 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1279 					   rf_ReconThread,
   1280 					   rrcopy,"raid_recon");
   1281 		return (0);
   1282 
   1283 		/* invoke a copyback operation after recon on whatever disk
   1284 		 * needs it, if any */
   1285 	case RAIDFRAME_COPYBACK:
   1286 
   1287 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1288 			/* This makes no sense on a RAID 0!! */
   1289 			return(EINVAL);
   1290 		}
   1291 
   1292 		if (raidPtr->copyback_in_progress == 1) {
   1293 			/* Copyback is already in progress! */
   1294 			return(EINVAL);
   1295 		}
   1296 
   1297 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1298 					   rf_CopybackThread,
   1299 					   raidPtr,"raid_copyback");
   1300 		return (retcode);
   1301 
   1302 		/* return the percentage completion of reconstruction */
   1303 	case RAIDFRAME_CHECK_RECON_STATUS:
   1304 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1305 			/* This makes no sense on a RAID 0, so tell the
   1306 			   user it's done. */
   1307 			*(int *) data = 100;
   1308 			return(0);
   1309 		}
   1310 		row = 0; /* XXX we only consider a single row... */
   1311 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1312 			*(int *) data = 100;
   1313 		else
   1314 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1315 		return (0);
   1316 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1317 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1318 		row = 0; /* XXX we only consider a single row... */
   1319 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1320 			progressInfo.remaining = 0;
   1321 			progressInfo.completed = 100;
   1322 			progressInfo.total = 100;
   1323 		} else {
   1324 			progressInfo.total =
   1325 				raidPtr->reconControl[row]->numRUsTotal;
   1326 			progressInfo.completed =
   1327 				raidPtr->reconControl[row]->numRUsComplete;
   1328 			progressInfo.remaining = progressInfo.total -
   1329 				progressInfo.completed;
   1330 		}
   1331 		retcode = copyout((caddr_t) &progressInfo,
   1332 				  (caddr_t) *progressInfoPtr,
   1333 				  sizeof(RF_ProgressInfo_t));
   1334 		return (retcode);
   1335 
   1336 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1337 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1338 			/* This makes no sense on a RAID 0, so tell the
   1339 			   user it's done. */
   1340 			*(int *) data = 100;
   1341 			return(0);
   1342 		}
   1343 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1344 			*(int *) data = 100 *
   1345 				raidPtr->parity_rewrite_stripes_done /
   1346 				raidPtr->Layout.numStripe;
   1347 		} else {
   1348 			*(int *) data = 100;
   1349 		}
   1350 		return (0);
   1351 
   1352 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1353 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1354 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1355 			progressInfo.total = raidPtr->Layout.numStripe;
   1356 			progressInfo.completed =
   1357 				raidPtr->parity_rewrite_stripes_done;
   1358 			progressInfo.remaining = progressInfo.total -
   1359 				progressInfo.completed;
   1360 		} else {
   1361 			progressInfo.remaining = 0;
   1362 			progressInfo.completed = 100;
   1363 			progressInfo.total = 100;
   1364 		}
   1365 		retcode = copyout((caddr_t) &progressInfo,
   1366 				  (caddr_t) *progressInfoPtr,
   1367 				  sizeof(RF_ProgressInfo_t));
   1368 		return (retcode);
   1369 
   1370 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1371 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1372 			/* This makes no sense on a RAID 0 */
   1373 			*(int *) data = 100;
   1374 			return(0);
   1375 		}
   1376 		if (raidPtr->copyback_in_progress == 1) {
   1377 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1378 				raidPtr->Layout.numStripe;
   1379 		} else {
   1380 			*(int *) data = 100;
   1381 		}
   1382 		return (0);
   1383 
   1384 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1385 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1386 		if (raidPtr->copyback_in_progress == 1) {
   1387 			progressInfo.total = raidPtr->Layout.numStripe;
   1388 			progressInfo.completed =
   1389 				raidPtr->copyback_stripes_done;
   1390 			progressInfo.remaining = progressInfo.total -
   1391 				progressInfo.completed;
   1392 		} else {
   1393 			progressInfo.remaining = 0;
   1394 			progressInfo.completed = 100;
   1395 			progressInfo.total = 100;
   1396 		}
   1397 		retcode = copyout((caddr_t) &progressInfo,
   1398 				  (caddr_t) *progressInfoPtr,
   1399 				  sizeof(RF_ProgressInfo_t));
   1400 		return (retcode);
   1401 
   1402 		/* the sparetable daemon calls this to wait for the kernel to
   1403 		 * need a spare table. this ioctl does not return until a
   1404 		 * spare table is needed. XXX -- calling mpsleep here in the
   1405 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1406 		 * -- I should either compute the spare table in the kernel,
   1407 		 * or have a different -- XXX XXX -- interface (a different
   1408 		 * character device) for delivering the table     -- XXX */
   1409 #if 0
   1410 	case RAIDFRAME_SPARET_WAIT:
   1411 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1412 		while (!rf_sparet_wait_queue)
   1413 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1414 		waitreq = rf_sparet_wait_queue;
   1415 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1416 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1417 
   1418 		/* structure assignment */
   1419 		*((RF_SparetWait_t *) data) = *waitreq;
   1420 
   1421 		RF_Free(waitreq, sizeof(*waitreq));
   1422 		return (0);
   1423 
   1424 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1425 		 * code in it that will cause the dameon to exit */
   1426 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1427 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1428 		waitreq->fcol = -1;
   1429 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1430 		waitreq->next = rf_sparet_wait_queue;
   1431 		rf_sparet_wait_queue = waitreq;
   1432 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1433 		wakeup(&rf_sparet_wait_queue);
   1434 		return (0);
   1435 
   1436 		/* used by the spare table daemon to deliver a spare table
   1437 		 * into the kernel */
   1438 	case RAIDFRAME_SEND_SPARET:
   1439 
   1440 		/* install the spare table */
   1441 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1442 
   1443 		/* respond to the requestor.  the return status of the spare
   1444 		 * table installation is passed in the "fcol" field */
   1445 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1446 		waitreq->fcol = retcode;
   1447 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1448 		waitreq->next = rf_sparet_resp_queue;
   1449 		rf_sparet_resp_queue = waitreq;
   1450 		wakeup(&rf_sparet_resp_queue);
   1451 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1452 
   1453 		return (retcode);
   1454 #endif
   1455 
   1456 	default:
   1457 		break; /* fall through to the os-specific code below */
   1458 
   1459 	}
   1460 
   1461 	if (!raidPtr->valid)
   1462 		return (EINVAL);
   1463 
   1464 	/*
   1465 	 * Add support for "regular" device ioctls here.
   1466 	 */
   1467 
   1468 	switch (cmd) {
   1469 	case DIOCGDINFO:
   1470 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1471 		break;
   1472 #ifdef __HAVE_OLD_DISKLABEL
   1473 	case ODIOCGDINFO:
   1474 		newlabel = *(rs->sc_dkdev.dk_label);
   1475 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1476 			return ENOTTY;
   1477 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1478 		break;
   1479 #endif
   1480 
   1481 	case DIOCGPART:
   1482 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1483 		((struct partinfo *) data)->part =
   1484 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1485 		break;
   1486 
   1487 	case DIOCWDINFO:
   1488 	case DIOCSDINFO:
   1489 #ifdef __HAVE_OLD_DISKLABEL
   1490 	case ODIOCWDINFO:
   1491 	case ODIOCSDINFO:
   1492 #endif
   1493 	{
   1494 		struct disklabel *lp;
   1495 #ifdef __HAVE_OLD_DISKLABEL
   1496 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1497 			memset(&newlabel, 0, sizeof newlabel);
   1498 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1499 			lp = &newlabel;
   1500 		} else
   1501 #endif
   1502 		lp = (struct disklabel *)data;
   1503 
   1504 		if ((error = raidlock(rs)) != 0)
   1505 			return (error);
   1506 
   1507 		rs->sc_flags |= RAIDF_LABELLING;
   1508 
   1509 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1510 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1511 		if (error == 0) {
   1512 			if (cmd == DIOCWDINFO
   1513 #ifdef __HAVE_OLD_DISKLABEL
   1514 			    || cmd == ODIOCWDINFO
   1515 #endif
   1516 			   )
   1517 				error = writedisklabel(RAIDLABELDEV(dev),
   1518 				    raidstrategy, rs->sc_dkdev.dk_label,
   1519 				    rs->sc_dkdev.dk_cpulabel);
   1520 		}
   1521 		rs->sc_flags &= ~RAIDF_LABELLING;
   1522 
   1523 		raidunlock(rs);
   1524 
   1525 		if (error)
   1526 			return (error);
   1527 		break;
   1528 	}
   1529 
   1530 	case DIOCWLABEL:
   1531 		if (*(int *) data != 0)
   1532 			rs->sc_flags |= RAIDF_WLABEL;
   1533 		else
   1534 			rs->sc_flags &= ~RAIDF_WLABEL;
   1535 		break;
   1536 
   1537 	case DIOCGDEFLABEL:
   1538 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1539 		break;
   1540 
   1541 #ifdef __HAVE_OLD_DISKLABEL
   1542 	case ODIOCGDEFLABEL:
   1543 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1544 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1545 			return ENOTTY;
   1546 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1547 		break;
   1548 #endif
   1549 
   1550 	default:
   1551 		retcode = ENOTTY;
   1552 	}
   1553 	return (retcode);
   1554 
   1555 }
   1556 
   1557 
   1558 /* raidinit -- complete the rest of the initialization for the
   1559    RAIDframe device.  */
   1560 
   1561 
   1562 static void
   1563 raidinit(raidPtr)
   1564 	RF_Raid_t *raidPtr;
   1565 {
   1566 	struct raid_softc *rs;
   1567 	int     unit;
   1568 
   1569 	unit = raidPtr->raidid;
   1570 
   1571 	rs = &raid_softc[unit];
   1572 
   1573 	/* XXX should check return code first... */
   1574 	rs->sc_flags |= RAIDF_INITED;
   1575 
   1576 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1577 
   1578 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1579 
   1580 	/* disk_attach actually creates space for the CPU disklabel, among
   1581 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1582 	 * with disklabels. */
   1583 
   1584 	disk_attach(&rs->sc_dkdev);
   1585 
   1586 	/* XXX There may be a weird interaction here between this, and
   1587 	 * protectedSectors, as used in RAIDframe.  */
   1588 
   1589 	rs->sc_size = raidPtr->totalSectors;
   1590 
   1591 }
   1592 
   1593 /* wake up the daemon & tell it to get us a spare table
   1594  * XXX
   1595  * the entries in the queues should be tagged with the raidPtr
   1596  * so that in the extremely rare case that two recons happen at once,
   1597  * we know for which device were requesting a spare table
   1598  * XXX
   1599  *
   1600  * XXX This code is not currently used. GO
   1601  */
   1602 int
   1603 rf_GetSpareTableFromDaemon(req)
   1604 	RF_SparetWait_t *req;
   1605 {
   1606 	int     retcode;
   1607 
   1608 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1609 	req->next = rf_sparet_wait_queue;
   1610 	rf_sparet_wait_queue = req;
   1611 	wakeup(&rf_sparet_wait_queue);
   1612 
   1613 	/* mpsleep unlocks the mutex */
   1614 	while (!rf_sparet_resp_queue) {
   1615 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1616 		    "raidframe getsparetable", 0);
   1617 	}
   1618 	req = rf_sparet_resp_queue;
   1619 	rf_sparet_resp_queue = req->next;
   1620 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1621 
   1622 	retcode = req->fcol;
   1623 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1624 					 * alloc'd */
   1625 	return (retcode);
   1626 }
   1627 
   1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1629  * bp & passes it down.
   1630  * any calls originating in the kernel must use non-blocking I/O
   1631  * do some extra sanity checking to return "appropriate" error values for
   1632  * certain conditions (to make some standard utilities work)
   1633  *
   1634  * Formerly known as: rf_DoAccessKernel
   1635  */
   1636 void
   1637 raidstart(raidPtr)
   1638 	RF_Raid_t *raidPtr;
   1639 {
   1640 	RF_SectorCount_t num_blocks, pb, sum;
   1641 	RF_RaidAddr_t raid_addr;
   1642 	int     retcode;
   1643 	struct partition *pp;
   1644 	daddr_t blocknum;
   1645 	int     unit;
   1646 	struct raid_softc *rs;
   1647 	int     do_async;
   1648 	struct buf *bp;
   1649 
   1650 	unit = raidPtr->raidid;
   1651 	rs = &raid_softc[unit];
   1652 
   1653 	/* quick check to see if anything has died recently */
   1654 	RF_LOCK_MUTEX(raidPtr->mutex);
   1655 	if (raidPtr->numNewFailures > 0) {
   1656 		rf_update_component_labels(raidPtr,
   1657 					   RF_NORMAL_COMPONENT_UPDATE);
   1658 		raidPtr->numNewFailures--;
   1659 	}
   1660 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1661 
   1662 	/* Check to see if we're at the limit... */
   1663 	RF_LOCK_MUTEX(raidPtr->mutex);
   1664 	while (raidPtr->openings > 0) {
   1665 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1666 
   1667 		/* get the next item, if any, from the queue */
   1668 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1669 			/* nothing more to do */
   1670 			return;
   1671 		}
   1672 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1673 
   1674 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1675 		 * partition.. Need to make it absolute to the underlying
   1676 		 * device.. */
   1677 
   1678 		blocknum = bp->b_blkno;
   1679 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1680 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1681 			blocknum += pp->p_offset;
   1682 		}
   1683 
   1684 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1685 			    (int) blocknum));
   1686 
   1687 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1688 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1689 
   1690 		/* *THIS* is where we adjust what block we're going to...
   1691 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1692 		raid_addr = blocknum;
   1693 
   1694 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1695 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1696 		sum = raid_addr + num_blocks + pb;
   1697 		if (1 || rf_debugKernelAccess) {
   1698 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1699 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1700 				    (int) pb, (int) bp->b_resid));
   1701 		}
   1702 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1703 		    || (sum < num_blocks) || (sum < pb)) {
   1704 			bp->b_error = ENOSPC;
   1705 			bp->b_flags |= B_ERROR;
   1706 			bp->b_resid = bp->b_bcount;
   1707 			biodone(bp);
   1708 			RF_LOCK_MUTEX(raidPtr->mutex);
   1709 			continue;
   1710 		}
   1711 		/*
   1712 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1713 		 */
   1714 
   1715 		if (bp->b_bcount & raidPtr->sectorMask) {
   1716 			bp->b_error = EINVAL;
   1717 			bp->b_flags |= B_ERROR;
   1718 			bp->b_resid = bp->b_bcount;
   1719 			biodone(bp);
   1720 			RF_LOCK_MUTEX(raidPtr->mutex);
   1721 			continue;
   1722 
   1723 		}
   1724 		db1_printf(("Calling DoAccess..\n"));
   1725 
   1726 
   1727 		RF_LOCK_MUTEX(raidPtr->mutex);
   1728 		raidPtr->openings--;
   1729 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1730 
   1731 		/*
   1732 		 * Everything is async.
   1733 		 */
   1734 		do_async = 1;
   1735 
   1736 		disk_busy(&rs->sc_dkdev);
   1737 
   1738 		/* XXX we're still at splbio() here... do we *really*
   1739 		   need to be? */
   1740 
   1741 		/* don't ever condition on bp->b_flags & B_WRITE.
   1742 		 * always condition on B_READ instead */
   1743 
   1744 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1745 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1746 				      do_async, raid_addr, num_blocks,
   1747 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1748 
   1749 		RF_LOCK_MUTEX(raidPtr->mutex);
   1750 	}
   1751 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1752 }
   1753 
   1754 
   1755 
   1756 
   1757 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1758 
   1759 int
   1760 rf_DispatchKernelIO(queue, req)
   1761 	RF_DiskQueue_t *queue;
   1762 	RF_DiskQueueData_t *req;
   1763 {
   1764 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1765 	struct buf *bp;
   1766 	struct raidbuf *raidbp = NULL;
   1767 	struct raid_softc *rs;
   1768 	int     unit;
   1769 	int s;
   1770 
   1771 	s=0;
   1772 	/* s = splbio();*/ /* want to test this */
   1773 	/* XXX along with the vnode, we also need the softc associated with
   1774 	 * this device.. */
   1775 
   1776 	req->queue = queue;
   1777 
   1778 	unit = queue->raidPtr->raidid;
   1779 
   1780 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1781 
   1782 	if (unit >= numraid) {
   1783 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1784 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1785 	}
   1786 	rs = &raid_softc[unit];
   1787 
   1788 	bp = req->bp;
   1789 #if 1
   1790 	/* XXX when there is a physical disk failure, someone is passing us a
   1791 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1792 	 * without taking a performance hit... (not sure where the real bug
   1793 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1794 
   1795 	if (bp->b_flags & B_ERROR) {
   1796 		bp->b_flags &= ~B_ERROR;
   1797 	}
   1798 	if (bp->b_error != 0) {
   1799 		bp->b_error = 0;
   1800 	}
   1801 #endif
   1802 	raidbp = RAIDGETBUF(rs);
   1803 
   1804 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1805 
   1806 	/*
   1807 	 * context for raidiodone
   1808 	 */
   1809 	raidbp->rf_obp = bp;
   1810 	raidbp->req = req;
   1811 
   1812 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1813 
   1814 	switch (req->type) {
   1815 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1816 		/* XXX need to do something extra here.. */
   1817 		/* I'm leaving this in, as I've never actually seen it used,
   1818 		 * and I'd like folks to report it... GO */
   1819 		printf(("WAKEUP CALLED\n"));
   1820 		queue->numOutstanding++;
   1821 
   1822 		/* XXX need to glue the original buffer into this??  */
   1823 
   1824 		KernelWakeupFunc(&raidbp->rf_buf);
   1825 		break;
   1826 
   1827 	case RF_IO_TYPE_READ:
   1828 	case RF_IO_TYPE_WRITE:
   1829 
   1830 		if (req->tracerec) {
   1831 			RF_ETIMER_START(req->tracerec->timer);
   1832 		}
   1833 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1834 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1835 		    req->sectorOffset, req->numSector,
   1836 		    req->buf, KernelWakeupFunc, (void *) req,
   1837 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1838 
   1839 		if (rf_debugKernelAccess) {
   1840 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1841 				(long) bp->b_blkno));
   1842 		}
   1843 		queue->numOutstanding++;
   1844 		queue->last_deq_sector = req->sectorOffset;
   1845 		/* acc wouldn't have been let in if there were any pending
   1846 		 * reqs at any other priority */
   1847 		queue->curPriority = req->priority;
   1848 
   1849 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1850 			req->type, unit, queue->row, queue->col));
   1851 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1852 			(int) req->sectorOffset, (int) req->numSector,
   1853 			(int) (req->numSector <<
   1854 			    queue->raidPtr->logBytesPerSector),
   1855 			(int) queue->raidPtr->logBytesPerSector));
   1856 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1857 			raidbp->rf_buf.b_vp->v_numoutput++;
   1858 		}
   1859 		VOP_STRATEGY(&raidbp->rf_buf);
   1860 
   1861 		break;
   1862 
   1863 	default:
   1864 		panic("bad req->type in rf_DispatchKernelIO");
   1865 	}
   1866 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1867 	/* splx(s); */ /* want to test this */
   1868 	return (0);
   1869 }
   1870 /* this is the callback function associated with a I/O invoked from
   1871    kernel code.
   1872  */
   1873 static void
   1874 KernelWakeupFunc(vbp)
   1875 	struct buf *vbp;
   1876 {
   1877 	RF_DiskQueueData_t *req = NULL;
   1878 	RF_DiskQueue_t *queue;
   1879 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1880 	struct buf *bp;
   1881 	struct raid_softc *rs;
   1882 	int     unit;
   1883 	int s;
   1884 
   1885 	s = splbio();
   1886 	db1_printf(("recovering the request queue:\n"));
   1887 	req = raidbp->req;
   1888 
   1889 	bp = raidbp->rf_obp;
   1890 
   1891 	queue = (RF_DiskQueue_t *) req->queue;
   1892 
   1893 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1894 		bp->b_flags |= B_ERROR;
   1895 		bp->b_error = raidbp->rf_buf.b_error ?
   1896 		    raidbp->rf_buf.b_error : EIO;
   1897 	}
   1898 
   1899 	/* XXX methinks this could be wrong... */
   1900 #if 1
   1901 	bp->b_resid = raidbp->rf_buf.b_resid;
   1902 #endif
   1903 
   1904 	if (req->tracerec) {
   1905 		RF_ETIMER_STOP(req->tracerec->timer);
   1906 		RF_ETIMER_EVAL(req->tracerec->timer);
   1907 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1908 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1909 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1910 		req->tracerec->num_phys_ios++;
   1911 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1912 	}
   1913 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1914 
   1915 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1916 
   1917 
   1918 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1919 	 * ballistic, and mark the component as hosed... */
   1920 
   1921 	if (bp->b_flags & B_ERROR) {
   1922 		/* Mark the disk as dead */
   1923 		/* but only mark it once... */
   1924 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1925 		    rf_ds_optimal) {
   1926 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1927 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1928 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1929 			    rf_ds_failed;
   1930 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1931 			queue->raidPtr->numFailures++;
   1932 			queue->raidPtr->numNewFailures++;
   1933 		} else {	/* Disk is already dead... */
   1934 			/* printf("Disk already marked as dead!\n"); */
   1935 		}
   1936 
   1937 	}
   1938 
   1939 	rs = &raid_softc[unit];
   1940 	RAIDPUTBUF(rs, raidbp);
   1941 
   1942 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1943 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1944 
   1945 	splx(s);
   1946 }
   1947 
   1948 
   1949 
   1950 /*
   1951  * initialize a buf structure for doing an I/O in the kernel.
   1952  */
   1953 static void
   1954 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1955        logBytesPerSector, b_proc)
   1956 	struct buf *bp;
   1957 	struct vnode *b_vp;
   1958 	unsigned rw_flag;
   1959 	dev_t dev;
   1960 	RF_SectorNum_t startSect;
   1961 	RF_SectorCount_t numSect;
   1962 	caddr_t buf;
   1963 	void (*cbFunc) (struct buf *);
   1964 	void *cbArg;
   1965 	int logBytesPerSector;
   1966 	struct proc *b_proc;
   1967 {
   1968 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1969 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1970 	bp->b_bcount = numSect << logBytesPerSector;
   1971 	bp->b_bufsize = bp->b_bcount;
   1972 	bp->b_error = 0;
   1973 	bp->b_dev = dev;
   1974 	bp->b_data = buf;
   1975 	bp->b_blkno = startSect;
   1976 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1977 	if (bp->b_bcount == 0) {
   1978 		panic("bp->b_bcount is zero in InitBP!!\n");
   1979 	}
   1980 	bp->b_proc = b_proc;
   1981 	bp->b_iodone = cbFunc;
   1982 	bp->b_vp = b_vp;
   1983 
   1984 }
   1985 
   1986 static void
   1987 raidgetdefaultlabel(raidPtr, rs, lp)
   1988 	RF_Raid_t *raidPtr;
   1989 	struct raid_softc *rs;
   1990 	struct disklabel *lp;
   1991 {
   1992 	db1_printf(("Building a default label...\n"));
   1993 	memset(lp, 0, sizeof(*lp));
   1994 
   1995 	/* fabricate a label... */
   1996 	lp->d_secperunit = raidPtr->totalSectors;
   1997 	lp->d_secsize = raidPtr->bytesPerSector;
   1998 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1999 	lp->d_ntracks = 4 * raidPtr->numCol;
   2000 	lp->d_ncylinders = raidPtr->totalSectors /
   2001 		(lp->d_nsectors * lp->d_ntracks);
   2002 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2003 
   2004 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2005 	lp->d_type = DTYPE_RAID;
   2006 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2007 	lp->d_rpm = 3600;
   2008 	lp->d_interleave = 1;
   2009 	lp->d_flags = 0;
   2010 
   2011 	lp->d_partitions[RAW_PART].p_offset = 0;
   2012 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2013 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2014 	lp->d_npartitions = RAW_PART + 1;
   2015 
   2016 	lp->d_magic = DISKMAGIC;
   2017 	lp->d_magic2 = DISKMAGIC;
   2018 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2019 
   2020 }
   2021 /*
   2022  * Read the disklabel from the raid device.  If one is not present, fake one
   2023  * up.
   2024  */
   2025 static void
   2026 raidgetdisklabel(dev)
   2027 	dev_t   dev;
   2028 {
   2029 	int     unit = raidunit(dev);
   2030 	struct raid_softc *rs = &raid_softc[unit];
   2031 	char   *errstring;
   2032 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2033 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2034 	RF_Raid_t *raidPtr;
   2035 
   2036 	db1_printf(("Getting the disklabel...\n"));
   2037 
   2038 	memset(clp, 0, sizeof(*clp));
   2039 
   2040 	raidPtr = raidPtrs[unit];
   2041 
   2042 	raidgetdefaultlabel(raidPtr, rs, lp);
   2043 
   2044 	/*
   2045 	 * Call the generic disklabel extraction routine.
   2046 	 */
   2047 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2048 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2049 	if (errstring)
   2050 		raidmakedisklabel(rs);
   2051 	else {
   2052 		int     i;
   2053 		struct partition *pp;
   2054 
   2055 		/*
   2056 		 * Sanity check whether the found disklabel is valid.
   2057 		 *
   2058 		 * This is necessary since total size of the raid device
   2059 		 * may vary when an interleave is changed even though exactly
   2060 		 * same componets are used, and old disklabel may used
   2061 		 * if that is found.
   2062 		 */
   2063 		if (lp->d_secperunit != rs->sc_size)
   2064 			printf("WARNING: %s: "
   2065 			    "total sector size in disklabel (%d) != "
   2066 			    "the size of raid (%ld)\n", rs->sc_xname,
   2067 			    lp->d_secperunit, (long) rs->sc_size);
   2068 		for (i = 0; i < lp->d_npartitions; i++) {
   2069 			pp = &lp->d_partitions[i];
   2070 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2071 				printf("WARNING: %s: end of partition `%c' "
   2072 				    "exceeds the size of raid (%ld)\n",
   2073 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2074 		}
   2075 	}
   2076 
   2077 }
   2078 /*
   2079  * Take care of things one might want to take care of in the event
   2080  * that a disklabel isn't present.
   2081  */
   2082 static void
   2083 raidmakedisklabel(rs)
   2084 	struct raid_softc *rs;
   2085 {
   2086 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2087 	db1_printf(("Making a label..\n"));
   2088 
   2089 	/*
   2090 	 * For historical reasons, if there's no disklabel present
   2091 	 * the raw partition must be marked FS_BSDFFS.
   2092 	 */
   2093 
   2094 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2095 
   2096 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2097 
   2098 	lp->d_checksum = dkcksum(lp);
   2099 }
   2100 /*
   2101  * Lookup the provided name in the filesystem.  If the file exists,
   2102  * is a valid block device, and isn't being used by anyone else,
   2103  * set *vpp to the file's vnode.
   2104  * You'll find the original of this in ccd.c
   2105  */
   2106 int
   2107 raidlookup(path, p, vpp)
   2108 	char   *path;
   2109 	struct proc *p;
   2110 	struct vnode **vpp;	/* result */
   2111 {
   2112 	struct nameidata nd;
   2113 	struct vnode *vp;
   2114 	struct vattr va;
   2115 	int     error;
   2116 
   2117 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2118 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2119 #ifdef DEBUG
   2120 		printf("RAIDframe: vn_open returned %d\n", error);
   2121 #endif
   2122 		return (error);
   2123 	}
   2124 	vp = nd.ni_vp;
   2125 	if (vp->v_usecount > 1) {
   2126 		VOP_UNLOCK(vp, 0);
   2127 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2128 		return (EBUSY);
   2129 	}
   2130 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2131 		VOP_UNLOCK(vp, 0);
   2132 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2133 		return (error);
   2134 	}
   2135 	/* XXX: eventually we should handle VREG, too. */
   2136 	if (va.va_type != VBLK) {
   2137 		VOP_UNLOCK(vp, 0);
   2138 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2139 		return (ENOTBLK);
   2140 	}
   2141 	VOP_UNLOCK(vp, 0);
   2142 	*vpp = vp;
   2143 	return (0);
   2144 }
   2145 /*
   2146  * Wait interruptibly for an exclusive lock.
   2147  *
   2148  * XXX
   2149  * Several drivers do this; it should be abstracted and made MP-safe.
   2150  * (Hmm... where have we seen this warning before :->  GO )
   2151  */
   2152 static int
   2153 raidlock(rs)
   2154 	struct raid_softc *rs;
   2155 {
   2156 	int     error;
   2157 
   2158 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2159 		rs->sc_flags |= RAIDF_WANTED;
   2160 		if ((error =
   2161 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2162 			return (error);
   2163 	}
   2164 	rs->sc_flags |= RAIDF_LOCKED;
   2165 	return (0);
   2166 }
   2167 /*
   2168  * Unlock and wake up any waiters.
   2169  */
   2170 static void
   2171 raidunlock(rs)
   2172 	struct raid_softc *rs;
   2173 {
   2174 
   2175 	rs->sc_flags &= ~RAIDF_LOCKED;
   2176 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2177 		rs->sc_flags &= ~RAIDF_WANTED;
   2178 		wakeup(rs);
   2179 	}
   2180 }
   2181 
   2182 
   2183 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2184 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2185 
   2186 int
   2187 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2188 {
   2189 	RF_ComponentLabel_t clabel;
   2190 	raidread_component_label(dev, b_vp, &clabel);
   2191 	clabel.mod_counter = mod_counter;
   2192 	clabel.clean = RF_RAID_CLEAN;
   2193 	raidwrite_component_label(dev, b_vp, &clabel);
   2194 	return(0);
   2195 }
   2196 
   2197 
   2198 int
   2199 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2200 {
   2201 	RF_ComponentLabel_t clabel;
   2202 	raidread_component_label(dev, b_vp, &clabel);
   2203 	clabel.mod_counter = mod_counter;
   2204 	clabel.clean = RF_RAID_DIRTY;
   2205 	raidwrite_component_label(dev, b_vp, &clabel);
   2206 	return(0);
   2207 }
   2208 
   2209 /* ARGSUSED */
   2210 int
   2211 raidread_component_label(dev, b_vp, clabel)
   2212 	dev_t dev;
   2213 	struct vnode *b_vp;
   2214 	RF_ComponentLabel_t *clabel;
   2215 {
   2216 	struct buf *bp;
   2217 	int error;
   2218 
   2219 	/* XXX should probably ensure that we don't try to do this if
   2220 	   someone has changed rf_protected_sectors. */
   2221 
   2222 	if (b_vp == NULL) {
   2223 		/* For whatever reason, this component is not valid.
   2224 		   Don't try to read a component label from it. */
   2225 		return(EINVAL);
   2226 	}
   2227 
   2228 	/* get a block of the appropriate size... */
   2229 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2230 	bp->b_dev = dev;
   2231 
   2232 	/* get our ducks in a row for the read */
   2233 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2234 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2235 	bp->b_flags |= B_READ;
   2236  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2237 
   2238 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2239 
   2240 	error = biowait(bp);
   2241 
   2242 	if (!error) {
   2243 		memcpy(clabel, bp->b_data,
   2244 		       sizeof(RF_ComponentLabel_t));
   2245 #if 0
   2246 		rf_print_component_label( clabel );
   2247 #endif
   2248         } else {
   2249 #if 0
   2250 		printf("Failed to read RAID component label!\n");
   2251 #endif
   2252 	}
   2253 
   2254 	brelse(bp);
   2255 	return(error);
   2256 }
   2257 /* ARGSUSED */
   2258 int
   2259 raidwrite_component_label(dev, b_vp, clabel)
   2260 	dev_t dev;
   2261 	struct vnode *b_vp;
   2262 	RF_ComponentLabel_t *clabel;
   2263 {
   2264 	struct buf *bp;
   2265 	int error;
   2266 
   2267 	/* get a block of the appropriate size... */
   2268 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2269 	bp->b_dev = dev;
   2270 
   2271 	/* get our ducks in a row for the write */
   2272 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2273 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2274 	bp->b_flags |= B_WRITE;
   2275  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2276 
   2277 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2278 
   2279 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2280 
   2281 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2282 	error = biowait(bp);
   2283 	brelse(bp);
   2284 	if (error) {
   2285 #if 1
   2286 		printf("Failed to write RAID component info!\n");
   2287 #endif
   2288 	}
   2289 
   2290 	return(error);
   2291 }
   2292 
   2293 void
   2294 rf_markalldirty(raidPtr)
   2295 	RF_Raid_t *raidPtr;
   2296 {
   2297 	RF_ComponentLabel_t clabel;
   2298 	int r,c;
   2299 
   2300 	raidPtr->mod_counter++;
   2301 	for (r = 0; r < raidPtr->numRow; r++) {
   2302 		for (c = 0; c < raidPtr->numCol; c++) {
   2303 			/* we don't want to touch (at all) a disk that has
   2304 			   failed */
   2305 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2306 				raidread_component_label(
   2307 					raidPtr->Disks[r][c].dev,
   2308 					raidPtr->raid_cinfo[r][c].ci_vp,
   2309 					&clabel);
   2310 				if (clabel.status == rf_ds_spared) {
   2311 					/* XXX do something special...
   2312 					 but whatever you do, don't
   2313 					 try to access it!! */
   2314 				} else {
   2315 #if 0
   2316 				clabel.status =
   2317 					raidPtr->Disks[r][c].status;
   2318 				raidwrite_component_label(
   2319 					raidPtr->Disks[r][c].dev,
   2320 					raidPtr->raid_cinfo[r][c].ci_vp,
   2321 					&clabel);
   2322 #endif
   2323 				raidmarkdirty(
   2324 				       raidPtr->Disks[r][c].dev,
   2325 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2326 				       raidPtr->mod_counter);
   2327 				}
   2328 			}
   2329 		}
   2330 	}
   2331 	/* printf("Component labels marked dirty.\n"); */
   2332 #if 0
   2333 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2334 		sparecol = raidPtr->numCol + c;
   2335 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2336 			/*
   2337 
   2338 			   XXX this is where we get fancy and map this spare
   2339 			   into it's correct spot in the array.
   2340 
   2341 			 */
   2342 			/*
   2343 
   2344 			   we claim this disk is "optimal" if it's
   2345 			   rf_ds_used_spare, as that means it should be
   2346 			   directly substitutable for the disk it replaced.
   2347 			   We note that too...
   2348 
   2349 			 */
   2350 
   2351 			for(i=0;i<raidPtr->numRow;i++) {
   2352 				for(j=0;j<raidPtr->numCol;j++) {
   2353 					if ((raidPtr->Disks[i][j].spareRow ==
   2354 					     r) &&
   2355 					    (raidPtr->Disks[i][j].spareCol ==
   2356 					     sparecol)) {
   2357 						srow = r;
   2358 						scol = sparecol;
   2359 						break;
   2360 					}
   2361 				}
   2362 			}
   2363 
   2364 			raidread_component_label(
   2365 				      raidPtr->Disks[r][sparecol].dev,
   2366 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2367 				      &clabel);
   2368 			/* make sure status is noted */
   2369 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2370 			clabel.mod_counter = raidPtr->mod_counter;
   2371 			clabel.serial_number = raidPtr->serial_number;
   2372 			clabel.row = srow;
   2373 			clabel.column = scol;
   2374 			clabel.num_rows = raidPtr->numRow;
   2375 			clabel.num_columns = raidPtr->numCol;
   2376 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2377 			clabel.status = rf_ds_optimal;
   2378 			raidwrite_component_label(
   2379 				      raidPtr->Disks[r][sparecol].dev,
   2380 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2381 				      &clabel);
   2382 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2383 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2384 		}
   2385 	}
   2386 
   2387 #endif
   2388 }
   2389 
   2390 
   2391 void
   2392 rf_update_component_labels(raidPtr, final)
   2393 	RF_Raid_t *raidPtr;
   2394 	int final;
   2395 {
   2396 	RF_ComponentLabel_t clabel;
   2397 	int sparecol;
   2398 	int r,c;
   2399 	int i,j;
   2400 	int srow, scol;
   2401 
   2402 	srow = -1;
   2403 	scol = -1;
   2404 
   2405 	/* XXX should do extra checks to make sure things really are clean,
   2406 	   rather than blindly setting the clean bit... */
   2407 
   2408 	raidPtr->mod_counter++;
   2409 
   2410 	for (r = 0; r < raidPtr->numRow; r++) {
   2411 		for (c = 0; c < raidPtr->numCol; c++) {
   2412 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2413 				raidread_component_label(
   2414 					raidPtr->Disks[r][c].dev,
   2415 					raidPtr->raid_cinfo[r][c].ci_vp,
   2416 					&clabel);
   2417 				/* make sure status is noted */
   2418 				clabel.status = rf_ds_optimal;
   2419 				/* bump the counter */
   2420 				clabel.mod_counter = raidPtr->mod_counter;
   2421 
   2422 				raidwrite_component_label(
   2423 					raidPtr->Disks[r][c].dev,
   2424 					raidPtr->raid_cinfo[r][c].ci_vp,
   2425 					&clabel);
   2426 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2427 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2428 						raidmarkclean(
   2429 							      raidPtr->Disks[r][c].dev,
   2430 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2431 							      raidPtr->mod_counter);
   2432 					}
   2433 				}
   2434 			}
   2435 			/* else we don't touch it.. */
   2436 		}
   2437 	}
   2438 
   2439 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2440 		sparecol = raidPtr->numCol + c;
   2441 		/* Need to ensure that the reconstruct actually completed! */
   2442 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2443 			/*
   2444 
   2445 			   we claim this disk is "optimal" if it's
   2446 			   rf_ds_used_spare, as that means it should be
   2447 			   directly substitutable for the disk it replaced.
   2448 			   We note that too...
   2449 
   2450 			 */
   2451 
   2452 			for(i=0;i<raidPtr->numRow;i++) {
   2453 				for(j=0;j<raidPtr->numCol;j++) {
   2454 					if ((raidPtr->Disks[i][j].spareRow ==
   2455 					     0) &&
   2456 					    (raidPtr->Disks[i][j].spareCol ==
   2457 					     sparecol)) {
   2458 						srow = i;
   2459 						scol = j;
   2460 						break;
   2461 					}
   2462 				}
   2463 			}
   2464 
   2465 			/* XXX shouldn't *really* need this... */
   2466 			raidread_component_label(
   2467 				      raidPtr->Disks[0][sparecol].dev,
   2468 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2469 				      &clabel);
   2470 			/* make sure status is noted */
   2471 
   2472 			raid_init_component_label(raidPtr, &clabel);
   2473 
   2474 			clabel.mod_counter = raidPtr->mod_counter;
   2475 			clabel.row = srow;
   2476 			clabel.column = scol;
   2477 			clabel.status = rf_ds_optimal;
   2478 
   2479 			raidwrite_component_label(
   2480 				      raidPtr->Disks[0][sparecol].dev,
   2481 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2482 				      &clabel);
   2483 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2484 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2485 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2486 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2487 						       raidPtr->mod_counter);
   2488 				}
   2489 			}
   2490 		}
   2491 	}
   2492 	/* 	printf("Component labels updated\n"); */
   2493 }
   2494 
   2495 void
   2496 rf_close_component(raidPtr, vp, auto_configured)
   2497 	RF_Raid_t *raidPtr;
   2498 	struct vnode *vp;
   2499 	int auto_configured;
   2500 {
   2501 	struct proc *p;
   2502 
   2503 	p = raidPtr->engine_thread;
   2504 
   2505 	if (vp != NULL) {
   2506 		if (auto_configured == 1) {
   2507 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2508 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2509 			vput(vp);
   2510 
   2511 		} else {
   2512 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2513 		}
   2514 	} else {
   2515 		printf("vnode was NULL\n");
   2516 	}
   2517 }
   2518 
   2519 
   2520 void
   2521 rf_UnconfigureVnodes(raidPtr)
   2522 	RF_Raid_t *raidPtr;
   2523 {
   2524 	int r,c;
   2525 	struct proc *p;
   2526 	struct vnode *vp;
   2527 	int acd;
   2528 
   2529 
   2530 	/* We take this opportunity to close the vnodes like we should.. */
   2531 
   2532 	p = raidPtr->engine_thread;
   2533 
   2534 	for (r = 0; r < raidPtr->numRow; r++) {
   2535 		for (c = 0; c < raidPtr->numCol; c++) {
   2536 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2537 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2538 			acd = raidPtr->Disks[r][c].auto_configured;
   2539 			rf_close_component(raidPtr, vp, acd);
   2540 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2541 			raidPtr->Disks[r][c].auto_configured = 0;
   2542 		}
   2543 	}
   2544 	for (r = 0; r < raidPtr->numSpare; r++) {
   2545 		printf("Closing vnode for spare: %d\n", r);
   2546 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2547 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2548 		rf_close_component(raidPtr, vp, acd);
   2549 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2550 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2551 	}
   2552 }
   2553 
   2554 
   2555 void
   2556 rf_ReconThread(req)
   2557 	struct rf_recon_req *req;
   2558 {
   2559 	int     s;
   2560 	RF_Raid_t *raidPtr;
   2561 
   2562 	s = splbio();
   2563 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2564 	raidPtr->recon_in_progress = 1;
   2565 
   2566 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2567 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2568 
   2569 	/* XXX get rid of this! we don't need it at all.. */
   2570 	RF_Free(req, sizeof(*req));
   2571 
   2572 	raidPtr->recon_in_progress = 0;
   2573 	splx(s);
   2574 
   2575 	/* That's all... */
   2576 	kthread_exit(0);        /* does not return */
   2577 }
   2578 
   2579 void
   2580 rf_RewriteParityThread(raidPtr)
   2581 	RF_Raid_t *raidPtr;
   2582 {
   2583 	int retcode;
   2584 	int s;
   2585 
   2586 	raidPtr->parity_rewrite_in_progress = 1;
   2587 	s = splbio();
   2588 	retcode = rf_RewriteParity(raidPtr);
   2589 	splx(s);
   2590 	if (retcode) {
   2591 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2592 	} else {
   2593 		/* set the clean bit!  If we shutdown correctly,
   2594 		   the clean bit on each component label will get
   2595 		   set */
   2596 		raidPtr->parity_good = RF_RAID_CLEAN;
   2597 	}
   2598 	raidPtr->parity_rewrite_in_progress = 0;
   2599 
   2600 	/* Anyone waiting for us to stop?  If so, inform them... */
   2601 	if (raidPtr->waitShutdown) {
   2602 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2603 	}
   2604 
   2605 	/* That's all... */
   2606 	kthread_exit(0);        /* does not return */
   2607 }
   2608 
   2609 
   2610 void
   2611 rf_CopybackThread(raidPtr)
   2612 	RF_Raid_t *raidPtr;
   2613 {
   2614 	int s;
   2615 
   2616 	raidPtr->copyback_in_progress = 1;
   2617 	s = splbio();
   2618 	rf_CopybackReconstructedData(raidPtr);
   2619 	splx(s);
   2620 	raidPtr->copyback_in_progress = 0;
   2621 
   2622 	/* That's all... */
   2623 	kthread_exit(0);        /* does not return */
   2624 }
   2625 
   2626 
   2627 void
   2628 rf_ReconstructInPlaceThread(req)
   2629 	struct rf_recon_req *req;
   2630 {
   2631 	int retcode;
   2632 	int s;
   2633 	RF_Raid_t *raidPtr;
   2634 
   2635 	s = splbio();
   2636 	raidPtr = req->raidPtr;
   2637 	raidPtr->recon_in_progress = 1;
   2638 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2639 	RF_Free(req, sizeof(*req));
   2640 	raidPtr->recon_in_progress = 0;
   2641 	splx(s);
   2642 
   2643 	/* That's all... */
   2644 	kthread_exit(0);        /* does not return */
   2645 }
   2646 
   2647 void
   2648 rf_mountroot_hook(dev)
   2649 	struct device *dev;
   2650 {
   2651 
   2652 }
   2653 
   2654 
   2655 RF_AutoConfig_t *
   2656 rf_find_raid_components()
   2657 {
   2658 	struct devnametobdevmaj *dtobdm;
   2659 	struct vnode *vp;
   2660 	struct disklabel label;
   2661 	struct device *dv;
   2662 	char *cd_name;
   2663 	dev_t dev;
   2664 	int error;
   2665 	int i;
   2666 	int good_one;
   2667 	RF_ComponentLabel_t *clabel;
   2668 	RF_AutoConfig_t *ac_list;
   2669 	RF_AutoConfig_t *ac;
   2670 
   2671 
   2672 	/* initialize the AutoConfig list */
   2673 	ac_list = NULL;
   2674 
   2675 	/* we begin by trolling through *all* the devices on the system */
   2676 
   2677 	for (dv = alldevs.tqh_first; dv != NULL;
   2678 	     dv = dv->dv_list.tqe_next) {
   2679 
   2680 		/* we are only interested in disks... */
   2681 		if (dv->dv_class != DV_DISK)
   2682 			continue;
   2683 
   2684 		/* we don't care about floppies... */
   2685 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2686 			continue;
   2687 		}
   2688 		/* hdfd is the Atari/Hades floppy driver */
   2689 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2690 			continue;
   2691 		}
   2692 
   2693 		/* need to find the device_name_to_block_device_major stuff */
   2694 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2695 		dtobdm = dev_name2blk;
   2696 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2697 			dtobdm++;
   2698 		}
   2699 
   2700 		/* get a vnode for the raw partition of this disk */
   2701 
   2702 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2703 		if (bdevvp(dev, &vp))
   2704 			panic("RAID can't alloc vnode");
   2705 
   2706 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2707 
   2708 		if (error) {
   2709 			/* "Who cares."  Continue looking
   2710 			   for something that exists*/
   2711 			vput(vp);
   2712 			continue;
   2713 		}
   2714 
   2715 		/* Ok, the disk exists.  Go get the disklabel. */
   2716 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2717 				  FREAD, NOCRED, 0);
   2718 		if (error) {
   2719 			/*
   2720 			 * XXX can't happen - open() would
   2721 			 * have errored out (or faked up one)
   2722 			 */
   2723 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2724 			       dv->dv_xname, 'a' + RAW_PART, error);
   2725 		}
   2726 
   2727 		/* don't need this any more.  We'll allocate it again
   2728 		   a little later if we really do... */
   2729 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2730 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2731 		vput(vp);
   2732 
   2733 		for (i=0; i < label.d_npartitions; i++) {
   2734 			/* We only support partitions marked as RAID */
   2735 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2736 				continue;
   2737 
   2738 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2739 			if (bdevvp(dev, &vp))
   2740 				panic("RAID can't alloc vnode");
   2741 
   2742 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2743 			if (error) {
   2744 				/* Whatever... */
   2745 				vput(vp);
   2746 				continue;
   2747 			}
   2748 
   2749 			good_one = 0;
   2750 
   2751 			clabel = (RF_ComponentLabel_t *)
   2752 				malloc(sizeof(RF_ComponentLabel_t),
   2753 				       M_RAIDFRAME, M_NOWAIT);
   2754 			if (clabel == NULL) {
   2755 				/* XXX CLEANUP HERE */
   2756 				printf("RAID auto config: out of memory!\n");
   2757 				return(NULL); /* XXX probably should panic? */
   2758 			}
   2759 
   2760 			if (!raidread_component_label(dev, vp, clabel)) {
   2761 				/* Got the label.  Does it look reasonable? */
   2762 				if (rf_reasonable_label(clabel) &&
   2763 				    (clabel->partitionSize <=
   2764 				     label.d_partitions[i].p_size)) {
   2765 #if DEBUG
   2766 					printf("Component on: %s%c: %d\n",
   2767 					       dv->dv_xname, 'a'+i,
   2768 					       label.d_partitions[i].p_size);
   2769 					rf_print_component_label(clabel);
   2770 #endif
   2771 					/* if it's reasonable, add it,
   2772 					   else ignore it. */
   2773 					ac = (RF_AutoConfig_t *)
   2774 						malloc(sizeof(RF_AutoConfig_t),
   2775 						       M_RAIDFRAME,
   2776 						       M_NOWAIT);
   2777 					if (ac == NULL) {
   2778 						/* XXX should panic?? */
   2779 						return(NULL);
   2780 					}
   2781 
   2782 					sprintf(ac->devname, "%s%c",
   2783 						dv->dv_xname, 'a'+i);
   2784 					ac->dev = dev;
   2785 					ac->vp = vp;
   2786 					ac->clabel = clabel;
   2787 					ac->next = ac_list;
   2788 					ac_list = ac;
   2789 					good_one = 1;
   2790 				}
   2791 			}
   2792 			if (!good_one) {
   2793 				/* cleanup */
   2794 				free(clabel, M_RAIDFRAME);
   2795 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2796 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2797 				vput(vp);
   2798 			}
   2799 		}
   2800 	}
   2801 	return(ac_list);
   2802 }
   2803 
   2804 static int
   2805 rf_reasonable_label(clabel)
   2806 	RF_ComponentLabel_t *clabel;
   2807 {
   2808 
   2809 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2810 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2811 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2812 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2813 	    clabel->row >=0 &&
   2814 	    clabel->column >= 0 &&
   2815 	    clabel->num_rows > 0 &&
   2816 	    clabel->num_columns > 0 &&
   2817 	    clabel->row < clabel->num_rows &&
   2818 	    clabel->column < clabel->num_columns &&
   2819 	    clabel->blockSize > 0 &&
   2820 	    clabel->numBlocks > 0) {
   2821 		/* label looks reasonable enough... */
   2822 		return(1);
   2823 	}
   2824 	return(0);
   2825 }
   2826 
   2827 
   2828 void
   2829 rf_print_component_label(clabel)
   2830 	RF_ComponentLabel_t *clabel;
   2831 {
   2832 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2833 	       clabel->row, clabel->column,
   2834 	       clabel->num_rows, clabel->num_columns);
   2835 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2836 	       clabel->version, clabel->serial_number,
   2837 	       clabel->mod_counter);
   2838 	printf("   Clean: %s Status: %d\n",
   2839 	       clabel->clean ? "Yes" : "No", clabel->status );
   2840 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2841 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2842 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2843 	       (char) clabel->parityConfig, clabel->blockSize,
   2844 	       clabel->numBlocks);
   2845 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2846 	printf("   Contains root partition: %s\n",
   2847 	       clabel->root_partition ? "Yes" : "No" );
   2848 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2849 #if 0
   2850 	   printf("   Config order: %d\n", clabel->config_order);
   2851 #endif
   2852 
   2853 }
   2854 
   2855 RF_ConfigSet_t *
   2856 rf_create_auto_sets(ac_list)
   2857 	RF_AutoConfig_t *ac_list;
   2858 {
   2859 	RF_AutoConfig_t *ac;
   2860 	RF_ConfigSet_t *config_sets;
   2861 	RF_ConfigSet_t *cset;
   2862 	RF_AutoConfig_t *ac_next;
   2863 
   2864 
   2865 	config_sets = NULL;
   2866 
   2867 	/* Go through the AutoConfig list, and figure out which components
   2868 	   belong to what sets.  */
   2869 	ac = ac_list;
   2870 	while(ac!=NULL) {
   2871 		/* we're going to putz with ac->next, so save it here
   2872 		   for use at the end of the loop */
   2873 		ac_next = ac->next;
   2874 
   2875 		if (config_sets == NULL) {
   2876 			/* will need at least this one... */
   2877 			config_sets = (RF_ConfigSet_t *)
   2878 				malloc(sizeof(RF_ConfigSet_t),
   2879 				       M_RAIDFRAME, M_NOWAIT);
   2880 			if (config_sets == NULL) {
   2881 				panic("rf_create_auto_sets: No memory!\n");
   2882 			}
   2883 			/* this one is easy :) */
   2884 			config_sets->ac = ac;
   2885 			config_sets->next = NULL;
   2886 			config_sets->rootable = 0;
   2887 			ac->next = NULL;
   2888 		} else {
   2889 			/* which set does this component fit into? */
   2890 			cset = config_sets;
   2891 			while(cset!=NULL) {
   2892 				if (rf_does_it_fit(cset, ac)) {
   2893 					/* looks like it matches... */
   2894 					ac->next = cset->ac;
   2895 					cset->ac = ac;
   2896 					break;
   2897 				}
   2898 				cset = cset->next;
   2899 			}
   2900 			if (cset==NULL) {
   2901 				/* didn't find a match above... new set..*/
   2902 				cset = (RF_ConfigSet_t *)
   2903 					malloc(sizeof(RF_ConfigSet_t),
   2904 					       M_RAIDFRAME, M_NOWAIT);
   2905 				if (cset == NULL) {
   2906 					panic("rf_create_auto_sets: No memory!\n");
   2907 				}
   2908 				cset->ac = ac;
   2909 				ac->next = NULL;
   2910 				cset->next = config_sets;
   2911 				cset->rootable = 0;
   2912 				config_sets = cset;
   2913 			}
   2914 		}
   2915 		ac = ac_next;
   2916 	}
   2917 
   2918 
   2919 	return(config_sets);
   2920 }
   2921 
   2922 static int
   2923 rf_does_it_fit(cset, ac)
   2924 	RF_ConfigSet_t *cset;
   2925 	RF_AutoConfig_t *ac;
   2926 {
   2927 	RF_ComponentLabel_t *clabel1, *clabel2;
   2928 
   2929 	/* If this one matches the *first* one in the set, that's good
   2930 	   enough, since the other members of the set would have been
   2931 	   through here too... */
   2932 	/* note that we are not checking partitionSize here..
   2933 
   2934 	   Note that we are also not checking the mod_counters here.
   2935 	   If everything else matches execpt the mod_counter, that's
   2936 	   good enough for this test.  We will deal with the mod_counters
   2937 	   a little later in the autoconfiguration process.
   2938 
   2939 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2940 
   2941 	   The reason we don't check for this is that failed disks
   2942 	   will have lower modification counts.  If those disks are
   2943 	   not added to the set they used to belong to, then they will
   2944 	   form their own set, which may result in 2 different sets,
   2945 	   for example, competing to be configured at raid0, and
   2946 	   perhaps competing to be the root filesystem set.  If the
   2947 	   wrong ones get configured, or both attempt to become /,
   2948 	   weird behaviour and or serious lossage will occur.  Thus we
   2949 	   need to bring them into the fold here, and kick them out at
   2950 	   a later point.
   2951 
   2952 	*/
   2953 
   2954 	clabel1 = cset->ac->clabel;
   2955 	clabel2 = ac->clabel;
   2956 	if ((clabel1->version == clabel2->version) &&
   2957 	    (clabel1->serial_number == clabel2->serial_number) &&
   2958 	    (clabel1->num_rows == clabel2->num_rows) &&
   2959 	    (clabel1->num_columns == clabel2->num_columns) &&
   2960 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2961 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2962 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2963 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2964 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2965 	    (clabel1->blockSize == clabel2->blockSize) &&
   2966 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2967 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2968 	    (clabel1->root_partition == clabel2->root_partition) &&
   2969 	    (clabel1->last_unit == clabel2->last_unit) &&
   2970 	    (clabel1->config_order == clabel2->config_order)) {
   2971 		/* if it get's here, it almost *has* to be a match */
   2972 	} else {
   2973 		/* it's not consistent with somebody in the set..
   2974 		   punt */
   2975 		return(0);
   2976 	}
   2977 	/* all was fine.. it must fit... */
   2978 	return(1);
   2979 }
   2980 
   2981 int
   2982 rf_have_enough_components(cset)
   2983 	RF_ConfigSet_t *cset;
   2984 {
   2985 	RF_AutoConfig_t *ac;
   2986 	RF_AutoConfig_t *auto_config;
   2987 	RF_ComponentLabel_t *clabel;
   2988 	int r,c;
   2989 	int num_rows;
   2990 	int num_cols;
   2991 	int num_missing;
   2992 	int mod_counter;
   2993 	int mod_counter_found;
   2994 	int even_pair_failed;
   2995 	char parity_type;
   2996 
   2997 
   2998 	/* check to see that we have enough 'live' components
   2999 	   of this set.  If so, we can configure it if necessary */
   3000 
   3001 	num_rows = cset->ac->clabel->num_rows;
   3002 	num_cols = cset->ac->clabel->num_columns;
   3003 	parity_type = cset->ac->clabel->parityConfig;
   3004 
   3005 	/* XXX Check for duplicate components!?!?!? */
   3006 
   3007 	/* Determine what the mod_counter is supposed to be for this set. */
   3008 
   3009 	mod_counter_found = 0;
   3010 	mod_counter = 0;
   3011 	ac = cset->ac;
   3012 	while(ac!=NULL) {
   3013 		if (mod_counter_found==0) {
   3014 			mod_counter = ac->clabel->mod_counter;
   3015 			mod_counter_found = 1;
   3016 		} else {
   3017 			if (ac->clabel->mod_counter > mod_counter) {
   3018 				mod_counter = ac->clabel->mod_counter;
   3019 			}
   3020 		}
   3021 		ac = ac->next;
   3022 	}
   3023 
   3024 	num_missing = 0;
   3025 	auto_config = cset->ac;
   3026 
   3027 	for(r=0; r<num_rows; r++) {
   3028 		even_pair_failed = 0;
   3029 		for(c=0; c<num_cols; c++) {
   3030 			ac = auto_config;
   3031 			while(ac!=NULL) {
   3032 				if ((ac->clabel->row == r) &&
   3033 				    (ac->clabel->column == c) &&
   3034 				    (ac->clabel->mod_counter == mod_counter)) {
   3035 					/* it's this one... */
   3036 #if DEBUG
   3037 					printf("Found: %s at %d,%d\n",
   3038 					       ac->devname,r,c);
   3039 #endif
   3040 					break;
   3041 				}
   3042 				ac=ac->next;
   3043 			}
   3044 			if (ac==NULL) {
   3045 				/* Didn't find one here! */
   3046 				/* special case for RAID 1, especially
   3047 				   where there are more than 2
   3048 				   components (where RAIDframe treats
   3049 				   things a little differently :( ) */
   3050 				if (parity_type == '1') {
   3051 					if (c%2 == 0) { /* even component */
   3052 						even_pair_failed = 1;
   3053 					} else { /* odd component.  If
   3054                                                     we're failed, and
   3055                                                     so is the even
   3056                                                     component, it's
   3057                                                     "Good Night, Charlie" */
   3058 						if (even_pair_failed == 1) {
   3059 							return(0);
   3060 						}
   3061 					}
   3062 				} else {
   3063 					/* normal accounting */
   3064 					num_missing++;
   3065 				}
   3066 			}
   3067 			if ((parity_type == '1') && (c%2 == 1)) {
   3068 				/* Just did an even component, and we didn't
   3069 				   bail.. reset the even_pair_failed flag,
   3070 				   and go on to the next component.... */
   3071 				even_pair_failed = 0;
   3072 			}
   3073 		}
   3074 	}
   3075 
   3076 	clabel = cset->ac->clabel;
   3077 
   3078 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3079 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3080 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3081 		/* XXX this needs to be made *much* more general */
   3082 		/* Too many failures */
   3083 		return(0);
   3084 	}
   3085 	/* otherwise, all is well, and we've got enough to take a kick
   3086 	   at autoconfiguring this set */
   3087 	return(1);
   3088 }
   3089 
   3090 void
   3091 rf_create_configuration(ac,config,raidPtr)
   3092 	RF_AutoConfig_t *ac;
   3093 	RF_Config_t *config;
   3094 	RF_Raid_t *raidPtr;
   3095 {
   3096 	RF_ComponentLabel_t *clabel;
   3097 	int i;
   3098 
   3099 	clabel = ac->clabel;
   3100 
   3101 	/* 1. Fill in the common stuff */
   3102 	config->numRow = clabel->num_rows;
   3103 	config->numCol = clabel->num_columns;
   3104 	config->numSpare = 0; /* XXX should this be set here? */
   3105 	config->sectPerSU = clabel->sectPerSU;
   3106 	config->SUsPerPU = clabel->SUsPerPU;
   3107 	config->SUsPerRU = clabel->SUsPerRU;
   3108 	config->parityConfig = clabel->parityConfig;
   3109 	/* XXX... */
   3110 	strcpy(config->diskQueueType,"fifo");
   3111 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3112 	config->layoutSpecificSize = 0; /* XXX ?? */
   3113 
   3114 	while(ac!=NULL) {
   3115 		/* row/col values will be in range due to the checks
   3116 		   in reasonable_label() */
   3117 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3118 		       ac->devname);
   3119 		ac = ac->next;
   3120 	}
   3121 
   3122 	for(i=0;i<RF_MAXDBGV;i++) {
   3123 		config->debugVars[i][0] = NULL;
   3124 	}
   3125 }
   3126 
   3127 int
   3128 rf_set_autoconfig(raidPtr, new_value)
   3129 	RF_Raid_t *raidPtr;
   3130 	int new_value;
   3131 {
   3132 	RF_ComponentLabel_t clabel;
   3133 	struct vnode *vp;
   3134 	dev_t dev;
   3135 	int row, column;
   3136 
   3137 	raidPtr->autoconfigure = new_value;
   3138 	for(row=0; row<raidPtr->numRow; row++) {
   3139 		for(column=0; column<raidPtr->numCol; column++) {
   3140 			if (raidPtr->Disks[row][column].status ==
   3141 			    rf_ds_optimal) {
   3142 				dev = raidPtr->Disks[row][column].dev;
   3143 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3144 				raidread_component_label(dev, vp, &clabel);
   3145 				clabel.autoconfigure = new_value;
   3146 				raidwrite_component_label(dev, vp, &clabel);
   3147 			}
   3148 		}
   3149 	}
   3150 	return(new_value);
   3151 }
   3152 
   3153 int
   3154 rf_set_rootpartition(raidPtr, new_value)
   3155 	RF_Raid_t *raidPtr;
   3156 	int new_value;
   3157 {
   3158 	RF_ComponentLabel_t clabel;
   3159 	struct vnode *vp;
   3160 	dev_t dev;
   3161 	int row, column;
   3162 
   3163 	raidPtr->root_partition = new_value;
   3164 	for(row=0; row<raidPtr->numRow; row++) {
   3165 		for(column=0; column<raidPtr->numCol; column++) {
   3166 			if (raidPtr->Disks[row][column].status ==
   3167 			    rf_ds_optimal) {
   3168 				dev = raidPtr->Disks[row][column].dev;
   3169 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3170 				raidread_component_label(dev, vp, &clabel);
   3171 				clabel.root_partition = new_value;
   3172 				raidwrite_component_label(dev, vp, &clabel);
   3173 			}
   3174 		}
   3175 	}
   3176 	return(new_value);
   3177 }
   3178 
   3179 void
   3180 rf_release_all_vps(cset)
   3181 	RF_ConfigSet_t *cset;
   3182 {
   3183 	RF_AutoConfig_t *ac;
   3184 
   3185 	ac = cset->ac;
   3186 	while(ac!=NULL) {
   3187 		/* Close the vp, and give it back */
   3188 		if (ac->vp) {
   3189 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3190 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3191 			vput(ac->vp);
   3192 			ac->vp = NULL;
   3193 		}
   3194 		ac = ac->next;
   3195 	}
   3196 }
   3197 
   3198 
   3199 void
   3200 rf_cleanup_config_set(cset)
   3201 	RF_ConfigSet_t *cset;
   3202 {
   3203 	RF_AutoConfig_t *ac;
   3204 	RF_AutoConfig_t *next_ac;
   3205 
   3206 	ac = cset->ac;
   3207 	while(ac!=NULL) {
   3208 		next_ac = ac->next;
   3209 		/* nuke the label */
   3210 		free(ac->clabel, M_RAIDFRAME);
   3211 		/* cleanup the config structure */
   3212 		free(ac, M_RAIDFRAME);
   3213 		/* "next.." */
   3214 		ac = next_ac;
   3215 	}
   3216 	/* and, finally, nuke the config set */
   3217 	free(cset, M_RAIDFRAME);
   3218 }
   3219 
   3220 
   3221 void
   3222 raid_init_component_label(raidPtr, clabel)
   3223 	RF_Raid_t *raidPtr;
   3224 	RF_ComponentLabel_t *clabel;
   3225 {
   3226 	/* current version number */
   3227 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3228 	clabel->serial_number = raidPtr->serial_number;
   3229 	clabel->mod_counter = raidPtr->mod_counter;
   3230 	clabel->num_rows = raidPtr->numRow;
   3231 	clabel->num_columns = raidPtr->numCol;
   3232 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3233 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3234 
   3235 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3236 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3237 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3238 
   3239 	clabel->blockSize = raidPtr->bytesPerSector;
   3240 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3241 
   3242 	/* XXX not portable */
   3243 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3244 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3245 	clabel->autoconfigure = raidPtr->autoconfigure;
   3246 	clabel->root_partition = raidPtr->root_partition;
   3247 	clabel->last_unit = raidPtr->raidid;
   3248 	clabel->config_order = raidPtr->config_order;
   3249 }
   3250 
   3251 int
   3252 rf_auto_config_set(cset,unit)
   3253 	RF_ConfigSet_t *cset;
   3254 	int *unit;
   3255 {
   3256 	RF_Raid_t *raidPtr;
   3257 	RF_Config_t *config;
   3258 	int raidID;
   3259 	int retcode;
   3260 
   3261 	printf("RAID autoconfigure\n");
   3262 
   3263 	retcode = 0;
   3264 	*unit = -1;
   3265 
   3266 	/* 1. Create a config structure */
   3267 
   3268 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3269 				       M_RAIDFRAME,
   3270 				       M_NOWAIT);
   3271 	if (config==NULL) {
   3272 		printf("Out of mem!?!?\n");
   3273 				/* XXX do something more intelligent here. */
   3274 		return(1);
   3275 	}
   3276 
   3277 	memset(config, 0, sizeof(RF_Config_t));
   3278 
   3279 	/* XXX raidID needs to be set correctly.. */
   3280 
   3281 	/*
   3282 	   2. Figure out what RAID ID this one is supposed to live at
   3283 	   See if we can get the same RAID dev that it was configured
   3284 	   on last time..
   3285 	*/
   3286 
   3287 	raidID = cset->ac->clabel->last_unit;
   3288 	if ((raidID < 0) || (raidID >= numraid)) {
   3289 		/* let's not wander off into lala land. */
   3290 		raidID = numraid - 1;
   3291 	}
   3292 	if (raidPtrs[raidID]->valid != 0) {
   3293 
   3294 		/*
   3295 		   Nope... Go looking for an alternative...
   3296 		   Start high so we don't immediately use raid0 if that's
   3297 		   not taken.
   3298 		*/
   3299 
   3300 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3301 			if (raidPtrs[raidID]->valid == 0) {
   3302 				/* can use this one! */
   3303 				break;
   3304 			}
   3305 		}
   3306 	}
   3307 
   3308 	if (raidID < 0) {
   3309 		/* punt... */
   3310 		printf("Unable to auto configure this set!\n");
   3311 		printf("(Out of RAID devs!)\n");
   3312 		return(1);
   3313 	}
   3314 	printf("Configuring raid%d:\n",raidID);
   3315 	raidPtr = raidPtrs[raidID];
   3316 
   3317 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3318 	raidPtr->raidid = raidID;
   3319 	raidPtr->openings = RAIDOUTSTANDING;
   3320 
   3321 	/* 3. Build the configuration structure */
   3322 	rf_create_configuration(cset->ac, config, raidPtr);
   3323 
   3324 	/* 4. Do the configuration */
   3325 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3326 
   3327 	if (retcode == 0) {
   3328 
   3329 		raidinit(raidPtrs[raidID]);
   3330 
   3331 		rf_markalldirty(raidPtrs[raidID]);
   3332 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3333 		if (cset->ac->clabel->root_partition==1) {
   3334 			/* everything configured just fine.  Make a note
   3335 			   that this set is eligible to be root. */
   3336 			cset->rootable = 1;
   3337 			/* XXX do this here? */
   3338 			raidPtrs[raidID]->root_partition = 1;
   3339 		}
   3340 	}
   3341 
   3342 	/* 5. Cleanup */
   3343 	free(config, M_RAIDFRAME);
   3344 
   3345 	*unit = raidID;
   3346 	return(retcode);
   3347 }
   3348 
   3349 void
   3350 rf_disk_unbusy(desc)
   3351 	RF_RaidAccessDesc_t *desc;
   3352 {
   3353 	struct buf *bp;
   3354 
   3355 	bp = (struct buf *)desc->bp;
   3356 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3357 			    (bp->b_bcount - bp->b_resid));
   3358 }
   3359