Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.101
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.101 2000/12/05 01:35:56 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include "raid.h"
    139 #include "opt_raid_autoconfig.h"
    140 #include "rf_raid.h"
    141 #include "rf_raidframe.h"
    142 #include "rf_copyback.h"
    143 #include "rf_dag.h"
    144 #include "rf_dagflags.h"
    145 #include "rf_desc.h"
    146 #include "rf_diskqueue.h"
    147 #include "rf_acctrace.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_debugMem.h"
    151 #include "rf_kintf.h"
    152 #include "rf_options.h"
    153 #include "rf_driver.h"
    154 #include "rf_parityscan.h"
    155 #include "rf_debugprint.h"
    156 #include "rf_threadstuff.h"
    157 #include "rf_configure.h"
    158 
    159 int     rf_kdebug_level = 0;
    160 
    161 #ifdef DEBUG
    162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    163 #else				/* DEBUG */
    164 #define db1_printf(a) { }
    165 #endif				/* DEBUG */
    166 
    167 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    168 
    169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    170 
    171 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    172 						 * spare table */
    173 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    174 						 * installation process */
    175 
    176 /* prototypes */
    177 static void KernelWakeupFunc(struct buf * bp);
    178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    179 		   dev_t dev, RF_SectorNum_t startSect,
    180 		   RF_SectorCount_t numSect, caddr_t buf,
    181 		   void (*cbFunc) (struct buf *), void *cbArg,
    182 		   int logBytesPerSector, struct proc * b_proc);
    183 static void raidinit __P((RF_Raid_t *));
    184 
    185 void raidattach __P((int));
    186 int raidsize __P((dev_t));
    187 int raidopen __P((dev_t, int, int, struct proc *));
    188 int raidclose __P((dev_t, int, int, struct proc *));
    189 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    190 int raidwrite __P((dev_t, struct uio *, int));
    191 int raidread __P((dev_t, struct uio *, int));
    192 void raidstrategy __P((struct buf *));
    193 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    194 
    195 /*
    196  * Pilfered from ccd.c
    197  */
    198 
    199 struct raidbuf {
    200 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    201 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    202 	int     rf_flags;	/* misc. flags */
    203 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    204 };
    205 
    206 
    207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    208 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    209 
    210 /* XXX Not sure if the following should be replacing the raidPtrs above,
    211    or if it should be used in conjunction with that...
    212 */
    213 
    214 struct raid_softc {
    215 	int     sc_flags;	/* flags */
    216 	int     sc_cflags;	/* configuration flags */
    217 	size_t  sc_size;        /* size of the raid device */
    218 	char    sc_xname[20];	/* XXX external name */
    219 	struct disk sc_dkdev;	/* generic disk device info */
    220 	struct pool sc_cbufpool;	/* component buffer pool */
    221 	struct buf_queue buf_queue;	/* used for the device queue */
    222 };
    223 /* sc_flags */
    224 #define RAIDF_INITED	0x01	/* unit has been initialized */
    225 #define RAIDF_WLABEL	0x02	/* label area is writable */
    226 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    227 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    228 #define RAIDF_LOCKED	0x80	/* unit is locked */
    229 
    230 #define	raidunit(x)	DISKUNIT(x)
    231 int numraid = 0;
    232 
    233 /*
    234  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    235  * Be aware that large numbers can allow the driver to consume a lot of
    236  * kernel memory, especially on writes, and in degraded mode reads.
    237  *
    238  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    239  * a single 64K write will typically require 64K for the old data,
    240  * 64K for the old parity, and 64K for the new parity, for a total
    241  * of 192K (if the parity buffer is not re-used immediately).
    242  * Even it if is used immedately, that's still 128K, which when multiplied
    243  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    244  *
    245  * Now in degraded mode, for example, a 64K read on the above setup may
    246  * require data reconstruction, which will require *all* of the 4 remaining
    247  * disks to participate -- 4 * 32K/disk == 128K again.
    248  */
    249 
    250 #ifndef RAIDOUTSTANDING
    251 #define RAIDOUTSTANDING   6
    252 #endif
    253 
    254 #define RAIDLABELDEV(dev)	\
    255 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    256 
    257 /* declared here, and made public, for the benefit of KVM stuff.. */
    258 struct raid_softc *raid_softc;
    259 
    260 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    261 				     struct disklabel *));
    262 static void raidgetdisklabel __P((dev_t));
    263 static void raidmakedisklabel __P((struct raid_softc *));
    264 
    265 static int raidlock __P((struct raid_softc *));
    266 static void raidunlock __P((struct raid_softc *));
    267 
    268 static void rf_markalldirty __P((RF_Raid_t *));
    269 void rf_mountroot_hook __P((struct device *));
    270 
    271 struct device *raidrootdev;
    272 
    273 void rf_ReconThread __P((struct rf_recon_req *));
    274 /* XXX what I want is: */
    275 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    276 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    277 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    278 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    279 void rf_buildroothack __P((void *));
    280 
    281 RF_AutoConfig_t *rf_find_raid_components __P((void));
    282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    286 				  RF_Raid_t *));
    287 int rf_set_autoconfig __P((RF_Raid_t *, int));
    288 int rf_set_rootpartition __P((RF_Raid_t *, int));
    289 void rf_release_all_vps __P((RF_ConfigSet_t *));
    290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    291 int rf_have_enough_components __P((RF_ConfigSet_t *));
    292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    293 
    294 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    295 				  allow autoconfig to take place.
    296 			          Note that this is overridden by having
    297 			          RAID_AUTOCONFIG as an option in the
    298 			          kernel config file.  */
    299 
    300 void
    301 raidattach(num)
    302 	int     num;
    303 {
    304 	int raidID;
    305 	int i, rc;
    306 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    307 	RF_ConfigSet_t *config_sets;
    308 
    309 #ifdef DEBUG
    310 	printf("raidattach: Asked for %d units\n", num);
    311 #endif
    312 
    313 	if (num <= 0) {
    314 #ifdef DIAGNOSTIC
    315 		panic("raidattach: count <= 0");
    316 #endif
    317 		return;
    318 	}
    319 	/* This is where all the initialization stuff gets done. */
    320 
    321 	numraid = num;
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 
    337 	for (i = 0; i < num; i++)
    338 		raidPtrs[i] = NULL;
    339 	rc = rf_BootRaidframe();
    340 	if (rc == 0)
    341 		printf("Kernelized RAIDframe activated\n");
    342 	else
    343 		panic("Serious error booting RAID!!\n");
    344 
    345 	/* put together some datastructures like the CCD device does.. This
    346 	 * lets us lock the device and what-not when it gets opened. */
    347 
    348 	raid_softc = (struct raid_softc *)
    349 		malloc(num * sizeof(struct raid_softc),
    350 		       M_RAIDFRAME, M_NOWAIT);
    351 	if (raid_softc == NULL) {
    352 		printf("WARNING: no memory for RAIDframe driver\n");
    353 		return;
    354 	}
    355 
    356 	bzero(raid_softc, num * sizeof(struct raid_softc));
    357 
    358 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    359 					      M_RAIDFRAME, M_NOWAIT);
    360 	if (raidrootdev == NULL) {
    361 		panic("No memory for RAIDframe driver!!?!?!\n");
    362 	}
    363 
    364 	for (raidID = 0; raidID < num; raidID++) {
    365 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    366 
    367 		raidrootdev[raidID].dv_class  = DV_DISK;
    368 		raidrootdev[raidID].dv_cfdata = NULL;
    369 		raidrootdev[raidID].dv_unit   = raidID;
    370 		raidrootdev[raidID].dv_parent = NULL;
    371 		raidrootdev[raidID].dv_flags  = 0;
    372 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    373 
    374 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    375 			  (RF_Raid_t *));
    376 		if (raidPtrs[raidID] == NULL) {
    377 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    378 			numraid = raidID;
    379 			return;
    380 		}
    381 	}
    382 
    383 #if RAID_AUTOCONFIG
    384 	raidautoconfig = 1;
    385 #endif
    386 
    387 if (raidautoconfig) {
    388 	/* 1. locate all RAID components on the system */
    389 
    390 #if DEBUG
    391 	printf("Searching for raid components...\n");
    392 #endif
    393 	ac_list = rf_find_raid_components();
    394 
    395 	/* 2. sort them into their respective sets */
    396 
    397 	config_sets = rf_create_auto_sets(ac_list);
    398 
    399 	/* 3. evaluate each set and configure the valid ones
    400 	   This gets done in rf_buildroothack() */
    401 
    402 	/* schedule the creation of the thread to do the
    403 	   "/ on RAID" stuff */
    404 
    405 	kthread_create(rf_buildroothack,config_sets);
    406 
    407 #if 0
    408 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    409 #endif
    410 }
    411 
    412 }
    413 
    414 void
    415 rf_buildroothack(arg)
    416 	void *arg;
    417 {
    418 	RF_ConfigSet_t *config_sets = arg;
    419 	RF_ConfigSet_t *cset;
    420 	RF_ConfigSet_t *next_cset;
    421 	int retcode;
    422 	int raidID;
    423 	int rootID;
    424 	int num_root;
    425 
    426 	rootID = 0;
    427 	num_root = 0;
    428 	cset = config_sets;
    429 	while(cset != NULL ) {
    430 		next_cset = cset->next;
    431 		if (rf_have_enough_components(cset) &&
    432 		    cset->ac->clabel->autoconfigure==1) {
    433 			retcode = rf_auto_config_set(cset,&raidID);
    434 			if (!retcode) {
    435 				if (cset->rootable) {
    436 					rootID = raidID;
    437 					num_root++;
    438 				}
    439 			} else {
    440 				/* The autoconfig didn't work :( */
    441 #if DEBUG
    442 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    443 #endif
    444 				rf_release_all_vps(cset);
    445 			}
    446 		} else {
    447 			/* we're not autoconfiguring this set...
    448 			   release the associated resources */
    449 			rf_release_all_vps(cset);
    450 		}
    451 		/* cleanup */
    452 		rf_cleanup_config_set(cset);
    453 		cset = next_cset;
    454 	}
    455 	if (boothowto & RB_ASKNAME) {
    456 		/* We don't auto-config... */
    457 	} else {
    458 		/* They didn't ask, and we found something bootable... */
    459 
    460 		if (num_root == 1) {
    461 			booted_device = &raidrootdev[rootID];
    462 		} else if (num_root > 1) {
    463 			/* we can't guess.. require the user to answer... */
    464 			boothowto |= RB_ASKNAME;
    465 		}
    466 	}
    467 }
    468 
    469 
    470 int
    471 raidsize(dev)
    472 	dev_t   dev;
    473 {
    474 	struct raid_softc *rs;
    475 	struct disklabel *lp;
    476 	int     part, unit, omask, size;
    477 
    478 	unit = raidunit(dev);
    479 	if (unit >= numraid)
    480 		return (-1);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    484 		return (-1);
    485 
    486 	part = DISKPART(dev);
    487 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    488 	lp = rs->sc_dkdev.dk_label;
    489 
    490 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    491 		return (-1);
    492 
    493 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    494 		size = -1;
    495 	else
    496 		size = lp->d_partitions[part].p_size *
    497 		    (lp->d_secsize / DEV_BSIZE);
    498 
    499 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    500 		return (-1);
    501 
    502 	return (size);
    503 
    504 }
    505 
    506 int
    507 raiddump(dev, blkno, va, size)
    508 	dev_t   dev;
    509 	daddr_t blkno;
    510 	caddr_t va;
    511 	size_t  size;
    512 {
    513 	/* Not implemented. */
    514 	return ENXIO;
    515 }
    516 /* ARGSUSED */
    517 int
    518 raidopen(dev, flags, fmt, p)
    519 	dev_t   dev;
    520 	int     flags, fmt;
    521 	struct proc *p;
    522 {
    523 	int     unit = raidunit(dev);
    524 	struct raid_softc *rs;
    525 	struct disklabel *lp;
    526 	int     part, pmask;
    527 	int     error = 0;
    528 
    529 	if (unit >= numraid)
    530 		return (ENXIO);
    531 	rs = &raid_softc[unit];
    532 
    533 	if ((error = raidlock(rs)) != 0)
    534 		return (error);
    535 	lp = rs->sc_dkdev.dk_label;
    536 
    537 	part = DISKPART(dev);
    538 	pmask = (1 << part);
    539 
    540 	db1_printf(("Opening raid device number: %d partition: %d\n",
    541 		unit, part));
    542 
    543 
    544 	if ((rs->sc_flags & RAIDF_INITED) &&
    545 	    (rs->sc_dkdev.dk_openmask == 0))
    546 		raidgetdisklabel(dev);
    547 
    548 	/* make sure that this partition exists */
    549 
    550 	if (part != RAW_PART) {
    551 		db1_printf(("Not a raw partition..\n"));
    552 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    553 		    ((part >= lp->d_npartitions) ||
    554 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    555 			error = ENXIO;
    556 			raidunlock(rs);
    557 			db1_printf(("Bailing out...\n"));
    558 			return (error);
    559 		}
    560 	}
    561 	/* Prevent this unit from being unconfigured while open. */
    562 	switch (fmt) {
    563 	case S_IFCHR:
    564 		rs->sc_dkdev.dk_copenmask |= pmask;
    565 		break;
    566 
    567 	case S_IFBLK:
    568 		rs->sc_dkdev.dk_bopenmask |= pmask;
    569 		break;
    570 	}
    571 
    572 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    573 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    574 		/* First one... mark things as dirty... Note that we *MUST*
    575 		 have done a configure before this.  I DO NOT WANT TO BE
    576 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    577 		 THAT THEY BELONG TOGETHER!!!!! */
    578 		/* XXX should check to see if we're only open for reading
    579 		   here... If so, we needn't do this, but then need some
    580 		   other way of keeping track of what's happened.. */
    581 
    582 		rf_markalldirty( raidPtrs[unit] );
    583 	}
    584 
    585 
    586 	rs->sc_dkdev.dk_openmask =
    587 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    588 
    589 	raidunlock(rs);
    590 
    591 	return (error);
    592 
    593 
    594 }
    595 /* ARGSUSED */
    596 int
    597 raidclose(dev, flags, fmt, p)
    598 	dev_t   dev;
    599 	int     flags, fmt;
    600 	struct proc *p;
    601 {
    602 	int     unit = raidunit(dev);
    603 	struct raid_softc *rs;
    604 	int     error = 0;
    605 	int     part;
    606 
    607 	if (unit >= numraid)
    608 		return (ENXIO);
    609 	rs = &raid_softc[unit];
    610 
    611 	if ((error = raidlock(rs)) != 0)
    612 		return (error);
    613 
    614 	part = DISKPART(dev);
    615 
    616 	/* ...that much closer to allowing unconfiguration... */
    617 	switch (fmt) {
    618 	case S_IFCHR:
    619 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    620 		break;
    621 
    622 	case S_IFBLK:
    623 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    624 		break;
    625 	}
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    630 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    631 		/* Last one... device is not unconfigured yet.
    632 		   Device shutdown has taken care of setting the
    633 		   clean bits if RAIDF_INITED is not set
    634 		   mark things as clean... */
    635 #if 0
    636 		printf("Last one on raid%d.  Updating status.\n",unit);
    637 #endif
    638 		rf_update_component_labels(raidPtrs[unit],
    639 						 RF_FINAL_COMPONENT_UPDATE);
    640 	}
    641 
    642 	raidunlock(rs);
    643 	return (0);
    644 
    645 }
    646 
    647 void
    648 raidstrategy(bp)
    649 	struct buf *bp;
    650 {
    651 	int s;
    652 
    653 	unsigned int raidID = raidunit(bp->b_dev);
    654 	RF_Raid_t *raidPtr;
    655 	struct raid_softc *rs = &raid_softc[raidID];
    656 	struct disklabel *lp;
    657 	int     wlabel;
    658 
    659 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    660 		bp->b_error = ENXIO;
    661 		bp->b_flags |= B_ERROR;
    662 		bp->b_resid = bp->b_bcount;
    663 		biodone(bp);
    664 		return;
    665 	}
    666 	if (raidID >= numraid || !raidPtrs[raidID]) {
    667 		bp->b_error = ENODEV;
    668 		bp->b_flags |= B_ERROR;
    669 		bp->b_resid = bp->b_bcount;
    670 		biodone(bp);
    671 		return;
    672 	}
    673 	raidPtr = raidPtrs[raidID];
    674 	if (!raidPtr->valid) {
    675 		bp->b_error = ENODEV;
    676 		bp->b_flags |= B_ERROR;
    677 		bp->b_resid = bp->b_bcount;
    678 		biodone(bp);
    679 		return;
    680 	}
    681 	if (bp->b_bcount == 0) {
    682 		db1_printf(("b_bcount is zero..\n"));
    683 		biodone(bp);
    684 		return;
    685 	}
    686 	lp = rs->sc_dkdev.dk_label;
    687 
    688 	/*
    689 	 * Do bounds checking and adjust transfer.  If there's an
    690 	 * error, the bounds check will flag that for us.
    691 	 */
    692 
    693 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    694 	if (DISKPART(bp->b_dev) != RAW_PART)
    695 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    696 			db1_printf(("Bounds check failed!!:%d %d\n",
    697 				(int) bp->b_blkno, (int) wlabel));
    698 			biodone(bp);
    699 			return;
    700 		}
    701 	s = splbio();
    702 
    703 	bp->b_resid = 0;
    704 
    705 	/* stuff it onto our queue */
    706 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    707 
    708 	raidstart(raidPtrs[raidID]);
    709 
    710 	splx(s);
    711 }
    712 /* ARGSUSED */
    713 int
    714 raidread(dev, uio, flags)
    715 	dev_t   dev;
    716 	struct uio *uio;
    717 	int     flags;
    718 {
    719 	int     unit = raidunit(dev);
    720 	struct raid_softc *rs;
    721 	int     part;
    722 
    723 	if (unit >= numraid)
    724 		return (ENXIO);
    725 	rs = &raid_softc[unit];
    726 
    727 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    728 		return (ENXIO);
    729 	part = DISKPART(dev);
    730 
    731 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    732 
    733 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    734 
    735 }
    736 /* ARGSUSED */
    737 int
    738 raidwrite(dev, uio, flags)
    739 	dev_t   dev;
    740 	struct uio *uio;
    741 	int     flags;
    742 {
    743 	int     unit = raidunit(dev);
    744 	struct raid_softc *rs;
    745 
    746 	if (unit >= numraid)
    747 		return (ENXIO);
    748 	rs = &raid_softc[unit];
    749 
    750 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    751 		return (ENXIO);
    752 	db1_printf(("raidwrite\n"));
    753 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    754 
    755 }
    756 
    757 int
    758 raidioctl(dev, cmd, data, flag, p)
    759 	dev_t   dev;
    760 	u_long  cmd;
    761 	caddr_t data;
    762 	int     flag;
    763 	struct proc *p;
    764 {
    765 	int     unit = raidunit(dev);
    766 	int     error = 0;
    767 	int     part, pmask;
    768 	struct raid_softc *rs;
    769 	RF_Config_t *k_cfg, *u_cfg;
    770 	RF_Raid_t *raidPtr;
    771 	RF_RaidDisk_t *diskPtr;
    772 	RF_AccTotals_t *totals;
    773 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    774 	u_char *specific_buf;
    775 	int retcode = 0;
    776 	int row;
    777 	int column;
    778 	struct rf_recon_req *rrcopy, *rr;
    779 	RF_ComponentLabel_t *clabel;
    780 	RF_ComponentLabel_t ci_label;
    781 	RF_ComponentLabel_t **clabel_ptr;
    782 	RF_SingleComponent_t *sparePtr,*componentPtr;
    783 	RF_SingleComponent_t hot_spare;
    784 	RF_SingleComponent_t component;
    785 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    786 	int i, j, d;
    787 
    788 	if (unit >= numraid)
    789 		return (ENXIO);
    790 	rs = &raid_softc[unit];
    791 	raidPtr = raidPtrs[unit];
    792 
    793 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    794 		(int) DISKPART(dev), (int) unit, (int) cmd));
    795 
    796 	/* Must be open for writes for these commands... */
    797 	switch (cmd) {
    798 	case DIOCSDINFO:
    799 	case DIOCWDINFO:
    800 	case DIOCWLABEL:
    801 		if ((flag & FWRITE) == 0)
    802 			return (EBADF);
    803 	}
    804 
    805 	/* Must be initialized for these... */
    806 	switch (cmd) {
    807 	case DIOCGDINFO:
    808 	case DIOCSDINFO:
    809 	case DIOCWDINFO:
    810 	case DIOCGPART:
    811 	case DIOCWLABEL:
    812 	case DIOCGDEFLABEL:
    813 	case RAIDFRAME_SHUTDOWN:
    814 	case RAIDFRAME_REWRITEPARITY:
    815 	case RAIDFRAME_GET_INFO:
    816 	case RAIDFRAME_RESET_ACCTOTALS:
    817 	case RAIDFRAME_GET_ACCTOTALS:
    818 	case RAIDFRAME_KEEP_ACCTOTALS:
    819 	case RAIDFRAME_GET_SIZE:
    820 	case RAIDFRAME_FAIL_DISK:
    821 	case RAIDFRAME_COPYBACK:
    822 	case RAIDFRAME_CHECK_RECON_STATUS:
    823 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    824 	case RAIDFRAME_GET_COMPONENT_LABEL:
    825 	case RAIDFRAME_SET_COMPONENT_LABEL:
    826 	case RAIDFRAME_ADD_HOT_SPARE:
    827 	case RAIDFRAME_REMOVE_HOT_SPARE:
    828 	case RAIDFRAME_INIT_LABELS:
    829 	case RAIDFRAME_REBUILD_IN_PLACE:
    830 	case RAIDFRAME_CHECK_PARITY:
    831 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    832 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    833 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    834 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    835 	case RAIDFRAME_SET_AUTOCONFIG:
    836 	case RAIDFRAME_SET_ROOT:
    837 	case RAIDFRAME_DELETE_COMPONENT:
    838 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    839 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    840 			return (ENXIO);
    841 	}
    842 
    843 	switch (cmd) {
    844 
    845 		/* configure the system */
    846 	case RAIDFRAME_CONFIGURE:
    847 
    848 		if (raidPtr->valid) {
    849 			/* There is a valid RAID set running on this unit! */
    850 			printf("raid%d: Device already configured!\n",unit);
    851 			return(EINVAL);
    852 		}
    853 
    854 		/* copy-in the configuration information */
    855 		/* data points to a pointer to the configuration structure */
    856 
    857 		u_cfg = *((RF_Config_t **) data);
    858 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    859 		if (k_cfg == NULL) {
    860 			return (ENOMEM);
    861 		}
    862 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    863 		    sizeof(RF_Config_t));
    864 		if (retcode) {
    865 			RF_Free(k_cfg, sizeof(RF_Config_t));
    866 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    867 				retcode));
    868 			return (retcode);
    869 		}
    870 		/* allocate a buffer for the layout-specific data, and copy it
    871 		 * in */
    872 		if (k_cfg->layoutSpecificSize) {
    873 			if (k_cfg->layoutSpecificSize > 10000) {
    874 				/* sanity check */
    875 				RF_Free(k_cfg, sizeof(RF_Config_t));
    876 				return (EINVAL);
    877 			}
    878 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    879 			    (u_char *));
    880 			if (specific_buf == NULL) {
    881 				RF_Free(k_cfg, sizeof(RF_Config_t));
    882 				return (ENOMEM);
    883 			}
    884 			retcode = copyin(k_cfg->layoutSpecific,
    885 			    (caddr_t) specific_buf,
    886 			    k_cfg->layoutSpecificSize);
    887 			if (retcode) {
    888 				RF_Free(k_cfg, sizeof(RF_Config_t));
    889 				RF_Free(specific_buf,
    890 					k_cfg->layoutSpecificSize);
    891 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    892 					retcode));
    893 				return (retcode);
    894 			}
    895 		} else
    896 			specific_buf = NULL;
    897 		k_cfg->layoutSpecific = specific_buf;
    898 
    899 		/* should do some kind of sanity check on the configuration.
    900 		 * Store the sum of all the bytes in the last byte? */
    901 
    902 		/* configure the system */
    903 
    904 		/*
    905 		 * Clear the entire RAID descriptor, just to make sure
    906 		 *  there is no stale data left in the case of a
    907 		 *  reconfiguration
    908 		 */
    909 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    910 		raidPtr->raidid = unit;
    911 
    912 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    913 
    914 		if (retcode == 0) {
    915 
    916 			/* allow this many simultaneous IO's to
    917 			   this RAID device */
    918 			raidPtr->openings = RAIDOUTSTANDING;
    919 
    920 			raidinit(raidPtr);
    921 			rf_markalldirty(raidPtr);
    922 		}
    923 		/* free the buffers.  No return code here. */
    924 		if (k_cfg->layoutSpecificSize) {
    925 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    926 		}
    927 		RF_Free(k_cfg, sizeof(RF_Config_t));
    928 
    929 		return (retcode);
    930 
    931 		/* shutdown the system */
    932 	case RAIDFRAME_SHUTDOWN:
    933 
    934 		if ((error = raidlock(rs)) != 0)
    935 			return (error);
    936 
    937 		/*
    938 		 * If somebody has a partition mounted, we shouldn't
    939 		 * shutdown.
    940 		 */
    941 
    942 		part = DISKPART(dev);
    943 		pmask = (1 << part);
    944 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    945 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    946 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    947 			raidunlock(rs);
    948 			return (EBUSY);
    949 		}
    950 
    951 		retcode = rf_Shutdown(raidPtr);
    952 
    953 		pool_destroy(&rs->sc_cbufpool);
    954 
    955 		/* It's no longer initialized... */
    956 		rs->sc_flags &= ~RAIDF_INITED;
    957 
    958 		/* Detach the disk. */
    959 		disk_detach(&rs->sc_dkdev);
    960 
    961 		raidunlock(rs);
    962 
    963 		return (retcode);
    964 	case RAIDFRAME_GET_COMPONENT_LABEL:
    965 		clabel_ptr = (RF_ComponentLabel_t **) data;
    966 		/* need to read the component label for the disk indicated
    967 		   by row,column in clabel */
    968 
    969 		/* For practice, let's get it directly fromdisk, rather
    970 		   than from the in-core copy */
    971 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    972 			   (RF_ComponentLabel_t *));
    973 		if (clabel == NULL)
    974 			return (ENOMEM);
    975 
    976 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    977 
    978 		retcode = copyin( *clabel_ptr, clabel,
    979 				  sizeof(RF_ComponentLabel_t));
    980 
    981 		if (retcode) {
    982 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    983 			return(retcode);
    984 		}
    985 
    986 		row = clabel->row;
    987 		column = clabel->column;
    988 
    989 		if ((row < 0) || (row >= raidPtr->numRow) ||
    990 		    (column < 0) || (column >= raidPtr->numCol +
    991 				     raidPtr->numSpare)) {
    992 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    993 			return(EINVAL);
    994 		}
    995 
    996 		raidread_component_label(raidPtr->Disks[row][column].dev,
    997 				raidPtr->raid_cinfo[row][column].ci_vp,
    998 				clabel );
    999 
   1000 		retcode = copyout((caddr_t) clabel,
   1001 				  (caddr_t) *clabel_ptr,
   1002 				  sizeof(RF_ComponentLabel_t));
   1003 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1004 		return (retcode);
   1005 
   1006 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1007 		clabel = (RF_ComponentLabel_t *) data;
   1008 
   1009 		/* XXX check the label for valid stuff... */
   1010 		/* Note that some things *should not* get modified --
   1011 		   the user should be re-initing the labels instead of
   1012 		   trying to patch things.
   1013 		   */
   1014 
   1015 		printf("Got component label:\n");
   1016 		printf("Version: %d\n",clabel->version);
   1017 		printf("Serial Number: %d\n",clabel->serial_number);
   1018 		printf("Mod counter: %d\n",clabel->mod_counter);
   1019 		printf("Row: %d\n", clabel->row);
   1020 		printf("Column: %d\n", clabel->column);
   1021 		printf("Num Rows: %d\n", clabel->num_rows);
   1022 		printf("Num Columns: %d\n", clabel->num_columns);
   1023 		printf("Clean: %d\n", clabel->clean);
   1024 		printf("Status: %d\n", clabel->status);
   1025 
   1026 		row = clabel->row;
   1027 		column = clabel->column;
   1028 
   1029 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1030 		    (column < 0) || (column >= raidPtr->numCol)) {
   1031 			return(EINVAL);
   1032 		}
   1033 
   1034 		/* XXX this isn't allowed to do anything for now :-) */
   1035 
   1036 		/* XXX and before it is, we need to fill in the rest
   1037 		   of the fields!?!?!?! */
   1038 #if 0
   1039 		raidwrite_component_label(
   1040                             raidPtr->Disks[row][column].dev,
   1041 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1042 			    clabel );
   1043 #endif
   1044 		return (0);
   1045 
   1046 	case RAIDFRAME_INIT_LABELS:
   1047 		clabel = (RF_ComponentLabel_t *) data;
   1048 		/*
   1049 		   we only want the serial number from
   1050 		   the above.  We get all the rest of the information
   1051 		   from the config that was used to create this RAID
   1052 		   set.
   1053 		   */
   1054 
   1055 		raidPtr->serial_number = clabel->serial_number;
   1056 
   1057 		raid_init_component_label(raidPtr, &ci_label);
   1058 		ci_label.serial_number = clabel->serial_number;
   1059 
   1060 		for(row=0;row<raidPtr->numRow;row++) {
   1061 			ci_label.row = row;
   1062 			for(column=0;column<raidPtr->numCol;column++) {
   1063 				diskPtr = &raidPtr->Disks[row][column];
   1064 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1065 					ci_label.partitionSize = diskPtr->partitionSize;
   1066 					ci_label.column = column;
   1067 					raidwrite_component_label(
   1068 					  raidPtr->Disks[row][column].dev,
   1069 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1070 					  &ci_label );
   1071 				}
   1072 			}
   1073 		}
   1074 
   1075 		return (retcode);
   1076 	case RAIDFRAME_SET_AUTOCONFIG:
   1077 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1078 		printf("New autoconfig value is: %d\n", d);
   1079 		*(int *) data = d;
   1080 		return (retcode);
   1081 
   1082 	case RAIDFRAME_SET_ROOT:
   1083 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1084 		printf("New rootpartition value is: %d\n", d);
   1085 		*(int *) data = d;
   1086 		return (retcode);
   1087 
   1088 		/* initialize all parity */
   1089 	case RAIDFRAME_REWRITEPARITY:
   1090 
   1091 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1092 			/* Parity for RAID 0 is trivially correct */
   1093 			raidPtr->parity_good = RF_RAID_CLEAN;
   1094 			return(0);
   1095 		}
   1096 
   1097 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1098 			/* Re-write is already in progress! */
   1099 			return(EINVAL);
   1100 		}
   1101 
   1102 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1103 					   rf_RewriteParityThread,
   1104 					   raidPtr,"raid_parity");
   1105 		return (retcode);
   1106 
   1107 
   1108 	case RAIDFRAME_ADD_HOT_SPARE:
   1109 		sparePtr = (RF_SingleComponent_t *) data;
   1110 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1111 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1112 		return(retcode);
   1113 
   1114 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1115 		return(retcode);
   1116 
   1117 	case RAIDFRAME_DELETE_COMPONENT:
   1118 		componentPtr = (RF_SingleComponent_t *)data;
   1119 		memcpy( &component, componentPtr,
   1120 			sizeof(RF_SingleComponent_t));
   1121 		retcode = rf_delete_component(raidPtr, &component);
   1122 		return(retcode);
   1123 
   1124 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1125 		componentPtr = (RF_SingleComponent_t *)data;
   1126 		memcpy( &component, componentPtr,
   1127 			sizeof(RF_SingleComponent_t));
   1128 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1129 		return(retcode);
   1130 
   1131 	case RAIDFRAME_REBUILD_IN_PLACE:
   1132 
   1133 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1134 			/* Can't do this on a RAID 0!! */
   1135 			return(EINVAL);
   1136 		}
   1137 
   1138 		if (raidPtr->recon_in_progress == 1) {
   1139 			/* a reconstruct is already in progress! */
   1140 			return(EINVAL);
   1141 		}
   1142 
   1143 		componentPtr = (RF_SingleComponent_t *) data;
   1144 		memcpy( &component, componentPtr,
   1145 			sizeof(RF_SingleComponent_t));
   1146 		row = component.row;
   1147 		column = component.column;
   1148 		printf("Rebuild: %d %d\n",row, column);
   1149 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1150 		    (column < 0) || (column >= raidPtr->numCol)) {
   1151 			return(EINVAL);
   1152 		}
   1153 
   1154 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1155 		if (rrcopy == NULL)
   1156 			return(ENOMEM);
   1157 
   1158 		rrcopy->raidPtr = (void *) raidPtr;
   1159 		rrcopy->row = row;
   1160 		rrcopy->col = column;
   1161 
   1162 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1163 					   rf_ReconstructInPlaceThread,
   1164 					   rrcopy,"raid_reconip");
   1165 		return(retcode);
   1166 
   1167 	case RAIDFRAME_GET_INFO:
   1168 		if (!raidPtr->valid)
   1169 			return (ENODEV);
   1170 		ucfgp = (RF_DeviceConfig_t **) data;
   1171 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1172 			  (RF_DeviceConfig_t *));
   1173 		if (d_cfg == NULL)
   1174 			return (ENOMEM);
   1175 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1176 		d_cfg->rows = raidPtr->numRow;
   1177 		d_cfg->cols = raidPtr->numCol;
   1178 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1179 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1180 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1181 			return (ENOMEM);
   1182 		}
   1183 		d_cfg->nspares = raidPtr->numSpare;
   1184 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1185 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1186 			return (ENOMEM);
   1187 		}
   1188 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1189 		d = 0;
   1190 		for (i = 0; i < d_cfg->rows; i++) {
   1191 			for (j = 0; j < d_cfg->cols; j++) {
   1192 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1193 				d++;
   1194 			}
   1195 		}
   1196 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1197 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1198 		}
   1199 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1200 				  sizeof(RF_DeviceConfig_t));
   1201 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1202 
   1203 		return (retcode);
   1204 
   1205 	case RAIDFRAME_CHECK_PARITY:
   1206 		*(int *) data = raidPtr->parity_good;
   1207 		return (0);
   1208 
   1209 	case RAIDFRAME_RESET_ACCTOTALS:
   1210 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1211 		return (0);
   1212 
   1213 	case RAIDFRAME_GET_ACCTOTALS:
   1214 		totals = (RF_AccTotals_t *) data;
   1215 		*totals = raidPtr->acc_totals;
   1216 		return (0);
   1217 
   1218 	case RAIDFRAME_KEEP_ACCTOTALS:
   1219 		raidPtr->keep_acc_totals = *(int *)data;
   1220 		return (0);
   1221 
   1222 	case RAIDFRAME_GET_SIZE:
   1223 		*(int *) data = raidPtr->totalSectors;
   1224 		return (0);
   1225 
   1226 		/* fail a disk & optionally start reconstruction */
   1227 	case RAIDFRAME_FAIL_DISK:
   1228 
   1229 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1230 			/* Can't do this on a RAID 0!! */
   1231 			return(EINVAL);
   1232 		}
   1233 
   1234 		rr = (struct rf_recon_req *) data;
   1235 
   1236 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1237 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1238 			return (EINVAL);
   1239 
   1240 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1241 		       unit, rr->row, rr->col);
   1242 
   1243 		/* make a copy of the recon request so that we don't rely on
   1244 		 * the user's buffer */
   1245 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1246 		if (rrcopy == NULL)
   1247 			return(ENOMEM);
   1248 		bcopy(rr, rrcopy, sizeof(*rr));
   1249 		rrcopy->raidPtr = (void *) raidPtr;
   1250 
   1251 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1252 					   rf_ReconThread,
   1253 					   rrcopy,"raid_recon");
   1254 		return (0);
   1255 
   1256 		/* invoke a copyback operation after recon on whatever disk
   1257 		 * needs it, if any */
   1258 	case RAIDFRAME_COPYBACK:
   1259 
   1260 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1261 			/* This makes no sense on a RAID 0!! */
   1262 			return(EINVAL);
   1263 		}
   1264 
   1265 		if (raidPtr->copyback_in_progress == 1) {
   1266 			/* Copyback is already in progress! */
   1267 			return(EINVAL);
   1268 		}
   1269 
   1270 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1271 					   rf_CopybackThread,
   1272 					   raidPtr,"raid_copyback");
   1273 		return (retcode);
   1274 
   1275 		/* return the percentage completion of reconstruction */
   1276 	case RAIDFRAME_CHECK_RECON_STATUS:
   1277 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1278 			/* This makes no sense on a RAID 0, so tell the
   1279 			   user it's done. */
   1280 			*(int *) data = 100;
   1281 			return(0);
   1282 		}
   1283 		row = 0; /* XXX we only consider a single row... */
   1284 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1285 			*(int *) data = 100;
   1286 		else
   1287 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1288 		return (0);
   1289 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1290 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1291 		row = 0; /* XXX we only consider a single row... */
   1292 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1293 			progressInfo.remaining = 0;
   1294 			progressInfo.completed = 100;
   1295 			progressInfo.total = 100;
   1296 		} else {
   1297 			progressInfo.total =
   1298 				raidPtr->reconControl[row]->numRUsTotal;
   1299 			progressInfo.completed =
   1300 				raidPtr->reconControl[row]->numRUsComplete;
   1301 			progressInfo.remaining = progressInfo.total -
   1302 				progressInfo.completed;
   1303 		}
   1304 		retcode = copyout((caddr_t) &progressInfo,
   1305 				  (caddr_t) *progressInfoPtr,
   1306 				  sizeof(RF_ProgressInfo_t));
   1307 		return (retcode);
   1308 
   1309 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1310 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1311 			/* This makes no sense on a RAID 0, so tell the
   1312 			   user it's done. */
   1313 			*(int *) data = 100;
   1314 			return(0);
   1315 		}
   1316 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1317 			*(int *) data = 100 *
   1318 				raidPtr->parity_rewrite_stripes_done /
   1319 				raidPtr->Layout.numStripe;
   1320 		} else {
   1321 			*(int *) data = 100;
   1322 		}
   1323 		return (0);
   1324 
   1325 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1326 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1327 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1328 			progressInfo.total = raidPtr->Layout.numStripe;
   1329 			progressInfo.completed =
   1330 				raidPtr->parity_rewrite_stripes_done;
   1331 			progressInfo.remaining = progressInfo.total -
   1332 				progressInfo.completed;
   1333 		} else {
   1334 			progressInfo.remaining = 0;
   1335 			progressInfo.completed = 100;
   1336 			progressInfo.total = 100;
   1337 		}
   1338 		retcode = copyout((caddr_t) &progressInfo,
   1339 				  (caddr_t) *progressInfoPtr,
   1340 				  sizeof(RF_ProgressInfo_t));
   1341 		return (retcode);
   1342 
   1343 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1344 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1345 			/* This makes no sense on a RAID 0 */
   1346 			*(int *) data = 100;
   1347 			return(0);
   1348 		}
   1349 		if (raidPtr->copyback_in_progress == 1) {
   1350 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1351 				raidPtr->Layout.numStripe;
   1352 		} else {
   1353 			*(int *) data = 100;
   1354 		}
   1355 		return (0);
   1356 
   1357 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1358 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1359 		if (raidPtr->copyback_in_progress == 1) {
   1360 			progressInfo.total = raidPtr->Layout.numStripe;
   1361 			progressInfo.completed =
   1362 				raidPtr->copyback_stripes_done;
   1363 			progressInfo.remaining = progressInfo.total -
   1364 				progressInfo.completed;
   1365 		} else {
   1366 			progressInfo.remaining = 0;
   1367 			progressInfo.completed = 100;
   1368 			progressInfo.total = 100;
   1369 		}
   1370 		retcode = copyout((caddr_t) &progressInfo,
   1371 				  (caddr_t) *progressInfoPtr,
   1372 				  sizeof(RF_ProgressInfo_t));
   1373 		return (retcode);
   1374 
   1375 		/* the sparetable daemon calls this to wait for the kernel to
   1376 		 * need a spare table. this ioctl does not return until a
   1377 		 * spare table is needed. XXX -- calling mpsleep here in the
   1378 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1379 		 * -- I should either compute the spare table in the kernel,
   1380 		 * or have a different -- XXX XXX -- interface (a different
   1381 		 * character device) for delivering the table     -- XXX */
   1382 #if 0
   1383 	case RAIDFRAME_SPARET_WAIT:
   1384 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1385 		while (!rf_sparet_wait_queue)
   1386 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1387 		waitreq = rf_sparet_wait_queue;
   1388 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1389 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1390 
   1391 		/* structure assignment */
   1392 		*((RF_SparetWait_t *) data) = *waitreq;
   1393 
   1394 		RF_Free(waitreq, sizeof(*waitreq));
   1395 		return (0);
   1396 
   1397 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1398 		 * code in it that will cause the dameon to exit */
   1399 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1400 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1401 		waitreq->fcol = -1;
   1402 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1403 		waitreq->next = rf_sparet_wait_queue;
   1404 		rf_sparet_wait_queue = waitreq;
   1405 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1406 		wakeup(&rf_sparet_wait_queue);
   1407 		return (0);
   1408 
   1409 		/* used by the spare table daemon to deliver a spare table
   1410 		 * into the kernel */
   1411 	case RAIDFRAME_SEND_SPARET:
   1412 
   1413 		/* install the spare table */
   1414 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1415 
   1416 		/* respond to the requestor.  the return status of the spare
   1417 		 * table installation is passed in the "fcol" field */
   1418 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1419 		waitreq->fcol = retcode;
   1420 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1421 		waitreq->next = rf_sparet_resp_queue;
   1422 		rf_sparet_resp_queue = waitreq;
   1423 		wakeup(&rf_sparet_resp_queue);
   1424 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1425 
   1426 		return (retcode);
   1427 #endif
   1428 
   1429 	default:
   1430 		break; /* fall through to the os-specific code below */
   1431 
   1432 	}
   1433 
   1434 	if (!raidPtr->valid)
   1435 		return (EINVAL);
   1436 
   1437 	/*
   1438 	 * Add support for "regular" device ioctls here.
   1439 	 */
   1440 
   1441 	switch (cmd) {
   1442 	case DIOCGDINFO:
   1443 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1444 		break;
   1445 
   1446 	case DIOCGPART:
   1447 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1448 		((struct partinfo *) data)->part =
   1449 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1450 		break;
   1451 
   1452 	case DIOCWDINFO:
   1453 	case DIOCSDINFO:
   1454 		if ((error = raidlock(rs)) != 0)
   1455 			return (error);
   1456 
   1457 		rs->sc_flags |= RAIDF_LABELLING;
   1458 
   1459 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1460 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1461 		if (error == 0) {
   1462 			if (cmd == DIOCWDINFO)
   1463 				error = writedisklabel(RAIDLABELDEV(dev),
   1464 				    raidstrategy, rs->sc_dkdev.dk_label,
   1465 				    rs->sc_dkdev.dk_cpulabel);
   1466 		}
   1467 		rs->sc_flags &= ~RAIDF_LABELLING;
   1468 
   1469 		raidunlock(rs);
   1470 
   1471 		if (error)
   1472 			return (error);
   1473 		break;
   1474 
   1475 	case DIOCWLABEL:
   1476 		if (*(int *) data != 0)
   1477 			rs->sc_flags |= RAIDF_WLABEL;
   1478 		else
   1479 			rs->sc_flags &= ~RAIDF_WLABEL;
   1480 		break;
   1481 
   1482 	case DIOCGDEFLABEL:
   1483 		raidgetdefaultlabel(raidPtr, rs,
   1484 		    (struct disklabel *) data);
   1485 		break;
   1486 
   1487 	default:
   1488 		retcode = ENOTTY;
   1489 	}
   1490 	return (retcode);
   1491 
   1492 }
   1493 
   1494 
   1495 /* raidinit -- complete the rest of the initialization for the
   1496    RAIDframe device.  */
   1497 
   1498 
   1499 static void
   1500 raidinit(raidPtr)
   1501 	RF_Raid_t *raidPtr;
   1502 {
   1503 	struct raid_softc *rs;
   1504 	int     unit;
   1505 
   1506 	unit = raidPtr->raidid;
   1507 
   1508 	rs = &raid_softc[unit];
   1509 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1510 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1511 
   1512 
   1513 	/* XXX should check return code first... */
   1514 	rs->sc_flags |= RAIDF_INITED;
   1515 
   1516 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1517 
   1518 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1519 
   1520 	/* disk_attach actually creates space for the CPU disklabel, among
   1521 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1522 	 * with disklabels. */
   1523 
   1524 	disk_attach(&rs->sc_dkdev);
   1525 
   1526 	/* XXX There may be a weird interaction here between this, and
   1527 	 * protectedSectors, as used in RAIDframe.  */
   1528 
   1529 	rs->sc_size = raidPtr->totalSectors;
   1530 
   1531 }
   1532 
   1533 /* wake up the daemon & tell it to get us a spare table
   1534  * XXX
   1535  * the entries in the queues should be tagged with the raidPtr
   1536  * so that in the extremely rare case that two recons happen at once,
   1537  * we know for which device were requesting a spare table
   1538  * XXX
   1539  *
   1540  * XXX This code is not currently used. GO
   1541  */
   1542 int
   1543 rf_GetSpareTableFromDaemon(req)
   1544 	RF_SparetWait_t *req;
   1545 {
   1546 	int     retcode;
   1547 
   1548 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1549 	req->next = rf_sparet_wait_queue;
   1550 	rf_sparet_wait_queue = req;
   1551 	wakeup(&rf_sparet_wait_queue);
   1552 
   1553 	/* mpsleep unlocks the mutex */
   1554 	while (!rf_sparet_resp_queue) {
   1555 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1556 		    "raidframe getsparetable", 0);
   1557 	}
   1558 	req = rf_sparet_resp_queue;
   1559 	rf_sparet_resp_queue = req->next;
   1560 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1561 
   1562 	retcode = req->fcol;
   1563 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1564 					 * alloc'd */
   1565 	return (retcode);
   1566 }
   1567 
   1568 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1569  * bp & passes it down.
   1570  * any calls originating in the kernel must use non-blocking I/O
   1571  * do some extra sanity checking to return "appropriate" error values for
   1572  * certain conditions (to make some standard utilities work)
   1573  *
   1574  * Formerly known as: rf_DoAccessKernel
   1575  */
   1576 void
   1577 raidstart(raidPtr)
   1578 	RF_Raid_t *raidPtr;
   1579 {
   1580 	RF_SectorCount_t num_blocks, pb, sum;
   1581 	RF_RaidAddr_t raid_addr;
   1582 	int     retcode;
   1583 	struct partition *pp;
   1584 	daddr_t blocknum;
   1585 	int     unit;
   1586 	struct raid_softc *rs;
   1587 	int     do_async;
   1588 	struct buf *bp;
   1589 
   1590 	unit = raidPtr->raidid;
   1591 	rs = &raid_softc[unit];
   1592 
   1593 	/* quick check to see if anything has died recently */
   1594 	RF_LOCK_MUTEX(raidPtr->mutex);
   1595 	if (raidPtr->numNewFailures > 0) {
   1596 		rf_update_component_labels(raidPtr,
   1597 					   RF_NORMAL_COMPONENT_UPDATE);
   1598 		raidPtr->numNewFailures--;
   1599 	}
   1600 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1601 
   1602 	/* Check to see if we're at the limit... */
   1603 	RF_LOCK_MUTEX(raidPtr->mutex);
   1604 	while (raidPtr->openings > 0) {
   1605 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1606 
   1607 		/* get the next item, if any, from the queue */
   1608 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1609 			/* nothing more to do */
   1610 			return;
   1611 		}
   1612 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1613 
   1614 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1615 		 * partition.. Need to make it absolute to the underlying
   1616 		 * device.. */
   1617 
   1618 		blocknum = bp->b_blkno;
   1619 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1620 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1621 			blocknum += pp->p_offset;
   1622 		}
   1623 
   1624 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1625 			    (int) blocknum));
   1626 
   1627 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1628 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1629 
   1630 		/* *THIS* is where we adjust what block we're going to...
   1631 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1632 		raid_addr = blocknum;
   1633 
   1634 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1635 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1636 		sum = raid_addr + num_blocks + pb;
   1637 		if (1 || rf_debugKernelAccess) {
   1638 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1639 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1640 				    (int) pb, (int) bp->b_resid));
   1641 		}
   1642 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1643 		    || (sum < num_blocks) || (sum < pb)) {
   1644 			bp->b_error = ENOSPC;
   1645 			bp->b_flags |= B_ERROR;
   1646 			bp->b_resid = bp->b_bcount;
   1647 			biodone(bp);
   1648 			RF_LOCK_MUTEX(raidPtr->mutex);
   1649 			continue;
   1650 		}
   1651 		/*
   1652 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1653 		 */
   1654 
   1655 		if (bp->b_bcount & raidPtr->sectorMask) {
   1656 			bp->b_error = EINVAL;
   1657 			bp->b_flags |= B_ERROR;
   1658 			bp->b_resid = bp->b_bcount;
   1659 			biodone(bp);
   1660 			RF_LOCK_MUTEX(raidPtr->mutex);
   1661 			continue;
   1662 
   1663 		}
   1664 		db1_printf(("Calling DoAccess..\n"));
   1665 
   1666 
   1667 		RF_LOCK_MUTEX(raidPtr->mutex);
   1668 		raidPtr->openings--;
   1669 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1670 
   1671 		/*
   1672 		 * Everything is async.
   1673 		 */
   1674 		do_async = 1;
   1675 
   1676 		disk_busy(&rs->sc_dkdev);
   1677 
   1678 		/* XXX we're still at splbio() here... do we *really*
   1679 		   need to be? */
   1680 
   1681 		/* don't ever condition on bp->b_flags & B_WRITE.
   1682 		 * always condition on B_READ instead */
   1683 
   1684 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1685 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1686 				      do_async, raid_addr, num_blocks,
   1687 				      bp->b_data, bp, NULL, NULL,
   1688 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1689 
   1690 
   1691 		RF_LOCK_MUTEX(raidPtr->mutex);
   1692 	}
   1693 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1694 }
   1695 
   1696 
   1697 
   1698 
   1699 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1700 
   1701 int
   1702 rf_DispatchKernelIO(queue, req)
   1703 	RF_DiskQueue_t *queue;
   1704 	RF_DiskQueueData_t *req;
   1705 {
   1706 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1707 	struct buf *bp;
   1708 	struct raidbuf *raidbp = NULL;
   1709 	struct raid_softc *rs;
   1710 	int     unit;
   1711 	int s;
   1712 
   1713 	s=0;
   1714 	/* s = splbio();*/ /* want to test this */
   1715 	/* XXX along with the vnode, we also need the softc associated with
   1716 	 * this device.. */
   1717 
   1718 	req->queue = queue;
   1719 
   1720 	unit = queue->raidPtr->raidid;
   1721 
   1722 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1723 
   1724 	if (unit >= numraid) {
   1725 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1726 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1727 	}
   1728 	rs = &raid_softc[unit];
   1729 
   1730 	bp = req->bp;
   1731 #if 1
   1732 	/* XXX when there is a physical disk failure, someone is passing us a
   1733 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1734 	 * without taking a performance hit... (not sure where the real bug
   1735 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1736 
   1737 	if (bp->b_flags & B_ERROR) {
   1738 		bp->b_flags &= ~B_ERROR;
   1739 	}
   1740 	if (bp->b_error != 0) {
   1741 		bp->b_error = 0;
   1742 	}
   1743 #endif
   1744 	raidbp = RAIDGETBUF(rs);
   1745 
   1746 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1747 
   1748 	/*
   1749 	 * context for raidiodone
   1750 	 */
   1751 	raidbp->rf_obp = bp;
   1752 	raidbp->req = req;
   1753 
   1754 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1755 
   1756 	switch (req->type) {
   1757 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1758 		/* XXX need to do something extra here.. */
   1759 		/* I'm leaving this in, as I've never actually seen it used,
   1760 		 * and I'd like folks to report it... GO */
   1761 		printf(("WAKEUP CALLED\n"));
   1762 		queue->numOutstanding++;
   1763 
   1764 		/* XXX need to glue the original buffer into this??  */
   1765 
   1766 		KernelWakeupFunc(&raidbp->rf_buf);
   1767 		break;
   1768 
   1769 	case RF_IO_TYPE_READ:
   1770 	case RF_IO_TYPE_WRITE:
   1771 
   1772 		if (req->tracerec) {
   1773 			RF_ETIMER_START(req->tracerec->timer);
   1774 		}
   1775 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1776 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1777 		    req->sectorOffset, req->numSector,
   1778 		    req->buf, KernelWakeupFunc, (void *) req,
   1779 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1780 
   1781 		if (rf_debugKernelAccess) {
   1782 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1783 				(long) bp->b_blkno));
   1784 		}
   1785 		queue->numOutstanding++;
   1786 		queue->last_deq_sector = req->sectorOffset;
   1787 		/* acc wouldn't have been let in if there were any pending
   1788 		 * reqs at any other priority */
   1789 		queue->curPriority = req->priority;
   1790 
   1791 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1792 			req->type, unit, queue->row, queue->col));
   1793 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1794 			(int) req->sectorOffset, (int) req->numSector,
   1795 			(int) (req->numSector <<
   1796 			    queue->raidPtr->logBytesPerSector),
   1797 			(int) queue->raidPtr->logBytesPerSector));
   1798 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1799 			raidbp->rf_buf.b_vp->v_numoutput++;
   1800 		}
   1801 		VOP_STRATEGY(&raidbp->rf_buf);
   1802 
   1803 		break;
   1804 
   1805 	default:
   1806 		panic("bad req->type in rf_DispatchKernelIO");
   1807 	}
   1808 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1809 	/* splx(s); */ /* want to test this */
   1810 	return (0);
   1811 }
   1812 /* this is the callback function associated with a I/O invoked from
   1813    kernel code.
   1814  */
   1815 static void
   1816 KernelWakeupFunc(vbp)
   1817 	struct buf *vbp;
   1818 {
   1819 	RF_DiskQueueData_t *req = NULL;
   1820 	RF_DiskQueue_t *queue;
   1821 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1822 	struct buf *bp;
   1823 	struct raid_softc *rs;
   1824 	int     unit;
   1825 	int s;
   1826 
   1827 	s = splbio();
   1828 	db1_printf(("recovering the request queue:\n"));
   1829 	req = raidbp->req;
   1830 
   1831 	bp = raidbp->rf_obp;
   1832 
   1833 	queue = (RF_DiskQueue_t *) req->queue;
   1834 
   1835 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1836 		bp->b_flags |= B_ERROR;
   1837 		bp->b_error = raidbp->rf_buf.b_error ?
   1838 		    raidbp->rf_buf.b_error : EIO;
   1839 	}
   1840 
   1841 	/* XXX methinks this could be wrong... */
   1842 #if 1
   1843 	bp->b_resid = raidbp->rf_buf.b_resid;
   1844 #endif
   1845 
   1846 	if (req->tracerec) {
   1847 		RF_ETIMER_STOP(req->tracerec->timer);
   1848 		RF_ETIMER_EVAL(req->tracerec->timer);
   1849 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1850 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1851 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1852 		req->tracerec->num_phys_ios++;
   1853 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1854 	}
   1855 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1856 
   1857 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1858 
   1859 
   1860 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1861 	 * ballistic, and mark the component as hosed... */
   1862 
   1863 	if (bp->b_flags & B_ERROR) {
   1864 		/* Mark the disk as dead */
   1865 		/* but only mark it once... */
   1866 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1867 		    rf_ds_optimal) {
   1868 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1869 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1870 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1871 			    rf_ds_failed;
   1872 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1873 			queue->raidPtr->numFailures++;
   1874 			queue->raidPtr->numNewFailures++;
   1875 		} else {	/* Disk is already dead... */
   1876 			/* printf("Disk already marked as dead!\n"); */
   1877 		}
   1878 
   1879 	}
   1880 
   1881 	rs = &raid_softc[unit];
   1882 	RAIDPUTBUF(rs, raidbp);
   1883 
   1884 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1885 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1886 
   1887 	splx(s);
   1888 }
   1889 
   1890 
   1891 
   1892 /*
   1893  * initialize a buf structure for doing an I/O in the kernel.
   1894  */
   1895 static void
   1896 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1897        logBytesPerSector, b_proc)
   1898 	struct buf *bp;
   1899 	struct vnode *b_vp;
   1900 	unsigned rw_flag;
   1901 	dev_t dev;
   1902 	RF_SectorNum_t startSect;
   1903 	RF_SectorCount_t numSect;
   1904 	caddr_t buf;
   1905 	void (*cbFunc) (struct buf *);
   1906 	void *cbArg;
   1907 	int logBytesPerSector;
   1908 	struct proc *b_proc;
   1909 {
   1910 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1911 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1912 	bp->b_bcount = numSect << logBytesPerSector;
   1913 	bp->b_bufsize = bp->b_bcount;
   1914 	bp->b_error = 0;
   1915 	bp->b_dev = dev;
   1916 	bp->b_data = buf;
   1917 	bp->b_blkno = startSect;
   1918 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1919 	if (bp->b_bcount == 0) {
   1920 		panic("bp->b_bcount is zero in InitBP!!\n");
   1921 	}
   1922 	bp->b_proc = b_proc;
   1923 	bp->b_iodone = cbFunc;
   1924 	bp->b_vp = b_vp;
   1925 
   1926 }
   1927 
   1928 static void
   1929 raidgetdefaultlabel(raidPtr, rs, lp)
   1930 	RF_Raid_t *raidPtr;
   1931 	struct raid_softc *rs;
   1932 	struct disklabel *lp;
   1933 {
   1934 	db1_printf(("Building a default label...\n"));
   1935 	bzero(lp, sizeof(*lp));
   1936 
   1937 	/* fabricate a label... */
   1938 	lp->d_secperunit = raidPtr->totalSectors;
   1939 	lp->d_secsize = raidPtr->bytesPerSector;
   1940 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1941 	lp->d_ntracks = 1;
   1942 	lp->d_ncylinders = raidPtr->totalSectors /
   1943 		(lp->d_nsectors * lp->d_ntracks);
   1944 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1945 
   1946 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1947 	lp->d_type = DTYPE_RAID;
   1948 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1949 	lp->d_rpm = 3600;
   1950 	lp->d_interleave = 1;
   1951 	lp->d_flags = 0;
   1952 
   1953 	lp->d_partitions[RAW_PART].p_offset = 0;
   1954 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1955 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1956 	lp->d_npartitions = RAW_PART + 1;
   1957 
   1958 	lp->d_magic = DISKMAGIC;
   1959 	lp->d_magic2 = DISKMAGIC;
   1960 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1961 
   1962 }
   1963 /*
   1964  * Read the disklabel from the raid device.  If one is not present, fake one
   1965  * up.
   1966  */
   1967 static void
   1968 raidgetdisklabel(dev)
   1969 	dev_t   dev;
   1970 {
   1971 	int     unit = raidunit(dev);
   1972 	struct raid_softc *rs = &raid_softc[unit];
   1973 	char   *errstring;
   1974 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1975 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1976 	RF_Raid_t *raidPtr;
   1977 
   1978 	db1_printf(("Getting the disklabel...\n"));
   1979 
   1980 	bzero(clp, sizeof(*clp));
   1981 
   1982 	raidPtr = raidPtrs[unit];
   1983 
   1984 	raidgetdefaultlabel(raidPtr, rs, lp);
   1985 
   1986 	/*
   1987 	 * Call the generic disklabel extraction routine.
   1988 	 */
   1989 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1990 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1991 	if (errstring)
   1992 		raidmakedisklabel(rs);
   1993 	else {
   1994 		int     i;
   1995 		struct partition *pp;
   1996 
   1997 		/*
   1998 		 * Sanity check whether the found disklabel is valid.
   1999 		 *
   2000 		 * This is necessary since total size of the raid device
   2001 		 * may vary when an interleave is changed even though exactly
   2002 		 * same componets are used, and old disklabel may used
   2003 		 * if that is found.
   2004 		 */
   2005 		if (lp->d_secperunit != rs->sc_size)
   2006 			printf("WARNING: %s: "
   2007 			    "total sector size in disklabel (%d) != "
   2008 			    "the size of raid (%ld)\n", rs->sc_xname,
   2009 			    lp->d_secperunit, (long) rs->sc_size);
   2010 		for (i = 0; i < lp->d_npartitions; i++) {
   2011 			pp = &lp->d_partitions[i];
   2012 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2013 				printf("WARNING: %s: end of partition `%c' "
   2014 				    "exceeds the size of raid (%ld)\n",
   2015 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2016 		}
   2017 	}
   2018 
   2019 }
   2020 /*
   2021  * Take care of things one might want to take care of in the event
   2022  * that a disklabel isn't present.
   2023  */
   2024 static void
   2025 raidmakedisklabel(rs)
   2026 	struct raid_softc *rs;
   2027 {
   2028 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2029 	db1_printf(("Making a label..\n"));
   2030 
   2031 	/*
   2032 	 * For historical reasons, if there's no disklabel present
   2033 	 * the raw partition must be marked FS_BSDFFS.
   2034 	 */
   2035 
   2036 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2037 
   2038 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2039 
   2040 	lp->d_checksum = dkcksum(lp);
   2041 }
   2042 /*
   2043  * Lookup the provided name in the filesystem.  If the file exists,
   2044  * is a valid block device, and isn't being used by anyone else,
   2045  * set *vpp to the file's vnode.
   2046  * You'll find the original of this in ccd.c
   2047  */
   2048 int
   2049 raidlookup(path, p, vpp)
   2050 	char   *path;
   2051 	struct proc *p;
   2052 	struct vnode **vpp;	/* result */
   2053 {
   2054 	struct nameidata nd;
   2055 	struct vnode *vp;
   2056 	struct vattr va;
   2057 	int     error;
   2058 
   2059 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2060 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2061 #ifdef DEBUG
   2062 		printf("RAIDframe: vn_open returned %d\n", error);
   2063 #endif
   2064 		return (error);
   2065 	}
   2066 	vp = nd.ni_vp;
   2067 	if (vp->v_usecount > 1) {
   2068 		VOP_UNLOCK(vp, 0);
   2069 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2070 		return (EBUSY);
   2071 	}
   2072 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2073 		VOP_UNLOCK(vp, 0);
   2074 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2075 		return (error);
   2076 	}
   2077 	/* XXX: eventually we should handle VREG, too. */
   2078 	if (va.va_type != VBLK) {
   2079 		VOP_UNLOCK(vp, 0);
   2080 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2081 		return (ENOTBLK);
   2082 	}
   2083 	VOP_UNLOCK(vp, 0);
   2084 	*vpp = vp;
   2085 	return (0);
   2086 }
   2087 /*
   2088  * Wait interruptibly for an exclusive lock.
   2089  *
   2090  * XXX
   2091  * Several drivers do this; it should be abstracted and made MP-safe.
   2092  * (Hmm... where have we seen this warning before :->  GO )
   2093  */
   2094 static int
   2095 raidlock(rs)
   2096 	struct raid_softc *rs;
   2097 {
   2098 	int     error;
   2099 
   2100 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2101 		rs->sc_flags |= RAIDF_WANTED;
   2102 		if ((error =
   2103 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2104 			return (error);
   2105 	}
   2106 	rs->sc_flags |= RAIDF_LOCKED;
   2107 	return (0);
   2108 }
   2109 /*
   2110  * Unlock and wake up any waiters.
   2111  */
   2112 static void
   2113 raidunlock(rs)
   2114 	struct raid_softc *rs;
   2115 {
   2116 
   2117 	rs->sc_flags &= ~RAIDF_LOCKED;
   2118 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2119 		rs->sc_flags &= ~RAIDF_WANTED;
   2120 		wakeup(rs);
   2121 	}
   2122 }
   2123 
   2124 
   2125 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2126 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2127 
   2128 int
   2129 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2130 {
   2131 	RF_ComponentLabel_t clabel;
   2132 	raidread_component_label(dev, b_vp, &clabel);
   2133 	clabel.mod_counter = mod_counter;
   2134 	clabel.clean = RF_RAID_CLEAN;
   2135 	raidwrite_component_label(dev, b_vp, &clabel);
   2136 	return(0);
   2137 }
   2138 
   2139 
   2140 int
   2141 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2142 {
   2143 	RF_ComponentLabel_t clabel;
   2144 	raidread_component_label(dev, b_vp, &clabel);
   2145 	clabel.mod_counter = mod_counter;
   2146 	clabel.clean = RF_RAID_DIRTY;
   2147 	raidwrite_component_label(dev, b_vp, &clabel);
   2148 	return(0);
   2149 }
   2150 
   2151 /* ARGSUSED */
   2152 int
   2153 raidread_component_label(dev, b_vp, clabel)
   2154 	dev_t dev;
   2155 	struct vnode *b_vp;
   2156 	RF_ComponentLabel_t *clabel;
   2157 {
   2158 	struct buf *bp;
   2159 	int error;
   2160 
   2161 	/* XXX should probably ensure that we don't try to do this if
   2162 	   someone has changed rf_protected_sectors. */
   2163 
   2164 	if (b_vp == NULL) {
   2165 		/* For whatever reason, this component is not valid.
   2166 		   Don't try to read a component label from it. */
   2167 		return(EINVAL);
   2168 	}
   2169 
   2170 	/* get a block of the appropriate size... */
   2171 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2172 	bp->b_dev = dev;
   2173 
   2174 	/* get our ducks in a row for the read */
   2175 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2176 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2177 	bp->b_flags |= B_READ;
   2178  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2179 
   2180 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2181 
   2182 	error = biowait(bp);
   2183 
   2184 	if (!error) {
   2185 		memcpy(clabel, bp->b_data,
   2186 		       sizeof(RF_ComponentLabel_t));
   2187 #if 0
   2188 		rf_print_component_label( clabel );
   2189 #endif
   2190         } else {
   2191 #if 0
   2192 		printf("Failed to read RAID component label!\n");
   2193 #endif
   2194 	}
   2195 
   2196 	brelse(bp);
   2197 	return(error);
   2198 }
   2199 /* ARGSUSED */
   2200 int
   2201 raidwrite_component_label(dev, b_vp, clabel)
   2202 	dev_t dev;
   2203 	struct vnode *b_vp;
   2204 	RF_ComponentLabel_t *clabel;
   2205 {
   2206 	struct buf *bp;
   2207 	int error;
   2208 
   2209 	/* get a block of the appropriate size... */
   2210 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2211 	bp->b_dev = dev;
   2212 
   2213 	/* get our ducks in a row for the write */
   2214 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2215 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2216 	bp->b_flags |= B_WRITE;
   2217  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2218 
   2219 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2220 
   2221 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2222 
   2223 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2224 	error = biowait(bp);
   2225 	brelse(bp);
   2226 	if (error) {
   2227 #if 1
   2228 		printf("Failed to write RAID component info!\n");
   2229 #endif
   2230 	}
   2231 
   2232 	return(error);
   2233 }
   2234 
   2235 void
   2236 rf_markalldirty(raidPtr)
   2237 	RF_Raid_t *raidPtr;
   2238 {
   2239 	RF_ComponentLabel_t clabel;
   2240 	int r,c;
   2241 
   2242 	raidPtr->mod_counter++;
   2243 	for (r = 0; r < raidPtr->numRow; r++) {
   2244 		for (c = 0; c < raidPtr->numCol; c++) {
   2245 			/* we don't want to touch (at all) a disk that has
   2246 			   failed */
   2247 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2248 				raidread_component_label(
   2249 					raidPtr->Disks[r][c].dev,
   2250 					raidPtr->raid_cinfo[r][c].ci_vp,
   2251 					&clabel);
   2252 				if (clabel.status == rf_ds_spared) {
   2253 					/* XXX do something special...
   2254 					 but whatever you do, don't
   2255 					 try to access it!! */
   2256 				} else {
   2257 #if 0
   2258 				clabel.status =
   2259 					raidPtr->Disks[r][c].status;
   2260 				raidwrite_component_label(
   2261 					raidPtr->Disks[r][c].dev,
   2262 					raidPtr->raid_cinfo[r][c].ci_vp,
   2263 					&clabel);
   2264 #endif
   2265 				raidmarkdirty(
   2266 				       raidPtr->Disks[r][c].dev,
   2267 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2268 				       raidPtr->mod_counter);
   2269 				}
   2270 			}
   2271 		}
   2272 	}
   2273 	/* printf("Component labels marked dirty.\n"); */
   2274 #if 0
   2275 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2276 		sparecol = raidPtr->numCol + c;
   2277 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2278 			/*
   2279 
   2280 			   XXX this is where we get fancy and map this spare
   2281 			   into it's correct spot in the array.
   2282 
   2283 			 */
   2284 			/*
   2285 
   2286 			   we claim this disk is "optimal" if it's
   2287 			   rf_ds_used_spare, as that means it should be
   2288 			   directly substitutable for the disk it replaced.
   2289 			   We note that too...
   2290 
   2291 			 */
   2292 
   2293 			for(i=0;i<raidPtr->numRow;i++) {
   2294 				for(j=0;j<raidPtr->numCol;j++) {
   2295 					if ((raidPtr->Disks[i][j].spareRow ==
   2296 					     r) &&
   2297 					    (raidPtr->Disks[i][j].spareCol ==
   2298 					     sparecol)) {
   2299 						srow = r;
   2300 						scol = sparecol;
   2301 						break;
   2302 					}
   2303 				}
   2304 			}
   2305 
   2306 			raidread_component_label(
   2307 				      raidPtr->Disks[r][sparecol].dev,
   2308 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2309 				      &clabel);
   2310 			/* make sure status is noted */
   2311 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2312 			clabel.mod_counter = raidPtr->mod_counter;
   2313 			clabel.serial_number = raidPtr->serial_number;
   2314 			clabel.row = srow;
   2315 			clabel.column = scol;
   2316 			clabel.num_rows = raidPtr->numRow;
   2317 			clabel.num_columns = raidPtr->numCol;
   2318 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2319 			clabel.status = rf_ds_optimal;
   2320 			raidwrite_component_label(
   2321 				      raidPtr->Disks[r][sparecol].dev,
   2322 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2323 				      &clabel);
   2324 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2325 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2326 		}
   2327 	}
   2328 
   2329 #endif
   2330 }
   2331 
   2332 
   2333 void
   2334 rf_update_component_labels(raidPtr, final)
   2335 	RF_Raid_t *raidPtr;
   2336 	int final;
   2337 {
   2338 	RF_ComponentLabel_t clabel;
   2339 	int sparecol;
   2340 	int r,c;
   2341 	int i,j;
   2342 	int srow, scol;
   2343 
   2344 	srow = -1;
   2345 	scol = -1;
   2346 
   2347 	/* XXX should do extra checks to make sure things really are clean,
   2348 	   rather than blindly setting the clean bit... */
   2349 
   2350 	raidPtr->mod_counter++;
   2351 
   2352 	for (r = 0; r < raidPtr->numRow; r++) {
   2353 		for (c = 0; c < raidPtr->numCol; c++) {
   2354 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2355 				raidread_component_label(
   2356 					raidPtr->Disks[r][c].dev,
   2357 					raidPtr->raid_cinfo[r][c].ci_vp,
   2358 					&clabel);
   2359 				/* make sure status is noted */
   2360 				clabel.status = rf_ds_optimal;
   2361 				/* bump the counter */
   2362 				clabel.mod_counter = raidPtr->mod_counter;
   2363 
   2364 				raidwrite_component_label(
   2365 					raidPtr->Disks[r][c].dev,
   2366 					raidPtr->raid_cinfo[r][c].ci_vp,
   2367 					&clabel);
   2368 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2369 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2370 						raidmarkclean(
   2371 							      raidPtr->Disks[r][c].dev,
   2372 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2373 							      raidPtr->mod_counter);
   2374 					}
   2375 				}
   2376 			}
   2377 			/* else we don't touch it.. */
   2378 		}
   2379 	}
   2380 
   2381 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2382 		sparecol = raidPtr->numCol + c;
   2383 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2384 			/*
   2385 
   2386 			   we claim this disk is "optimal" if it's
   2387 			   rf_ds_used_spare, as that means it should be
   2388 			   directly substitutable for the disk it replaced.
   2389 			   We note that too...
   2390 
   2391 			 */
   2392 
   2393 			for(i=0;i<raidPtr->numRow;i++) {
   2394 				for(j=0;j<raidPtr->numCol;j++) {
   2395 					if ((raidPtr->Disks[i][j].spareRow ==
   2396 					     0) &&
   2397 					    (raidPtr->Disks[i][j].spareCol ==
   2398 					     sparecol)) {
   2399 						srow = i;
   2400 						scol = j;
   2401 						break;
   2402 					}
   2403 				}
   2404 			}
   2405 
   2406 			/* XXX shouldn't *really* need this... */
   2407 			raidread_component_label(
   2408 				      raidPtr->Disks[0][sparecol].dev,
   2409 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2410 				      &clabel);
   2411 			/* make sure status is noted */
   2412 
   2413 			raid_init_component_label(raidPtr, &clabel);
   2414 
   2415 			clabel.mod_counter = raidPtr->mod_counter;
   2416 			clabel.row = srow;
   2417 			clabel.column = scol;
   2418 			clabel.status = rf_ds_optimal;
   2419 
   2420 			raidwrite_component_label(
   2421 				      raidPtr->Disks[0][sparecol].dev,
   2422 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2423 				      &clabel);
   2424 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2425 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2426 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2427 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2428 						       raidPtr->mod_counter);
   2429 				}
   2430 			}
   2431 		}
   2432 	}
   2433 	/* 	printf("Component labels updated\n"); */
   2434 }
   2435 
   2436 void
   2437 rf_close_component(raidPtr, vp, auto_configured)
   2438 	RF_Raid_t *raidPtr;
   2439 	struct vnode *vp;
   2440 	int auto_configured;
   2441 {
   2442 	struct proc *p;
   2443 
   2444 	p = raidPtr->engine_thread;
   2445 
   2446 	if (vp != NULL) {
   2447 		if (auto_configured == 1) {
   2448 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2449 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2450 			vput(vp);
   2451 
   2452 		} else {
   2453 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2454 		}
   2455 	} else {
   2456 		printf("vnode was NULL\n");
   2457 	}
   2458 }
   2459 
   2460 
   2461 void
   2462 rf_UnconfigureVnodes(raidPtr)
   2463 	RF_Raid_t *raidPtr;
   2464 {
   2465 	int r,c;
   2466 	struct proc *p;
   2467 	struct vnode *vp;
   2468 	int acd;
   2469 
   2470 
   2471 	/* We take this opportunity to close the vnodes like we should.. */
   2472 
   2473 	p = raidPtr->engine_thread;
   2474 
   2475 	for (r = 0; r < raidPtr->numRow; r++) {
   2476 		for (c = 0; c < raidPtr->numCol; c++) {
   2477 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2478 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2479 			acd = raidPtr->Disks[r][c].auto_configured;
   2480 			rf_close_component(raidPtr, vp, acd);
   2481 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2482 			raidPtr->Disks[r][c].auto_configured = 0;
   2483 		}
   2484 	}
   2485 	for (r = 0; r < raidPtr->numSpare; r++) {
   2486 		printf("Closing vnode for spare: %d\n", r);
   2487 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2488 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2489 		rf_close_component(raidPtr, vp, acd);
   2490 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2491 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2492 	}
   2493 }
   2494 
   2495 
   2496 void
   2497 rf_ReconThread(req)
   2498 	struct rf_recon_req *req;
   2499 {
   2500 	int     s;
   2501 	RF_Raid_t *raidPtr;
   2502 
   2503 	s = splbio();
   2504 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2505 	raidPtr->recon_in_progress = 1;
   2506 
   2507 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2508 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2509 
   2510 	/* XXX get rid of this! we don't need it at all.. */
   2511 	RF_Free(req, sizeof(*req));
   2512 
   2513 	raidPtr->recon_in_progress = 0;
   2514 	splx(s);
   2515 
   2516 	/* That's all... */
   2517 	kthread_exit(0);        /* does not return */
   2518 }
   2519 
   2520 void
   2521 rf_RewriteParityThread(raidPtr)
   2522 	RF_Raid_t *raidPtr;
   2523 {
   2524 	int retcode;
   2525 	int s;
   2526 
   2527 	raidPtr->parity_rewrite_in_progress = 1;
   2528 	s = splbio();
   2529 	retcode = rf_RewriteParity(raidPtr);
   2530 	splx(s);
   2531 	if (retcode) {
   2532 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2533 	} else {
   2534 		/* set the clean bit!  If we shutdown correctly,
   2535 		   the clean bit on each component label will get
   2536 		   set */
   2537 		raidPtr->parity_good = RF_RAID_CLEAN;
   2538 	}
   2539 	raidPtr->parity_rewrite_in_progress = 0;
   2540 
   2541 	/* Anyone waiting for us to stop?  If so, inform them... */
   2542 	if (raidPtr->waitShutdown) {
   2543 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2544 	}
   2545 
   2546 	/* That's all... */
   2547 	kthread_exit(0);        /* does not return */
   2548 }
   2549 
   2550 
   2551 void
   2552 rf_CopybackThread(raidPtr)
   2553 	RF_Raid_t *raidPtr;
   2554 {
   2555 	int s;
   2556 
   2557 	raidPtr->copyback_in_progress = 1;
   2558 	s = splbio();
   2559 	rf_CopybackReconstructedData(raidPtr);
   2560 	splx(s);
   2561 	raidPtr->copyback_in_progress = 0;
   2562 
   2563 	/* That's all... */
   2564 	kthread_exit(0);        /* does not return */
   2565 }
   2566 
   2567 
   2568 void
   2569 rf_ReconstructInPlaceThread(req)
   2570 	struct rf_recon_req *req;
   2571 {
   2572 	int retcode;
   2573 	int s;
   2574 	RF_Raid_t *raidPtr;
   2575 
   2576 	s = splbio();
   2577 	raidPtr = req->raidPtr;
   2578 	raidPtr->recon_in_progress = 1;
   2579 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2580 	RF_Free(req, sizeof(*req));
   2581 	raidPtr->recon_in_progress = 0;
   2582 	splx(s);
   2583 
   2584 	/* That's all... */
   2585 	kthread_exit(0);        /* does not return */
   2586 }
   2587 
   2588 void
   2589 rf_mountroot_hook(dev)
   2590 	struct device *dev;
   2591 {
   2592 
   2593 }
   2594 
   2595 
   2596 RF_AutoConfig_t *
   2597 rf_find_raid_components()
   2598 {
   2599 	struct devnametobdevmaj *dtobdm;
   2600 	struct vnode *vp;
   2601 	struct disklabel label;
   2602 	struct device *dv;
   2603 	char *cd_name;
   2604 	dev_t dev;
   2605 	int error;
   2606 	int i;
   2607 	int good_one;
   2608 	RF_ComponentLabel_t *clabel;
   2609 	RF_AutoConfig_t *ac_list;
   2610 	RF_AutoConfig_t *ac;
   2611 
   2612 
   2613 	/* initialize the AutoConfig list */
   2614 	ac_list = NULL;
   2615 
   2616 if (raidautoconfig) {
   2617 
   2618 	/* we begin by trolling through *all* the devices on the system */
   2619 
   2620 	for (dv = alldevs.tqh_first; dv != NULL;
   2621 	     dv = dv->dv_list.tqe_next) {
   2622 
   2623 		/* we are only interested in disks... */
   2624 		if (dv->dv_class != DV_DISK)
   2625 			continue;
   2626 
   2627 		/* we don't care about floppies... */
   2628 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2629 			continue;
   2630 		}
   2631 
   2632 		/* need to find the device_name_to_block_device_major stuff */
   2633 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2634 		dtobdm = dev_name2blk;
   2635 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2636 			dtobdm++;
   2637 		}
   2638 
   2639 		/* get a vnode for the raw partition of this disk */
   2640 
   2641 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2642 		if (bdevvp(dev, &vp))
   2643 			panic("RAID can't alloc vnode");
   2644 
   2645 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2646 
   2647 		if (error) {
   2648 			/* "Who cares."  Continue looking
   2649 			   for something that exists*/
   2650 			vput(vp);
   2651 			continue;
   2652 		}
   2653 
   2654 		/* Ok, the disk exists.  Go get the disklabel. */
   2655 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2656 				  FREAD, NOCRED, 0);
   2657 		if (error) {
   2658 			/*
   2659 			 * XXX can't happen - open() would
   2660 			 * have errored out (or faked up one)
   2661 			 */
   2662 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2663 			       dv->dv_xname, 'a' + RAW_PART, error);
   2664 		}
   2665 
   2666 		/* don't need this any more.  We'll allocate it again
   2667 		   a little later if we really do... */
   2668 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2669 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2670 		vput(vp);
   2671 
   2672 		for (i=0; i < label.d_npartitions; i++) {
   2673 			/* We only support partitions marked as RAID */
   2674 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2675 				continue;
   2676 
   2677 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2678 			if (bdevvp(dev, &vp))
   2679 				panic("RAID can't alloc vnode");
   2680 
   2681 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2682 			if (error) {
   2683 				/* Whatever... */
   2684 				vput(vp);
   2685 				continue;
   2686 			}
   2687 
   2688 			good_one = 0;
   2689 
   2690 			clabel = (RF_ComponentLabel_t *)
   2691 				malloc(sizeof(RF_ComponentLabel_t),
   2692 				       M_RAIDFRAME, M_NOWAIT);
   2693 			if (clabel == NULL) {
   2694 				/* XXX CLEANUP HERE */
   2695 				printf("RAID auto config: out of memory!\n");
   2696 				return(NULL); /* XXX probably should panic? */
   2697 			}
   2698 
   2699 			if (!raidread_component_label(dev, vp, clabel)) {
   2700 				/* Got the label.  Does it look reasonable? */
   2701 				if (rf_reasonable_label(clabel) &&
   2702 				    (clabel->partitionSize <=
   2703 				     label.d_partitions[i].p_size)) {
   2704 #if DEBUG
   2705 					printf("Component on: %s%c: %d\n",
   2706 					       dv->dv_xname, 'a'+i,
   2707 					       label.d_partitions[i].p_size);
   2708 					rf_print_component_label(clabel);
   2709 #endif
   2710 					/* if it's reasonable, add it,
   2711 					   else ignore it. */
   2712 					ac = (RF_AutoConfig_t *)
   2713 						malloc(sizeof(RF_AutoConfig_t),
   2714 						       M_RAIDFRAME,
   2715 						       M_NOWAIT);
   2716 					if (ac == NULL) {
   2717 						/* XXX should panic?? */
   2718 						return(NULL);
   2719 					}
   2720 
   2721 					sprintf(ac->devname, "%s%c",
   2722 						dv->dv_xname, 'a'+i);
   2723 					ac->dev = dev;
   2724 					ac->vp = vp;
   2725 					ac->clabel = clabel;
   2726 					ac->next = ac_list;
   2727 					ac_list = ac;
   2728 					good_one = 1;
   2729 				}
   2730 			}
   2731 			if (!good_one) {
   2732 				/* cleanup */
   2733 				free(clabel, M_RAIDFRAME);
   2734 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2735 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2736 				vput(vp);
   2737 			}
   2738 		}
   2739 	}
   2740 }
   2741 return(ac_list);
   2742 }
   2743 
   2744 static int
   2745 rf_reasonable_label(clabel)
   2746 	RF_ComponentLabel_t *clabel;
   2747 {
   2748 
   2749 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2750 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2751 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2752 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2753 	    clabel->row >=0 &&
   2754 	    clabel->column >= 0 &&
   2755 	    clabel->num_rows > 0 &&
   2756 	    clabel->num_columns > 0 &&
   2757 	    clabel->row < clabel->num_rows &&
   2758 	    clabel->column < clabel->num_columns &&
   2759 	    clabel->blockSize > 0 &&
   2760 	    clabel->numBlocks > 0) {
   2761 		/* label looks reasonable enough... */
   2762 		return(1);
   2763 	}
   2764 	return(0);
   2765 }
   2766 
   2767 
   2768 void
   2769 rf_print_component_label(clabel)
   2770 	RF_ComponentLabel_t *clabel;
   2771 {
   2772 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2773 	       clabel->row, clabel->column,
   2774 	       clabel->num_rows, clabel->num_columns);
   2775 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2776 	       clabel->version, clabel->serial_number,
   2777 	       clabel->mod_counter);
   2778 	printf("   Clean: %s Status: %d\n",
   2779 	       clabel->clean ? "Yes" : "No", clabel->status );
   2780 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2781 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2782 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2783 	       (char) clabel->parityConfig, clabel->blockSize,
   2784 	       clabel->numBlocks);
   2785 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2786 	printf("   Contains root partition: %s\n",
   2787 	       clabel->root_partition ? "Yes" : "No" );
   2788 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2789 #if 0
   2790 	   printf("   Config order: %d\n", clabel->config_order);
   2791 #endif
   2792 
   2793 }
   2794 
   2795 RF_ConfigSet_t *
   2796 rf_create_auto_sets(ac_list)
   2797 	RF_AutoConfig_t *ac_list;
   2798 {
   2799 	RF_AutoConfig_t *ac;
   2800 	RF_ConfigSet_t *config_sets;
   2801 	RF_ConfigSet_t *cset;
   2802 	RF_AutoConfig_t *ac_next;
   2803 
   2804 
   2805 	config_sets = NULL;
   2806 
   2807 	/* Go through the AutoConfig list, and figure out which components
   2808 	   belong to what sets.  */
   2809 	ac = ac_list;
   2810 	while(ac!=NULL) {
   2811 		/* we're going to putz with ac->next, so save it here
   2812 		   for use at the end of the loop */
   2813 		ac_next = ac->next;
   2814 
   2815 		if (config_sets == NULL) {
   2816 			/* will need at least this one... */
   2817 			config_sets = (RF_ConfigSet_t *)
   2818 				malloc(sizeof(RF_ConfigSet_t),
   2819 				       M_RAIDFRAME, M_NOWAIT);
   2820 			if (config_sets == NULL) {
   2821 				panic("rf_create_auto_sets: No memory!\n");
   2822 			}
   2823 			/* this one is easy :) */
   2824 			config_sets->ac = ac;
   2825 			config_sets->next = NULL;
   2826 			config_sets->rootable = 0;
   2827 			ac->next = NULL;
   2828 		} else {
   2829 			/* which set does this component fit into? */
   2830 			cset = config_sets;
   2831 			while(cset!=NULL) {
   2832 				if (rf_does_it_fit(cset, ac)) {
   2833 					/* looks like it matches... */
   2834 					ac->next = cset->ac;
   2835 					cset->ac = ac;
   2836 					break;
   2837 				}
   2838 				cset = cset->next;
   2839 			}
   2840 			if (cset==NULL) {
   2841 				/* didn't find a match above... new set..*/
   2842 				cset = (RF_ConfigSet_t *)
   2843 					malloc(sizeof(RF_ConfigSet_t),
   2844 					       M_RAIDFRAME, M_NOWAIT);
   2845 				if (cset == NULL) {
   2846 					panic("rf_create_auto_sets: No memory!\n");
   2847 				}
   2848 				cset->ac = ac;
   2849 				ac->next = NULL;
   2850 				cset->next = config_sets;
   2851 				cset->rootable = 0;
   2852 				config_sets = cset;
   2853 			}
   2854 		}
   2855 		ac = ac_next;
   2856 	}
   2857 
   2858 
   2859 	return(config_sets);
   2860 }
   2861 
   2862 static int
   2863 rf_does_it_fit(cset, ac)
   2864 	RF_ConfigSet_t *cset;
   2865 	RF_AutoConfig_t *ac;
   2866 {
   2867 	RF_ComponentLabel_t *clabel1, *clabel2;
   2868 
   2869 	/* If this one matches the *first* one in the set, that's good
   2870 	   enough, since the other members of the set would have been
   2871 	   through here too... */
   2872 	/* note that we are not checking partitionSize here..
   2873 
   2874 	   Note that we are also not checking the mod_counters here.
   2875 	   If everything else matches execpt the mod_counter, that's
   2876 	   good enough for this test.  We will deal with the mod_counters
   2877 	   a little later in the autoconfiguration process.
   2878 
   2879 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2880 
   2881 	   The reason we don't check for this is that failed disks
   2882 	   will have lower modification counts.  If those disks are
   2883 	   not added to the set they used to belong to, then they will
   2884 	   form their own set, which may result in 2 different sets,
   2885 	   for example, competing to be configured at raid0, and
   2886 	   perhaps competing to be the root filesystem set.  If the
   2887 	   wrong ones get configured, or both attempt to become /,
   2888 	   weird behaviour and or serious lossage will occur.  Thus we
   2889 	   need to bring them into the fold here, and kick them out at
   2890 	   a later point.
   2891 
   2892 	*/
   2893 
   2894 	clabel1 = cset->ac->clabel;
   2895 	clabel2 = ac->clabel;
   2896 	if ((clabel1->version == clabel2->version) &&
   2897 	    (clabel1->serial_number == clabel2->serial_number) &&
   2898 	    (clabel1->num_rows == clabel2->num_rows) &&
   2899 	    (clabel1->num_columns == clabel2->num_columns) &&
   2900 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2901 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2902 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2903 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2904 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2905 	    (clabel1->blockSize == clabel2->blockSize) &&
   2906 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2907 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2908 	    (clabel1->root_partition == clabel2->root_partition) &&
   2909 	    (clabel1->last_unit == clabel2->last_unit) &&
   2910 	    (clabel1->config_order == clabel2->config_order)) {
   2911 		/* if it get's here, it almost *has* to be a match */
   2912 	} else {
   2913 		/* it's not consistent with somebody in the set..
   2914 		   punt */
   2915 		return(0);
   2916 	}
   2917 	/* all was fine.. it must fit... */
   2918 	return(1);
   2919 }
   2920 
   2921 int
   2922 rf_have_enough_components(cset)
   2923 	RF_ConfigSet_t *cset;
   2924 {
   2925 	RF_AutoConfig_t *ac;
   2926 	RF_AutoConfig_t *auto_config;
   2927 	RF_ComponentLabel_t *clabel;
   2928 	int r,c;
   2929 	int num_rows;
   2930 	int num_cols;
   2931 	int num_missing;
   2932 	int mod_counter;
   2933 	int mod_counter_found;
   2934 	int even_pair_failed;
   2935 	char parity_type;
   2936 
   2937 
   2938 	/* check to see that we have enough 'live' components
   2939 	   of this set.  If so, we can configure it if necessary */
   2940 
   2941 	num_rows = cset->ac->clabel->num_rows;
   2942 	num_cols = cset->ac->clabel->num_columns;
   2943 	parity_type = cset->ac->clabel->parityConfig;
   2944 
   2945 	/* XXX Check for duplicate components!?!?!? */
   2946 
   2947 	/* Determine what the mod_counter is supposed to be for this set. */
   2948 
   2949 	mod_counter_found = 0;
   2950 	mod_counter = 0;
   2951 	ac = cset->ac;
   2952 	while(ac!=NULL) {
   2953 		if (mod_counter_found==0) {
   2954 			mod_counter = ac->clabel->mod_counter;
   2955 			mod_counter_found = 1;
   2956 		} else {
   2957 			if (ac->clabel->mod_counter > mod_counter) {
   2958 				mod_counter = ac->clabel->mod_counter;
   2959 			}
   2960 		}
   2961 		ac = ac->next;
   2962 	}
   2963 
   2964 	num_missing = 0;
   2965 	auto_config = cset->ac;
   2966 
   2967 	for(r=0; r<num_rows; r++) {
   2968 		even_pair_failed = 0;
   2969 		for(c=0; c<num_cols; c++) {
   2970 			ac = auto_config;
   2971 			while(ac!=NULL) {
   2972 				if ((ac->clabel->row == r) &&
   2973 				    (ac->clabel->column == c) &&
   2974 				    (ac->clabel->mod_counter == mod_counter)) {
   2975 					/* it's this one... */
   2976 #if DEBUG
   2977 					printf("Found: %s at %d,%d\n",
   2978 					       ac->devname,r,c);
   2979 #endif
   2980 					break;
   2981 				}
   2982 				ac=ac->next;
   2983 			}
   2984 			if (ac==NULL) {
   2985 				/* Didn't find one here! */
   2986 				/* special case for RAID 1, especially
   2987 				   where there are more than 2
   2988 				   components (where RAIDframe treats
   2989 				   things a little differently :( ) */
   2990 				if (parity_type == '1') {
   2991 					if (c%2 == 0) { /* even component */
   2992 						even_pair_failed = 1;
   2993 					} else { /* odd component.  If
   2994                                                     we're failed, and
   2995                                                     so is the even
   2996                                                     component, it's
   2997                                                     "Good Night, Charlie" */
   2998 						if (even_pair_failed == 1) {
   2999 							return(0);
   3000 						}
   3001 					}
   3002 				} else {
   3003 					/* normal accounting */
   3004 					num_missing++;
   3005 				}
   3006 			}
   3007 			if ((parity_type == '1') && (c%2 == 1)) {
   3008 				/* Just did an even component, and we didn't
   3009 				   bail.. reset the even_pair_failed flag,
   3010 				   and go on to the next component.... */
   3011 				even_pair_failed = 0;
   3012 			}
   3013 		}
   3014 	}
   3015 
   3016 	clabel = cset->ac->clabel;
   3017 
   3018 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3019 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3020 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3021 		/* XXX this needs to be made *much* more general */
   3022 		/* Too many failures */
   3023 		return(0);
   3024 	}
   3025 	/* otherwise, all is well, and we've got enough to take a kick
   3026 	   at autoconfiguring this set */
   3027 	return(1);
   3028 }
   3029 
   3030 void
   3031 rf_create_configuration(ac,config,raidPtr)
   3032 	RF_AutoConfig_t *ac;
   3033 	RF_Config_t *config;
   3034 	RF_Raid_t *raidPtr;
   3035 {
   3036 	RF_ComponentLabel_t *clabel;
   3037 	int i;
   3038 
   3039 	clabel = ac->clabel;
   3040 
   3041 	/* 1. Fill in the common stuff */
   3042 	config->numRow = clabel->num_rows;
   3043 	config->numCol = clabel->num_columns;
   3044 	config->numSpare = 0; /* XXX should this be set here? */
   3045 	config->sectPerSU = clabel->sectPerSU;
   3046 	config->SUsPerPU = clabel->SUsPerPU;
   3047 	config->SUsPerRU = clabel->SUsPerRU;
   3048 	config->parityConfig = clabel->parityConfig;
   3049 	/* XXX... */
   3050 	strcpy(config->diskQueueType,"fifo");
   3051 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3052 	config->layoutSpecificSize = 0; /* XXX ?? */
   3053 
   3054 	while(ac!=NULL) {
   3055 		/* row/col values will be in range due to the checks
   3056 		   in reasonable_label() */
   3057 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3058 		       ac->devname);
   3059 		ac = ac->next;
   3060 	}
   3061 
   3062 	for(i=0;i<RF_MAXDBGV;i++) {
   3063 		config->debugVars[i][0] = NULL;
   3064 	}
   3065 }
   3066 
   3067 int
   3068 rf_set_autoconfig(raidPtr, new_value)
   3069 	RF_Raid_t *raidPtr;
   3070 	int new_value;
   3071 {
   3072 	RF_ComponentLabel_t clabel;
   3073 	struct vnode *vp;
   3074 	dev_t dev;
   3075 	int row, column;
   3076 
   3077 	raidPtr->autoconfigure = new_value;
   3078 	for(row=0; row<raidPtr->numRow; row++) {
   3079 		for(column=0; column<raidPtr->numCol; column++) {
   3080 			if (raidPtr->Disks[row][column].status ==
   3081 			    rf_ds_optimal) {
   3082 				dev = raidPtr->Disks[row][column].dev;
   3083 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3084 				raidread_component_label(dev, vp, &clabel);
   3085 				clabel.autoconfigure = new_value;
   3086 				raidwrite_component_label(dev, vp, &clabel);
   3087 			}
   3088 		}
   3089 	}
   3090 	return(new_value);
   3091 }
   3092 
   3093 int
   3094 rf_set_rootpartition(raidPtr, new_value)
   3095 	RF_Raid_t *raidPtr;
   3096 	int new_value;
   3097 {
   3098 	RF_ComponentLabel_t clabel;
   3099 	struct vnode *vp;
   3100 	dev_t dev;
   3101 	int row, column;
   3102 
   3103 	raidPtr->root_partition = new_value;
   3104 	for(row=0; row<raidPtr->numRow; row++) {
   3105 		for(column=0; column<raidPtr->numCol; column++) {
   3106 			if (raidPtr->Disks[row][column].status ==
   3107 			    rf_ds_optimal) {
   3108 				dev = raidPtr->Disks[row][column].dev;
   3109 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3110 				raidread_component_label(dev, vp, &clabel);
   3111 				clabel.root_partition = new_value;
   3112 				raidwrite_component_label(dev, vp, &clabel);
   3113 			}
   3114 		}
   3115 	}
   3116 	return(new_value);
   3117 }
   3118 
   3119 void
   3120 rf_release_all_vps(cset)
   3121 	RF_ConfigSet_t *cset;
   3122 {
   3123 	RF_AutoConfig_t *ac;
   3124 
   3125 	ac = cset->ac;
   3126 	while(ac!=NULL) {
   3127 		/* Close the vp, and give it back */
   3128 		if (ac->vp) {
   3129 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3130 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3131 			vput(ac->vp);
   3132 			ac->vp = NULL;
   3133 		}
   3134 		ac = ac->next;
   3135 	}
   3136 }
   3137 
   3138 
   3139 void
   3140 rf_cleanup_config_set(cset)
   3141 	RF_ConfigSet_t *cset;
   3142 {
   3143 	RF_AutoConfig_t *ac;
   3144 	RF_AutoConfig_t *next_ac;
   3145 
   3146 	ac = cset->ac;
   3147 	while(ac!=NULL) {
   3148 		next_ac = ac->next;
   3149 		/* nuke the label */
   3150 		free(ac->clabel, M_RAIDFRAME);
   3151 		/* cleanup the config structure */
   3152 		free(ac, M_RAIDFRAME);
   3153 		/* "next.." */
   3154 		ac = next_ac;
   3155 	}
   3156 	/* and, finally, nuke the config set */
   3157 	free(cset, M_RAIDFRAME);
   3158 }
   3159 
   3160 
   3161 void
   3162 raid_init_component_label(raidPtr, clabel)
   3163 	RF_Raid_t *raidPtr;
   3164 	RF_ComponentLabel_t *clabel;
   3165 {
   3166 	/* current version number */
   3167 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3168 	clabel->serial_number = raidPtr->serial_number;
   3169 	clabel->mod_counter = raidPtr->mod_counter;
   3170 	clabel->num_rows = raidPtr->numRow;
   3171 	clabel->num_columns = raidPtr->numCol;
   3172 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3173 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3174 
   3175 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3176 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3177 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3178 
   3179 	clabel->blockSize = raidPtr->bytesPerSector;
   3180 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3181 
   3182 	/* XXX not portable */
   3183 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3184 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3185 	clabel->autoconfigure = raidPtr->autoconfigure;
   3186 	clabel->root_partition = raidPtr->root_partition;
   3187 	clabel->last_unit = raidPtr->raidid;
   3188 	clabel->config_order = raidPtr->config_order;
   3189 }
   3190 
   3191 int
   3192 rf_auto_config_set(cset,unit)
   3193 	RF_ConfigSet_t *cset;
   3194 	int *unit;
   3195 {
   3196 	RF_Raid_t *raidPtr;
   3197 	RF_Config_t *config;
   3198 	int raidID;
   3199 	int retcode;
   3200 
   3201 	printf("RAID autoconfigure\n");
   3202 
   3203 	retcode = 0;
   3204 	*unit = -1;
   3205 
   3206 	/* 1. Create a config structure */
   3207 
   3208 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3209 				       M_RAIDFRAME,
   3210 				       M_NOWAIT);
   3211 	if (config==NULL) {
   3212 		printf("Out of mem!?!?\n");
   3213 				/* XXX do something more intelligent here. */
   3214 		return(1);
   3215 	}
   3216 
   3217 	memset(config, 0, sizeof(RF_Config_t));
   3218 
   3219 	/* XXX raidID needs to be set correctly.. */
   3220 
   3221 	/*
   3222 	   2. Figure out what RAID ID this one is supposed to live at
   3223 	   See if we can get the same RAID dev that it was configured
   3224 	   on last time..
   3225 	*/
   3226 
   3227 	raidID = cset->ac->clabel->last_unit;
   3228 	if ((raidID < 0) || (raidID >= numraid)) {
   3229 		/* let's not wander off into lala land. */
   3230 		raidID = numraid - 1;
   3231 	}
   3232 	if (raidPtrs[raidID]->valid != 0) {
   3233 
   3234 		/*
   3235 		   Nope... Go looking for an alternative...
   3236 		   Start high so we don't immediately use raid0 if that's
   3237 		   not taken.
   3238 		*/
   3239 
   3240 		for(raidID = numraid; raidID >= 0; raidID--) {
   3241 			if (raidPtrs[raidID]->valid == 0) {
   3242 				/* can use this one! */
   3243 				break;
   3244 			}
   3245 		}
   3246 	}
   3247 
   3248 	if (raidID < 0) {
   3249 		/* punt... */
   3250 		printf("Unable to auto configure this set!\n");
   3251 		printf("(Out of RAID devs!)\n");
   3252 		return(1);
   3253 	}
   3254 	printf("Configuring raid%d:\n",raidID);
   3255 	raidPtr = raidPtrs[raidID];
   3256 
   3257 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3258 	raidPtr->raidid = raidID;
   3259 	raidPtr->openings = RAIDOUTSTANDING;
   3260 
   3261 	/* 3. Build the configuration structure */
   3262 	rf_create_configuration(cset->ac, config, raidPtr);
   3263 
   3264 	/* 4. Do the configuration */
   3265 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3266 
   3267 	if (retcode == 0) {
   3268 
   3269 		raidinit(raidPtrs[raidID]);
   3270 
   3271 		rf_markalldirty(raidPtrs[raidID]);
   3272 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3273 		if (cset->ac->clabel->root_partition==1) {
   3274 			/* everything configured just fine.  Make a note
   3275 			   that this set is eligible to be root. */
   3276 			cset->rootable = 1;
   3277 			/* XXX do this here? */
   3278 			raidPtrs[raidID]->root_partition = 1;
   3279 		}
   3280 	}
   3281 
   3282 	/* 5. Cleanup */
   3283 	free(config, M_RAIDFRAME);
   3284 
   3285 	*unit = raidID;
   3286 	return(retcode);
   3287 }
   3288 
   3289 void
   3290 rf_disk_unbusy(desc)
   3291 	RF_RaidAccessDesc_t *desc;
   3292 {
   3293 	struct buf *bp;
   3294 
   3295 	bp = (struct buf *)desc->bp;
   3296 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3297 			    (bp->b_bcount - bp->b_resid));
   3298 }
   3299