Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.64
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.64 2000/02/27 02:35:33 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "opt_raid_autoconfig.h"
    139 #include "rf_raid.h"
    140 #include "rf_raidframe.h"
    141 #include "rf_copyback.h"
    142 #include "rf_dag.h"
    143 #include "rf_dagflags.h"
    144 #include "rf_diskqueue.h"
    145 #include "rf_acctrace.h"
    146 #include "rf_etimer.h"
    147 #include "rf_general.h"
    148 #include "rf_debugMem.h"
    149 #include "rf_kintf.h"
    150 #include "rf_options.h"
    151 #include "rf_driver.h"
    152 #include "rf_parityscan.h"
    153 #include "rf_debugprint.h"
    154 #include "rf_threadstuff.h"
    155 
    156 int     rf_kdebug_level = 0;
    157 
    158 #ifdef DEBUG
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    165 
    166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    167 
    168 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    169 						 * spare table */
    170 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    171 						 * installation process */
    172 
    173 /* prototypes */
    174 static void KernelWakeupFunc(struct buf * bp);
    175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    176 		   dev_t dev, RF_SectorNum_t startSect,
    177 		   RF_SectorCount_t numSect, caddr_t buf,
    178 		   void (*cbFunc) (struct buf *), void *cbArg,
    179 		   int logBytesPerSector, struct proc * b_proc);
    180 static void raidinit __P((RF_Raid_t *));
    181 
    182 void raidattach __P((int));
    183 int raidsize __P((dev_t));
    184 int raidopen __P((dev_t, int, int, struct proc *));
    185 int raidclose __P((dev_t, int, int, struct proc *));
    186 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    187 int raidwrite __P((dev_t, struct uio *, int));
    188 int raidread __P((dev_t, struct uio *, int));
    189 void raidstrategy __P((struct buf *));
    190 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    191 
    192 /*
    193  * Pilfered from ccd.c
    194  */
    195 
    196 struct raidbuf {
    197 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    198 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    199 	int     rf_flags;	/* misc. flags */
    200 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    201 };
    202 
    203 
    204 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    205 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    206 
    207 /* XXX Not sure if the following should be replacing the raidPtrs above,
    208    or if it should be used in conjunction with that...
    209 */
    210 
    211 struct raid_softc {
    212 	int     sc_flags;	/* flags */
    213 	int     sc_cflags;	/* configuration flags */
    214 	size_t  sc_size;        /* size of the raid device */
    215 	char    sc_xname[20];	/* XXX external name */
    216 	struct disk sc_dkdev;	/* generic disk device info */
    217 	struct pool sc_cbufpool;	/* component buffer pool */
    218 	struct buf_queue buf_queue;	/* used for the device queue */
    219 };
    220 /* sc_flags */
    221 #define RAIDF_INITED	0x01	/* unit has been initialized */
    222 #define RAIDF_WLABEL	0x02	/* label area is writable */
    223 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    224 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    225 #define RAIDF_LOCKED	0x80	/* unit is locked */
    226 
    227 #define	raidunit(x)	DISKUNIT(x)
    228 int numraid = 0;
    229 
    230 /*
    231  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    232  * Be aware that large numbers can allow the driver to consume a lot of
    233  * kernel memory, especially on writes, and in degraded mode reads.
    234  *
    235  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    236  * a single 64K write will typically require 64K for the old data,
    237  * 64K for the old parity, and 64K for the new parity, for a total
    238  * of 192K (if the parity buffer is not re-used immediately).
    239  * Even it if is used immedately, that's still 128K, which when multiplied
    240  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    241  *
    242  * Now in degraded mode, for example, a 64K read on the above setup may
    243  * require data reconstruction, which will require *all* of the 4 remaining
    244  * disks to participate -- 4 * 32K/disk == 128K again.
    245  */
    246 
    247 #ifndef RAIDOUTSTANDING
    248 #define RAIDOUTSTANDING   6
    249 #endif
    250 
    251 #define RAIDLABELDEV(dev)	\
    252 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    253 
    254 /* declared here, and made public, for the benefit of KVM stuff.. */
    255 struct raid_softc *raid_softc;
    256 
    257 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    258 				     struct disklabel *));
    259 static void raidgetdisklabel __P((dev_t));
    260 static void raidmakedisklabel __P((struct raid_softc *));
    261 
    262 static int raidlock __P((struct raid_softc *));
    263 static void raidunlock __P((struct raid_softc *));
    264 
    265 static void rf_markalldirty __P((RF_Raid_t *));
    266 void rf_mountroot_hook __P((struct device *));
    267 
    268 struct device *raidrootdev;
    269 struct cfdata cf_raidrootdev;
    270 struct cfdriver cfdrv;
    271 /* XXX these should be moved up */
    272 #include "rf_configure.h"
    273 #include <sys/reboot.h>
    274 
    275 void rf_ReconThread __P((struct rf_recon_req *));
    276 /* XXX what I want is: */
    277 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    278 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    279 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    280 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    281 void rf_buildroothack __P((void *));
    282 void rf_final_update_component_labels __P((RF_Raid_t *));
    283 
    284 RF_AutoConfig_t *rf_find_raid_components __P((void));
    285 void print_component_label __P((RF_ComponentLabel_t *));
    286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    290 				  RF_Raid_t *));
    291 int rf_set_autoconfig __P((RF_Raid_t *, int));
    292 int rf_set_rootpartition __P((RF_Raid_t *, int));
    293 void rf_release_all_vps __P((RF_ConfigSet_t *));
    294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    295 int rf_have_enough_components __P((RF_ConfigSet_t *));
    296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place.
    300 			          Note that this is overridden by having
    301 			          RAID_AUTOCONFIG as an option in the
    302 			          kernel config file.  */
    303 extern struct device *booted_device;
    304 
    305 void
    306 raidattach(num)
    307 	int     num;
    308 {
    309 	int raidID;
    310 	int i, rc;
    311 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    312 	RF_ConfigSet_t *config_sets;
    313 
    314 #ifdef DEBUG
    315 	printf("raidattach: Asked for %d units\n", num);
    316 #endif
    317 
    318 	if (num <= 0) {
    319 #ifdef DIAGNOSTIC
    320 		panic("raidattach: count <= 0");
    321 #endif
    322 		return;
    323 	}
    324 	/* This is where all the initialization stuff gets done. */
    325 
    326 	numraid = num;
    327 
    328 	/* Make some space for requested number of units... */
    329 
    330 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    331 	if (raidPtrs == NULL) {
    332 		panic("raidPtrs is NULL!!\n");
    333 	}
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 
    342 	for (i = 0; i < num; i++)
    343 		raidPtrs[i] = NULL;
    344 	rc = rf_BootRaidframe();
    345 	if (rc == 0)
    346 		printf("Kernelized RAIDframe activated\n");
    347 	else
    348 		panic("Serious error booting RAID!!\n");
    349 
    350 	/* put together some datastructures like the CCD device does.. This
    351 	 * lets us lock the device and what-not when it gets opened. */
    352 
    353 	raid_softc = (struct raid_softc *)
    354 		malloc(num * sizeof(struct raid_softc),
    355 		       M_RAIDFRAME, M_NOWAIT);
    356 	if (raid_softc == NULL) {
    357 		printf("WARNING: no memory for RAIDframe driver\n");
    358 		return;
    359 	}
    360 
    361 	bzero(raid_softc, num * sizeof(struct raid_softc));
    362 
    363 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    364 					      M_RAIDFRAME, M_NOWAIT);
    365 	if (raidrootdev == NULL) {
    366 		panic("No memory for RAIDframe driver!!?!?!\n");
    367 	}
    368 
    369 	for (raidID = 0; raidID < num; raidID++) {
    370 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    371 
    372 		raidrootdev[raidID].dv_class  = DV_DISK;
    373 		raidrootdev[raidID].dv_cfdata = NULL;
    374 		raidrootdev[raidID].dv_unit   = raidID;
    375 		raidrootdev[raidID].dv_parent = NULL;
    376 		raidrootdev[raidID].dv_flags  = 0;
    377 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    378 
    379 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    380 			  (RF_Raid_t *));
    381 		if (raidPtrs[raidID] == NULL) {
    382 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    383 			numraid = raidID;
    384 			return;
    385 		}
    386 	}
    387 
    388 #if RAID_AUTOCONFIG
    389 	raidautoconfig = 1;
    390 #endif
    391 
    392 if (raidautoconfig) {
    393 	/* 1. locate all RAID components on the system */
    394 
    395 #if DEBUG
    396 	printf("Searching for raid components...\n");
    397 #endif
    398 	ac_list = rf_find_raid_components();
    399 
    400 	/* 2. sort them into their respective sets */
    401 
    402 	config_sets = rf_create_auto_sets(ac_list);
    403 
    404 	/* 3. evaluate each set and configure the valid ones
    405 	   This gets done in rf_buildroothack() */
    406 
    407 	/* schedule the creation of the thread to do the
    408 	   "/ on RAID" stuff */
    409 
    410 	kthread_create(rf_buildroothack,config_sets);
    411 
    412 #if 0
    413 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    414 #endif
    415 }
    416 
    417 }
    418 
    419 void
    420 rf_buildroothack(arg)
    421 	void *arg;
    422 {
    423 	RF_ConfigSet_t *config_sets = arg;
    424 	RF_ConfigSet_t *cset;
    425 	RF_ConfigSet_t *next_cset;
    426 	int retcode;
    427 	int raidID;
    428 	int rootID;
    429 	int num_root;
    430 
    431 	num_root = 0;
    432 	cset = config_sets;
    433 	while(cset != NULL ) {
    434 		next_cset = cset->next;
    435 		if (rf_have_enough_components(cset) &&
    436 		    cset->ac->clabel->autoconfigure==1) {
    437 			retcode = rf_auto_config_set(cset,&raidID);
    438 			if (!retcode) {
    439 				if (cset->rootable) {
    440 					rootID = raidID;
    441 					num_root++;
    442 				}
    443 			} else {
    444 				/* The autoconfig didn't work :( */
    445 #if DEBUG
    446 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    447 #endif
    448 				rf_release_all_vps(cset);
    449 			}
    450 		} else {
    451 			/* we're not autoconfiguring this set...
    452 			   release the associated resources */
    453 			rf_release_all_vps(cset);
    454 		}
    455 		/* cleanup */
    456 		rf_cleanup_config_set(cset);
    457 		cset = next_cset;
    458 	}
    459 	if (boothowto & RB_ASKNAME) {
    460 		/* We don't auto-config... */
    461 	} else {
    462 		/* They didn't ask, and we found something bootable... */
    463 
    464 		if (num_root == 1) {
    465 			booted_device = &raidrootdev[rootID];
    466 		} else if (num_root > 1) {
    467 			/* we can't guess.. require the user to answer... */
    468 			boothowto |= RB_ASKNAME;
    469 		}
    470 	}
    471 }
    472 
    473 
    474 int
    475 raidsize(dev)
    476 	dev_t   dev;
    477 {
    478 	struct raid_softc *rs;
    479 	struct disklabel *lp;
    480 	int     part, unit, omask, size;
    481 
    482 	unit = raidunit(dev);
    483 	if (unit >= numraid)
    484 		return (-1);
    485 	rs = &raid_softc[unit];
    486 
    487 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    488 		return (-1);
    489 
    490 	part = DISKPART(dev);
    491 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    492 	lp = rs->sc_dkdev.dk_label;
    493 
    494 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    495 		return (-1);
    496 
    497 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    498 		size = -1;
    499 	else
    500 		size = lp->d_partitions[part].p_size *
    501 		    (lp->d_secsize / DEV_BSIZE);
    502 
    503 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    504 		return (-1);
    505 
    506 	return (size);
    507 
    508 }
    509 
    510 int
    511 raiddump(dev, blkno, va, size)
    512 	dev_t   dev;
    513 	daddr_t blkno;
    514 	caddr_t va;
    515 	size_t  size;
    516 {
    517 	/* Not implemented. */
    518 	return ENXIO;
    519 }
    520 /* ARGSUSED */
    521 int
    522 raidopen(dev, flags, fmt, p)
    523 	dev_t   dev;
    524 	int     flags, fmt;
    525 	struct proc *p;
    526 {
    527 	int     unit = raidunit(dev);
    528 	struct raid_softc *rs;
    529 	struct disklabel *lp;
    530 	int     part, pmask;
    531 	int     error = 0;
    532 
    533 	if (unit >= numraid)
    534 		return (ENXIO);
    535 	rs = &raid_softc[unit];
    536 
    537 	if ((error = raidlock(rs)) != 0)
    538 		return (error);
    539 	lp = rs->sc_dkdev.dk_label;
    540 
    541 	part = DISKPART(dev);
    542 	pmask = (1 << part);
    543 
    544 	db1_printf(("Opening raid device number: %d partition: %d\n",
    545 		unit, part));
    546 
    547 
    548 	if ((rs->sc_flags & RAIDF_INITED) &&
    549 	    (rs->sc_dkdev.dk_openmask == 0))
    550 		raidgetdisklabel(dev);
    551 
    552 	/* make sure that this partition exists */
    553 
    554 	if (part != RAW_PART) {
    555 		db1_printf(("Not a raw partition..\n"));
    556 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    557 		    ((part >= lp->d_npartitions) ||
    558 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    559 			error = ENXIO;
    560 			raidunlock(rs);
    561 			db1_printf(("Bailing out...\n"));
    562 			return (error);
    563 		}
    564 	}
    565 	/* Prevent this unit from being unconfigured while open. */
    566 	switch (fmt) {
    567 	case S_IFCHR:
    568 		rs->sc_dkdev.dk_copenmask |= pmask;
    569 		break;
    570 
    571 	case S_IFBLK:
    572 		rs->sc_dkdev.dk_bopenmask |= pmask;
    573 		break;
    574 	}
    575 
    576 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    577 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    578 		/* First one... mark things as dirty... Note that we *MUST*
    579 		 have done a configure before this.  I DO NOT WANT TO BE
    580 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    581 		 THAT THEY BELONG TOGETHER!!!!! */
    582 		/* XXX should check to see if we're only open for reading
    583 		   here... If so, we needn't do this, but then need some
    584 		   other way of keeping track of what's happened.. */
    585 
    586 		rf_markalldirty( raidPtrs[unit] );
    587 	}
    588 
    589 
    590 	rs->sc_dkdev.dk_openmask =
    591 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    592 
    593 	raidunlock(rs);
    594 
    595 	return (error);
    596 
    597 
    598 }
    599 /* ARGSUSED */
    600 int
    601 raidclose(dev, flags, fmt, p)
    602 	dev_t   dev;
    603 	int     flags, fmt;
    604 	struct proc *p;
    605 {
    606 	int     unit = raidunit(dev);
    607 	struct raid_softc *rs;
    608 	int     error = 0;
    609 	int     part;
    610 
    611 	if (unit >= numraid)
    612 		return (ENXIO);
    613 	rs = &raid_softc[unit];
    614 
    615 	if ((error = raidlock(rs)) != 0)
    616 		return (error);
    617 
    618 	part = DISKPART(dev);
    619 
    620 	/* ...that much closer to allowing unconfiguration... */
    621 	switch (fmt) {
    622 	case S_IFCHR:
    623 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    624 		break;
    625 
    626 	case S_IFBLK:
    627 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    628 		break;
    629 	}
    630 	rs->sc_dkdev.dk_openmask =
    631 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    632 
    633 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    634 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    635 		/* Last one... device is not unconfigured yet.
    636 		   Device shutdown has taken care of setting the
    637 		   clean bits if RAIDF_INITED is not set
    638 		   mark things as clean... */
    639 #if 0
    640 		printf("Last one on raid%d.  Updating status.\n",unit);
    641 #endif
    642 		rf_final_update_component_labels( raidPtrs[unit] );
    643 	}
    644 
    645 	raidunlock(rs);
    646 	return (0);
    647 
    648 }
    649 
    650 void
    651 raidstrategy(bp)
    652 	register struct buf *bp;
    653 {
    654 	register int s;
    655 
    656 	unsigned int raidID = raidunit(bp->b_dev);
    657 	RF_Raid_t *raidPtr;
    658 	struct raid_softc *rs = &raid_softc[raidID];
    659 	struct disklabel *lp;
    660 	int     wlabel;
    661 
    662 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    663 		bp->b_error = ENXIO;
    664 		bp->b_flags = B_ERROR;
    665 		bp->b_resid = bp->b_bcount;
    666 		biodone(bp);
    667 		return;
    668 	}
    669 	if (raidID >= numraid || !raidPtrs[raidID]) {
    670 		bp->b_error = ENODEV;
    671 		bp->b_flags |= B_ERROR;
    672 		bp->b_resid = bp->b_bcount;
    673 		biodone(bp);
    674 		return;
    675 	}
    676 	raidPtr = raidPtrs[raidID];
    677 	if (!raidPtr->valid) {
    678 		bp->b_error = ENODEV;
    679 		bp->b_flags |= B_ERROR;
    680 		bp->b_resid = bp->b_bcount;
    681 		biodone(bp);
    682 		return;
    683 	}
    684 	if (bp->b_bcount == 0) {
    685 		db1_printf(("b_bcount is zero..\n"));
    686 		biodone(bp);
    687 		return;
    688 	}
    689 	lp = rs->sc_dkdev.dk_label;
    690 
    691 	/*
    692 	 * Do bounds checking and adjust transfer.  If there's an
    693 	 * error, the bounds check will flag that for us.
    694 	 */
    695 
    696 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    697 	if (DISKPART(bp->b_dev) != RAW_PART)
    698 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    699 			db1_printf(("Bounds check failed!!:%d %d\n",
    700 				(int) bp->b_blkno, (int) wlabel));
    701 			biodone(bp);
    702 			return;
    703 		}
    704 	s = splbio();
    705 
    706 	bp->b_resid = 0;
    707 
    708 	/* stuff it onto our queue */
    709 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    710 
    711 	raidstart(raidPtrs[raidID]);
    712 
    713 	splx(s);
    714 }
    715 /* ARGSUSED */
    716 int
    717 raidread(dev, uio, flags)
    718 	dev_t   dev;
    719 	struct uio *uio;
    720 	int     flags;
    721 {
    722 	int     unit = raidunit(dev);
    723 	struct raid_softc *rs;
    724 	int     part;
    725 
    726 	if (unit >= numraid)
    727 		return (ENXIO);
    728 	rs = &raid_softc[unit];
    729 
    730 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    731 		return (ENXIO);
    732 	part = DISKPART(dev);
    733 
    734 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    735 
    736 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    737 
    738 }
    739 /* ARGSUSED */
    740 int
    741 raidwrite(dev, uio, flags)
    742 	dev_t   dev;
    743 	struct uio *uio;
    744 	int     flags;
    745 {
    746 	int     unit = raidunit(dev);
    747 	struct raid_softc *rs;
    748 
    749 	if (unit >= numraid)
    750 		return (ENXIO);
    751 	rs = &raid_softc[unit];
    752 
    753 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    754 		return (ENXIO);
    755 	db1_printf(("raidwrite\n"));
    756 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    757 
    758 }
    759 
    760 int
    761 raidioctl(dev, cmd, data, flag, p)
    762 	dev_t   dev;
    763 	u_long  cmd;
    764 	caddr_t data;
    765 	int     flag;
    766 	struct proc *p;
    767 {
    768 	int     unit = raidunit(dev);
    769 	int     error = 0;
    770 	int     part, pmask;
    771 	struct raid_softc *rs;
    772 	RF_Config_t *k_cfg, *u_cfg;
    773 	RF_Raid_t *raidPtr;
    774 	RF_RaidDisk_t *diskPtr;
    775 	RF_AccTotals_t *totals;
    776 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    777 	u_char *specific_buf;
    778 	int retcode = 0;
    779 	int row;
    780 	int column;
    781 	struct rf_recon_req *rrcopy, *rr;
    782 	RF_ComponentLabel_t *clabel;
    783 	RF_ComponentLabel_t ci_label;
    784 	RF_ComponentLabel_t **clabel_ptr;
    785 	RF_SingleComponent_t *sparePtr,*componentPtr;
    786 	RF_SingleComponent_t hot_spare;
    787 	RF_SingleComponent_t component;
    788 	int i, j, d;
    789 
    790 	if (unit >= numraid)
    791 		return (ENXIO);
    792 	rs = &raid_softc[unit];
    793 	raidPtr = raidPtrs[unit];
    794 
    795 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    796 		(int) DISKPART(dev), (int) unit, (int) cmd));
    797 
    798 	/* Must be open for writes for these commands... */
    799 	switch (cmd) {
    800 	case DIOCSDINFO:
    801 	case DIOCWDINFO:
    802 	case DIOCWLABEL:
    803 		if ((flag & FWRITE) == 0)
    804 			return (EBADF);
    805 	}
    806 
    807 	/* Must be initialized for these... */
    808 	switch (cmd) {
    809 	case DIOCGDINFO:
    810 	case DIOCSDINFO:
    811 	case DIOCWDINFO:
    812 	case DIOCGPART:
    813 	case DIOCWLABEL:
    814 	case DIOCGDEFLABEL:
    815 	case RAIDFRAME_SHUTDOWN:
    816 	case RAIDFRAME_REWRITEPARITY:
    817 	case RAIDFRAME_GET_INFO:
    818 	case RAIDFRAME_RESET_ACCTOTALS:
    819 	case RAIDFRAME_GET_ACCTOTALS:
    820 	case RAIDFRAME_KEEP_ACCTOTALS:
    821 	case RAIDFRAME_GET_SIZE:
    822 	case RAIDFRAME_FAIL_DISK:
    823 	case RAIDFRAME_COPYBACK:
    824 	case RAIDFRAME_CHECK_RECON_STATUS:
    825 	case RAIDFRAME_GET_COMPONENT_LABEL:
    826 	case RAIDFRAME_SET_COMPONENT_LABEL:
    827 	case RAIDFRAME_ADD_HOT_SPARE:
    828 	case RAIDFRAME_REMOVE_HOT_SPARE:
    829 	case RAIDFRAME_INIT_LABELS:
    830 	case RAIDFRAME_REBUILD_IN_PLACE:
    831 	case RAIDFRAME_CHECK_PARITY:
    832 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    833 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    834 	case RAIDFRAME_SET_AUTOCONFIG:
    835 	case RAIDFRAME_SET_ROOT:
    836 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    837 			return (ENXIO);
    838 	}
    839 
    840 	switch (cmd) {
    841 
    842 		/* configure the system */
    843 	case RAIDFRAME_CONFIGURE:
    844 
    845 		if (raidPtr->valid) {
    846 			/* There is a valid RAID set running on this unit! */
    847 			printf("raid%d: Device already configured!\n",unit);
    848 		}
    849 
    850 		/* copy-in the configuration information */
    851 		/* data points to a pointer to the configuration structure */
    852 
    853 		u_cfg = *((RF_Config_t **) data);
    854 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    855 		if (k_cfg == NULL) {
    856 			return (ENOMEM);
    857 		}
    858 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    859 		    sizeof(RF_Config_t));
    860 		if (retcode) {
    861 			RF_Free(k_cfg, sizeof(RF_Config_t));
    862 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    863 				retcode));
    864 			return (retcode);
    865 		}
    866 		/* allocate a buffer for the layout-specific data, and copy it
    867 		 * in */
    868 		if (k_cfg->layoutSpecificSize) {
    869 			if (k_cfg->layoutSpecificSize > 10000) {
    870 				/* sanity check */
    871 				RF_Free(k_cfg, sizeof(RF_Config_t));
    872 				return (EINVAL);
    873 			}
    874 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    875 			    (u_char *));
    876 			if (specific_buf == NULL) {
    877 				RF_Free(k_cfg, sizeof(RF_Config_t));
    878 				return (ENOMEM);
    879 			}
    880 			retcode = copyin(k_cfg->layoutSpecific,
    881 			    (caddr_t) specific_buf,
    882 			    k_cfg->layoutSpecificSize);
    883 			if (retcode) {
    884 				RF_Free(k_cfg, sizeof(RF_Config_t));
    885 				RF_Free(specific_buf,
    886 					k_cfg->layoutSpecificSize);
    887 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    888 					retcode));
    889 				return (retcode);
    890 			}
    891 		} else
    892 			specific_buf = NULL;
    893 		k_cfg->layoutSpecific = specific_buf;
    894 
    895 		/* should do some kind of sanity check on the configuration.
    896 		 * Store the sum of all the bytes in the last byte? */
    897 
    898 		/* configure the system */
    899 
    900 		/*
    901 		 * Clear the entire RAID descriptor, just to make sure
    902 		 *  there is no stale data left in the case of a
    903 		 *  reconfiguration
    904 		 */
    905 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    906 		raidPtr->raidid = unit;
    907 
    908 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    909 
    910 		if (retcode == 0) {
    911 
    912 			/* allow this many simultaneous IO's to
    913 			   this RAID device */
    914 			raidPtr->openings = RAIDOUTSTANDING;
    915 
    916 			raidinit(raidPtr);
    917 			rf_markalldirty(raidPtr);
    918 		}
    919 		/* free the buffers.  No return code here. */
    920 		if (k_cfg->layoutSpecificSize) {
    921 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    922 		}
    923 		RF_Free(k_cfg, sizeof(RF_Config_t));
    924 
    925 		return (retcode);
    926 
    927 		/* shutdown the system */
    928 	case RAIDFRAME_SHUTDOWN:
    929 
    930 		if ((error = raidlock(rs)) != 0)
    931 			return (error);
    932 
    933 		/*
    934 		 * If somebody has a partition mounted, we shouldn't
    935 		 * shutdown.
    936 		 */
    937 
    938 		part = DISKPART(dev);
    939 		pmask = (1 << part);
    940 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    941 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    942 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    943 			raidunlock(rs);
    944 			return (EBUSY);
    945 		}
    946 
    947 		retcode = rf_Shutdown(raidPtr);
    948 
    949 		pool_destroy(&rs->sc_cbufpool);
    950 
    951 		/* It's no longer initialized... */
    952 		rs->sc_flags &= ~RAIDF_INITED;
    953 
    954 		/* Detach the disk. */
    955 		disk_detach(&rs->sc_dkdev);
    956 
    957 		raidunlock(rs);
    958 
    959 		return (retcode);
    960 	case RAIDFRAME_GET_COMPONENT_LABEL:
    961 		clabel_ptr = (RF_ComponentLabel_t **) data;
    962 		/* need to read the component label for the disk indicated
    963 		   by row,column in clabel */
    964 
    965 		/* For practice, let's get it directly fromdisk, rather
    966 		   than from the in-core copy */
    967 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    968 			   (RF_ComponentLabel_t *));
    969 		if (clabel == NULL)
    970 			return (ENOMEM);
    971 
    972 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    973 
    974 		retcode = copyin( *clabel_ptr, clabel,
    975 				  sizeof(RF_ComponentLabel_t));
    976 
    977 		if (retcode) {
    978 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    979 			return(retcode);
    980 		}
    981 
    982 		row = clabel->row;
    983 		column = clabel->column;
    984 
    985 		if ((row < 0) || (row >= raidPtr->numRow) ||
    986 		    (column < 0) || (column >= raidPtr->numCol)) {
    987 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    988 			return(EINVAL);
    989 		}
    990 
    991 		raidread_component_label(raidPtr->Disks[row][column].dev,
    992 				raidPtr->raid_cinfo[row][column].ci_vp,
    993 				clabel );
    994 
    995 		retcode = copyout((caddr_t) clabel,
    996 				  (caddr_t) *clabel_ptr,
    997 				  sizeof(RF_ComponentLabel_t));
    998 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    999 		return (retcode);
   1000 
   1001 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1002 		clabel = (RF_ComponentLabel_t *) data;
   1003 
   1004 		/* XXX check the label for valid stuff... */
   1005 		/* Note that some things *should not* get modified --
   1006 		   the user should be re-initing the labels instead of
   1007 		   trying to patch things.
   1008 		   */
   1009 
   1010 		printf("Got component label:\n");
   1011 		printf("Version: %d\n",clabel->version);
   1012 		printf("Serial Number: %d\n",clabel->serial_number);
   1013 		printf("Mod counter: %d\n",clabel->mod_counter);
   1014 		printf("Row: %d\n", clabel->row);
   1015 		printf("Column: %d\n", clabel->column);
   1016 		printf("Num Rows: %d\n", clabel->num_rows);
   1017 		printf("Num Columns: %d\n", clabel->num_columns);
   1018 		printf("Clean: %d\n", clabel->clean);
   1019 		printf("Status: %d\n", clabel->status);
   1020 
   1021 		row = clabel->row;
   1022 		column = clabel->column;
   1023 
   1024 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1025 		    (column < 0) || (column >= raidPtr->numCol)) {
   1026 			return(EINVAL);
   1027 		}
   1028 
   1029 		/* XXX this isn't allowed to do anything for now :-) */
   1030 
   1031 		/* XXX and before it is, we need to fill in the rest
   1032 		   of the fields!?!?!?! */
   1033 #if 0
   1034 		raidwrite_component_label(
   1035                             raidPtr->Disks[row][column].dev,
   1036 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1037 			    clabel );
   1038 #endif
   1039 		return (0);
   1040 
   1041 	case RAIDFRAME_INIT_LABELS:
   1042 		clabel = (RF_ComponentLabel_t *) data;
   1043 		/*
   1044 		   we only want the serial number from
   1045 		   the above.  We get all the rest of the information
   1046 		   from the config that was used to create this RAID
   1047 		   set.
   1048 		   */
   1049 
   1050 		raidPtr->serial_number = clabel->serial_number;
   1051 
   1052 		raid_init_component_label(raidPtr, &ci_label);
   1053 		ci_label.serial_number = clabel->serial_number;
   1054 
   1055 		for(row=0;row<raidPtr->numRow;row++) {
   1056 			ci_label.row = row;
   1057 			for(column=0;column<raidPtr->numCol;column++) {
   1058 				diskPtr = &raidPtr->Disks[row][column];
   1059 				ci_label.partitionSize = diskPtr->partitionSize;
   1060 				ci_label.column = column;
   1061 				raidwrite_component_label(
   1062 				  raidPtr->Disks[row][column].dev,
   1063 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1064 				  &ci_label );
   1065 			}
   1066 		}
   1067 
   1068 		return (retcode);
   1069 	case RAIDFRAME_SET_AUTOCONFIG:
   1070 		d = rf_set_autoconfig(raidPtr, *data);
   1071 		printf("New autoconfig value is: %d\n", d);
   1072 		*data = d;
   1073 		return (retcode);
   1074 
   1075 	case RAIDFRAME_SET_ROOT:
   1076 		d = rf_set_rootpartition(raidPtr, *data);
   1077 		printf("New rootpartition value is: %d\n", d);
   1078 		*data = d;
   1079 		return (retcode);
   1080 
   1081 		/* initialize all parity */
   1082 	case RAIDFRAME_REWRITEPARITY:
   1083 
   1084 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1085 			/* Parity for RAID 0 is trivially correct */
   1086 			raidPtr->parity_good = RF_RAID_CLEAN;
   1087 			return(0);
   1088 		}
   1089 
   1090 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1091 			/* Re-write is already in progress! */
   1092 			return(EINVAL);
   1093 		}
   1094 
   1095 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1096 					   rf_RewriteParityThread,
   1097 					   raidPtr,"raid_parity");
   1098 		return (retcode);
   1099 
   1100 
   1101 	case RAIDFRAME_ADD_HOT_SPARE:
   1102 		sparePtr = (RF_SingleComponent_t *) data;
   1103 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1104 		printf("Adding spare\n");
   1105 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1106 		return(retcode);
   1107 
   1108 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1109 		return(retcode);
   1110 
   1111 	case RAIDFRAME_REBUILD_IN_PLACE:
   1112 
   1113 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1114 			/* Can't do this on a RAID 0!! */
   1115 			return(EINVAL);
   1116 		}
   1117 
   1118 		if (raidPtr->recon_in_progress == 1) {
   1119 			/* a reconstruct is already in progress! */
   1120 			return(EINVAL);
   1121 		}
   1122 
   1123 		componentPtr = (RF_SingleComponent_t *) data;
   1124 		memcpy( &component, componentPtr,
   1125 			sizeof(RF_SingleComponent_t));
   1126 		row = component.row;
   1127 		column = component.column;
   1128 		printf("Rebuild: %d %d\n",row, column);
   1129 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1130 		    (column < 0) || (column >= raidPtr->numCol)) {
   1131 			return(EINVAL);
   1132 		}
   1133 
   1134 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1135 		if (rrcopy == NULL)
   1136 			return(ENOMEM);
   1137 
   1138 		rrcopy->raidPtr = (void *) raidPtr;
   1139 		rrcopy->row = row;
   1140 		rrcopy->col = column;
   1141 
   1142 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1143 					   rf_ReconstructInPlaceThread,
   1144 					   rrcopy,"raid_reconip");
   1145 		return(retcode);
   1146 
   1147 	case RAIDFRAME_GET_INFO:
   1148 		if (!raidPtr->valid)
   1149 			return (ENODEV);
   1150 		ucfgp = (RF_DeviceConfig_t **) data;
   1151 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1152 			  (RF_DeviceConfig_t *));
   1153 		if (d_cfg == NULL)
   1154 			return (ENOMEM);
   1155 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1156 		d_cfg->rows = raidPtr->numRow;
   1157 		d_cfg->cols = raidPtr->numCol;
   1158 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1159 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1160 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1161 			return (ENOMEM);
   1162 		}
   1163 		d_cfg->nspares = raidPtr->numSpare;
   1164 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1165 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1166 			return (ENOMEM);
   1167 		}
   1168 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1169 		d = 0;
   1170 		for (i = 0; i < d_cfg->rows; i++) {
   1171 			for (j = 0; j < d_cfg->cols; j++) {
   1172 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1173 				d++;
   1174 			}
   1175 		}
   1176 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1177 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1178 		}
   1179 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1180 				  sizeof(RF_DeviceConfig_t));
   1181 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1182 
   1183 		return (retcode);
   1184 
   1185 	case RAIDFRAME_CHECK_PARITY:
   1186 		*(int *) data = raidPtr->parity_good;
   1187 		return (0);
   1188 
   1189 	case RAIDFRAME_RESET_ACCTOTALS:
   1190 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1191 		return (0);
   1192 
   1193 	case RAIDFRAME_GET_ACCTOTALS:
   1194 		totals = (RF_AccTotals_t *) data;
   1195 		*totals = raidPtr->acc_totals;
   1196 		return (0);
   1197 
   1198 	case RAIDFRAME_KEEP_ACCTOTALS:
   1199 		raidPtr->keep_acc_totals = *(int *)data;
   1200 		return (0);
   1201 
   1202 	case RAIDFRAME_GET_SIZE:
   1203 		*(int *) data = raidPtr->totalSectors;
   1204 		return (0);
   1205 
   1206 		/* fail a disk & optionally start reconstruction */
   1207 	case RAIDFRAME_FAIL_DISK:
   1208 
   1209 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1210 			/* Can't do this on a RAID 0!! */
   1211 			return(EINVAL);
   1212 		}
   1213 
   1214 		rr = (struct rf_recon_req *) data;
   1215 
   1216 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1217 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1218 			return (EINVAL);
   1219 
   1220 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1221 		       unit, rr->row, rr->col);
   1222 
   1223 		/* make a copy of the recon request so that we don't rely on
   1224 		 * the user's buffer */
   1225 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1226 		if (rrcopy == NULL)
   1227 			return(ENOMEM);
   1228 		bcopy(rr, rrcopy, sizeof(*rr));
   1229 		rrcopy->raidPtr = (void *) raidPtr;
   1230 
   1231 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1232 					   rf_ReconThread,
   1233 					   rrcopy,"raid_recon");
   1234 		return (0);
   1235 
   1236 		/* invoke a copyback operation after recon on whatever disk
   1237 		 * needs it, if any */
   1238 	case RAIDFRAME_COPYBACK:
   1239 
   1240 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1241 			/* This makes no sense on a RAID 0!! */
   1242 			return(EINVAL);
   1243 		}
   1244 
   1245 		if (raidPtr->copyback_in_progress == 1) {
   1246 			/* Copyback is already in progress! */
   1247 			return(EINVAL);
   1248 		}
   1249 
   1250 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1251 					   rf_CopybackThread,
   1252 					   raidPtr,"raid_copyback");
   1253 		return (retcode);
   1254 
   1255 		/* return the percentage completion of reconstruction */
   1256 	case RAIDFRAME_CHECK_RECON_STATUS:
   1257 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1258 			/* This makes no sense on a RAID 0 */
   1259 			return(EINVAL);
   1260 		}
   1261 		row = 0; /* XXX we only consider a single row... */
   1262 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1263 			*(int *) data = 100;
   1264 		else
   1265 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1266 		return (0);
   1267 
   1268 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1269 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1270 			/* This makes no sense on a RAID 0 */
   1271 			return(EINVAL);
   1272 		}
   1273 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1274 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1275 		} else {
   1276 			*(int *) data = 100;
   1277 		}
   1278 		return (0);
   1279 
   1280 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1281 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1282 			/* This makes no sense on a RAID 0 */
   1283 			return(EINVAL);
   1284 		}
   1285 		if (raidPtr->copyback_in_progress == 1) {
   1286 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1287 				raidPtr->Layout.numStripe;
   1288 		} else {
   1289 			*(int *) data = 100;
   1290 		}
   1291 		return (0);
   1292 
   1293 
   1294 		/* the sparetable daemon calls this to wait for the kernel to
   1295 		 * need a spare table. this ioctl does not return until a
   1296 		 * spare table is needed. XXX -- calling mpsleep here in the
   1297 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1298 		 * -- I should either compute the spare table in the kernel,
   1299 		 * or have a different -- XXX XXX -- interface (a different
   1300 		 * character device) for delivering the table     -- XXX */
   1301 #if 0
   1302 	case RAIDFRAME_SPARET_WAIT:
   1303 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1304 		while (!rf_sparet_wait_queue)
   1305 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1306 		waitreq = rf_sparet_wait_queue;
   1307 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1308 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1309 
   1310 		/* structure assignment */
   1311 		*((RF_SparetWait_t *) data) = *waitreq;
   1312 
   1313 		RF_Free(waitreq, sizeof(*waitreq));
   1314 		return (0);
   1315 
   1316 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1317 		 * code in it that will cause the dameon to exit */
   1318 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1319 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1320 		waitreq->fcol = -1;
   1321 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1322 		waitreq->next = rf_sparet_wait_queue;
   1323 		rf_sparet_wait_queue = waitreq;
   1324 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1325 		wakeup(&rf_sparet_wait_queue);
   1326 		return (0);
   1327 
   1328 		/* used by the spare table daemon to deliver a spare table
   1329 		 * into the kernel */
   1330 	case RAIDFRAME_SEND_SPARET:
   1331 
   1332 		/* install the spare table */
   1333 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1334 
   1335 		/* respond to the requestor.  the return status of the spare
   1336 		 * table installation is passed in the "fcol" field */
   1337 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1338 		waitreq->fcol = retcode;
   1339 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1340 		waitreq->next = rf_sparet_resp_queue;
   1341 		rf_sparet_resp_queue = waitreq;
   1342 		wakeup(&rf_sparet_resp_queue);
   1343 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1344 
   1345 		return (retcode);
   1346 #endif
   1347 
   1348 	default:
   1349 		break; /* fall through to the os-specific code below */
   1350 
   1351 	}
   1352 
   1353 	if (!raidPtr->valid)
   1354 		return (EINVAL);
   1355 
   1356 	/*
   1357 	 * Add support for "regular" device ioctls here.
   1358 	 */
   1359 
   1360 	switch (cmd) {
   1361 	case DIOCGDINFO:
   1362 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1363 		break;
   1364 
   1365 	case DIOCGPART:
   1366 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1367 		((struct partinfo *) data)->part =
   1368 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1369 		break;
   1370 
   1371 	case DIOCWDINFO:
   1372 	case DIOCSDINFO:
   1373 		if ((error = raidlock(rs)) != 0)
   1374 			return (error);
   1375 
   1376 		rs->sc_flags |= RAIDF_LABELLING;
   1377 
   1378 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1379 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1380 		if (error == 0) {
   1381 			if (cmd == DIOCWDINFO)
   1382 				error = writedisklabel(RAIDLABELDEV(dev),
   1383 				    raidstrategy, rs->sc_dkdev.dk_label,
   1384 				    rs->sc_dkdev.dk_cpulabel);
   1385 		}
   1386 		rs->sc_flags &= ~RAIDF_LABELLING;
   1387 
   1388 		raidunlock(rs);
   1389 
   1390 		if (error)
   1391 			return (error);
   1392 		break;
   1393 
   1394 	case DIOCWLABEL:
   1395 		if (*(int *) data != 0)
   1396 			rs->sc_flags |= RAIDF_WLABEL;
   1397 		else
   1398 			rs->sc_flags &= ~RAIDF_WLABEL;
   1399 		break;
   1400 
   1401 	case DIOCGDEFLABEL:
   1402 		raidgetdefaultlabel(raidPtr, rs,
   1403 		    (struct disklabel *) data);
   1404 		break;
   1405 
   1406 	default:
   1407 		retcode = ENOTTY;
   1408 	}
   1409 	return (retcode);
   1410 
   1411 }
   1412 
   1413 
   1414 /* raidinit -- complete the rest of the initialization for the
   1415    RAIDframe device.  */
   1416 
   1417 
   1418 static void
   1419 raidinit(raidPtr)
   1420 	RF_Raid_t *raidPtr;
   1421 {
   1422 	struct raid_softc *rs;
   1423 	int     unit;
   1424 
   1425 	unit = raidPtr->raidid;
   1426 
   1427 	rs = &raid_softc[unit];
   1428 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1429 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1430 
   1431 
   1432 	/* XXX should check return code first... */
   1433 	rs->sc_flags |= RAIDF_INITED;
   1434 
   1435 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1436 
   1437 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1438 
   1439 	/* disk_attach actually creates space for the CPU disklabel, among
   1440 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1441 	 * with disklabels. */
   1442 
   1443 	disk_attach(&rs->sc_dkdev);
   1444 
   1445 	/* XXX There may be a weird interaction here between this, and
   1446 	 * protectedSectors, as used in RAIDframe.  */
   1447 
   1448 	rs->sc_size = raidPtr->totalSectors;
   1449 
   1450 }
   1451 
   1452 /* wake up the daemon & tell it to get us a spare table
   1453  * XXX
   1454  * the entries in the queues should be tagged with the raidPtr
   1455  * so that in the extremely rare case that two recons happen at once,
   1456  * we know for which device were requesting a spare table
   1457  * XXX
   1458  *
   1459  * XXX This code is not currently used. GO
   1460  */
   1461 int
   1462 rf_GetSpareTableFromDaemon(req)
   1463 	RF_SparetWait_t *req;
   1464 {
   1465 	int     retcode;
   1466 
   1467 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1468 	req->next = rf_sparet_wait_queue;
   1469 	rf_sparet_wait_queue = req;
   1470 	wakeup(&rf_sparet_wait_queue);
   1471 
   1472 	/* mpsleep unlocks the mutex */
   1473 	while (!rf_sparet_resp_queue) {
   1474 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1475 		    "raidframe getsparetable", 0);
   1476 	}
   1477 	req = rf_sparet_resp_queue;
   1478 	rf_sparet_resp_queue = req->next;
   1479 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1480 
   1481 	retcode = req->fcol;
   1482 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1483 					 * alloc'd */
   1484 	return (retcode);
   1485 }
   1486 
   1487 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1488  * bp & passes it down.
   1489  * any calls originating in the kernel must use non-blocking I/O
   1490  * do some extra sanity checking to return "appropriate" error values for
   1491  * certain conditions (to make some standard utilities work)
   1492  *
   1493  * Formerly known as: rf_DoAccessKernel
   1494  */
   1495 void
   1496 raidstart(raidPtr)
   1497 	RF_Raid_t *raidPtr;
   1498 {
   1499 	RF_SectorCount_t num_blocks, pb, sum;
   1500 	RF_RaidAddr_t raid_addr;
   1501 	int     retcode;
   1502 	struct partition *pp;
   1503 	daddr_t blocknum;
   1504 	int     unit;
   1505 	struct raid_softc *rs;
   1506 	int     do_async;
   1507 	struct buf *bp;
   1508 
   1509 	unit = raidPtr->raidid;
   1510 	rs = &raid_softc[unit];
   1511 
   1512 	/* quick check to see if anything has died recently */
   1513 	RF_LOCK_MUTEX(raidPtr->mutex);
   1514 	if (raidPtr->numNewFailures > 0) {
   1515 		rf_update_component_labels(raidPtr);
   1516 		raidPtr->numNewFailures--;
   1517 	}
   1518 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1519 
   1520 	/* Check to see if we're at the limit... */
   1521 	RF_LOCK_MUTEX(raidPtr->mutex);
   1522 	while (raidPtr->openings > 0) {
   1523 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1524 
   1525 		/* get the next item, if any, from the queue */
   1526 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1527 			/* nothing more to do */
   1528 			return;
   1529 		}
   1530 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1531 
   1532 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1533 		 * partition.. Need to make it absolute to the underlying
   1534 		 * device.. */
   1535 
   1536 		blocknum = bp->b_blkno;
   1537 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1538 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1539 			blocknum += pp->p_offset;
   1540 		}
   1541 
   1542 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1543 			    (int) blocknum));
   1544 
   1545 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1546 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1547 
   1548 		/* *THIS* is where we adjust what block we're going to...
   1549 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1550 		raid_addr = blocknum;
   1551 
   1552 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1553 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1554 		sum = raid_addr + num_blocks + pb;
   1555 		if (1 || rf_debugKernelAccess) {
   1556 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1557 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1558 				    (int) pb, (int) bp->b_resid));
   1559 		}
   1560 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1561 		    || (sum < num_blocks) || (sum < pb)) {
   1562 			bp->b_error = ENOSPC;
   1563 			bp->b_flags |= B_ERROR;
   1564 			bp->b_resid = bp->b_bcount;
   1565 			biodone(bp);
   1566 			RF_LOCK_MUTEX(raidPtr->mutex);
   1567 			continue;
   1568 		}
   1569 		/*
   1570 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1571 		 */
   1572 
   1573 		if (bp->b_bcount & raidPtr->sectorMask) {
   1574 			bp->b_error = EINVAL;
   1575 			bp->b_flags |= B_ERROR;
   1576 			bp->b_resid = bp->b_bcount;
   1577 			biodone(bp);
   1578 			RF_LOCK_MUTEX(raidPtr->mutex);
   1579 			continue;
   1580 
   1581 		}
   1582 		db1_printf(("Calling DoAccess..\n"));
   1583 
   1584 
   1585 		RF_LOCK_MUTEX(raidPtr->mutex);
   1586 		raidPtr->openings--;
   1587 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1588 
   1589 		/*
   1590 		 * Everything is async.
   1591 		 */
   1592 		do_async = 1;
   1593 
   1594 		/* don't ever condition on bp->b_flags & B_WRITE.
   1595 		 * always condition on B_READ instead */
   1596 
   1597 		/* XXX we're still at splbio() here... do we *really*
   1598 		   need to be? */
   1599 
   1600 
   1601 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1602 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1603 				      do_async, raid_addr, num_blocks,
   1604 				      bp->b_un.b_addr, bp, NULL, NULL,
   1605 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1606 
   1607 
   1608 		RF_LOCK_MUTEX(raidPtr->mutex);
   1609 	}
   1610 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1611 }
   1612 
   1613 
   1614 
   1615 
   1616 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1617 
   1618 int
   1619 rf_DispatchKernelIO(queue, req)
   1620 	RF_DiskQueue_t *queue;
   1621 	RF_DiskQueueData_t *req;
   1622 {
   1623 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1624 	struct buf *bp;
   1625 	struct raidbuf *raidbp = NULL;
   1626 	struct raid_softc *rs;
   1627 	int     unit;
   1628 	int s;
   1629 
   1630 	s=0;
   1631 	/* s = splbio();*/ /* want to test this */
   1632 	/* XXX along with the vnode, we also need the softc associated with
   1633 	 * this device.. */
   1634 
   1635 	req->queue = queue;
   1636 
   1637 	unit = queue->raidPtr->raidid;
   1638 
   1639 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1640 
   1641 	if (unit >= numraid) {
   1642 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1643 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1644 	}
   1645 	rs = &raid_softc[unit];
   1646 
   1647 	/* XXX is this the right place? */
   1648 	disk_busy(&rs->sc_dkdev);
   1649 
   1650 	bp = req->bp;
   1651 #if 1
   1652 	/* XXX when there is a physical disk failure, someone is passing us a
   1653 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1654 	 * without taking a performance hit... (not sure where the real bug
   1655 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1656 
   1657 	if (bp->b_flags & B_ERROR) {
   1658 		bp->b_flags &= ~B_ERROR;
   1659 	}
   1660 	if (bp->b_error != 0) {
   1661 		bp->b_error = 0;
   1662 	}
   1663 #endif
   1664 	raidbp = RAIDGETBUF(rs);
   1665 
   1666 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1667 
   1668 	/*
   1669 	 * context for raidiodone
   1670 	 */
   1671 	raidbp->rf_obp = bp;
   1672 	raidbp->req = req;
   1673 
   1674 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1675 
   1676 	switch (req->type) {
   1677 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1678 		/* XXX need to do something extra here.. */
   1679 		/* I'm leaving this in, as I've never actually seen it used,
   1680 		 * and I'd like folks to report it... GO */
   1681 		printf(("WAKEUP CALLED\n"));
   1682 		queue->numOutstanding++;
   1683 
   1684 		/* XXX need to glue the original buffer into this??  */
   1685 
   1686 		KernelWakeupFunc(&raidbp->rf_buf);
   1687 		break;
   1688 
   1689 	case RF_IO_TYPE_READ:
   1690 	case RF_IO_TYPE_WRITE:
   1691 
   1692 		if (req->tracerec) {
   1693 			RF_ETIMER_START(req->tracerec->timer);
   1694 		}
   1695 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1696 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1697 		    req->sectorOffset, req->numSector,
   1698 		    req->buf, KernelWakeupFunc, (void *) req,
   1699 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1700 
   1701 		if (rf_debugKernelAccess) {
   1702 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1703 				(long) bp->b_blkno));
   1704 		}
   1705 		queue->numOutstanding++;
   1706 		queue->last_deq_sector = req->sectorOffset;
   1707 		/* acc wouldn't have been let in if there were any pending
   1708 		 * reqs at any other priority */
   1709 		queue->curPriority = req->priority;
   1710 
   1711 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1712 			req->type, unit, queue->row, queue->col));
   1713 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1714 			(int) req->sectorOffset, (int) req->numSector,
   1715 			(int) (req->numSector <<
   1716 			    queue->raidPtr->logBytesPerSector),
   1717 			(int) queue->raidPtr->logBytesPerSector));
   1718 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1719 			raidbp->rf_buf.b_vp->v_numoutput++;
   1720 		}
   1721 		VOP_STRATEGY(&raidbp->rf_buf);
   1722 
   1723 		break;
   1724 
   1725 	default:
   1726 		panic("bad req->type in rf_DispatchKernelIO");
   1727 	}
   1728 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1729 	/* splx(s); */ /* want to test this */
   1730 	return (0);
   1731 }
   1732 /* this is the callback function associated with a I/O invoked from
   1733    kernel code.
   1734  */
   1735 static void
   1736 KernelWakeupFunc(vbp)
   1737 	struct buf *vbp;
   1738 {
   1739 	RF_DiskQueueData_t *req = NULL;
   1740 	RF_DiskQueue_t *queue;
   1741 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1742 	struct buf *bp;
   1743 	struct raid_softc *rs;
   1744 	int     unit;
   1745 	register int s;
   1746 
   1747 	s = splbio();
   1748 	db1_printf(("recovering the request queue:\n"));
   1749 	req = raidbp->req;
   1750 
   1751 	bp = raidbp->rf_obp;
   1752 
   1753 	queue = (RF_DiskQueue_t *) req->queue;
   1754 
   1755 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1756 		bp->b_flags |= B_ERROR;
   1757 		bp->b_error = raidbp->rf_buf.b_error ?
   1758 		    raidbp->rf_buf.b_error : EIO;
   1759 	}
   1760 
   1761 	/* XXX methinks this could be wrong... */
   1762 #if 1
   1763 	bp->b_resid = raidbp->rf_buf.b_resid;
   1764 #endif
   1765 
   1766 	if (req->tracerec) {
   1767 		RF_ETIMER_STOP(req->tracerec->timer);
   1768 		RF_ETIMER_EVAL(req->tracerec->timer);
   1769 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1770 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1771 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1772 		req->tracerec->num_phys_ios++;
   1773 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1774 	}
   1775 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1776 
   1777 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1778 
   1779 
   1780 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1781 	 * ballistic, and mark the component as hosed... */
   1782 
   1783 	if (bp->b_flags & B_ERROR) {
   1784 		/* Mark the disk as dead */
   1785 		/* but only mark it once... */
   1786 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1787 		    rf_ds_optimal) {
   1788 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1789 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1790 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1791 			    rf_ds_failed;
   1792 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1793 			queue->raidPtr->numFailures++;
   1794 			queue->raidPtr->numNewFailures++;
   1795 			/* XXX here we should bump the version number for each component, and write that data out */
   1796 		} else {	/* Disk is already dead... */
   1797 			/* printf("Disk already marked as dead!\n"); */
   1798 		}
   1799 
   1800 	}
   1801 
   1802 	rs = &raid_softc[unit];
   1803 	RAIDPUTBUF(rs, raidbp);
   1804 
   1805 
   1806 	if (bp->b_resid == 0) {
   1807 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1808 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1809 	}
   1810 
   1811 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1812 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1813 
   1814 	splx(s);
   1815 }
   1816 
   1817 
   1818 
   1819 /*
   1820  * initialize a buf structure for doing an I/O in the kernel.
   1821  */
   1822 static void
   1823 InitBP(
   1824     struct buf * bp,
   1825     struct vnode * b_vp,
   1826     unsigned rw_flag,
   1827     dev_t dev,
   1828     RF_SectorNum_t startSect,
   1829     RF_SectorCount_t numSect,
   1830     caddr_t buf,
   1831     void (*cbFunc) (struct buf *),
   1832     void *cbArg,
   1833     int logBytesPerSector,
   1834     struct proc * b_proc)
   1835 {
   1836 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1837 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1838 	bp->b_bcount = numSect << logBytesPerSector;
   1839 	bp->b_bufsize = bp->b_bcount;
   1840 	bp->b_error = 0;
   1841 	bp->b_dev = dev;
   1842 	bp->b_un.b_addr = buf;
   1843 	bp->b_blkno = startSect;
   1844 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1845 	if (bp->b_bcount == 0) {
   1846 		panic("bp->b_bcount is zero in InitBP!!\n");
   1847 	}
   1848 	bp->b_proc = b_proc;
   1849 	bp->b_iodone = cbFunc;
   1850 	bp->b_vp = b_vp;
   1851 
   1852 }
   1853 
   1854 static void
   1855 raidgetdefaultlabel(raidPtr, rs, lp)
   1856 	RF_Raid_t *raidPtr;
   1857 	struct raid_softc *rs;
   1858 	struct disklabel *lp;
   1859 {
   1860 	db1_printf(("Building a default label...\n"));
   1861 	bzero(lp, sizeof(*lp));
   1862 
   1863 	/* fabricate a label... */
   1864 	lp->d_secperunit = raidPtr->totalSectors;
   1865 	lp->d_secsize = raidPtr->bytesPerSector;
   1866 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1867 	lp->d_ntracks = 1;
   1868 	lp->d_ncylinders = raidPtr->totalSectors /
   1869 		(lp->d_nsectors * lp->d_ntracks);
   1870 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1871 
   1872 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1873 	lp->d_type = DTYPE_RAID;
   1874 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1875 	lp->d_rpm = 3600;
   1876 	lp->d_interleave = 1;
   1877 	lp->d_flags = 0;
   1878 
   1879 	lp->d_partitions[RAW_PART].p_offset = 0;
   1880 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1881 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1882 	lp->d_npartitions = RAW_PART + 1;
   1883 
   1884 	lp->d_magic = DISKMAGIC;
   1885 	lp->d_magic2 = DISKMAGIC;
   1886 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1887 
   1888 }
   1889 /*
   1890  * Read the disklabel from the raid device.  If one is not present, fake one
   1891  * up.
   1892  */
   1893 static void
   1894 raidgetdisklabel(dev)
   1895 	dev_t   dev;
   1896 {
   1897 	int     unit = raidunit(dev);
   1898 	struct raid_softc *rs = &raid_softc[unit];
   1899 	char   *errstring;
   1900 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1901 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1902 	RF_Raid_t *raidPtr;
   1903 
   1904 	db1_printf(("Getting the disklabel...\n"));
   1905 
   1906 	bzero(clp, sizeof(*clp));
   1907 
   1908 	raidPtr = raidPtrs[unit];
   1909 
   1910 	raidgetdefaultlabel(raidPtr, rs, lp);
   1911 
   1912 	/*
   1913 	 * Call the generic disklabel extraction routine.
   1914 	 */
   1915 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1916 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1917 	if (errstring)
   1918 		raidmakedisklabel(rs);
   1919 	else {
   1920 		int     i;
   1921 		struct partition *pp;
   1922 
   1923 		/*
   1924 		 * Sanity check whether the found disklabel is valid.
   1925 		 *
   1926 		 * This is necessary since total size of the raid device
   1927 		 * may vary when an interleave is changed even though exactly
   1928 		 * same componets are used, and old disklabel may used
   1929 		 * if that is found.
   1930 		 */
   1931 		if (lp->d_secperunit != rs->sc_size)
   1932 			printf("WARNING: %s: "
   1933 			    "total sector size in disklabel (%d) != "
   1934 			    "the size of raid (%ld)\n", rs->sc_xname,
   1935 			    lp->d_secperunit, (long) rs->sc_size);
   1936 		for (i = 0; i < lp->d_npartitions; i++) {
   1937 			pp = &lp->d_partitions[i];
   1938 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1939 				printf("WARNING: %s: end of partition `%c' "
   1940 				    "exceeds the size of raid (%ld)\n",
   1941 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1942 		}
   1943 	}
   1944 
   1945 }
   1946 /*
   1947  * Take care of things one might want to take care of in the event
   1948  * that a disklabel isn't present.
   1949  */
   1950 static void
   1951 raidmakedisklabel(rs)
   1952 	struct raid_softc *rs;
   1953 {
   1954 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1955 	db1_printf(("Making a label..\n"));
   1956 
   1957 	/*
   1958 	 * For historical reasons, if there's no disklabel present
   1959 	 * the raw partition must be marked FS_BSDFFS.
   1960 	 */
   1961 
   1962 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1963 
   1964 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1965 
   1966 	lp->d_checksum = dkcksum(lp);
   1967 }
   1968 /*
   1969  * Lookup the provided name in the filesystem.  If the file exists,
   1970  * is a valid block device, and isn't being used by anyone else,
   1971  * set *vpp to the file's vnode.
   1972  * You'll find the original of this in ccd.c
   1973  */
   1974 int
   1975 raidlookup(path, p, vpp)
   1976 	char   *path;
   1977 	struct proc *p;
   1978 	struct vnode **vpp;	/* result */
   1979 {
   1980 	struct nameidata nd;
   1981 	struct vnode *vp;
   1982 	struct vattr va;
   1983 	int     error;
   1984 
   1985 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1986 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1987 #ifdef DEBUG
   1988 		printf("RAIDframe: vn_open returned %d\n", error);
   1989 #endif
   1990 		return (error);
   1991 	}
   1992 	vp = nd.ni_vp;
   1993 	if (vp->v_usecount > 1) {
   1994 		VOP_UNLOCK(vp, 0);
   1995 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1996 		return (EBUSY);
   1997 	}
   1998 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1999 		VOP_UNLOCK(vp, 0);
   2000 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2001 		return (error);
   2002 	}
   2003 	/* XXX: eventually we should handle VREG, too. */
   2004 	if (va.va_type != VBLK) {
   2005 		VOP_UNLOCK(vp, 0);
   2006 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2007 		return (ENOTBLK);
   2008 	}
   2009 	VOP_UNLOCK(vp, 0);
   2010 	*vpp = vp;
   2011 	return (0);
   2012 }
   2013 /*
   2014  * Wait interruptibly for an exclusive lock.
   2015  *
   2016  * XXX
   2017  * Several drivers do this; it should be abstracted and made MP-safe.
   2018  * (Hmm... where have we seen this warning before :->  GO )
   2019  */
   2020 static int
   2021 raidlock(rs)
   2022 	struct raid_softc *rs;
   2023 {
   2024 	int     error;
   2025 
   2026 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2027 		rs->sc_flags |= RAIDF_WANTED;
   2028 		if ((error =
   2029 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2030 			return (error);
   2031 	}
   2032 	rs->sc_flags |= RAIDF_LOCKED;
   2033 	return (0);
   2034 }
   2035 /*
   2036  * Unlock and wake up any waiters.
   2037  */
   2038 static void
   2039 raidunlock(rs)
   2040 	struct raid_softc *rs;
   2041 {
   2042 
   2043 	rs->sc_flags &= ~RAIDF_LOCKED;
   2044 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2045 		rs->sc_flags &= ~RAIDF_WANTED;
   2046 		wakeup(rs);
   2047 	}
   2048 }
   2049 
   2050 
   2051 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2052 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2053 
   2054 int
   2055 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2056 {
   2057 	RF_ComponentLabel_t clabel;
   2058 	raidread_component_label(dev, b_vp, &clabel);
   2059 	clabel.mod_counter = mod_counter;
   2060 	clabel.clean = RF_RAID_CLEAN;
   2061 	raidwrite_component_label(dev, b_vp, &clabel);
   2062 	return(0);
   2063 }
   2064 
   2065 
   2066 int
   2067 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2068 {
   2069 	RF_ComponentLabel_t clabel;
   2070 	raidread_component_label(dev, b_vp, &clabel);
   2071 	clabel.mod_counter = mod_counter;
   2072 	clabel.clean = RF_RAID_DIRTY;
   2073 	raidwrite_component_label(dev, b_vp, &clabel);
   2074 	return(0);
   2075 }
   2076 
   2077 /* ARGSUSED */
   2078 int
   2079 raidread_component_label(dev, b_vp, clabel)
   2080 	dev_t dev;
   2081 	struct vnode *b_vp;
   2082 	RF_ComponentLabel_t *clabel;
   2083 {
   2084 	struct buf *bp;
   2085 	int error;
   2086 
   2087 	/* XXX should probably ensure that we don't try to do this if
   2088 	   someone has changed rf_protected_sectors. */
   2089 
   2090 	/* get a block of the appropriate size... */
   2091 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2092 	bp->b_dev = dev;
   2093 
   2094 	/* get our ducks in a row for the read */
   2095 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2096 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2097 	bp->b_flags = B_BUSY | B_READ;
   2098  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2099 
   2100 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2101 
   2102 	error = biowait(bp);
   2103 
   2104 	if (!error) {
   2105 		memcpy(clabel, bp->b_un.b_addr,
   2106 		       sizeof(RF_ComponentLabel_t));
   2107 #if 0
   2108 		print_component_label( clabel );
   2109 #endif
   2110         } else {
   2111 #if 0
   2112 		printf("Failed to read RAID component label!\n");
   2113 #endif
   2114 	}
   2115 
   2116         bp->b_flags = B_INVAL | B_AGE;
   2117 	brelse(bp);
   2118 	return(error);
   2119 }
   2120 /* ARGSUSED */
   2121 int
   2122 raidwrite_component_label(dev, b_vp, clabel)
   2123 	dev_t dev;
   2124 	struct vnode *b_vp;
   2125 	RF_ComponentLabel_t *clabel;
   2126 {
   2127 	struct buf *bp;
   2128 	int error;
   2129 
   2130 	/* get a block of the appropriate size... */
   2131 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2132 	bp->b_dev = dev;
   2133 
   2134 	/* get our ducks in a row for the write */
   2135 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2136 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2137 	bp->b_flags = B_BUSY | B_WRITE;
   2138  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2139 
   2140 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2141 
   2142 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2143 
   2144 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2145 	error = biowait(bp);
   2146         bp->b_flags = B_INVAL | B_AGE;
   2147 	brelse(bp);
   2148 	if (error) {
   2149 #if 1
   2150 		printf("Failed to write RAID component info!\n");
   2151 #endif
   2152 	}
   2153 
   2154 	return(error);
   2155 }
   2156 
   2157 void
   2158 rf_markalldirty( raidPtr )
   2159 	RF_Raid_t *raidPtr;
   2160 {
   2161 	RF_ComponentLabel_t clabel;
   2162 	int r,c;
   2163 
   2164 	raidPtr->mod_counter++;
   2165 	for (r = 0; r < raidPtr->numRow; r++) {
   2166 		for (c = 0; c < raidPtr->numCol; c++) {
   2167 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2168 				raidread_component_label(
   2169 					raidPtr->Disks[r][c].dev,
   2170 					raidPtr->raid_cinfo[r][c].ci_vp,
   2171 					&clabel);
   2172 				if (clabel.status == rf_ds_spared) {
   2173 					/* XXX do something special...
   2174 					 but whatever you do, don't
   2175 					 try to access it!! */
   2176 				} else {
   2177 #if 0
   2178 				clabel.status =
   2179 					raidPtr->Disks[r][c].status;
   2180 				raidwrite_component_label(
   2181 					raidPtr->Disks[r][c].dev,
   2182 					raidPtr->raid_cinfo[r][c].ci_vp,
   2183 					&clabel);
   2184 #endif
   2185 				raidmarkdirty(
   2186 				       raidPtr->Disks[r][c].dev,
   2187 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2188 				       raidPtr->mod_counter);
   2189 				}
   2190 			}
   2191 		}
   2192 	}
   2193 	/* printf("Component labels marked dirty.\n"); */
   2194 #if 0
   2195 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2196 		sparecol = raidPtr->numCol + c;
   2197 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2198 			/*
   2199 
   2200 			   XXX this is where we get fancy and map this spare
   2201 			   into it's correct spot in the array.
   2202 
   2203 			 */
   2204 			/*
   2205 
   2206 			   we claim this disk is "optimal" if it's
   2207 			   rf_ds_used_spare, as that means it should be
   2208 			   directly substitutable for the disk it replaced.
   2209 			   We note that too...
   2210 
   2211 			 */
   2212 
   2213 			for(i=0;i<raidPtr->numRow;i++) {
   2214 				for(j=0;j<raidPtr->numCol;j++) {
   2215 					if ((raidPtr->Disks[i][j].spareRow ==
   2216 					     r) &&
   2217 					    (raidPtr->Disks[i][j].spareCol ==
   2218 					     sparecol)) {
   2219 						srow = r;
   2220 						scol = sparecol;
   2221 						break;
   2222 					}
   2223 				}
   2224 			}
   2225 
   2226 			raidread_component_label(
   2227 				      raidPtr->Disks[r][sparecol].dev,
   2228 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2229 				      &clabel);
   2230 			/* make sure status is noted */
   2231 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2232 			clabel.mod_counter = raidPtr->mod_counter;
   2233 			clabel.serial_number = raidPtr->serial_number;
   2234 			clabel.row = srow;
   2235 			clabel.column = scol;
   2236 			clabel.num_rows = raidPtr->numRow;
   2237 			clabel.num_columns = raidPtr->numCol;
   2238 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2239 			clabel.status = rf_ds_optimal;
   2240 			raidwrite_component_label(
   2241 				      raidPtr->Disks[r][sparecol].dev,
   2242 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2243 				      &clabel);
   2244 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2245 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2246 		}
   2247 	}
   2248 
   2249 #endif
   2250 }
   2251 
   2252 
   2253 void
   2254 rf_update_component_labels( raidPtr )
   2255 	RF_Raid_t *raidPtr;
   2256 {
   2257 	RF_ComponentLabel_t clabel;
   2258 	int sparecol;
   2259 	int r,c;
   2260 	int i,j;
   2261 	int srow, scol;
   2262 
   2263 	srow = -1;
   2264 	scol = -1;
   2265 
   2266 	/* XXX should do extra checks to make sure things really are clean,
   2267 	   rather than blindly setting the clean bit... */
   2268 
   2269 	raidPtr->mod_counter++;
   2270 
   2271 	for (r = 0; r < raidPtr->numRow; r++) {
   2272 		for (c = 0; c < raidPtr->numCol; c++) {
   2273 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2274 				raidread_component_label(
   2275 					raidPtr->Disks[r][c].dev,
   2276 					raidPtr->raid_cinfo[r][c].ci_vp,
   2277 					&clabel);
   2278 				/* make sure status is noted */
   2279 				clabel.status = rf_ds_optimal;
   2280 				/* bump the counter */
   2281 				clabel.mod_counter = raidPtr->mod_counter;
   2282 
   2283 				raidwrite_component_label(
   2284 					raidPtr->Disks[r][c].dev,
   2285 					raidPtr->raid_cinfo[r][c].ci_vp,
   2286 					&clabel);
   2287 			}
   2288 			/* else we don't touch it.. */
   2289 		}
   2290 	}
   2291 
   2292 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2293 		sparecol = raidPtr->numCol + c;
   2294 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2295 			/*
   2296 
   2297 			   we claim this disk is "optimal" if it's
   2298 			   rf_ds_used_spare, as that means it should be
   2299 			   directly substitutable for the disk it replaced.
   2300 			   We note that too...
   2301 
   2302 			 */
   2303 
   2304 			for(i=0;i<raidPtr->numRow;i++) {
   2305 				for(j=0;j<raidPtr->numCol;j++) {
   2306 					if ((raidPtr->Disks[i][j].spareRow ==
   2307 					     0) &&
   2308 					    (raidPtr->Disks[i][j].spareCol ==
   2309 					     sparecol)) {
   2310 						srow = i;
   2311 						scol = j;
   2312 						break;
   2313 					}
   2314 				}
   2315 			}
   2316 
   2317 			/* XXX shouldn't *really* need this... */
   2318 			raidread_component_label(
   2319 				      raidPtr->Disks[0][sparecol].dev,
   2320 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2321 				      &clabel);
   2322 			/* make sure status is noted */
   2323 
   2324 			raid_init_component_label(raidPtr, &clabel);
   2325 
   2326 			clabel.mod_counter = raidPtr->mod_counter;
   2327 			clabel.row = srow;
   2328 			clabel.column = scol;
   2329 			clabel.status = rf_ds_optimal;
   2330 
   2331 			raidwrite_component_label(
   2332 				      raidPtr->Disks[0][sparecol].dev,
   2333 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2334 				      &clabel);
   2335 		}
   2336 	}
   2337 	/* 	printf("Component labels updated\n"); */
   2338 }
   2339 
   2340 
   2341 void
   2342 rf_final_update_component_labels( raidPtr )
   2343 	RF_Raid_t *raidPtr;
   2344 {
   2345 	RF_ComponentLabel_t clabel;
   2346 	int sparecol;
   2347 	int r,c;
   2348 	int i,j;
   2349 	int srow, scol;
   2350 
   2351 	srow = -1;
   2352 	scol = -1;
   2353 
   2354 	/* XXX should do extra checks to make sure things really are clean,
   2355 	   rather than blindly setting the clean bit... */
   2356 
   2357 	raidPtr->mod_counter++;
   2358 
   2359 	for (r = 0; r < raidPtr->numRow; r++) {
   2360 		for (c = 0; c < raidPtr->numCol; c++) {
   2361 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2362 				raidread_component_label(
   2363 					raidPtr->Disks[r][c].dev,
   2364 					raidPtr->raid_cinfo[r][c].ci_vp,
   2365 					&clabel);
   2366 				/* make sure status is noted */
   2367 				clabel.status = rf_ds_optimal;
   2368 				/* bump the counter */
   2369 				clabel.mod_counter = raidPtr->mod_counter;
   2370 
   2371 				raidwrite_component_label(
   2372 					raidPtr->Disks[r][c].dev,
   2373 					raidPtr->raid_cinfo[r][c].ci_vp,
   2374 					&clabel);
   2375 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2376 					raidmarkclean(
   2377 					      raidPtr->Disks[r][c].dev,
   2378 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2379 					      raidPtr->mod_counter);
   2380 				}
   2381 			}
   2382 			/* else we don't touch it.. */
   2383 		}
   2384 	}
   2385 
   2386 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2387 		sparecol = raidPtr->numCol + c;
   2388 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2389 			/*
   2390 
   2391 			   we claim this disk is "optimal" if it's
   2392 			   rf_ds_used_spare, as that means it should be
   2393 			   directly substitutable for the disk it replaced.
   2394 			   We note that too...
   2395 
   2396 			 */
   2397 
   2398 			for(i=0;i<raidPtr->numRow;i++) {
   2399 				for(j=0;j<raidPtr->numCol;j++) {
   2400 					if ((raidPtr->Disks[i][j].spareRow ==
   2401 					     0) &&
   2402 					    (raidPtr->Disks[i][j].spareCol ==
   2403 					     sparecol)) {
   2404 						srow = i;
   2405 						scol = j;
   2406 						break;
   2407 					}
   2408 				}
   2409 			}
   2410 
   2411 			/* XXX shouldn't *really* need this... */
   2412 			raidread_component_label(
   2413 				      raidPtr->Disks[0][sparecol].dev,
   2414 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2415 				      &clabel);
   2416 			/* make sure status is noted */
   2417 
   2418 			raid_init_component_label(raidPtr, &clabel);
   2419 
   2420 			clabel.mod_counter = raidPtr->mod_counter;
   2421 			clabel.row = srow;
   2422 			clabel.column = scol;
   2423 			clabel.status = rf_ds_optimal;
   2424 
   2425 			raidwrite_component_label(
   2426 				      raidPtr->Disks[0][sparecol].dev,
   2427 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2428 				      &clabel);
   2429 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2430 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2431 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2432 					       raidPtr->mod_counter);
   2433 			}
   2434 		}
   2435 	}
   2436 	/* 	printf("Component labels updated\n"); */
   2437 }
   2438 
   2439 
   2440 void
   2441 rf_ReconThread(req)
   2442 	struct rf_recon_req *req;
   2443 {
   2444 	int     s;
   2445 	RF_Raid_t *raidPtr;
   2446 
   2447 	s = splbio();
   2448 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2449 	raidPtr->recon_in_progress = 1;
   2450 
   2451 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2452 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2453 
   2454 	/* XXX get rid of this! we don't need it at all.. */
   2455 	RF_Free(req, sizeof(*req));
   2456 
   2457 	raidPtr->recon_in_progress = 0;
   2458 	splx(s);
   2459 
   2460 	/* That's all... */
   2461 	kthread_exit(0);        /* does not return */
   2462 }
   2463 
   2464 void
   2465 rf_RewriteParityThread(raidPtr)
   2466 	RF_Raid_t *raidPtr;
   2467 {
   2468 	int retcode;
   2469 	int s;
   2470 
   2471 	raidPtr->parity_rewrite_in_progress = 1;
   2472 	s = splbio();
   2473 	retcode = rf_RewriteParity(raidPtr);
   2474 	splx(s);
   2475 	if (retcode) {
   2476 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2477 	} else {
   2478 		/* set the clean bit!  If we shutdown correctly,
   2479 		   the clean bit on each component label will get
   2480 		   set */
   2481 		raidPtr->parity_good = RF_RAID_CLEAN;
   2482 	}
   2483 	raidPtr->parity_rewrite_in_progress = 0;
   2484 
   2485 	/* That's all... */
   2486 	kthread_exit(0);        /* does not return */
   2487 }
   2488 
   2489 
   2490 void
   2491 rf_CopybackThread(raidPtr)
   2492 	RF_Raid_t *raidPtr;
   2493 {
   2494 	int s;
   2495 
   2496 	raidPtr->copyback_in_progress = 1;
   2497 	s = splbio();
   2498 	rf_CopybackReconstructedData(raidPtr);
   2499 	splx(s);
   2500 	raidPtr->copyback_in_progress = 0;
   2501 
   2502 	/* That's all... */
   2503 	kthread_exit(0);        /* does not return */
   2504 }
   2505 
   2506 
   2507 void
   2508 rf_ReconstructInPlaceThread(req)
   2509 	struct rf_recon_req *req;
   2510 {
   2511 	int retcode;
   2512 	int s;
   2513 	RF_Raid_t *raidPtr;
   2514 
   2515 	s = splbio();
   2516 	raidPtr = req->raidPtr;
   2517 	raidPtr->recon_in_progress = 1;
   2518 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2519 	RF_Free(req, sizeof(*req));
   2520 	raidPtr->recon_in_progress = 0;
   2521 	splx(s);
   2522 
   2523 	/* That's all... */
   2524 	kthread_exit(0);        /* does not return */
   2525 }
   2526 
   2527 void
   2528 rf_mountroot_hook(dev)
   2529 	struct device *dev;
   2530 {
   2531 
   2532 }
   2533 
   2534 
   2535 RF_AutoConfig_t *
   2536 rf_find_raid_components()
   2537 {
   2538 	struct devnametobdevmaj *dtobdm;
   2539 	struct vnode *vp;
   2540 	struct disklabel label;
   2541 	struct device *dv;
   2542 	char *cd_name;
   2543 	dev_t dev;
   2544 	int error;
   2545 	int i;
   2546 	int good_one;
   2547 	RF_ComponentLabel_t *clabel;
   2548 	RF_AutoConfig_t *ac_list;
   2549 	RF_AutoConfig_t *ac;
   2550 
   2551 
   2552 	/* initialize the AutoConfig list */
   2553 	ac_list = NULL;
   2554 
   2555 if (raidautoconfig) {
   2556 
   2557 	/* we begin by trolling through *all* the devices on the system */
   2558 
   2559 	for (dv = alldevs.tqh_first; dv != NULL;
   2560 	     dv = dv->dv_list.tqe_next) {
   2561 
   2562 		/* we are only interested in disks... */
   2563 		if (dv->dv_class != DV_DISK)
   2564 			continue;
   2565 
   2566 		/* we don't care about floppies... */
   2567 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2568 			continue;
   2569 		}
   2570 
   2571 		/* need to find the device_name_to_block_device_major stuff */
   2572 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2573 		dtobdm = dev_name2blk;
   2574 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2575 			dtobdm++;
   2576 		}
   2577 
   2578 		/* get a vnode for the raw partition of this disk */
   2579 
   2580 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2581 		if (bdevvp(dev, &vp))
   2582 			panic("RAID can't alloc vnode");
   2583 
   2584 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2585 
   2586 		if (error) {
   2587 			/* "Who cares."  Continue looking
   2588 			   for something that exists*/
   2589 			vput(vp);
   2590 			continue;
   2591 		}
   2592 
   2593 		/* Ok, the disk exists.  Go get the disklabel. */
   2594 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2595 				  FREAD, NOCRED, 0);
   2596 		if (error) {
   2597 			/*
   2598 			 * XXX can't happen - open() would
   2599 			 * have errored out (or faked up one)
   2600 			 */
   2601 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2602 			       dv->dv_xname, 'a' + RAW_PART, error);
   2603 		}
   2604 
   2605 		/* don't need this any more.  We'll allocate it again
   2606 		   a little later if we really do... */
   2607 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2608 		vput(vp);
   2609 
   2610 		for (i=0; i < label.d_npartitions; i++) {
   2611 			/* We only support partitions marked as RAID */
   2612 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2613 				continue;
   2614 
   2615 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2616 			if (bdevvp(dev, &vp))
   2617 				panic("RAID can't alloc vnode");
   2618 
   2619 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2620 			if (error) {
   2621 				/* Whatever... */
   2622 				vput(vp);
   2623 				continue;
   2624 			}
   2625 
   2626 			good_one = 0;
   2627 
   2628 			clabel = (RF_ComponentLabel_t *)
   2629 				malloc(sizeof(RF_ComponentLabel_t),
   2630 				       M_RAIDFRAME, M_NOWAIT);
   2631 			if (clabel == NULL) {
   2632 				/* XXX CLEANUP HERE */
   2633 				printf("RAID auto config: out of memory!\n");
   2634 				return(NULL); /* XXX probably should panic? */
   2635 			}
   2636 
   2637 			if (!raidread_component_label(dev, vp, clabel)) {
   2638 				/* Got the label.  Does it look reasonable? */
   2639 				if (rf_reasonable_label(clabel) &&
   2640 				    (clabel->partitionSize <=
   2641 				     label.d_partitions[i].p_size)) {
   2642 #if DEBUG
   2643 					printf("Component on: %s%c: %d\n",
   2644 					       dv->dv_xname, 'a'+i,
   2645 					       label.d_partitions[i].p_size);
   2646 					print_component_label(clabel);
   2647 #endif
   2648 					/* if it's reasonable, add it,
   2649 					   else ignore it. */
   2650 					ac = (RF_AutoConfig_t *)
   2651 						malloc(sizeof(RF_AutoConfig_t),
   2652 						       M_RAIDFRAME,
   2653 						       M_NOWAIT);
   2654 					if (ac == NULL) {
   2655 						/* XXX should panic?? */
   2656 						return(NULL);
   2657 					}
   2658 
   2659 					sprintf(ac->devname, "%s%c",
   2660 						dv->dv_xname, 'a'+i);
   2661 					ac->dev = dev;
   2662 					ac->vp = vp;
   2663 					ac->clabel = clabel;
   2664 					ac->next = ac_list;
   2665 					ac_list = ac;
   2666 					good_one = 1;
   2667 				}
   2668 			}
   2669 			if (!good_one) {
   2670 				/* cleanup */
   2671 				free(clabel, M_RAIDFRAME);
   2672 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2673 				vput(vp);
   2674 			}
   2675 		}
   2676 	}
   2677 }
   2678 return(ac_list);
   2679 }
   2680 
   2681 static int
   2682 rf_reasonable_label(clabel)
   2683 	RF_ComponentLabel_t *clabel;
   2684 {
   2685 
   2686 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2687 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2688 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2689 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2690 	    clabel->row >=0 &&
   2691 	    clabel->column >= 0 &&
   2692 	    clabel->num_rows > 0 &&
   2693 	    clabel->num_columns > 0 &&
   2694 	    clabel->row < clabel->num_rows &&
   2695 	    clabel->column < clabel->num_columns &&
   2696 	    clabel->blockSize > 0 &&
   2697 	    clabel->numBlocks > 0) {
   2698 		/* label looks reasonable enough... */
   2699 		return(1);
   2700 	}
   2701 	return(0);
   2702 }
   2703 
   2704 
   2705 void
   2706 print_component_label(clabel)
   2707 	RF_ComponentLabel_t *clabel;
   2708 {
   2709 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2710 	       clabel->row, clabel->column,
   2711 	       clabel->num_rows, clabel->num_columns);
   2712 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2713 	       clabel->version, clabel->serial_number,
   2714 	       clabel->mod_counter);
   2715 	printf("   Clean: %s Status: %d\n",
   2716 	       clabel->clean ? "Yes" : "No", clabel->status );
   2717 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2718 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2719 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2720 	       (char) clabel->parityConfig, clabel->blockSize,
   2721 	       clabel->numBlocks);
   2722 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2723 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2724 #if 0
   2725 	   printf("   Config order: %d\n", clabel->config_order);
   2726 #endif
   2727 
   2728 }
   2729 
   2730 RF_ConfigSet_t *
   2731 rf_create_auto_sets(ac_list)
   2732 	RF_AutoConfig_t *ac_list;
   2733 {
   2734 	RF_AutoConfig_t *ac;
   2735 	RF_ConfigSet_t *config_sets;
   2736 	RF_ConfigSet_t *cset;
   2737 	RF_AutoConfig_t *ac_next;
   2738 
   2739 
   2740 	config_sets = NULL;
   2741 
   2742 	/* Go through the AutoConfig list, and figure out which components
   2743 	   belong to what sets.  */
   2744 	ac = ac_list;
   2745 	while(ac!=NULL) {
   2746 		/* we're going to putz with ac->next, so save it here
   2747 		   for use at the end of the loop */
   2748 		ac_next = ac->next;
   2749 
   2750 		if (config_sets == NULL) {
   2751 			/* will need at least this one... */
   2752 			config_sets = (RF_ConfigSet_t *)
   2753 				malloc(sizeof(RF_ConfigSet_t),
   2754 				       M_RAIDFRAME, M_NOWAIT);
   2755 			if (config_sets == NULL) {
   2756 				panic("rf_create_auto_sets: No memory!\n");
   2757 			}
   2758 			/* this one is easy :) */
   2759 			config_sets->ac = ac;
   2760 			config_sets->next = NULL;
   2761 			config_sets->rootable = 0;
   2762 			ac->next = NULL;
   2763 		} else {
   2764 			/* which set does this component fit into? */
   2765 			cset = config_sets;
   2766 			while(cset!=NULL) {
   2767 				if (rf_does_it_fit(cset, ac)) {
   2768 					/* looks like it matches */
   2769 					ac->next = cset->ac;
   2770 					cset->ac = ac;
   2771 					break;
   2772 				}
   2773 				cset = cset->next;
   2774 			}
   2775 			if (cset==NULL) {
   2776 				/* didn't find a match above... new set..*/
   2777 				cset = (RF_ConfigSet_t *)
   2778 					malloc(sizeof(RF_ConfigSet_t),
   2779 					       M_RAIDFRAME, M_NOWAIT);
   2780 				if (cset == NULL) {
   2781 					panic("rf_create_auto_sets: No memory!\n");
   2782 				}
   2783 				cset->ac = ac;
   2784 				ac->next = NULL;
   2785 				cset->next = config_sets;
   2786 				cset->rootable = 0;
   2787 				config_sets = cset;
   2788 			}
   2789 		}
   2790 		ac = ac_next;
   2791 	}
   2792 
   2793 
   2794 	return(config_sets);
   2795 }
   2796 
   2797 static int
   2798 rf_does_it_fit(cset, ac)
   2799 	RF_ConfigSet_t *cset;
   2800 	RF_AutoConfig_t *ac;
   2801 {
   2802 	RF_ComponentLabel_t *clabel1, *clabel2;
   2803 
   2804 	/* If this one matches the *first* one in the set, that's good
   2805 	   enough, since the other members of the set would have been
   2806 	   through here too... */
   2807 	/* note that we are not checking partitionSize here..
   2808 
   2809 	   Note that we are also not checking the mod_counters here.
   2810 	   If everything else matches execpt the mod_counter, that's
   2811 	   good enough for this test.  We will deal with the mod_counters
   2812 	   a little later in the autoconfiguration process.
   2813 
   2814 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2815 
   2816 	*/
   2817 
   2818 	clabel1 = cset->ac->clabel;
   2819 	clabel2 = ac->clabel;
   2820 	if ((clabel1->version == clabel2->version) &&
   2821 	    (clabel1->serial_number == clabel2->serial_number) &&
   2822 	    (clabel1->num_rows == clabel2->num_rows) &&
   2823 	    (clabel1->num_columns == clabel2->num_columns) &&
   2824 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2825 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2826 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2827 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2828 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2829 	    (clabel1->blockSize == clabel2->blockSize) &&
   2830 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2831 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2832 	    (clabel1->root_partition == clabel2->root_partition) &&
   2833 	    (clabel1->last_unit == clabel2->last_unit) &&
   2834 	    (clabel1->config_order == clabel2->config_order)) {
   2835 		/* if it get's here, it almost *has* to be a match */
   2836 	} else {
   2837 		/* it's not consistent with somebody in the set..
   2838 		   punt */
   2839 		return(0);
   2840 	}
   2841 	/* all was fine.. it must fit... */
   2842 	return(1);
   2843 }
   2844 
   2845 int
   2846 rf_have_enough_components(cset)
   2847 	RF_ConfigSet_t *cset;
   2848 {
   2849 	RF_AutoConfig_t *ac;
   2850 	RF_AutoConfig_t *auto_config;
   2851 	RF_ComponentLabel_t *clabel;
   2852 	int r,c;
   2853 	int num_rows;
   2854 	int num_cols;
   2855 	int num_missing;
   2856 
   2857 	/* check to see that we have enough 'live' components
   2858 	   of this set.  If so, we can configure it if necessary */
   2859 
   2860 	num_rows = cset->ac->clabel->num_rows;
   2861 	num_cols = cset->ac->clabel->num_columns;
   2862 
   2863 	/* XXX Check for duplicate components!?!?!? */
   2864 
   2865 	num_missing = 0;
   2866 	auto_config = cset->ac;
   2867 
   2868 	for(r=0; r<num_rows; r++) {
   2869 		for(c=0; c<num_cols; c++) {
   2870 			ac = auto_config;
   2871 			while(ac!=NULL) {
   2872 				if (ac->clabel==NULL) {
   2873 					/* big-time bad news. */
   2874 					goto fail;
   2875 				}
   2876 				if ((ac->clabel->row == r) &&
   2877 				    (ac->clabel->column == c)) {
   2878 					/* it's this one... */
   2879 #if DEBUG
   2880 					printf("Found: %s at %d,%d\n",
   2881 					       ac->devname,r,c);
   2882 #endif
   2883 					break;
   2884 				}
   2885 				ac=ac->next;
   2886 			}
   2887 			if (ac==NULL) {
   2888 				/* Didn't find one here! */
   2889 				num_missing++;
   2890 			}
   2891 		}
   2892 	}
   2893 
   2894 	clabel = cset->ac->clabel;
   2895 
   2896 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2897 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2898 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2899 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2900 		/* XXX this needs to be made *much* more general */
   2901 		/* Too many failures */
   2902 		return(0);
   2903 	}
   2904 	/* otherwise, all is well, and we've got enough to take a kick
   2905 	   at autoconfiguring this set */
   2906 	return(1);
   2907 fail:
   2908 	return(0);
   2909 
   2910 }
   2911 
   2912 void
   2913 rf_create_configuration(ac,config,raidPtr)
   2914 	RF_AutoConfig_t *ac;
   2915 	RF_Config_t *config;
   2916 	RF_Raid_t *raidPtr;
   2917 {
   2918 	RF_ComponentLabel_t *clabel;
   2919 
   2920 	clabel = ac->clabel;
   2921 
   2922 	/* 1. Fill in the common stuff */
   2923 	config->numRow = clabel->num_rows;
   2924 	config->numCol = clabel->num_columns;
   2925 	config->numSpare = 0; /* XXX should this be set here? */
   2926 	config->sectPerSU = clabel->sectPerSU;
   2927 	config->SUsPerPU = clabel->SUsPerPU;
   2928 	config->SUsPerRU = clabel->SUsPerRU;
   2929 	config->parityConfig = clabel->parityConfig;
   2930 	/* XXX... */
   2931 	strcpy(config->diskQueueType,"fifo");
   2932 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2933 	config->layoutSpecificSize = 0; /* XXX ?? */
   2934 
   2935 	while(ac!=NULL) {
   2936 		/* row/col values will be in range due to the checks
   2937 		   in reasonable_label() */
   2938 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2939 		       ac->devname);
   2940 		ac = ac->next;
   2941 	}
   2942 
   2943 }
   2944 
   2945 int
   2946 rf_set_autoconfig(raidPtr, new_value)
   2947 	RF_Raid_t *raidPtr;
   2948 	int new_value;
   2949 {
   2950 	RF_ComponentLabel_t clabel;
   2951 	struct vnode *vp;
   2952 	dev_t dev;
   2953 	int row, column;
   2954 
   2955 	raidPtr->autoconfigure = new_value;
   2956 	for(row=0; row<raidPtr->numRow; row++) {
   2957 		for(column=0; column<raidPtr->numCol; column++) {
   2958 			dev = raidPtr->Disks[row][column].dev;
   2959 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2960 			raidread_component_label(dev, vp, &clabel);
   2961 			clabel.autoconfigure = new_value;
   2962 			raidwrite_component_label(dev, vp, &clabel);
   2963 		}
   2964 	}
   2965 	return(new_value);
   2966 }
   2967 
   2968 int
   2969 rf_set_rootpartition(raidPtr, new_value)
   2970 	RF_Raid_t *raidPtr;
   2971 	int new_value;
   2972 {
   2973 	RF_ComponentLabel_t clabel;
   2974 	struct vnode *vp;
   2975 	dev_t dev;
   2976 	int row, column;
   2977 
   2978 	raidPtr->root_partition = new_value;
   2979 	for(row=0; row<raidPtr->numRow; row++) {
   2980 		for(column=0; column<raidPtr->numCol; column++) {
   2981 			dev = raidPtr->Disks[row][column].dev;
   2982 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2983 			raidread_component_label(dev, vp, &clabel);
   2984 			clabel.root_partition = new_value;
   2985 			raidwrite_component_label(dev, vp, &clabel);
   2986 		}
   2987 	}
   2988 	return(new_value);
   2989 }
   2990 
   2991 void
   2992 rf_release_all_vps(cset)
   2993 	RF_ConfigSet_t *cset;
   2994 {
   2995 	RF_AutoConfig_t *ac;
   2996 
   2997 	ac = cset->ac;
   2998 	while(ac!=NULL) {
   2999 		/* Close the vp, and give it back */
   3000 		if (ac->vp) {
   3001 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3002 			vput(ac->vp);
   3003 		}
   3004 		ac = ac->next;
   3005 	}
   3006 }
   3007 
   3008 
   3009 void
   3010 rf_cleanup_config_set(cset)
   3011 	RF_ConfigSet_t *cset;
   3012 {
   3013 	RF_AutoConfig_t *ac;
   3014 	RF_AutoConfig_t *next_ac;
   3015 
   3016 	ac = cset->ac;
   3017 	while(ac!=NULL) {
   3018 		next_ac = ac->next;
   3019 		/* nuke the label */
   3020 		free(ac->clabel, M_RAIDFRAME);
   3021 		/* cleanup the config structure */
   3022 		free(ac, M_RAIDFRAME);
   3023 		/* "next.." */
   3024 		ac = next_ac;
   3025 	}
   3026 	/* and, finally, nuke the config set */
   3027 	free(cset, M_RAIDFRAME);
   3028 }
   3029 
   3030 
   3031 void
   3032 raid_init_component_label(raidPtr, clabel)
   3033 	RF_Raid_t *raidPtr;
   3034 	RF_ComponentLabel_t *clabel;
   3035 {
   3036 	/* current version number */
   3037 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3038 	clabel->serial_number = raidPtr->serial_number;
   3039 	clabel->mod_counter = raidPtr->mod_counter;
   3040 	clabel->num_rows = raidPtr->numRow;
   3041 	clabel->num_columns = raidPtr->numCol;
   3042 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3043 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3044 
   3045 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3046 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3047 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3048 
   3049 	clabel->blockSize = raidPtr->bytesPerSector;
   3050 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3051 
   3052 	/* XXX not portable */
   3053 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3054 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3055 	clabel->autoconfigure = raidPtr->autoconfigure;
   3056 	clabel->root_partition = raidPtr->root_partition;
   3057 	clabel->last_unit = raidPtr->raidid;
   3058 	clabel->config_order = raidPtr->config_order;
   3059 }
   3060 
   3061 int
   3062 rf_auto_config_set(cset,unit)
   3063 	RF_ConfigSet_t *cset;
   3064 	int *unit;
   3065 {
   3066 	RF_Raid_t *raidPtr;
   3067 	RF_Config_t *config;
   3068 	int raidID;
   3069 	int retcode;
   3070 
   3071 	printf("Starting autoconfigure on raid%d\n",raidID);
   3072 
   3073 	retcode = 0;
   3074 	*unit = -1;
   3075 
   3076 	/* 1. Create a config structure */
   3077 
   3078 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3079 				       M_RAIDFRAME,
   3080 				       M_NOWAIT);
   3081 	if (config==NULL) {
   3082 		printf("Out of mem!?!?\n");
   3083 				/* XXX do something more intelligent here. */
   3084 		return(1);
   3085 	}
   3086 	/* XXX raidID needs to be set correctly.. */
   3087 
   3088 	/*
   3089 	   2. Figure out what RAID ID this one is supposed to live at
   3090 	   See if we can get the same RAID dev that it was configured
   3091 	   on last time..
   3092 	*/
   3093 
   3094 	raidID = cset->ac->clabel->last_unit;
   3095 	if ((raidID < 0) || (raidID >= numraid)) {
   3096 		/* let's not wander off into lala land. */
   3097 		raidID = numraid - 1;
   3098 	}
   3099 	if (raidPtrs[raidID]->valid != 0) {
   3100 
   3101 		/*
   3102 		   Nope... Go looking for an alternative...
   3103 		   Start high so we don't immediately use raid0 if that's
   3104 		   not taken.
   3105 		*/
   3106 
   3107 		for(raidID = numraid; raidID >= 0; raidID--) {
   3108 			if (raidPtrs[raidID]->valid == 0) {
   3109 				/* can use this one! */
   3110 				break;
   3111 			}
   3112 		}
   3113 	}
   3114 
   3115 	if (raidID < 0) {
   3116 		/* punt... */
   3117 		printf("Unable to auto configure this set!\n");
   3118 		printf("(Out of RAID devs!)\n");
   3119 		return(1);
   3120 	}
   3121 
   3122 	raidPtr = raidPtrs[raidID];
   3123 
   3124 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3125 	raidPtr->raidid = raidID;
   3126 	raidPtr->openings = RAIDOUTSTANDING;
   3127 
   3128 	/* 3. Build the configuration structure */
   3129 	rf_create_configuration(cset->ac, config, raidPtr);
   3130 
   3131 	/* 4. Do the configuration */
   3132 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3133 
   3134 	if (retcode == 0) {
   3135 
   3136 		raidinit(raidPtrs[raidID]);
   3137 
   3138 		rf_markalldirty(raidPtrs[raidID]);
   3139 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3140 		if (cset->ac->clabel->root_partition==1) {
   3141 			/* everything configured just fine.  Make a note
   3142 			   that this set is eligible to be root. */
   3143 			cset->rootable = 1;
   3144 			/* XXX do this here? */
   3145 			raidPtrs[raidID]->root_partition = 1;
   3146 		}
   3147 	}
   3148 
   3149 	/* 5. Cleanup */
   3150 	free(config, M_RAIDFRAME);
   3151 
   3152 	*unit = raidID;
   3153 	return(retcode);
   3154 }
   3155