Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.65
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.65 2000/03/03 01:46:36 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include "raid.h"
    139 #include "opt_raid_autoconfig.h"
    140 #include "rf_raid.h"
    141 #include "rf_raidframe.h"
    142 #include "rf_copyback.h"
    143 #include "rf_dag.h"
    144 #include "rf_dagflags.h"
    145 #include "rf_diskqueue.h"
    146 #include "rf_acctrace.h"
    147 #include "rf_etimer.h"
    148 #include "rf_general.h"
    149 #include "rf_debugMem.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_debugprint.h"
    155 #include "rf_threadstuff.h"
    156 #include "rf_configure.h"
    157 
    158 int     rf_kdebug_level = 0;
    159 
    160 #ifdef DEBUG
    161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    162 #else				/* DEBUG */
    163 #define db1_printf(a) { }
    164 #endif				/* DEBUG */
    165 
    166 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    167 
    168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    169 
    170 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    171 						 * spare table */
    172 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    173 						 * installation process */
    174 
    175 /* prototypes */
    176 static void KernelWakeupFunc(struct buf * bp);
    177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    178 		   dev_t dev, RF_SectorNum_t startSect,
    179 		   RF_SectorCount_t numSect, caddr_t buf,
    180 		   void (*cbFunc) (struct buf *), void *cbArg,
    181 		   int logBytesPerSector, struct proc * b_proc);
    182 static void raidinit __P((RF_Raid_t *));
    183 
    184 void raidattach __P((int));
    185 int raidsize __P((dev_t));
    186 int raidopen __P((dev_t, int, int, struct proc *));
    187 int raidclose __P((dev_t, int, int, struct proc *));
    188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    189 int raidwrite __P((dev_t, struct uio *, int));
    190 int raidread __P((dev_t, struct uio *, int));
    191 void raidstrategy __P((struct buf *));
    192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    193 
    194 /*
    195  * Pilfered from ccd.c
    196  */
    197 
    198 struct raidbuf {
    199 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    200 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    201 	int     rf_flags;	/* misc. flags */
    202 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    203 };
    204 
    205 
    206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    207 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    208 
    209 /* XXX Not sure if the following should be replacing the raidPtrs above,
    210    or if it should be used in conjunction with that...
    211 */
    212 
    213 struct raid_softc {
    214 	int     sc_flags;	/* flags */
    215 	int     sc_cflags;	/* configuration flags */
    216 	size_t  sc_size;        /* size of the raid device */
    217 	char    sc_xname[20];	/* XXX external name */
    218 	struct disk sc_dkdev;	/* generic disk device info */
    219 	struct pool sc_cbufpool;	/* component buffer pool */
    220 	struct buf_queue buf_queue;	/* used for the device queue */
    221 };
    222 /* sc_flags */
    223 #define RAIDF_INITED	0x01	/* unit has been initialized */
    224 #define RAIDF_WLABEL	0x02	/* label area is writable */
    225 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    226 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    227 #define RAIDF_LOCKED	0x80	/* unit is locked */
    228 
    229 #define	raidunit(x)	DISKUNIT(x)
    230 int numraid = 0;
    231 
    232 /*
    233  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    234  * Be aware that large numbers can allow the driver to consume a lot of
    235  * kernel memory, especially on writes, and in degraded mode reads.
    236  *
    237  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    238  * a single 64K write will typically require 64K for the old data,
    239  * 64K for the old parity, and 64K for the new parity, for a total
    240  * of 192K (if the parity buffer is not re-used immediately).
    241  * Even it if is used immedately, that's still 128K, which when multiplied
    242  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    243  *
    244  * Now in degraded mode, for example, a 64K read on the above setup may
    245  * require data reconstruction, which will require *all* of the 4 remaining
    246  * disks to participate -- 4 * 32K/disk == 128K again.
    247  */
    248 
    249 #ifndef RAIDOUTSTANDING
    250 #define RAIDOUTSTANDING   6
    251 #endif
    252 
    253 #define RAIDLABELDEV(dev)	\
    254 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    255 
    256 /* declared here, and made public, for the benefit of KVM stuff.. */
    257 struct raid_softc *raid_softc;
    258 
    259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    260 				     struct disklabel *));
    261 static void raidgetdisklabel __P((dev_t));
    262 static void raidmakedisklabel __P((struct raid_softc *));
    263 
    264 static int raidlock __P((struct raid_softc *));
    265 static void raidunlock __P((struct raid_softc *));
    266 
    267 static void rf_markalldirty __P((RF_Raid_t *));
    268 void rf_mountroot_hook __P((struct device *));
    269 
    270 struct device *raidrootdev;
    271 
    272 void rf_ReconThread __P((struct rf_recon_req *));
    273 /* XXX what I want is: */
    274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    278 void rf_buildroothack __P((void *));
    279 
    280 RF_AutoConfig_t *rf_find_raid_components __P((void));
    281 void print_component_label __P((RF_ComponentLabel_t *));
    282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    286 				  RF_Raid_t *));
    287 int rf_set_autoconfig __P((RF_Raid_t *, int));
    288 int rf_set_rootpartition __P((RF_Raid_t *, int));
    289 void rf_release_all_vps __P((RF_ConfigSet_t *));
    290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    291 int rf_have_enough_components __P((RF_ConfigSet_t *));
    292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    293 
    294 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    295 				  allow autoconfig to take place.
    296 			          Note that this is overridden by having
    297 			          RAID_AUTOCONFIG as an option in the
    298 			          kernel config file.  */
    299 extern struct device *booted_device;
    300 
    301 void
    302 raidattach(num)
    303 	int     num;
    304 {
    305 	int raidID;
    306 	int i, rc;
    307 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    308 	RF_ConfigSet_t *config_sets;
    309 
    310 #ifdef DEBUG
    311 	printf("raidattach: Asked for %d units\n", num);
    312 #endif
    313 
    314 	if (num <= 0) {
    315 #ifdef DIAGNOSTIC
    316 		panic("raidattach: count <= 0");
    317 #endif
    318 		return;
    319 	}
    320 	/* This is where all the initialization stuff gets done. */
    321 
    322 	numraid = num;
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!\n");
    329 	}
    330 
    331 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    332 	if (rc) {
    333 		RF_PANIC();
    334 	}
    335 
    336 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    337 
    338 	for (i = 0; i < num; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 		malloc(num * sizeof(struct raid_softc),
    351 		       M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 
    357 	bzero(raid_softc, num * sizeof(struct raid_softc));
    358 
    359 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    360 					      M_RAIDFRAME, M_NOWAIT);
    361 	if (raidrootdev == NULL) {
    362 		panic("No memory for RAIDframe driver!!?!?!\n");
    363 	}
    364 
    365 	for (raidID = 0; raidID < num; raidID++) {
    366 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    367 
    368 		raidrootdev[raidID].dv_class  = DV_DISK;
    369 		raidrootdev[raidID].dv_cfdata = NULL;
    370 		raidrootdev[raidID].dv_unit   = raidID;
    371 		raidrootdev[raidID].dv_parent = NULL;
    372 		raidrootdev[raidID].dv_flags  = 0;
    373 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    374 
    375 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    376 			  (RF_Raid_t *));
    377 		if (raidPtrs[raidID] == NULL) {
    378 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    379 			numraid = raidID;
    380 			return;
    381 		}
    382 	}
    383 
    384 #if RAID_AUTOCONFIG
    385 	raidautoconfig = 1;
    386 #endif
    387 
    388 if (raidautoconfig) {
    389 	/* 1. locate all RAID components on the system */
    390 
    391 #if DEBUG
    392 	printf("Searching for raid components...\n");
    393 #endif
    394 	ac_list = rf_find_raid_components();
    395 
    396 	/* 2. sort them into their respective sets */
    397 
    398 	config_sets = rf_create_auto_sets(ac_list);
    399 
    400 	/* 3. evaluate each set and configure the valid ones
    401 	   This gets done in rf_buildroothack() */
    402 
    403 	/* schedule the creation of the thread to do the
    404 	   "/ on RAID" stuff */
    405 
    406 	kthread_create(rf_buildroothack,config_sets);
    407 
    408 #if 0
    409 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    410 #endif
    411 }
    412 
    413 }
    414 
    415 void
    416 rf_buildroothack(arg)
    417 	void *arg;
    418 {
    419 	RF_ConfigSet_t *config_sets = arg;
    420 	RF_ConfigSet_t *cset;
    421 	RF_ConfigSet_t *next_cset;
    422 	int retcode;
    423 	int raidID;
    424 	int rootID;
    425 	int num_root;
    426 
    427 	num_root = 0;
    428 	cset = config_sets;
    429 	while(cset != NULL ) {
    430 		next_cset = cset->next;
    431 		if (rf_have_enough_components(cset) &&
    432 		    cset->ac->clabel->autoconfigure==1) {
    433 			retcode = rf_auto_config_set(cset,&raidID);
    434 			if (!retcode) {
    435 				if (cset->rootable) {
    436 					rootID = raidID;
    437 					num_root++;
    438 				}
    439 			} else {
    440 				/* The autoconfig didn't work :( */
    441 #if DEBUG
    442 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    443 #endif
    444 				rf_release_all_vps(cset);
    445 			}
    446 		} else {
    447 			/* we're not autoconfiguring this set...
    448 			   release the associated resources */
    449 			rf_release_all_vps(cset);
    450 		}
    451 		/* cleanup */
    452 		rf_cleanup_config_set(cset);
    453 		cset = next_cset;
    454 	}
    455 	if (boothowto & RB_ASKNAME) {
    456 		/* We don't auto-config... */
    457 	} else {
    458 		/* They didn't ask, and we found something bootable... */
    459 
    460 		if (num_root == 1) {
    461 			booted_device = &raidrootdev[rootID];
    462 		} else if (num_root > 1) {
    463 			/* we can't guess.. require the user to answer... */
    464 			boothowto |= RB_ASKNAME;
    465 		}
    466 	}
    467 }
    468 
    469 
    470 int
    471 raidsize(dev)
    472 	dev_t   dev;
    473 {
    474 	struct raid_softc *rs;
    475 	struct disklabel *lp;
    476 	int     part, unit, omask, size;
    477 
    478 	unit = raidunit(dev);
    479 	if (unit >= numraid)
    480 		return (-1);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    484 		return (-1);
    485 
    486 	part = DISKPART(dev);
    487 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    488 	lp = rs->sc_dkdev.dk_label;
    489 
    490 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    491 		return (-1);
    492 
    493 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    494 		size = -1;
    495 	else
    496 		size = lp->d_partitions[part].p_size *
    497 		    (lp->d_secsize / DEV_BSIZE);
    498 
    499 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    500 		return (-1);
    501 
    502 	return (size);
    503 
    504 }
    505 
    506 int
    507 raiddump(dev, blkno, va, size)
    508 	dev_t   dev;
    509 	daddr_t blkno;
    510 	caddr_t va;
    511 	size_t  size;
    512 {
    513 	/* Not implemented. */
    514 	return ENXIO;
    515 }
    516 /* ARGSUSED */
    517 int
    518 raidopen(dev, flags, fmt, p)
    519 	dev_t   dev;
    520 	int     flags, fmt;
    521 	struct proc *p;
    522 {
    523 	int     unit = raidunit(dev);
    524 	struct raid_softc *rs;
    525 	struct disklabel *lp;
    526 	int     part, pmask;
    527 	int     error = 0;
    528 
    529 	if (unit >= numraid)
    530 		return (ENXIO);
    531 	rs = &raid_softc[unit];
    532 
    533 	if ((error = raidlock(rs)) != 0)
    534 		return (error);
    535 	lp = rs->sc_dkdev.dk_label;
    536 
    537 	part = DISKPART(dev);
    538 	pmask = (1 << part);
    539 
    540 	db1_printf(("Opening raid device number: %d partition: %d\n",
    541 		unit, part));
    542 
    543 
    544 	if ((rs->sc_flags & RAIDF_INITED) &&
    545 	    (rs->sc_dkdev.dk_openmask == 0))
    546 		raidgetdisklabel(dev);
    547 
    548 	/* make sure that this partition exists */
    549 
    550 	if (part != RAW_PART) {
    551 		db1_printf(("Not a raw partition..\n"));
    552 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    553 		    ((part >= lp->d_npartitions) ||
    554 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    555 			error = ENXIO;
    556 			raidunlock(rs);
    557 			db1_printf(("Bailing out...\n"));
    558 			return (error);
    559 		}
    560 	}
    561 	/* Prevent this unit from being unconfigured while open. */
    562 	switch (fmt) {
    563 	case S_IFCHR:
    564 		rs->sc_dkdev.dk_copenmask |= pmask;
    565 		break;
    566 
    567 	case S_IFBLK:
    568 		rs->sc_dkdev.dk_bopenmask |= pmask;
    569 		break;
    570 	}
    571 
    572 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    573 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    574 		/* First one... mark things as dirty... Note that we *MUST*
    575 		 have done a configure before this.  I DO NOT WANT TO BE
    576 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    577 		 THAT THEY BELONG TOGETHER!!!!! */
    578 		/* XXX should check to see if we're only open for reading
    579 		   here... If so, we needn't do this, but then need some
    580 		   other way of keeping track of what's happened.. */
    581 
    582 		rf_markalldirty( raidPtrs[unit] );
    583 	}
    584 
    585 
    586 	rs->sc_dkdev.dk_openmask =
    587 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    588 
    589 	raidunlock(rs);
    590 
    591 	return (error);
    592 
    593 
    594 }
    595 /* ARGSUSED */
    596 int
    597 raidclose(dev, flags, fmt, p)
    598 	dev_t   dev;
    599 	int     flags, fmt;
    600 	struct proc *p;
    601 {
    602 	int     unit = raidunit(dev);
    603 	struct raid_softc *rs;
    604 	int     error = 0;
    605 	int     part;
    606 
    607 	if (unit >= numraid)
    608 		return (ENXIO);
    609 	rs = &raid_softc[unit];
    610 
    611 	if ((error = raidlock(rs)) != 0)
    612 		return (error);
    613 
    614 	part = DISKPART(dev);
    615 
    616 	/* ...that much closer to allowing unconfiguration... */
    617 	switch (fmt) {
    618 	case S_IFCHR:
    619 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    620 		break;
    621 
    622 	case S_IFBLK:
    623 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    624 		break;
    625 	}
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    630 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    631 		/* Last one... device is not unconfigured yet.
    632 		   Device shutdown has taken care of setting the
    633 		   clean bits if RAIDF_INITED is not set
    634 		   mark things as clean... */
    635 #if 0
    636 		printf("Last one on raid%d.  Updating status.\n",unit);
    637 #endif
    638 		rf_final_update_component_labels( raidPtrs[unit] );
    639 	}
    640 
    641 	raidunlock(rs);
    642 	return (0);
    643 
    644 }
    645 
    646 void
    647 raidstrategy(bp)
    648 	register struct buf *bp;
    649 {
    650 	register int s;
    651 
    652 	unsigned int raidID = raidunit(bp->b_dev);
    653 	RF_Raid_t *raidPtr;
    654 	struct raid_softc *rs = &raid_softc[raidID];
    655 	struct disklabel *lp;
    656 	int     wlabel;
    657 
    658 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    659 		bp->b_error = ENXIO;
    660 		bp->b_flags = B_ERROR;
    661 		bp->b_resid = bp->b_bcount;
    662 		biodone(bp);
    663 		return;
    664 	}
    665 	if (raidID >= numraid || !raidPtrs[raidID]) {
    666 		bp->b_error = ENODEV;
    667 		bp->b_flags |= B_ERROR;
    668 		bp->b_resid = bp->b_bcount;
    669 		biodone(bp);
    670 		return;
    671 	}
    672 	raidPtr = raidPtrs[raidID];
    673 	if (!raidPtr->valid) {
    674 		bp->b_error = ENODEV;
    675 		bp->b_flags |= B_ERROR;
    676 		bp->b_resid = bp->b_bcount;
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	if (bp->b_bcount == 0) {
    681 		db1_printf(("b_bcount is zero..\n"));
    682 		biodone(bp);
    683 		return;
    684 	}
    685 	lp = rs->sc_dkdev.dk_label;
    686 
    687 	/*
    688 	 * Do bounds checking and adjust transfer.  If there's an
    689 	 * error, the bounds check will flag that for us.
    690 	 */
    691 
    692 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    693 	if (DISKPART(bp->b_dev) != RAW_PART)
    694 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    695 			db1_printf(("Bounds check failed!!:%d %d\n",
    696 				(int) bp->b_blkno, (int) wlabel));
    697 			biodone(bp);
    698 			return;
    699 		}
    700 	s = splbio();
    701 
    702 	bp->b_resid = 0;
    703 
    704 	/* stuff it onto our queue */
    705 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    706 
    707 	raidstart(raidPtrs[raidID]);
    708 
    709 	splx(s);
    710 }
    711 /* ARGSUSED */
    712 int
    713 raidread(dev, uio, flags)
    714 	dev_t   dev;
    715 	struct uio *uio;
    716 	int     flags;
    717 {
    718 	int     unit = raidunit(dev);
    719 	struct raid_softc *rs;
    720 	int     part;
    721 
    722 	if (unit >= numraid)
    723 		return (ENXIO);
    724 	rs = &raid_softc[unit];
    725 
    726 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    727 		return (ENXIO);
    728 	part = DISKPART(dev);
    729 
    730 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    731 
    732 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    733 
    734 }
    735 /* ARGSUSED */
    736 int
    737 raidwrite(dev, uio, flags)
    738 	dev_t   dev;
    739 	struct uio *uio;
    740 	int     flags;
    741 {
    742 	int     unit = raidunit(dev);
    743 	struct raid_softc *rs;
    744 
    745 	if (unit >= numraid)
    746 		return (ENXIO);
    747 	rs = &raid_softc[unit];
    748 
    749 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    750 		return (ENXIO);
    751 	db1_printf(("raidwrite\n"));
    752 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    753 
    754 }
    755 
    756 int
    757 raidioctl(dev, cmd, data, flag, p)
    758 	dev_t   dev;
    759 	u_long  cmd;
    760 	caddr_t data;
    761 	int     flag;
    762 	struct proc *p;
    763 {
    764 	int     unit = raidunit(dev);
    765 	int     error = 0;
    766 	int     part, pmask;
    767 	struct raid_softc *rs;
    768 	RF_Config_t *k_cfg, *u_cfg;
    769 	RF_Raid_t *raidPtr;
    770 	RF_RaidDisk_t *diskPtr;
    771 	RF_AccTotals_t *totals;
    772 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    773 	u_char *specific_buf;
    774 	int retcode = 0;
    775 	int row;
    776 	int column;
    777 	struct rf_recon_req *rrcopy, *rr;
    778 	RF_ComponentLabel_t *clabel;
    779 	RF_ComponentLabel_t ci_label;
    780 	RF_ComponentLabel_t **clabel_ptr;
    781 	RF_SingleComponent_t *sparePtr,*componentPtr;
    782 	RF_SingleComponent_t hot_spare;
    783 	RF_SingleComponent_t component;
    784 	int i, j, d;
    785 
    786 	if (unit >= numraid)
    787 		return (ENXIO);
    788 	rs = &raid_softc[unit];
    789 	raidPtr = raidPtrs[unit];
    790 
    791 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    792 		(int) DISKPART(dev), (int) unit, (int) cmd));
    793 
    794 	/* Must be open for writes for these commands... */
    795 	switch (cmd) {
    796 	case DIOCSDINFO:
    797 	case DIOCWDINFO:
    798 	case DIOCWLABEL:
    799 		if ((flag & FWRITE) == 0)
    800 			return (EBADF);
    801 	}
    802 
    803 	/* Must be initialized for these... */
    804 	switch (cmd) {
    805 	case DIOCGDINFO:
    806 	case DIOCSDINFO:
    807 	case DIOCWDINFO:
    808 	case DIOCGPART:
    809 	case DIOCWLABEL:
    810 	case DIOCGDEFLABEL:
    811 	case RAIDFRAME_SHUTDOWN:
    812 	case RAIDFRAME_REWRITEPARITY:
    813 	case RAIDFRAME_GET_INFO:
    814 	case RAIDFRAME_RESET_ACCTOTALS:
    815 	case RAIDFRAME_GET_ACCTOTALS:
    816 	case RAIDFRAME_KEEP_ACCTOTALS:
    817 	case RAIDFRAME_GET_SIZE:
    818 	case RAIDFRAME_FAIL_DISK:
    819 	case RAIDFRAME_COPYBACK:
    820 	case RAIDFRAME_CHECK_RECON_STATUS:
    821 	case RAIDFRAME_GET_COMPONENT_LABEL:
    822 	case RAIDFRAME_SET_COMPONENT_LABEL:
    823 	case RAIDFRAME_ADD_HOT_SPARE:
    824 	case RAIDFRAME_REMOVE_HOT_SPARE:
    825 	case RAIDFRAME_INIT_LABELS:
    826 	case RAIDFRAME_REBUILD_IN_PLACE:
    827 	case RAIDFRAME_CHECK_PARITY:
    828 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    829 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    830 	case RAIDFRAME_SET_AUTOCONFIG:
    831 	case RAIDFRAME_SET_ROOT:
    832 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    833 			return (ENXIO);
    834 	}
    835 
    836 	switch (cmd) {
    837 
    838 		/* configure the system */
    839 	case RAIDFRAME_CONFIGURE:
    840 
    841 		if (raidPtr->valid) {
    842 			/* There is a valid RAID set running on this unit! */
    843 			printf("raid%d: Device already configured!\n",unit);
    844 		}
    845 
    846 		/* copy-in the configuration information */
    847 		/* data points to a pointer to the configuration structure */
    848 
    849 		u_cfg = *((RF_Config_t **) data);
    850 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    851 		if (k_cfg == NULL) {
    852 			return (ENOMEM);
    853 		}
    854 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    855 		    sizeof(RF_Config_t));
    856 		if (retcode) {
    857 			RF_Free(k_cfg, sizeof(RF_Config_t));
    858 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    859 				retcode));
    860 			return (retcode);
    861 		}
    862 		/* allocate a buffer for the layout-specific data, and copy it
    863 		 * in */
    864 		if (k_cfg->layoutSpecificSize) {
    865 			if (k_cfg->layoutSpecificSize > 10000) {
    866 				/* sanity check */
    867 				RF_Free(k_cfg, sizeof(RF_Config_t));
    868 				return (EINVAL);
    869 			}
    870 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    871 			    (u_char *));
    872 			if (specific_buf == NULL) {
    873 				RF_Free(k_cfg, sizeof(RF_Config_t));
    874 				return (ENOMEM);
    875 			}
    876 			retcode = copyin(k_cfg->layoutSpecific,
    877 			    (caddr_t) specific_buf,
    878 			    k_cfg->layoutSpecificSize);
    879 			if (retcode) {
    880 				RF_Free(k_cfg, sizeof(RF_Config_t));
    881 				RF_Free(specific_buf,
    882 					k_cfg->layoutSpecificSize);
    883 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    884 					retcode));
    885 				return (retcode);
    886 			}
    887 		} else
    888 			specific_buf = NULL;
    889 		k_cfg->layoutSpecific = specific_buf;
    890 
    891 		/* should do some kind of sanity check on the configuration.
    892 		 * Store the sum of all the bytes in the last byte? */
    893 
    894 		/* configure the system */
    895 
    896 		/*
    897 		 * Clear the entire RAID descriptor, just to make sure
    898 		 *  there is no stale data left in the case of a
    899 		 *  reconfiguration
    900 		 */
    901 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    902 		raidPtr->raidid = unit;
    903 
    904 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    905 
    906 		if (retcode == 0) {
    907 
    908 			/* allow this many simultaneous IO's to
    909 			   this RAID device */
    910 			raidPtr->openings = RAIDOUTSTANDING;
    911 
    912 			raidinit(raidPtr);
    913 			rf_markalldirty(raidPtr);
    914 		}
    915 		/* free the buffers.  No return code here. */
    916 		if (k_cfg->layoutSpecificSize) {
    917 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    918 		}
    919 		RF_Free(k_cfg, sizeof(RF_Config_t));
    920 
    921 		return (retcode);
    922 
    923 		/* shutdown the system */
    924 	case RAIDFRAME_SHUTDOWN:
    925 
    926 		if ((error = raidlock(rs)) != 0)
    927 			return (error);
    928 
    929 		/*
    930 		 * If somebody has a partition mounted, we shouldn't
    931 		 * shutdown.
    932 		 */
    933 
    934 		part = DISKPART(dev);
    935 		pmask = (1 << part);
    936 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    937 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    938 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    939 			raidunlock(rs);
    940 			return (EBUSY);
    941 		}
    942 
    943 		retcode = rf_Shutdown(raidPtr);
    944 
    945 		pool_destroy(&rs->sc_cbufpool);
    946 
    947 		/* It's no longer initialized... */
    948 		rs->sc_flags &= ~RAIDF_INITED;
    949 
    950 		/* Detach the disk. */
    951 		disk_detach(&rs->sc_dkdev);
    952 
    953 		raidunlock(rs);
    954 
    955 		return (retcode);
    956 	case RAIDFRAME_GET_COMPONENT_LABEL:
    957 		clabel_ptr = (RF_ComponentLabel_t **) data;
    958 		/* need to read the component label for the disk indicated
    959 		   by row,column in clabel */
    960 
    961 		/* For practice, let's get it directly fromdisk, rather
    962 		   than from the in-core copy */
    963 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    964 			   (RF_ComponentLabel_t *));
    965 		if (clabel == NULL)
    966 			return (ENOMEM);
    967 
    968 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    969 
    970 		retcode = copyin( *clabel_ptr, clabel,
    971 				  sizeof(RF_ComponentLabel_t));
    972 
    973 		if (retcode) {
    974 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    975 			return(retcode);
    976 		}
    977 
    978 		row = clabel->row;
    979 		column = clabel->column;
    980 
    981 		if ((row < 0) || (row >= raidPtr->numRow) ||
    982 		    (column < 0) || (column >= raidPtr->numCol)) {
    983 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    984 			return(EINVAL);
    985 		}
    986 
    987 		raidread_component_label(raidPtr->Disks[row][column].dev,
    988 				raidPtr->raid_cinfo[row][column].ci_vp,
    989 				clabel );
    990 
    991 		retcode = copyout((caddr_t) clabel,
    992 				  (caddr_t) *clabel_ptr,
    993 				  sizeof(RF_ComponentLabel_t));
    994 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    995 		return (retcode);
    996 
    997 	case RAIDFRAME_SET_COMPONENT_LABEL:
    998 		clabel = (RF_ComponentLabel_t *) data;
    999 
   1000 		/* XXX check the label for valid stuff... */
   1001 		/* Note that some things *should not* get modified --
   1002 		   the user should be re-initing the labels instead of
   1003 		   trying to patch things.
   1004 		   */
   1005 
   1006 		printf("Got component label:\n");
   1007 		printf("Version: %d\n",clabel->version);
   1008 		printf("Serial Number: %d\n",clabel->serial_number);
   1009 		printf("Mod counter: %d\n",clabel->mod_counter);
   1010 		printf("Row: %d\n", clabel->row);
   1011 		printf("Column: %d\n", clabel->column);
   1012 		printf("Num Rows: %d\n", clabel->num_rows);
   1013 		printf("Num Columns: %d\n", clabel->num_columns);
   1014 		printf("Clean: %d\n", clabel->clean);
   1015 		printf("Status: %d\n", clabel->status);
   1016 
   1017 		row = clabel->row;
   1018 		column = clabel->column;
   1019 
   1020 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1021 		    (column < 0) || (column >= raidPtr->numCol)) {
   1022 			return(EINVAL);
   1023 		}
   1024 
   1025 		/* XXX this isn't allowed to do anything for now :-) */
   1026 
   1027 		/* XXX and before it is, we need to fill in the rest
   1028 		   of the fields!?!?!?! */
   1029 #if 0
   1030 		raidwrite_component_label(
   1031                             raidPtr->Disks[row][column].dev,
   1032 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1033 			    clabel );
   1034 #endif
   1035 		return (0);
   1036 
   1037 	case RAIDFRAME_INIT_LABELS:
   1038 		clabel = (RF_ComponentLabel_t *) data;
   1039 		/*
   1040 		   we only want the serial number from
   1041 		   the above.  We get all the rest of the information
   1042 		   from the config that was used to create this RAID
   1043 		   set.
   1044 		   */
   1045 
   1046 		raidPtr->serial_number = clabel->serial_number;
   1047 
   1048 		raid_init_component_label(raidPtr, &ci_label);
   1049 		ci_label.serial_number = clabel->serial_number;
   1050 
   1051 		for(row=0;row<raidPtr->numRow;row++) {
   1052 			ci_label.row = row;
   1053 			for(column=0;column<raidPtr->numCol;column++) {
   1054 				diskPtr = &raidPtr->Disks[row][column];
   1055 				ci_label.partitionSize = diskPtr->partitionSize;
   1056 				ci_label.column = column;
   1057 				raidwrite_component_label(
   1058 				  raidPtr->Disks[row][column].dev,
   1059 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1060 				  &ci_label );
   1061 			}
   1062 		}
   1063 
   1064 		return (retcode);
   1065 	case RAIDFRAME_SET_AUTOCONFIG:
   1066 		d = rf_set_autoconfig(raidPtr, *data);
   1067 		printf("New autoconfig value is: %d\n", d);
   1068 		*data = d;
   1069 		return (retcode);
   1070 
   1071 	case RAIDFRAME_SET_ROOT:
   1072 		d = rf_set_rootpartition(raidPtr, *data);
   1073 		printf("New rootpartition value is: %d\n", d);
   1074 		*data = d;
   1075 		return (retcode);
   1076 
   1077 		/* initialize all parity */
   1078 	case RAIDFRAME_REWRITEPARITY:
   1079 
   1080 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1081 			/* Parity for RAID 0 is trivially correct */
   1082 			raidPtr->parity_good = RF_RAID_CLEAN;
   1083 			return(0);
   1084 		}
   1085 
   1086 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1087 			/* Re-write is already in progress! */
   1088 			return(EINVAL);
   1089 		}
   1090 
   1091 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1092 					   rf_RewriteParityThread,
   1093 					   raidPtr,"raid_parity");
   1094 		return (retcode);
   1095 
   1096 
   1097 	case RAIDFRAME_ADD_HOT_SPARE:
   1098 		sparePtr = (RF_SingleComponent_t *) data;
   1099 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1100 		printf("Adding spare\n");
   1101 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1102 		return(retcode);
   1103 
   1104 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1105 		return(retcode);
   1106 
   1107 	case RAIDFRAME_REBUILD_IN_PLACE:
   1108 
   1109 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1110 			/* Can't do this on a RAID 0!! */
   1111 			return(EINVAL);
   1112 		}
   1113 
   1114 		if (raidPtr->recon_in_progress == 1) {
   1115 			/* a reconstruct is already in progress! */
   1116 			return(EINVAL);
   1117 		}
   1118 
   1119 		componentPtr = (RF_SingleComponent_t *) data;
   1120 		memcpy( &component, componentPtr,
   1121 			sizeof(RF_SingleComponent_t));
   1122 		row = component.row;
   1123 		column = component.column;
   1124 		printf("Rebuild: %d %d\n",row, column);
   1125 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1126 		    (column < 0) || (column >= raidPtr->numCol)) {
   1127 			return(EINVAL);
   1128 		}
   1129 
   1130 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1131 		if (rrcopy == NULL)
   1132 			return(ENOMEM);
   1133 
   1134 		rrcopy->raidPtr = (void *) raidPtr;
   1135 		rrcopy->row = row;
   1136 		rrcopy->col = column;
   1137 
   1138 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1139 					   rf_ReconstructInPlaceThread,
   1140 					   rrcopy,"raid_reconip");
   1141 		return(retcode);
   1142 
   1143 	case RAIDFRAME_GET_INFO:
   1144 		if (!raidPtr->valid)
   1145 			return (ENODEV);
   1146 		ucfgp = (RF_DeviceConfig_t **) data;
   1147 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1148 			  (RF_DeviceConfig_t *));
   1149 		if (d_cfg == NULL)
   1150 			return (ENOMEM);
   1151 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1152 		d_cfg->rows = raidPtr->numRow;
   1153 		d_cfg->cols = raidPtr->numCol;
   1154 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1155 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1156 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1157 			return (ENOMEM);
   1158 		}
   1159 		d_cfg->nspares = raidPtr->numSpare;
   1160 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1161 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1162 			return (ENOMEM);
   1163 		}
   1164 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1165 		d = 0;
   1166 		for (i = 0; i < d_cfg->rows; i++) {
   1167 			for (j = 0; j < d_cfg->cols; j++) {
   1168 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1169 				d++;
   1170 			}
   1171 		}
   1172 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1173 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1174 		}
   1175 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1176 				  sizeof(RF_DeviceConfig_t));
   1177 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1178 
   1179 		return (retcode);
   1180 
   1181 	case RAIDFRAME_CHECK_PARITY:
   1182 		*(int *) data = raidPtr->parity_good;
   1183 		return (0);
   1184 
   1185 	case RAIDFRAME_RESET_ACCTOTALS:
   1186 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1187 		return (0);
   1188 
   1189 	case RAIDFRAME_GET_ACCTOTALS:
   1190 		totals = (RF_AccTotals_t *) data;
   1191 		*totals = raidPtr->acc_totals;
   1192 		return (0);
   1193 
   1194 	case RAIDFRAME_KEEP_ACCTOTALS:
   1195 		raidPtr->keep_acc_totals = *(int *)data;
   1196 		return (0);
   1197 
   1198 	case RAIDFRAME_GET_SIZE:
   1199 		*(int *) data = raidPtr->totalSectors;
   1200 		return (0);
   1201 
   1202 		/* fail a disk & optionally start reconstruction */
   1203 	case RAIDFRAME_FAIL_DISK:
   1204 
   1205 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1206 			/* Can't do this on a RAID 0!! */
   1207 			return(EINVAL);
   1208 		}
   1209 
   1210 		rr = (struct rf_recon_req *) data;
   1211 
   1212 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1213 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1214 			return (EINVAL);
   1215 
   1216 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1217 		       unit, rr->row, rr->col);
   1218 
   1219 		/* make a copy of the recon request so that we don't rely on
   1220 		 * the user's buffer */
   1221 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1222 		if (rrcopy == NULL)
   1223 			return(ENOMEM);
   1224 		bcopy(rr, rrcopy, sizeof(*rr));
   1225 		rrcopy->raidPtr = (void *) raidPtr;
   1226 
   1227 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1228 					   rf_ReconThread,
   1229 					   rrcopy,"raid_recon");
   1230 		return (0);
   1231 
   1232 		/* invoke a copyback operation after recon on whatever disk
   1233 		 * needs it, if any */
   1234 	case RAIDFRAME_COPYBACK:
   1235 
   1236 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1237 			/* This makes no sense on a RAID 0!! */
   1238 			return(EINVAL);
   1239 		}
   1240 
   1241 		if (raidPtr->copyback_in_progress == 1) {
   1242 			/* Copyback is already in progress! */
   1243 			return(EINVAL);
   1244 		}
   1245 
   1246 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1247 					   rf_CopybackThread,
   1248 					   raidPtr,"raid_copyback");
   1249 		return (retcode);
   1250 
   1251 		/* return the percentage completion of reconstruction */
   1252 	case RAIDFRAME_CHECK_RECON_STATUS:
   1253 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1254 			/* This makes no sense on a RAID 0 */
   1255 			return(EINVAL);
   1256 		}
   1257 		row = 0; /* XXX we only consider a single row... */
   1258 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1259 			*(int *) data = 100;
   1260 		else
   1261 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1262 		return (0);
   1263 
   1264 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1265 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1266 			/* This makes no sense on a RAID 0 */
   1267 			return(EINVAL);
   1268 		}
   1269 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1270 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1271 		} else {
   1272 			*(int *) data = 100;
   1273 		}
   1274 		return (0);
   1275 
   1276 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1277 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1278 			/* This makes no sense on a RAID 0 */
   1279 			return(EINVAL);
   1280 		}
   1281 		if (raidPtr->copyback_in_progress == 1) {
   1282 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1283 				raidPtr->Layout.numStripe;
   1284 		} else {
   1285 			*(int *) data = 100;
   1286 		}
   1287 		return (0);
   1288 
   1289 
   1290 		/* the sparetable daemon calls this to wait for the kernel to
   1291 		 * need a spare table. this ioctl does not return until a
   1292 		 * spare table is needed. XXX -- calling mpsleep here in the
   1293 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1294 		 * -- I should either compute the spare table in the kernel,
   1295 		 * or have a different -- XXX XXX -- interface (a different
   1296 		 * character device) for delivering the table     -- XXX */
   1297 #if 0
   1298 	case RAIDFRAME_SPARET_WAIT:
   1299 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1300 		while (!rf_sparet_wait_queue)
   1301 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1302 		waitreq = rf_sparet_wait_queue;
   1303 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1304 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1305 
   1306 		/* structure assignment */
   1307 		*((RF_SparetWait_t *) data) = *waitreq;
   1308 
   1309 		RF_Free(waitreq, sizeof(*waitreq));
   1310 		return (0);
   1311 
   1312 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1313 		 * code in it that will cause the dameon to exit */
   1314 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1315 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1316 		waitreq->fcol = -1;
   1317 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1318 		waitreq->next = rf_sparet_wait_queue;
   1319 		rf_sparet_wait_queue = waitreq;
   1320 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1321 		wakeup(&rf_sparet_wait_queue);
   1322 		return (0);
   1323 
   1324 		/* used by the spare table daemon to deliver a spare table
   1325 		 * into the kernel */
   1326 	case RAIDFRAME_SEND_SPARET:
   1327 
   1328 		/* install the spare table */
   1329 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1330 
   1331 		/* respond to the requestor.  the return status of the spare
   1332 		 * table installation is passed in the "fcol" field */
   1333 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1334 		waitreq->fcol = retcode;
   1335 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1336 		waitreq->next = rf_sparet_resp_queue;
   1337 		rf_sparet_resp_queue = waitreq;
   1338 		wakeup(&rf_sparet_resp_queue);
   1339 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1340 
   1341 		return (retcode);
   1342 #endif
   1343 
   1344 	default:
   1345 		break; /* fall through to the os-specific code below */
   1346 
   1347 	}
   1348 
   1349 	if (!raidPtr->valid)
   1350 		return (EINVAL);
   1351 
   1352 	/*
   1353 	 * Add support for "regular" device ioctls here.
   1354 	 */
   1355 
   1356 	switch (cmd) {
   1357 	case DIOCGDINFO:
   1358 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1359 		break;
   1360 
   1361 	case DIOCGPART:
   1362 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1363 		((struct partinfo *) data)->part =
   1364 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1365 		break;
   1366 
   1367 	case DIOCWDINFO:
   1368 	case DIOCSDINFO:
   1369 		if ((error = raidlock(rs)) != 0)
   1370 			return (error);
   1371 
   1372 		rs->sc_flags |= RAIDF_LABELLING;
   1373 
   1374 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1375 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1376 		if (error == 0) {
   1377 			if (cmd == DIOCWDINFO)
   1378 				error = writedisklabel(RAIDLABELDEV(dev),
   1379 				    raidstrategy, rs->sc_dkdev.dk_label,
   1380 				    rs->sc_dkdev.dk_cpulabel);
   1381 		}
   1382 		rs->sc_flags &= ~RAIDF_LABELLING;
   1383 
   1384 		raidunlock(rs);
   1385 
   1386 		if (error)
   1387 			return (error);
   1388 		break;
   1389 
   1390 	case DIOCWLABEL:
   1391 		if (*(int *) data != 0)
   1392 			rs->sc_flags |= RAIDF_WLABEL;
   1393 		else
   1394 			rs->sc_flags &= ~RAIDF_WLABEL;
   1395 		break;
   1396 
   1397 	case DIOCGDEFLABEL:
   1398 		raidgetdefaultlabel(raidPtr, rs,
   1399 		    (struct disklabel *) data);
   1400 		break;
   1401 
   1402 	default:
   1403 		retcode = ENOTTY;
   1404 	}
   1405 	return (retcode);
   1406 
   1407 }
   1408 
   1409 
   1410 /* raidinit -- complete the rest of the initialization for the
   1411    RAIDframe device.  */
   1412 
   1413 
   1414 static void
   1415 raidinit(raidPtr)
   1416 	RF_Raid_t *raidPtr;
   1417 {
   1418 	struct raid_softc *rs;
   1419 	int     unit;
   1420 
   1421 	unit = raidPtr->raidid;
   1422 
   1423 	rs = &raid_softc[unit];
   1424 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1425 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1426 
   1427 
   1428 	/* XXX should check return code first... */
   1429 	rs->sc_flags |= RAIDF_INITED;
   1430 
   1431 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1432 
   1433 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1434 
   1435 	/* disk_attach actually creates space for the CPU disklabel, among
   1436 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1437 	 * with disklabels. */
   1438 
   1439 	disk_attach(&rs->sc_dkdev);
   1440 
   1441 	/* XXX There may be a weird interaction here between this, and
   1442 	 * protectedSectors, as used in RAIDframe.  */
   1443 
   1444 	rs->sc_size = raidPtr->totalSectors;
   1445 
   1446 }
   1447 
   1448 /* wake up the daemon & tell it to get us a spare table
   1449  * XXX
   1450  * the entries in the queues should be tagged with the raidPtr
   1451  * so that in the extremely rare case that two recons happen at once,
   1452  * we know for which device were requesting a spare table
   1453  * XXX
   1454  *
   1455  * XXX This code is not currently used. GO
   1456  */
   1457 int
   1458 rf_GetSpareTableFromDaemon(req)
   1459 	RF_SparetWait_t *req;
   1460 {
   1461 	int     retcode;
   1462 
   1463 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1464 	req->next = rf_sparet_wait_queue;
   1465 	rf_sparet_wait_queue = req;
   1466 	wakeup(&rf_sparet_wait_queue);
   1467 
   1468 	/* mpsleep unlocks the mutex */
   1469 	while (!rf_sparet_resp_queue) {
   1470 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1471 		    "raidframe getsparetable", 0);
   1472 	}
   1473 	req = rf_sparet_resp_queue;
   1474 	rf_sparet_resp_queue = req->next;
   1475 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1476 
   1477 	retcode = req->fcol;
   1478 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1479 					 * alloc'd */
   1480 	return (retcode);
   1481 }
   1482 
   1483 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1484  * bp & passes it down.
   1485  * any calls originating in the kernel must use non-blocking I/O
   1486  * do some extra sanity checking to return "appropriate" error values for
   1487  * certain conditions (to make some standard utilities work)
   1488  *
   1489  * Formerly known as: rf_DoAccessKernel
   1490  */
   1491 void
   1492 raidstart(raidPtr)
   1493 	RF_Raid_t *raidPtr;
   1494 {
   1495 	RF_SectorCount_t num_blocks, pb, sum;
   1496 	RF_RaidAddr_t raid_addr;
   1497 	int     retcode;
   1498 	struct partition *pp;
   1499 	daddr_t blocknum;
   1500 	int     unit;
   1501 	struct raid_softc *rs;
   1502 	int     do_async;
   1503 	struct buf *bp;
   1504 
   1505 	unit = raidPtr->raidid;
   1506 	rs = &raid_softc[unit];
   1507 
   1508 	/* quick check to see if anything has died recently */
   1509 	RF_LOCK_MUTEX(raidPtr->mutex);
   1510 	if (raidPtr->numNewFailures > 0) {
   1511 		rf_update_component_labels(raidPtr);
   1512 		raidPtr->numNewFailures--;
   1513 	}
   1514 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1515 
   1516 	/* Check to see if we're at the limit... */
   1517 	RF_LOCK_MUTEX(raidPtr->mutex);
   1518 	while (raidPtr->openings > 0) {
   1519 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1520 
   1521 		/* get the next item, if any, from the queue */
   1522 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1523 			/* nothing more to do */
   1524 			return;
   1525 		}
   1526 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1527 
   1528 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1529 		 * partition.. Need to make it absolute to the underlying
   1530 		 * device.. */
   1531 
   1532 		blocknum = bp->b_blkno;
   1533 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1534 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1535 			blocknum += pp->p_offset;
   1536 		}
   1537 
   1538 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1539 			    (int) blocknum));
   1540 
   1541 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1542 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1543 
   1544 		/* *THIS* is where we adjust what block we're going to...
   1545 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1546 		raid_addr = blocknum;
   1547 
   1548 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1549 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1550 		sum = raid_addr + num_blocks + pb;
   1551 		if (1 || rf_debugKernelAccess) {
   1552 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1553 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1554 				    (int) pb, (int) bp->b_resid));
   1555 		}
   1556 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1557 		    || (sum < num_blocks) || (sum < pb)) {
   1558 			bp->b_error = ENOSPC;
   1559 			bp->b_flags |= B_ERROR;
   1560 			bp->b_resid = bp->b_bcount;
   1561 			biodone(bp);
   1562 			RF_LOCK_MUTEX(raidPtr->mutex);
   1563 			continue;
   1564 		}
   1565 		/*
   1566 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1567 		 */
   1568 
   1569 		if (bp->b_bcount & raidPtr->sectorMask) {
   1570 			bp->b_error = EINVAL;
   1571 			bp->b_flags |= B_ERROR;
   1572 			bp->b_resid = bp->b_bcount;
   1573 			biodone(bp);
   1574 			RF_LOCK_MUTEX(raidPtr->mutex);
   1575 			continue;
   1576 
   1577 		}
   1578 		db1_printf(("Calling DoAccess..\n"));
   1579 
   1580 
   1581 		RF_LOCK_MUTEX(raidPtr->mutex);
   1582 		raidPtr->openings--;
   1583 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1584 
   1585 		/*
   1586 		 * Everything is async.
   1587 		 */
   1588 		do_async = 1;
   1589 
   1590 		/* don't ever condition on bp->b_flags & B_WRITE.
   1591 		 * always condition on B_READ instead */
   1592 
   1593 		/* XXX we're still at splbio() here... do we *really*
   1594 		   need to be? */
   1595 
   1596 
   1597 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1598 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1599 				      do_async, raid_addr, num_blocks,
   1600 				      bp->b_un.b_addr, bp, NULL, NULL,
   1601 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1602 
   1603 
   1604 		RF_LOCK_MUTEX(raidPtr->mutex);
   1605 	}
   1606 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1607 }
   1608 
   1609 
   1610 
   1611 
   1612 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1613 
   1614 int
   1615 rf_DispatchKernelIO(queue, req)
   1616 	RF_DiskQueue_t *queue;
   1617 	RF_DiskQueueData_t *req;
   1618 {
   1619 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1620 	struct buf *bp;
   1621 	struct raidbuf *raidbp = NULL;
   1622 	struct raid_softc *rs;
   1623 	int     unit;
   1624 	int s;
   1625 
   1626 	s=0;
   1627 	/* s = splbio();*/ /* want to test this */
   1628 	/* XXX along with the vnode, we also need the softc associated with
   1629 	 * this device.. */
   1630 
   1631 	req->queue = queue;
   1632 
   1633 	unit = queue->raidPtr->raidid;
   1634 
   1635 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1636 
   1637 	if (unit >= numraid) {
   1638 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1639 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1640 	}
   1641 	rs = &raid_softc[unit];
   1642 
   1643 	/* XXX is this the right place? */
   1644 	disk_busy(&rs->sc_dkdev);
   1645 
   1646 	bp = req->bp;
   1647 #if 1
   1648 	/* XXX when there is a physical disk failure, someone is passing us a
   1649 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1650 	 * without taking a performance hit... (not sure where the real bug
   1651 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1652 
   1653 	if (bp->b_flags & B_ERROR) {
   1654 		bp->b_flags &= ~B_ERROR;
   1655 	}
   1656 	if (bp->b_error != 0) {
   1657 		bp->b_error = 0;
   1658 	}
   1659 #endif
   1660 	raidbp = RAIDGETBUF(rs);
   1661 
   1662 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1663 
   1664 	/*
   1665 	 * context for raidiodone
   1666 	 */
   1667 	raidbp->rf_obp = bp;
   1668 	raidbp->req = req;
   1669 
   1670 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1671 
   1672 	switch (req->type) {
   1673 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1674 		/* XXX need to do something extra here.. */
   1675 		/* I'm leaving this in, as I've never actually seen it used,
   1676 		 * and I'd like folks to report it... GO */
   1677 		printf(("WAKEUP CALLED\n"));
   1678 		queue->numOutstanding++;
   1679 
   1680 		/* XXX need to glue the original buffer into this??  */
   1681 
   1682 		KernelWakeupFunc(&raidbp->rf_buf);
   1683 		break;
   1684 
   1685 	case RF_IO_TYPE_READ:
   1686 	case RF_IO_TYPE_WRITE:
   1687 
   1688 		if (req->tracerec) {
   1689 			RF_ETIMER_START(req->tracerec->timer);
   1690 		}
   1691 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1692 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1693 		    req->sectorOffset, req->numSector,
   1694 		    req->buf, KernelWakeupFunc, (void *) req,
   1695 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1696 
   1697 		if (rf_debugKernelAccess) {
   1698 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1699 				(long) bp->b_blkno));
   1700 		}
   1701 		queue->numOutstanding++;
   1702 		queue->last_deq_sector = req->sectorOffset;
   1703 		/* acc wouldn't have been let in if there were any pending
   1704 		 * reqs at any other priority */
   1705 		queue->curPriority = req->priority;
   1706 
   1707 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1708 			req->type, unit, queue->row, queue->col));
   1709 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1710 			(int) req->sectorOffset, (int) req->numSector,
   1711 			(int) (req->numSector <<
   1712 			    queue->raidPtr->logBytesPerSector),
   1713 			(int) queue->raidPtr->logBytesPerSector));
   1714 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1715 			raidbp->rf_buf.b_vp->v_numoutput++;
   1716 		}
   1717 		VOP_STRATEGY(&raidbp->rf_buf);
   1718 
   1719 		break;
   1720 
   1721 	default:
   1722 		panic("bad req->type in rf_DispatchKernelIO");
   1723 	}
   1724 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1725 	/* splx(s); */ /* want to test this */
   1726 	return (0);
   1727 }
   1728 /* this is the callback function associated with a I/O invoked from
   1729    kernel code.
   1730  */
   1731 static void
   1732 KernelWakeupFunc(vbp)
   1733 	struct buf *vbp;
   1734 {
   1735 	RF_DiskQueueData_t *req = NULL;
   1736 	RF_DiskQueue_t *queue;
   1737 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1738 	struct buf *bp;
   1739 	struct raid_softc *rs;
   1740 	int     unit;
   1741 	register int s;
   1742 
   1743 	s = splbio();
   1744 	db1_printf(("recovering the request queue:\n"));
   1745 	req = raidbp->req;
   1746 
   1747 	bp = raidbp->rf_obp;
   1748 
   1749 	queue = (RF_DiskQueue_t *) req->queue;
   1750 
   1751 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1752 		bp->b_flags |= B_ERROR;
   1753 		bp->b_error = raidbp->rf_buf.b_error ?
   1754 		    raidbp->rf_buf.b_error : EIO;
   1755 	}
   1756 
   1757 	/* XXX methinks this could be wrong... */
   1758 #if 1
   1759 	bp->b_resid = raidbp->rf_buf.b_resid;
   1760 #endif
   1761 
   1762 	if (req->tracerec) {
   1763 		RF_ETIMER_STOP(req->tracerec->timer);
   1764 		RF_ETIMER_EVAL(req->tracerec->timer);
   1765 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1766 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1767 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1768 		req->tracerec->num_phys_ios++;
   1769 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1770 	}
   1771 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1772 
   1773 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1774 
   1775 
   1776 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1777 	 * ballistic, and mark the component as hosed... */
   1778 
   1779 	if (bp->b_flags & B_ERROR) {
   1780 		/* Mark the disk as dead */
   1781 		/* but only mark it once... */
   1782 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1783 		    rf_ds_optimal) {
   1784 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1785 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1786 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1787 			    rf_ds_failed;
   1788 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1789 			queue->raidPtr->numFailures++;
   1790 			queue->raidPtr->numNewFailures++;
   1791 			/* XXX here we should bump the version number for each component, and write that data out */
   1792 		} else {	/* Disk is already dead... */
   1793 			/* printf("Disk already marked as dead!\n"); */
   1794 		}
   1795 
   1796 	}
   1797 
   1798 	rs = &raid_softc[unit];
   1799 	RAIDPUTBUF(rs, raidbp);
   1800 
   1801 
   1802 	if (bp->b_resid == 0) {
   1803 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1804 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1805 	}
   1806 
   1807 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1808 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1809 
   1810 	splx(s);
   1811 }
   1812 
   1813 
   1814 
   1815 /*
   1816  * initialize a buf structure for doing an I/O in the kernel.
   1817  */
   1818 static void
   1819 InitBP(
   1820     struct buf * bp,
   1821     struct vnode * b_vp,
   1822     unsigned rw_flag,
   1823     dev_t dev,
   1824     RF_SectorNum_t startSect,
   1825     RF_SectorCount_t numSect,
   1826     caddr_t buf,
   1827     void (*cbFunc) (struct buf *),
   1828     void *cbArg,
   1829     int logBytesPerSector,
   1830     struct proc * b_proc)
   1831 {
   1832 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1833 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1834 	bp->b_bcount = numSect << logBytesPerSector;
   1835 	bp->b_bufsize = bp->b_bcount;
   1836 	bp->b_error = 0;
   1837 	bp->b_dev = dev;
   1838 	bp->b_un.b_addr = buf;
   1839 	bp->b_blkno = startSect;
   1840 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1841 	if (bp->b_bcount == 0) {
   1842 		panic("bp->b_bcount is zero in InitBP!!\n");
   1843 	}
   1844 	bp->b_proc = b_proc;
   1845 	bp->b_iodone = cbFunc;
   1846 	bp->b_vp = b_vp;
   1847 
   1848 }
   1849 
   1850 static void
   1851 raidgetdefaultlabel(raidPtr, rs, lp)
   1852 	RF_Raid_t *raidPtr;
   1853 	struct raid_softc *rs;
   1854 	struct disklabel *lp;
   1855 {
   1856 	db1_printf(("Building a default label...\n"));
   1857 	bzero(lp, sizeof(*lp));
   1858 
   1859 	/* fabricate a label... */
   1860 	lp->d_secperunit = raidPtr->totalSectors;
   1861 	lp->d_secsize = raidPtr->bytesPerSector;
   1862 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1863 	lp->d_ntracks = 1;
   1864 	lp->d_ncylinders = raidPtr->totalSectors /
   1865 		(lp->d_nsectors * lp->d_ntracks);
   1866 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1867 
   1868 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1869 	lp->d_type = DTYPE_RAID;
   1870 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1871 	lp->d_rpm = 3600;
   1872 	lp->d_interleave = 1;
   1873 	lp->d_flags = 0;
   1874 
   1875 	lp->d_partitions[RAW_PART].p_offset = 0;
   1876 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1877 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1878 	lp->d_npartitions = RAW_PART + 1;
   1879 
   1880 	lp->d_magic = DISKMAGIC;
   1881 	lp->d_magic2 = DISKMAGIC;
   1882 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1883 
   1884 }
   1885 /*
   1886  * Read the disklabel from the raid device.  If one is not present, fake one
   1887  * up.
   1888  */
   1889 static void
   1890 raidgetdisklabel(dev)
   1891 	dev_t   dev;
   1892 {
   1893 	int     unit = raidunit(dev);
   1894 	struct raid_softc *rs = &raid_softc[unit];
   1895 	char   *errstring;
   1896 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1897 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1898 	RF_Raid_t *raidPtr;
   1899 
   1900 	db1_printf(("Getting the disklabel...\n"));
   1901 
   1902 	bzero(clp, sizeof(*clp));
   1903 
   1904 	raidPtr = raidPtrs[unit];
   1905 
   1906 	raidgetdefaultlabel(raidPtr, rs, lp);
   1907 
   1908 	/*
   1909 	 * Call the generic disklabel extraction routine.
   1910 	 */
   1911 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1912 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1913 	if (errstring)
   1914 		raidmakedisklabel(rs);
   1915 	else {
   1916 		int     i;
   1917 		struct partition *pp;
   1918 
   1919 		/*
   1920 		 * Sanity check whether the found disklabel is valid.
   1921 		 *
   1922 		 * This is necessary since total size of the raid device
   1923 		 * may vary when an interleave is changed even though exactly
   1924 		 * same componets are used, and old disklabel may used
   1925 		 * if that is found.
   1926 		 */
   1927 		if (lp->d_secperunit != rs->sc_size)
   1928 			printf("WARNING: %s: "
   1929 			    "total sector size in disklabel (%d) != "
   1930 			    "the size of raid (%ld)\n", rs->sc_xname,
   1931 			    lp->d_secperunit, (long) rs->sc_size);
   1932 		for (i = 0; i < lp->d_npartitions; i++) {
   1933 			pp = &lp->d_partitions[i];
   1934 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1935 				printf("WARNING: %s: end of partition `%c' "
   1936 				    "exceeds the size of raid (%ld)\n",
   1937 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1938 		}
   1939 	}
   1940 
   1941 }
   1942 /*
   1943  * Take care of things one might want to take care of in the event
   1944  * that a disklabel isn't present.
   1945  */
   1946 static void
   1947 raidmakedisklabel(rs)
   1948 	struct raid_softc *rs;
   1949 {
   1950 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1951 	db1_printf(("Making a label..\n"));
   1952 
   1953 	/*
   1954 	 * For historical reasons, if there's no disklabel present
   1955 	 * the raw partition must be marked FS_BSDFFS.
   1956 	 */
   1957 
   1958 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1959 
   1960 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1961 
   1962 	lp->d_checksum = dkcksum(lp);
   1963 }
   1964 /*
   1965  * Lookup the provided name in the filesystem.  If the file exists,
   1966  * is a valid block device, and isn't being used by anyone else,
   1967  * set *vpp to the file's vnode.
   1968  * You'll find the original of this in ccd.c
   1969  */
   1970 int
   1971 raidlookup(path, p, vpp)
   1972 	char   *path;
   1973 	struct proc *p;
   1974 	struct vnode **vpp;	/* result */
   1975 {
   1976 	struct nameidata nd;
   1977 	struct vnode *vp;
   1978 	struct vattr va;
   1979 	int     error;
   1980 
   1981 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1982 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1983 #ifdef DEBUG
   1984 		printf("RAIDframe: vn_open returned %d\n", error);
   1985 #endif
   1986 		return (error);
   1987 	}
   1988 	vp = nd.ni_vp;
   1989 	if (vp->v_usecount > 1) {
   1990 		VOP_UNLOCK(vp, 0);
   1991 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1992 		return (EBUSY);
   1993 	}
   1994 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1995 		VOP_UNLOCK(vp, 0);
   1996 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1997 		return (error);
   1998 	}
   1999 	/* XXX: eventually we should handle VREG, too. */
   2000 	if (va.va_type != VBLK) {
   2001 		VOP_UNLOCK(vp, 0);
   2002 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2003 		return (ENOTBLK);
   2004 	}
   2005 	VOP_UNLOCK(vp, 0);
   2006 	*vpp = vp;
   2007 	return (0);
   2008 }
   2009 /*
   2010  * Wait interruptibly for an exclusive lock.
   2011  *
   2012  * XXX
   2013  * Several drivers do this; it should be abstracted and made MP-safe.
   2014  * (Hmm... where have we seen this warning before :->  GO )
   2015  */
   2016 static int
   2017 raidlock(rs)
   2018 	struct raid_softc *rs;
   2019 {
   2020 	int     error;
   2021 
   2022 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2023 		rs->sc_flags |= RAIDF_WANTED;
   2024 		if ((error =
   2025 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2026 			return (error);
   2027 	}
   2028 	rs->sc_flags |= RAIDF_LOCKED;
   2029 	return (0);
   2030 }
   2031 /*
   2032  * Unlock and wake up any waiters.
   2033  */
   2034 static void
   2035 raidunlock(rs)
   2036 	struct raid_softc *rs;
   2037 {
   2038 
   2039 	rs->sc_flags &= ~RAIDF_LOCKED;
   2040 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2041 		rs->sc_flags &= ~RAIDF_WANTED;
   2042 		wakeup(rs);
   2043 	}
   2044 }
   2045 
   2046 
   2047 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2048 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2049 
   2050 int
   2051 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2052 {
   2053 	RF_ComponentLabel_t clabel;
   2054 	raidread_component_label(dev, b_vp, &clabel);
   2055 	clabel.mod_counter = mod_counter;
   2056 	clabel.clean = RF_RAID_CLEAN;
   2057 	raidwrite_component_label(dev, b_vp, &clabel);
   2058 	return(0);
   2059 }
   2060 
   2061 
   2062 int
   2063 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2064 {
   2065 	RF_ComponentLabel_t clabel;
   2066 	raidread_component_label(dev, b_vp, &clabel);
   2067 	clabel.mod_counter = mod_counter;
   2068 	clabel.clean = RF_RAID_DIRTY;
   2069 	raidwrite_component_label(dev, b_vp, &clabel);
   2070 	return(0);
   2071 }
   2072 
   2073 /* ARGSUSED */
   2074 int
   2075 raidread_component_label(dev, b_vp, clabel)
   2076 	dev_t dev;
   2077 	struct vnode *b_vp;
   2078 	RF_ComponentLabel_t *clabel;
   2079 {
   2080 	struct buf *bp;
   2081 	int error;
   2082 
   2083 	/* XXX should probably ensure that we don't try to do this if
   2084 	   someone has changed rf_protected_sectors. */
   2085 
   2086 	/* get a block of the appropriate size... */
   2087 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2088 	bp->b_dev = dev;
   2089 
   2090 	/* get our ducks in a row for the read */
   2091 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2092 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2093 	bp->b_flags = B_BUSY | B_READ;
   2094  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2095 
   2096 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2097 
   2098 	error = biowait(bp);
   2099 
   2100 	if (!error) {
   2101 		memcpy(clabel, bp->b_un.b_addr,
   2102 		       sizeof(RF_ComponentLabel_t));
   2103 #if 0
   2104 		print_component_label( clabel );
   2105 #endif
   2106         } else {
   2107 #if 0
   2108 		printf("Failed to read RAID component label!\n");
   2109 #endif
   2110 	}
   2111 
   2112         bp->b_flags = B_INVAL | B_AGE;
   2113 	brelse(bp);
   2114 	return(error);
   2115 }
   2116 /* ARGSUSED */
   2117 int
   2118 raidwrite_component_label(dev, b_vp, clabel)
   2119 	dev_t dev;
   2120 	struct vnode *b_vp;
   2121 	RF_ComponentLabel_t *clabel;
   2122 {
   2123 	struct buf *bp;
   2124 	int error;
   2125 
   2126 	/* get a block of the appropriate size... */
   2127 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2128 	bp->b_dev = dev;
   2129 
   2130 	/* get our ducks in a row for the write */
   2131 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2132 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2133 	bp->b_flags = B_BUSY | B_WRITE;
   2134  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2135 
   2136 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2137 
   2138 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2139 
   2140 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2141 	error = biowait(bp);
   2142         bp->b_flags = B_INVAL | B_AGE;
   2143 	brelse(bp);
   2144 	if (error) {
   2145 #if 1
   2146 		printf("Failed to write RAID component info!\n");
   2147 #endif
   2148 	}
   2149 
   2150 	return(error);
   2151 }
   2152 
   2153 void
   2154 rf_markalldirty( raidPtr )
   2155 	RF_Raid_t *raidPtr;
   2156 {
   2157 	RF_ComponentLabel_t clabel;
   2158 	int r,c;
   2159 
   2160 	raidPtr->mod_counter++;
   2161 	for (r = 0; r < raidPtr->numRow; r++) {
   2162 		for (c = 0; c < raidPtr->numCol; c++) {
   2163 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2164 				raidread_component_label(
   2165 					raidPtr->Disks[r][c].dev,
   2166 					raidPtr->raid_cinfo[r][c].ci_vp,
   2167 					&clabel);
   2168 				if (clabel.status == rf_ds_spared) {
   2169 					/* XXX do something special...
   2170 					 but whatever you do, don't
   2171 					 try to access it!! */
   2172 				} else {
   2173 #if 0
   2174 				clabel.status =
   2175 					raidPtr->Disks[r][c].status;
   2176 				raidwrite_component_label(
   2177 					raidPtr->Disks[r][c].dev,
   2178 					raidPtr->raid_cinfo[r][c].ci_vp,
   2179 					&clabel);
   2180 #endif
   2181 				raidmarkdirty(
   2182 				       raidPtr->Disks[r][c].dev,
   2183 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2184 				       raidPtr->mod_counter);
   2185 				}
   2186 			}
   2187 		}
   2188 	}
   2189 	/* printf("Component labels marked dirty.\n"); */
   2190 #if 0
   2191 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2192 		sparecol = raidPtr->numCol + c;
   2193 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2194 			/*
   2195 
   2196 			   XXX this is where we get fancy and map this spare
   2197 			   into it's correct spot in the array.
   2198 
   2199 			 */
   2200 			/*
   2201 
   2202 			   we claim this disk is "optimal" if it's
   2203 			   rf_ds_used_spare, as that means it should be
   2204 			   directly substitutable for the disk it replaced.
   2205 			   We note that too...
   2206 
   2207 			 */
   2208 
   2209 			for(i=0;i<raidPtr->numRow;i++) {
   2210 				for(j=0;j<raidPtr->numCol;j++) {
   2211 					if ((raidPtr->Disks[i][j].spareRow ==
   2212 					     r) &&
   2213 					    (raidPtr->Disks[i][j].spareCol ==
   2214 					     sparecol)) {
   2215 						srow = r;
   2216 						scol = sparecol;
   2217 						break;
   2218 					}
   2219 				}
   2220 			}
   2221 
   2222 			raidread_component_label(
   2223 				      raidPtr->Disks[r][sparecol].dev,
   2224 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2225 				      &clabel);
   2226 			/* make sure status is noted */
   2227 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2228 			clabel.mod_counter = raidPtr->mod_counter;
   2229 			clabel.serial_number = raidPtr->serial_number;
   2230 			clabel.row = srow;
   2231 			clabel.column = scol;
   2232 			clabel.num_rows = raidPtr->numRow;
   2233 			clabel.num_columns = raidPtr->numCol;
   2234 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2235 			clabel.status = rf_ds_optimal;
   2236 			raidwrite_component_label(
   2237 				      raidPtr->Disks[r][sparecol].dev,
   2238 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2239 				      &clabel);
   2240 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2241 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2242 		}
   2243 	}
   2244 
   2245 #endif
   2246 }
   2247 
   2248 
   2249 void
   2250 rf_update_component_labels( raidPtr )
   2251 	RF_Raid_t *raidPtr;
   2252 {
   2253 	RF_ComponentLabel_t clabel;
   2254 	int sparecol;
   2255 	int r,c;
   2256 	int i,j;
   2257 	int srow, scol;
   2258 
   2259 	srow = -1;
   2260 	scol = -1;
   2261 
   2262 	/* XXX should do extra checks to make sure things really are clean,
   2263 	   rather than blindly setting the clean bit... */
   2264 
   2265 	raidPtr->mod_counter++;
   2266 
   2267 	for (r = 0; r < raidPtr->numRow; r++) {
   2268 		for (c = 0; c < raidPtr->numCol; c++) {
   2269 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2270 				raidread_component_label(
   2271 					raidPtr->Disks[r][c].dev,
   2272 					raidPtr->raid_cinfo[r][c].ci_vp,
   2273 					&clabel);
   2274 				/* make sure status is noted */
   2275 				clabel.status = rf_ds_optimal;
   2276 				/* bump the counter */
   2277 				clabel.mod_counter = raidPtr->mod_counter;
   2278 
   2279 				raidwrite_component_label(
   2280 					raidPtr->Disks[r][c].dev,
   2281 					raidPtr->raid_cinfo[r][c].ci_vp,
   2282 					&clabel);
   2283 			}
   2284 			/* else we don't touch it.. */
   2285 		}
   2286 	}
   2287 
   2288 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2289 		sparecol = raidPtr->numCol + c;
   2290 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2291 			/*
   2292 
   2293 			   we claim this disk is "optimal" if it's
   2294 			   rf_ds_used_spare, as that means it should be
   2295 			   directly substitutable for the disk it replaced.
   2296 			   We note that too...
   2297 
   2298 			 */
   2299 
   2300 			for(i=0;i<raidPtr->numRow;i++) {
   2301 				for(j=0;j<raidPtr->numCol;j++) {
   2302 					if ((raidPtr->Disks[i][j].spareRow ==
   2303 					     0) &&
   2304 					    (raidPtr->Disks[i][j].spareCol ==
   2305 					     sparecol)) {
   2306 						srow = i;
   2307 						scol = j;
   2308 						break;
   2309 					}
   2310 				}
   2311 			}
   2312 
   2313 			/* XXX shouldn't *really* need this... */
   2314 			raidread_component_label(
   2315 				      raidPtr->Disks[0][sparecol].dev,
   2316 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2317 				      &clabel);
   2318 			/* make sure status is noted */
   2319 
   2320 			raid_init_component_label(raidPtr, &clabel);
   2321 
   2322 			clabel.mod_counter = raidPtr->mod_counter;
   2323 			clabel.row = srow;
   2324 			clabel.column = scol;
   2325 			clabel.status = rf_ds_optimal;
   2326 
   2327 			raidwrite_component_label(
   2328 				      raidPtr->Disks[0][sparecol].dev,
   2329 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2330 				      &clabel);
   2331 		}
   2332 	}
   2333 	/* 	printf("Component labels updated\n"); */
   2334 }
   2335 
   2336 
   2337 void
   2338 rf_final_update_component_labels( raidPtr )
   2339 	RF_Raid_t *raidPtr;
   2340 {
   2341 	RF_ComponentLabel_t clabel;
   2342 	int sparecol;
   2343 	int r,c;
   2344 	int i,j;
   2345 	int srow, scol;
   2346 
   2347 	srow = -1;
   2348 	scol = -1;
   2349 
   2350 	/* XXX should do extra checks to make sure things really are clean,
   2351 	   rather than blindly setting the clean bit... */
   2352 
   2353 	raidPtr->mod_counter++;
   2354 
   2355 	for (r = 0; r < raidPtr->numRow; r++) {
   2356 		for (c = 0; c < raidPtr->numCol; c++) {
   2357 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2358 				raidread_component_label(
   2359 					raidPtr->Disks[r][c].dev,
   2360 					raidPtr->raid_cinfo[r][c].ci_vp,
   2361 					&clabel);
   2362 				/* make sure status is noted */
   2363 				clabel.status = rf_ds_optimal;
   2364 				/* bump the counter */
   2365 				clabel.mod_counter = raidPtr->mod_counter;
   2366 
   2367 				raidwrite_component_label(
   2368 					raidPtr->Disks[r][c].dev,
   2369 					raidPtr->raid_cinfo[r][c].ci_vp,
   2370 					&clabel);
   2371 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2372 					raidmarkclean(
   2373 					      raidPtr->Disks[r][c].dev,
   2374 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2375 					      raidPtr->mod_counter);
   2376 				}
   2377 			}
   2378 			/* else we don't touch it.. */
   2379 		}
   2380 	}
   2381 
   2382 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2383 		sparecol = raidPtr->numCol + c;
   2384 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2385 			/*
   2386 
   2387 			   we claim this disk is "optimal" if it's
   2388 			   rf_ds_used_spare, as that means it should be
   2389 			   directly substitutable for the disk it replaced.
   2390 			   We note that too...
   2391 
   2392 			 */
   2393 
   2394 			for(i=0;i<raidPtr->numRow;i++) {
   2395 				for(j=0;j<raidPtr->numCol;j++) {
   2396 					if ((raidPtr->Disks[i][j].spareRow ==
   2397 					     0) &&
   2398 					    (raidPtr->Disks[i][j].spareCol ==
   2399 					     sparecol)) {
   2400 						srow = i;
   2401 						scol = j;
   2402 						break;
   2403 					}
   2404 				}
   2405 			}
   2406 
   2407 			/* XXX shouldn't *really* need this... */
   2408 			raidread_component_label(
   2409 				      raidPtr->Disks[0][sparecol].dev,
   2410 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2411 				      &clabel);
   2412 			/* make sure status is noted */
   2413 
   2414 			raid_init_component_label(raidPtr, &clabel);
   2415 
   2416 			clabel.mod_counter = raidPtr->mod_counter;
   2417 			clabel.row = srow;
   2418 			clabel.column = scol;
   2419 			clabel.status = rf_ds_optimal;
   2420 
   2421 			raidwrite_component_label(
   2422 				      raidPtr->Disks[0][sparecol].dev,
   2423 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2424 				      &clabel);
   2425 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2426 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2427 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2428 					       raidPtr->mod_counter);
   2429 			}
   2430 		}
   2431 	}
   2432 	/* 	printf("Component labels updated\n"); */
   2433 }
   2434 
   2435 
   2436 void
   2437 rf_ReconThread(req)
   2438 	struct rf_recon_req *req;
   2439 {
   2440 	int     s;
   2441 	RF_Raid_t *raidPtr;
   2442 
   2443 	s = splbio();
   2444 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2445 	raidPtr->recon_in_progress = 1;
   2446 
   2447 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2448 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2449 
   2450 	/* XXX get rid of this! we don't need it at all.. */
   2451 	RF_Free(req, sizeof(*req));
   2452 
   2453 	raidPtr->recon_in_progress = 0;
   2454 	splx(s);
   2455 
   2456 	/* That's all... */
   2457 	kthread_exit(0);        /* does not return */
   2458 }
   2459 
   2460 void
   2461 rf_RewriteParityThread(raidPtr)
   2462 	RF_Raid_t *raidPtr;
   2463 {
   2464 	int retcode;
   2465 	int s;
   2466 
   2467 	raidPtr->parity_rewrite_in_progress = 1;
   2468 	s = splbio();
   2469 	retcode = rf_RewriteParity(raidPtr);
   2470 	splx(s);
   2471 	if (retcode) {
   2472 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2473 	} else {
   2474 		/* set the clean bit!  If we shutdown correctly,
   2475 		   the clean bit on each component label will get
   2476 		   set */
   2477 		raidPtr->parity_good = RF_RAID_CLEAN;
   2478 	}
   2479 	raidPtr->parity_rewrite_in_progress = 0;
   2480 
   2481 	/* That's all... */
   2482 	kthread_exit(0);        /* does not return */
   2483 }
   2484 
   2485 
   2486 void
   2487 rf_CopybackThread(raidPtr)
   2488 	RF_Raid_t *raidPtr;
   2489 {
   2490 	int s;
   2491 
   2492 	raidPtr->copyback_in_progress = 1;
   2493 	s = splbio();
   2494 	rf_CopybackReconstructedData(raidPtr);
   2495 	splx(s);
   2496 	raidPtr->copyback_in_progress = 0;
   2497 
   2498 	/* That's all... */
   2499 	kthread_exit(0);        /* does not return */
   2500 }
   2501 
   2502 
   2503 void
   2504 rf_ReconstructInPlaceThread(req)
   2505 	struct rf_recon_req *req;
   2506 {
   2507 	int retcode;
   2508 	int s;
   2509 	RF_Raid_t *raidPtr;
   2510 
   2511 	s = splbio();
   2512 	raidPtr = req->raidPtr;
   2513 	raidPtr->recon_in_progress = 1;
   2514 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2515 	RF_Free(req, sizeof(*req));
   2516 	raidPtr->recon_in_progress = 0;
   2517 	splx(s);
   2518 
   2519 	/* That's all... */
   2520 	kthread_exit(0);        /* does not return */
   2521 }
   2522 
   2523 void
   2524 rf_mountroot_hook(dev)
   2525 	struct device *dev;
   2526 {
   2527 
   2528 }
   2529 
   2530 
   2531 RF_AutoConfig_t *
   2532 rf_find_raid_components()
   2533 {
   2534 	struct devnametobdevmaj *dtobdm;
   2535 	struct vnode *vp;
   2536 	struct disklabel label;
   2537 	struct device *dv;
   2538 	char *cd_name;
   2539 	dev_t dev;
   2540 	int error;
   2541 	int i;
   2542 	int good_one;
   2543 	RF_ComponentLabel_t *clabel;
   2544 	RF_AutoConfig_t *ac_list;
   2545 	RF_AutoConfig_t *ac;
   2546 
   2547 
   2548 	/* initialize the AutoConfig list */
   2549 	ac_list = NULL;
   2550 
   2551 if (raidautoconfig) {
   2552 
   2553 	/* we begin by trolling through *all* the devices on the system */
   2554 
   2555 	for (dv = alldevs.tqh_first; dv != NULL;
   2556 	     dv = dv->dv_list.tqe_next) {
   2557 
   2558 		/* we are only interested in disks... */
   2559 		if (dv->dv_class != DV_DISK)
   2560 			continue;
   2561 
   2562 		/* we don't care about floppies... */
   2563 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2564 			continue;
   2565 		}
   2566 
   2567 		/* need to find the device_name_to_block_device_major stuff */
   2568 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2569 		dtobdm = dev_name2blk;
   2570 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2571 			dtobdm++;
   2572 		}
   2573 
   2574 		/* get a vnode for the raw partition of this disk */
   2575 
   2576 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2577 		if (bdevvp(dev, &vp))
   2578 			panic("RAID can't alloc vnode");
   2579 
   2580 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2581 
   2582 		if (error) {
   2583 			/* "Who cares."  Continue looking
   2584 			   for something that exists*/
   2585 			vput(vp);
   2586 			continue;
   2587 		}
   2588 
   2589 		/* Ok, the disk exists.  Go get the disklabel. */
   2590 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2591 				  FREAD, NOCRED, 0);
   2592 		if (error) {
   2593 			/*
   2594 			 * XXX can't happen - open() would
   2595 			 * have errored out (or faked up one)
   2596 			 */
   2597 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2598 			       dv->dv_xname, 'a' + RAW_PART, error);
   2599 		}
   2600 
   2601 		/* don't need this any more.  We'll allocate it again
   2602 		   a little later if we really do... */
   2603 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2604 		vput(vp);
   2605 
   2606 		for (i=0; i < label.d_npartitions; i++) {
   2607 			/* We only support partitions marked as RAID */
   2608 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2609 				continue;
   2610 
   2611 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2612 			if (bdevvp(dev, &vp))
   2613 				panic("RAID can't alloc vnode");
   2614 
   2615 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2616 			if (error) {
   2617 				/* Whatever... */
   2618 				vput(vp);
   2619 				continue;
   2620 			}
   2621 
   2622 			good_one = 0;
   2623 
   2624 			clabel = (RF_ComponentLabel_t *)
   2625 				malloc(sizeof(RF_ComponentLabel_t),
   2626 				       M_RAIDFRAME, M_NOWAIT);
   2627 			if (clabel == NULL) {
   2628 				/* XXX CLEANUP HERE */
   2629 				printf("RAID auto config: out of memory!\n");
   2630 				return(NULL); /* XXX probably should panic? */
   2631 			}
   2632 
   2633 			if (!raidread_component_label(dev, vp, clabel)) {
   2634 				/* Got the label.  Does it look reasonable? */
   2635 				if (rf_reasonable_label(clabel) &&
   2636 				    (clabel->partitionSize <=
   2637 				     label.d_partitions[i].p_size)) {
   2638 #if DEBUG
   2639 					printf("Component on: %s%c: %d\n",
   2640 					       dv->dv_xname, 'a'+i,
   2641 					       label.d_partitions[i].p_size);
   2642 					print_component_label(clabel);
   2643 #endif
   2644 					/* if it's reasonable, add it,
   2645 					   else ignore it. */
   2646 					ac = (RF_AutoConfig_t *)
   2647 						malloc(sizeof(RF_AutoConfig_t),
   2648 						       M_RAIDFRAME,
   2649 						       M_NOWAIT);
   2650 					if (ac == NULL) {
   2651 						/* XXX should panic?? */
   2652 						return(NULL);
   2653 					}
   2654 
   2655 					sprintf(ac->devname, "%s%c",
   2656 						dv->dv_xname, 'a'+i);
   2657 					ac->dev = dev;
   2658 					ac->vp = vp;
   2659 					ac->clabel = clabel;
   2660 					ac->next = ac_list;
   2661 					ac_list = ac;
   2662 					good_one = 1;
   2663 				}
   2664 			}
   2665 			if (!good_one) {
   2666 				/* cleanup */
   2667 				free(clabel, M_RAIDFRAME);
   2668 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2669 				vput(vp);
   2670 			}
   2671 		}
   2672 	}
   2673 }
   2674 return(ac_list);
   2675 }
   2676 
   2677 static int
   2678 rf_reasonable_label(clabel)
   2679 	RF_ComponentLabel_t *clabel;
   2680 {
   2681 
   2682 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2683 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2684 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2685 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2686 	    clabel->row >=0 &&
   2687 	    clabel->column >= 0 &&
   2688 	    clabel->num_rows > 0 &&
   2689 	    clabel->num_columns > 0 &&
   2690 	    clabel->row < clabel->num_rows &&
   2691 	    clabel->column < clabel->num_columns &&
   2692 	    clabel->blockSize > 0 &&
   2693 	    clabel->numBlocks > 0) {
   2694 		/* label looks reasonable enough... */
   2695 		return(1);
   2696 	}
   2697 	return(0);
   2698 }
   2699 
   2700 
   2701 void
   2702 print_component_label(clabel)
   2703 	RF_ComponentLabel_t *clabel;
   2704 {
   2705 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2706 	       clabel->row, clabel->column,
   2707 	       clabel->num_rows, clabel->num_columns);
   2708 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2709 	       clabel->version, clabel->serial_number,
   2710 	       clabel->mod_counter);
   2711 	printf("   Clean: %s Status: %d\n",
   2712 	       clabel->clean ? "Yes" : "No", clabel->status );
   2713 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2714 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2715 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2716 	       (char) clabel->parityConfig, clabel->blockSize,
   2717 	       clabel->numBlocks);
   2718 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2719 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2720 #if 0
   2721 	   printf("   Config order: %d\n", clabel->config_order);
   2722 #endif
   2723 
   2724 }
   2725 
   2726 RF_ConfigSet_t *
   2727 rf_create_auto_sets(ac_list)
   2728 	RF_AutoConfig_t *ac_list;
   2729 {
   2730 	RF_AutoConfig_t *ac;
   2731 	RF_ConfigSet_t *config_sets;
   2732 	RF_ConfigSet_t *cset;
   2733 	RF_AutoConfig_t *ac_next;
   2734 
   2735 
   2736 	config_sets = NULL;
   2737 
   2738 	/* Go through the AutoConfig list, and figure out which components
   2739 	   belong to what sets.  */
   2740 	ac = ac_list;
   2741 	while(ac!=NULL) {
   2742 		/* we're going to putz with ac->next, so save it here
   2743 		   for use at the end of the loop */
   2744 		ac_next = ac->next;
   2745 
   2746 		if (config_sets == NULL) {
   2747 			/* will need at least this one... */
   2748 			config_sets = (RF_ConfigSet_t *)
   2749 				malloc(sizeof(RF_ConfigSet_t),
   2750 				       M_RAIDFRAME, M_NOWAIT);
   2751 			if (config_sets == NULL) {
   2752 				panic("rf_create_auto_sets: No memory!\n");
   2753 			}
   2754 			/* this one is easy :) */
   2755 			config_sets->ac = ac;
   2756 			config_sets->next = NULL;
   2757 			config_sets->rootable = 0;
   2758 			ac->next = NULL;
   2759 		} else {
   2760 			/* which set does this component fit into? */
   2761 			cset = config_sets;
   2762 			while(cset!=NULL) {
   2763 				if (rf_does_it_fit(cset, ac)) {
   2764 					/* looks like it matches */
   2765 					ac->next = cset->ac;
   2766 					cset->ac = ac;
   2767 					break;
   2768 				}
   2769 				cset = cset->next;
   2770 			}
   2771 			if (cset==NULL) {
   2772 				/* didn't find a match above... new set..*/
   2773 				cset = (RF_ConfigSet_t *)
   2774 					malloc(sizeof(RF_ConfigSet_t),
   2775 					       M_RAIDFRAME, M_NOWAIT);
   2776 				if (cset == NULL) {
   2777 					panic("rf_create_auto_sets: No memory!\n");
   2778 				}
   2779 				cset->ac = ac;
   2780 				ac->next = NULL;
   2781 				cset->next = config_sets;
   2782 				cset->rootable = 0;
   2783 				config_sets = cset;
   2784 			}
   2785 		}
   2786 		ac = ac_next;
   2787 	}
   2788 
   2789 
   2790 	return(config_sets);
   2791 }
   2792 
   2793 static int
   2794 rf_does_it_fit(cset, ac)
   2795 	RF_ConfigSet_t *cset;
   2796 	RF_AutoConfig_t *ac;
   2797 {
   2798 	RF_ComponentLabel_t *clabel1, *clabel2;
   2799 
   2800 	/* If this one matches the *first* one in the set, that's good
   2801 	   enough, since the other members of the set would have been
   2802 	   through here too... */
   2803 	/* note that we are not checking partitionSize here..
   2804 
   2805 	   Note that we are also not checking the mod_counters here.
   2806 	   If everything else matches execpt the mod_counter, that's
   2807 	   good enough for this test.  We will deal with the mod_counters
   2808 	   a little later in the autoconfiguration process.
   2809 
   2810 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2811 
   2812 	*/
   2813 
   2814 	clabel1 = cset->ac->clabel;
   2815 	clabel2 = ac->clabel;
   2816 	if ((clabel1->version == clabel2->version) &&
   2817 	    (clabel1->serial_number == clabel2->serial_number) &&
   2818 	    (clabel1->num_rows == clabel2->num_rows) &&
   2819 	    (clabel1->num_columns == clabel2->num_columns) &&
   2820 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2821 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2822 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2823 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2824 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2825 	    (clabel1->blockSize == clabel2->blockSize) &&
   2826 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2827 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2828 	    (clabel1->root_partition == clabel2->root_partition) &&
   2829 	    (clabel1->last_unit == clabel2->last_unit) &&
   2830 	    (clabel1->config_order == clabel2->config_order)) {
   2831 		/* if it get's here, it almost *has* to be a match */
   2832 	} else {
   2833 		/* it's not consistent with somebody in the set..
   2834 		   punt */
   2835 		return(0);
   2836 	}
   2837 	/* all was fine.. it must fit... */
   2838 	return(1);
   2839 }
   2840 
   2841 int
   2842 rf_have_enough_components(cset)
   2843 	RF_ConfigSet_t *cset;
   2844 {
   2845 	RF_AutoConfig_t *ac;
   2846 	RF_AutoConfig_t *auto_config;
   2847 	RF_ComponentLabel_t *clabel;
   2848 	int r,c;
   2849 	int num_rows;
   2850 	int num_cols;
   2851 	int num_missing;
   2852 
   2853 	/* check to see that we have enough 'live' components
   2854 	   of this set.  If so, we can configure it if necessary */
   2855 
   2856 	num_rows = cset->ac->clabel->num_rows;
   2857 	num_cols = cset->ac->clabel->num_columns;
   2858 
   2859 	/* XXX Check for duplicate components!?!?!? */
   2860 
   2861 	num_missing = 0;
   2862 	auto_config = cset->ac;
   2863 
   2864 	for(r=0; r<num_rows; r++) {
   2865 		for(c=0; c<num_cols; c++) {
   2866 			ac = auto_config;
   2867 			while(ac!=NULL) {
   2868 				if (ac->clabel==NULL) {
   2869 					/* big-time bad news. */
   2870 					goto fail;
   2871 				}
   2872 				if ((ac->clabel->row == r) &&
   2873 				    (ac->clabel->column == c)) {
   2874 					/* it's this one... */
   2875 #if DEBUG
   2876 					printf("Found: %s at %d,%d\n",
   2877 					       ac->devname,r,c);
   2878 #endif
   2879 					break;
   2880 				}
   2881 				ac=ac->next;
   2882 			}
   2883 			if (ac==NULL) {
   2884 				/* Didn't find one here! */
   2885 				num_missing++;
   2886 			}
   2887 		}
   2888 	}
   2889 
   2890 	clabel = cset->ac->clabel;
   2891 
   2892 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2893 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2894 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2895 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2896 		/* XXX this needs to be made *much* more general */
   2897 		/* Too many failures */
   2898 		return(0);
   2899 	}
   2900 	/* otherwise, all is well, and we've got enough to take a kick
   2901 	   at autoconfiguring this set */
   2902 	return(1);
   2903 fail:
   2904 	return(0);
   2905 
   2906 }
   2907 
   2908 void
   2909 rf_create_configuration(ac,config,raidPtr)
   2910 	RF_AutoConfig_t *ac;
   2911 	RF_Config_t *config;
   2912 	RF_Raid_t *raidPtr;
   2913 {
   2914 	RF_ComponentLabel_t *clabel;
   2915 
   2916 	clabel = ac->clabel;
   2917 
   2918 	/* 1. Fill in the common stuff */
   2919 	config->numRow = clabel->num_rows;
   2920 	config->numCol = clabel->num_columns;
   2921 	config->numSpare = 0; /* XXX should this be set here? */
   2922 	config->sectPerSU = clabel->sectPerSU;
   2923 	config->SUsPerPU = clabel->SUsPerPU;
   2924 	config->SUsPerRU = clabel->SUsPerRU;
   2925 	config->parityConfig = clabel->parityConfig;
   2926 	/* XXX... */
   2927 	strcpy(config->diskQueueType,"fifo");
   2928 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2929 	config->layoutSpecificSize = 0; /* XXX ?? */
   2930 
   2931 	while(ac!=NULL) {
   2932 		/* row/col values will be in range due to the checks
   2933 		   in reasonable_label() */
   2934 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2935 		       ac->devname);
   2936 		ac = ac->next;
   2937 	}
   2938 
   2939 }
   2940 
   2941 int
   2942 rf_set_autoconfig(raidPtr, new_value)
   2943 	RF_Raid_t *raidPtr;
   2944 	int new_value;
   2945 {
   2946 	RF_ComponentLabel_t clabel;
   2947 	struct vnode *vp;
   2948 	dev_t dev;
   2949 	int row, column;
   2950 
   2951 	raidPtr->autoconfigure = new_value;
   2952 	for(row=0; row<raidPtr->numRow; row++) {
   2953 		for(column=0; column<raidPtr->numCol; column++) {
   2954 			dev = raidPtr->Disks[row][column].dev;
   2955 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2956 			raidread_component_label(dev, vp, &clabel);
   2957 			clabel.autoconfigure = new_value;
   2958 			raidwrite_component_label(dev, vp, &clabel);
   2959 		}
   2960 	}
   2961 	return(new_value);
   2962 }
   2963 
   2964 int
   2965 rf_set_rootpartition(raidPtr, new_value)
   2966 	RF_Raid_t *raidPtr;
   2967 	int new_value;
   2968 {
   2969 	RF_ComponentLabel_t clabel;
   2970 	struct vnode *vp;
   2971 	dev_t dev;
   2972 	int row, column;
   2973 
   2974 	raidPtr->root_partition = new_value;
   2975 	for(row=0; row<raidPtr->numRow; row++) {
   2976 		for(column=0; column<raidPtr->numCol; column++) {
   2977 			dev = raidPtr->Disks[row][column].dev;
   2978 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2979 			raidread_component_label(dev, vp, &clabel);
   2980 			clabel.root_partition = new_value;
   2981 			raidwrite_component_label(dev, vp, &clabel);
   2982 		}
   2983 	}
   2984 	return(new_value);
   2985 }
   2986 
   2987 void
   2988 rf_release_all_vps(cset)
   2989 	RF_ConfigSet_t *cset;
   2990 {
   2991 	RF_AutoConfig_t *ac;
   2992 
   2993 	ac = cset->ac;
   2994 	while(ac!=NULL) {
   2995 		/* Close the vp, and give it back */
   2996 		if (ac->vp) {
   2997 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2998 			vput(ac->vp);
   2999 		}
   3000 		ac = ac->next;
   3001 	}
   3002 }
   3003 
   3004 
   3005 void
   3006 rf_cleanup_config_set(cset)
   3007 	RF_ConfigSet_t *cset;
   3008 {
   3009 	RF_AutoConfig_t *ac;
   3010 	RF_AutoConfig_t *next_ac;
   3011 
   3012 	ac = cset->ac;
   3013 	while(ac!=NULL) {
   3014 		next_ac = ac->next;
   3015 		/* nuke the label */
   3016 		free(ac->clabel, M_RAIDFRAME);
   3017 		/* cleanup the config structure */
   3018 		free(ac, M_RAIDFRAME);
   3019 		/* "next.." */
   3020 		ac = next_ac;
   3021 	}
   3022 	/* and, finally, nuke the config set */
   3023 	free(cset, M_RAIDFRAME);
   3024 }
   3025 
   3026 
   3027 void
   3028 raid_init_component_label(raidPtr, clabel)
   3029 	RF_Raid_t *raidPtr;
   3030 	RF_ComponentLabel_t *clabel;
   3031 {
   3032 	/* current version number */
   3033 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3034 	clabel->serial_number = raidPtr->serial_number;
   3035 	clabel->mod_counter = raidPtr->mod_counter;
   3036 	clabel->num_rows = raidPtr->numRow;
   3037 	clabel->num_columns = raidPtr->numCol;
   3038 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3039 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3040 
   3041 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3042 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3043 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3044 
   3045 	clabel->blockSize = raidPtr->bytesPerSector;
   3046 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3047 
   3048 	/* XXX not portable */
   3049 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3050 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3051 	clabel->autoconfigure = raidPtr->autoconfigure;
   3052 	clabel->root_partition = raidPtr->root_partition;
   3053 	clabel->last_unit = raidPtr->raidid;
   3054 	clabel->config_order = raidPtr->config_order;
   3055 }
   3056 
   3057 int
   3058 rf_auto_config_set(cset,unit)
   3059 	RF_ConfigSet_t *cset;
   3060 	int *unit;
   3061 {
   3062 	RF_Raid_t *raidPtr;
   3063 	RF_Config_t *config;
   3064 	int raidID;
   3065 	int retcode;
   3066 
   3067 	printf("Starting autoconfigure on raid%d\n",raidID);
   3068 
   3069 	retcode = 0;
   3070 	*unit = -1;
   3071 
   3072 	/* 1. Create a config structure */
   3073 
   3074 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3075 				       M_RAIDFRAME,
   3076 				       M_NOWAIT);
   3077 	if (config==NULL) {
   3078 		printf("Out of mem!?!?\n");
   3079 				/* XXX do something more intelligent here. */
   3080 		return(1);
   3081 	}
   3082 	/* XXX raidID needs to be set correctly.. */
   3083 
   3084 	/*
   3085 	   2. Figure out what RAID ID this one is supposed to live at
   3086 	   See if we can get the same RAID dev that it was configured
   3087 	   on last time..
   3088 	*/
   3089 
   3090 	raidID = cset->ac->clabel->last_unit;
   3091 	if ((raidID < 0) || (raidID >= numraid)) {
   3092 		/* let's not wander off into lala land. */
   3093 		raidID = numraid - 1;
   3094 	}
   3095 	if (raidPtrs[raidID]->valid != 0) {
   3096 
   3097 		/*
   3098 		   Nope... Go looking for an alternative...
   3099 		   Start high so we don't immediately use raid0 if that's
   3100 		   not taken.
   3101 		*/
   3102 
   3103 		for(raidID = numraid; raidID >= 0; raidID--) {
   3104 			if (raidPtrs[raidID]->valid == 0) {
   3105 				/* can use this one! */
   3106 				break;
   3107 			}
   3108 		}
   3109 	}
   3110 
   3111 	if (raidID < 0) {
   3112 		/* punt... */
   3113 		printf("Unable to auto configure this set!\n");
   3114 		printf("(Out of RAID devs!)\n");
   3115 		return(1);
   3116 	}
   3117 
   3118 	raidPtr = raidPtrs[raidID];
   3119 
   3120 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3121 	raidPtr->raidid = raidID;
   3122 	raidPtr->openings = RAIDOUTSTANDING;
   3123 
   3124 	/* 3. Build the configuration structure */
   3125 	rf_create_configuration(cset->ac, config, raidPtr);
   3126 
   3127 	/* 4. Do the configuration */
   3128 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3129 
   3130 	if (retcode == 0) {
   3131 
   3132 		raidinit(raidPtrs[raidID]);
   3133 
   3134 		rf_markalldirty(raidPtrs[raidID]);
   3135 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3136 		if (cset->ac->clabel->root_partition==1) {
   3137 			/* everything configured just fine.  Make a note
   3138 			   that this set is eligible to be root. */
   3139 			cset->rootable = 1;
   3140 			/* XXX do this here? */
   3141 			raidPtrs[raidID]->root_partition = 1;
   3142 		}
   3143 	}
   3144 
   3145 	/* 5. Cleanup */
   3146 	free(config, M_RAIDFRAME);
   3147 
   3148 	*unit = raidID;
   3149 	return(retcode);
   3150 }
   3151