Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.166
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.166 2003/12/29 02:38:18 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.166 2003/12/29 02:38:18 oster Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/namei.h>
    163 #include <sys/vnode.h>
    164 #include <sys/disklabel.h>
    165 #include <sys/conf.h>
    166 #include <sys/lock.h>
    167 #include <sys/buf.h>
    168 #include <sys/user.h>
    169 #include <sys/reboot.h>
    170 
    171 #include <dev/raidframe/raidframevar.h>
    172 #include <dev/raidframe/raidframeio.h>
    173 #include "raid.h"
    174 #include "opt_raid_autoconfig.h"
    175 #include "rf_raid.h"
    176 #include "rf_copyback.h"
    177 #include "rf_dag.h"
    178 #include "rf_dagflags.h"
    179 #include "rf_desc.h"
    180 #include "rf_diskqueue.h"
    181 #include "rf_etimer.h"
    182 #include "rf_general.h"
    183 #include "rf_kintf.h"
    184 #include "rf_options.h"
    185 #include "rf_driver.h"
    186 #include "rf_parityscan.h"
    187 #include "rf_threadstuff.h"
    188 
    189 #ifdef DEBUG
    190 int     rf_kdebug_level = 0;
    191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    192 #else				/* DEBUG */
    193 #define db1_printf(a) { }
    194 #endif				/* DEBUG */
    195 
    196 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    197 
    198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    199 
    200 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    201 						 * spare table */
    202 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    203 						 * installation process */
    204 
    205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    206 
    207 /* prototypes */
    208 static void KernelWakeupFunc(struct buf * bp);
    209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    210 		   dev_t dev, RF_SectorNum_t startSect,
    211 		   RF_SectorCount_t numSect, caddr_t buf,
    212 		   void (*cbFunc) (struct buf *), void *cbArg,
    213 		   int logBytesPerSector, struct proc * b_proc);
    214 static void raidinit(RF_Raid_t *);
    215 
    216 void raidattach(int);
    217 
    218 dev_type_open(raidopen);
    219 dev_type_close(raidclose);
    220 dev_type_read(raidread);
    221 dev_type_write(raidwrite);
    222 dev_type_ioctl(raidioctl);
    223 dev_type_strategy(raidstrategy);
    224 dev_type_dump(raiddump);
    225 dev_type_size(raidsize);
    226 
    227 const struct bdevsw raid_bdevsw = {
    228 	raidopen, raidclose, raidstrategy, raidioctl,
    229 	raiddump, raidsize, D_DISK
    230 };
    231 
    232 const struct cdevsw raid_cdevsw = {
    233 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    234 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    235 };
    236 
    237 /*
    238  * Pilfered from ccd.c
    239  */
    240 
    241 struct raidbuf {
    242 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    243 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    244 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    245 };
    246 
    247 /* component buffer pool */
    248 struct pool raidframe_cbufpool;
    249 
    250 /* XXX Not sure if the following should be replacing the raidPtrs above,
    251    or if it should be used in conjunction with that...
    252 */
    253 
    254 struct raid_softc {
    255 	int     sc_flags;	/* flags */
    256 	int     sc_cflags;	/* configuration flags */
    257 	size_t  sc_size;        /* size of the raid device */
    258 	char    sc_xname[20];	/* XXX external name */
    259 	struct disk sc_dkdev;	/* generic disk device info */
    260 	struct bufq_state buf_queue;	/* used for the device queue */
    261 };
    262 /* sc_flags */
    263 #define RAIDF_INITED	0x01	/* unit has been initialized */
    264 #define RAIDF_WLABEL	0x02	/* label area is writable */
    265 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    266 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    267 #define RAIDF_LOCKED	0x80	/* unit is locked */
    268 
    269 #define	raidunit(x)	DISKUNIT(x)
    270 int numraid = 0;
    271 
    272 /*
    273  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    274  * Be aware that large numbers can allow the driver to consume a lot of
    275  * kernel memory, especially on writes, and in degraded mode reads.
    276  *
    277  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    278  * a single 64K write will typically require 64K for the old data,
    279  * 64K for the old parity, and 64K for the new parity, for a total
    280  * of 192K (if the parity buffer is not re-used immediately).
    281  * Even it if is used immediately, that's still 128K, which when multiplied
    282  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    283  *
    284  * Now in degraded mode, for example, a 64K read on the above setup may
    285  * require data reconstruction, which will require *all* of the 4 remaining
    286  * disks to participate -- 4 * 32K/disk == 128K again.
    287  */
    288 
    289 #ifndef RAIDOUTSTANDING
    290 #define RAIDOUTSTANDING   6
    291 #endif
    292 
    293 #define RAIDLABELDEV(dev)	\
    294 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    295 
    296 /* declared here, and made public, for the benefit of KVM stuff.. */
    297 struct raid_softc *raid_softc;
    298 
    299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    300 				     struct disklabel *);
    301 static void raidgetdisklabel(dev_t);
    302 static void raidmakedisklabel(struct raid_softc *);
    303 
    304 static int raidlock(struct raid_softc *);
    305 static void raidunlock(struct raid_softc *);
    306 
    307 static void rf_markalldirty(RF_Raid_t *);
    308 
    309 struct device *raidrootdev;
    310 
    311 void rf_ReconThread(struct rf_recon_req *);
    312 /* XXX what I want is: */
    313 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    315 void rf_CopybackThread(RF_Raid_t *raidPtr);
    316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    317 int rf_autoconfig(struct device *self);
    318 void rf_buildroothack(RF_ConfigSet_t *);
    319 
    320 RF_AutoConfig_t *rf_find_raid_components(void);
    321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    323 static int rf_reasonable_label(RF_ComponentLabel_t *);
    324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    325 int rf_set_autoconfig(RF_Raid_t *, int);
    326 int rf_set_rootpartition(RF_Raid_t *, int);
    327 void rf_release_all_vps(RF_ConfigSet_t *);
    328 void rf_cleanup_config_set(RF_ConfigSet_t *);
    329 int rf_have_enough_components(RF_ConfigSet_t *);
    330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    331 
    332 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    333 				  allow autoconfig to take place.
    334 			          Note that this is overridden by having
    335 			          RAID_AUTOCONFIG as an option in the
    336 			          kernel config file.  */
    337 
    338 void
    339 raidattach(num)
    340 	int     num;
    341 {
    342 	int raidID;
    343 	int i, rc;
    344 
    345 #ifdef DEBUG
    346 	printf("raidattach: Asked for %d units\n", num);
    347 #endif
    348 
    349 	if (num <= 0) {
    350 #ifdef DIAGNOSTIC
    351 		panic("raidattach: count <= 0");
    352 #endif
    353 		return;
    354 	}
    355 	/* This is where all the initialization stuff gets done. */
    356 
    357 	numraid = num;
    358 
    359 	/* Make some space for requested number of units... */
    360 
    361 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    362 	if (raidPtrs == NULL) {
    363 		panic("raidPtrs is NULL!!");
    364 	}
    365 
    366 	/* Initialize the component buffer pool. */
    367 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    368 	    0, 0, "raidpl", NULL);
    369 
    370 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    371 	if (rc) {
    372 		RF_PANIC();
    373 	}
    374 
    375 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    376 
    377 	for (i = 0; i < num; i++)
    378 		raidPtrs[i] = NULL;
    379 	rc = rf_BootRaidframe();
    380 	if (rc == 0)
    381 		printf("Kernelized RAIDframe activated\n");
    382 	else
    383 		panic("Serious error booting RAID!!");
    384 
    385 	/* put together some datastructures like the CCD device does.. This
    386 	 * lets us lock the device and what-not when it gets opened. */
    387 
    388 	raid_softc = (struct raid_softc *)
    389 		malloc(num * sizeof(struct raid_softc),
    390 		       M_RAIDFRAME, M_NOWAIT);
    391 	if (raid_softc == NULL) {
    392 		printf("WARNING: no memory for RAIDframe driver\n");
    393 		return;
    394 	}
    395 
    396 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    397 
    398 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    399 					      M_RAIDFRAME, M_NOWAIT);
    400 	if (raidrootdev == NULL) {
    401 		panic("No memory for RAIDframe driver!!?!?!");
    402 	}
    403 
    404 	for (raidID = 0; raidID < num; raidID++) {
    405 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    406 
    407 		raidrootdev[raidID].dv_class  = DV_DISK;
    408 		raidrootdev[raidID].dv_cfdata = NULL;
    409 		raidrootdev[raidID].dv_unit   = raidID;
    410 		raidrootdev[raidID].dv_parent = NULL;
    411 		raidrootdev[raidID].dv_flags  = 0;
    412 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    413 
    414 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    415 			  (RF_Raid_t *));
    416 		if (raidPtrs[raidID] == NULL) {
    417 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    418 			numraid = raidID;
    419 			return;
    420 		}
    421 	}
    422 
    423 #ifdef RAID_AUTOCONFIG
    424 	raidautoconfig = 1;
    425 #endif
    426 
    427 	/*
    428 	 * Register a finalizer which will be used to auto-config RAID
    429 	 * sets once all real hardware devices have been found.
    430 	 */
    431 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    432 		printf("WARNING: unable to register RAIDframe finalizer\n");
    433 }
    434 
    435 int
    436 rf_autoconfig(struct device *self)
    437 {
    438 	RF_AutoConfig_t *ac_list;
    439 	RF_ConfigSet_t *config_sets;
    440 
    441 	if (raidautoconfig == 0)
    442 		return (0);
    443 
    444 	/* XXX This code can only be run once. */
    445 	raidautoconfig = 0;
    446 
    447 	/* 1. locate all RAID components on the system */
    448 #ifdef DEBUG
    449 	printf("Searching for RAID components...\n");
    450 #endif
    451 	ac_list = rf_find_raid_components();
    452 
    453 	/* 2. Sort them into their respective sets. */
    454 	config_sets = rf_create_auto_sets(ac_list);
    455 
    456 	/*
    457 	 * 3. Evaluate each set andconfigure the valid ones.
    458 	 * This gets done in rf_buildroothack().
    459 	 */
    460 	rf_buildroothack(config_sets);
    461 
    462 	return (1);
    463 }
    464 
    465 void
    466 rf_buildroothack(RF_ConfigSet_t *config_sets)
    467 {
    468 	RF_ConfigSet_t *cset;
    469 	RF_ConfigSet_t *next_cset;
    470 	int retcode;
    471 	int raidID;
    472 	int rootID;
    473 	int num_root;
    474 
    475 	rootID = 0;
    476 	num_root = 0;
    477 	cset = config_sets;
    478 	while(cset != NULL ) {
    479 		next_cset = cset->next;
    480 		if (rf_have_enough_components(cset) &&
    481 		    cset->ac->clabel->autoconfigure==1) {
    482 			retcode = rf_auto_config_set(cset,&raidID);
    483 			if (!retcode) {
    484 				if (cset->rootable) {
    485 					rootID = raidID;
    486 					num_root++;
    487 				}
    488 			} else {
    489 				/* The autoconfig didn't work :( */
    490 #if DEBUG
    491 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    492 #endif
    493 				rf_release_all_vps(cset);
    494 			}
    495 		} else {
    496 			/* we're not autoconfiguring this set...
    497 			   release the associated resources */
    498 			rf_release_all_vps(cset);
    499 		}
    500 		/* cleanup */
    501 		rf_cleanup_config_set(cset);
    502 		cset = next_cset;
    503 	}
    504 
    505 	/* we found something bootable... */
    506 
    507 	if (num_root == 1) {
    508 		booted_device = &raidrootdev[rootID];
    509 	} else if (num_root > 1) {
    510 		/* we can't guess.. require the user to answer... */
    511 		boothowto |= RB_ASKNAME;
    512 	}
    513 }
    514 
    515 
    516 int
    517 raidsize(dev)
    518 	dev_t   dev;
    519 {
    520 	struct raid_softc *rs;
    521 	struct disklabel *lp;
    522 	int     part, unit, omask, size;
    523 
    524 	unit = raidunit(dev);
    525 	if (unit >= numraid)
    526 		return (-1);
    527 	rs = &raid_softc[unit];
    528 
    529 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    530 		return (-1);
    531 
    532 	part = DISKPART(dev);
    533 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    534 	lp = rs->sc_dkdev.dk_label;
    535 
    536 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    537 		return (-1);
    538 
    539 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    540 		size = -1;
    541 	else
    542 		size = lp->d_partitions[part].p_size *
    543 		    (lp->d_secsize / DEV_BSIZE);
    544 
    545 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    546 		return (-1);
    547 
    548 	return (size);
    549 
    550 }
    551 
    552 int
    553 raiddump(dev, blkno, va, size)
    554 	dev_t   dev;
    555 	daddr_t blkno;
    556 	caddr_t va;
    557 	size_t  size;
    558 {
    559 	/* Not implemented. */
    560 	return ENXIO;
    561 }
    562 /* ARGSUSED */
    563 int
    564 raidopen(dev, flags, fmt, p)
    565 	dev_t   dev;
    566 	int     flags, fmt;
    567 	struct proc *p;
    568 {
    569 	int     unit = raidunit(dev);
    570 	struct raid_softc *rs;
    571 	struct disklabel *lp;
    572 	int     part, pmask;
    573 	int     error = 0;
    574 
    575 	if (unit >= numraid)
    576 		return (ENXIO);
    577 	rs = &raid_softc[unit];
    578 
    579 	if ((error = raidlock(rs)) != 0)
    580 		return (error);
    581 	lp = rs->sc_dkdev.dk_label;
    582 
    583 	part = DISKPART(dev);
    584 	pmask = (1 << part);
    585 
    586 	if ((rs->sc_flags & RAIDF_INITED) &&
    587 	    (rs->sc_dkdev.dk_openmask == 0))
    588 		raidgetdisklabel(dev);
    589 
    590 	/* make sure that this partition exists */
    591 
    592 	if (part != RAW_PART) {
    593 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    594 		    ((part >= lp->d_npartitions) ||
    595 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    596 			error = ENXIO;
    597 			raidunlock(rs);
    598 			return (error);
    599 		}
    600 	}
    601 	/* Prevent this unit from being unconfigured while open. */
    602 	switch (fmt) {
    603 	case S_IFCHR:
    604 		rs->sc_dkdev.dk_copenmask |= pmask;
    605 		break;
    606 
    607 	case S_IFBLK:
    608 		rs->sc_dkdev.dk_bopenmask |= pmask;
    609 		break;
    610 	}
    611 
    612 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    613 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    614 		/* First one... mark things as dirty... Note that we *MUST*
    615 		 have done a configure before this.  I DO NOT WANT TO BE
    616 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    617 		 THAT THEY BELONG TOGETHER!!!!! */
    618 		/* XXX should check to see if we're only open for reading
    619 		   here... If so, we needn't do this, but then need some
    620 		   other way of keeping track of what's happened.. */
    621 
    622 		rf_markalldirty( raidPtrs[unit] );
    623 	}
    624 
    625 
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	raidunlock(rs);
    630 
    631 	return (error);
    632 
    633 
    634 }
    635 /* ARGSUSED */
    636 int
    637 raidclose(dev, flags, fmt, p)
    638 	dev_t   dev;
    639 	int     flags, fmt;
    640 	struct proc *p;
    641 {
    642 	int     unit = raidunit(dev);
    643 	struct raid_softc *rs;
    644 	int     error = 0;
    645 	int     part;
    646 
    647 	if (unit >= numraid)
    648 		return (ENXIO);
    649 	rs = &raid_softc[unit];
    650 
    651 	if ((error = raidlock(rs)) != 0)
    652 		return (error);
    653 
    654 	part = DISKPART(dev);
    655 
    656 	/* ...that much closer to allowing unconfiguration... */
    657 	switch (fmt) {
    658 	case S_IFCHR:
    659 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    660 		break;
    661 
    662 	case S_IFBLK:
    663 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    664 		break;
    665 	}
    666 	rs->sc_dkdev.dk_openmask =
    667 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    668 
    669 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    670 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    671 		/* Last one... device is not unconfigured yet.
    672 		   Device shutdown has taken care of setting the
    673 		   clean bits if RAIDF_INITED is not set
    674 		   mark things as clean... */
    675 
    676 		rf_update_component_labels(raidPtrs[unit],
    677 						 RF_FINAL_COMPONENT_UPDATE);
    678 		if (doing_shutdown) {
    679 			/* last one, and we're going down, so
    680 			   lights out for this RAID set too. */
    681 			error = rf_Shutdown(raidPtrs[unit]);
    682 
    683 			/* It's no longer initialized... */
    684 			rs->sc_flags &= ~RAIDF_INITED;
    685 
    686 			/* Detach the disk. */
    687 			disk_detach(&rs->sc_dkdev);
    688 		}
    689 	}
    690 
    691 	raidunlock(rs);
    692 	return (0);
    693 
    694 }
    695 
    696 void
    697 raidstrategy(bp)
    698 	struct buf *bp;
    699 {
    700 	int s;
    701 
    702 	unsigned int raidID = raidunit(bp->b_dev);
    703 	RF_Raid_t *raidPtr;
    704 	struct raid_softc *rs = &raid_softc[raidID];
    705 	int     wlabel;
    706 
    707 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    708 		bp->b_error = ENXIO;
    709 		bp->b_flags |= B_ERROR;
    710 		bp->b_resid = bp->b_bcount;
    711 		biodone(bp);
    712 		return;
    713 	}
    714 	if (raidID >= numraid || !raidPtrs[raidID]) {
    715 		bp->b_error = ENODEV;
    716 		bp->b_flags |= B_ERROR;
    717 		bp->b_resid = bp->b_bcount;
    718 		biodone(bp);
    719 		return;
    720 	}
    721 	raidPtr = raidPtrs[raidID];
    722 	if (!raidPtr->valid) {
    723 		bp->b_error = ENODEV;
    724 		bp->b_flags |= B_ERROR;
    725 		bp->b_resid = bp->b_bcount;
    726 		biodone(bp);
    727 		return;
    728 	}
    729 	if (bp->b_bcount == 0) {
    730 		db1_printf(("b_bcount is zero..\n"));
    731 		biodone(bp);
    732 		return;
    733 	}
    734 
    735 	/*
    736 	 * Do bounds checking and adjust transfer.  If there's an
    737 	 * error, the bounds check will flag that for us.
    738 	 */
    739 
    740 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    741 	if (DISKPART(bp->b_dev) != RAW_PART)
    742 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    743 			db1_printf(("Bounds check failed!!:%d %d\n",
    744 				(int) bp->b_blkno, (int) wlabel));
    745 			biodone(bp);
    746 			return;
    747 		}
    748 	s = splbio();
    749 
    750 	bp->b_resid = 0;
    751 
    752 	/* stuff it onto our queue */
    753 	BUFQ_PUT(&rs->buf_queue, bp);
    754 
    755 	raidstart(raidPtrs[raidID]);
    756 
    757 	splx(s);
    758 }
    759 /* ARGSUSED */
    760 int
    761 raidread(dev, uio, flags)
    762 	dev_t   dev;
    763 	struct uio *uio;
    764 	int     flags;
    765 {
    766 	int     unit = raidunit(dev);
    767 	struct raid_softc *rs;
    768 
    769 	if (unit >= numraid)
    770 		return (ENXIO);
    771 	rs = &raid_softc[unit];
    772 
    773 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    774 		return (ENXIO);
    775 
    776 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    777 
    778 }
    779 /* ARGSUSED */
    780 int
    781 raidwrite(dev, uio, flags)
    782 	dev_t   dev;
    783 	struct uio *uio;
    784 	int     flags;
    785 {
    786 	int     unit = raidunit(dev);
    787 	struct raid_softc *rs;
    788 
    789 	if (unit >= numraid)
    790 		return (ENXIO);
    791 	rs = &raid_softc[unit];
    792 
    793 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    794 		return (ENXIO);
    795 
    796 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    797 
    798 }
    799 
    800 int
    801 raidioctl(dev, cmd, data, flag, p)
    802 	dev_t   dev;
    803 	u_long  cmd;
    804 	caddr_t data;
    805 	int     flag;
    806 	struct proc *p;
    807 {
    808 	int     unit = raidunit(dev);
    809 	int     error = 0;
    810 	int     part, pmask;
    811 	struct raid_softc *rs;
    812 	RF_Config_t *k_cfg, *u_cfg;
    813 	RF_Raid_t *raidPtr;
    814 	RF_RaidDisk_t *diskPtr;
    815 	RF_AccTotals_t *totals;
    816 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    817 	u_char *specific_buf;
    818 	int retcode = 0;
    819 	int column;
    820 	int raidid;
    821 	struct rf_recon_req *rrcopy, *rr;
    822 	RF_ComponentLabel_t *clabel;
    823 	RF_ComponentLabel_t ci_label;
    824 	RF_ComponentLabel_t **clabel_ptr;
    825 	RF_SingleComponent_t *sparePtr,*componentPtr;
    826 	RF_SingleComponent_t hot_spare;
    827 	RF_SingleComponent_t component;
    828 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    829 	int i, j, d;
    830 #ifdef __HAVE_OLD_DISKLABEL
    831 	struct disklabel newlabel;
    832 #endif
    833 
    834 	if (unit >= numraid)
    835 		return (ENXIO);
    836 	rs = &raid_softc[unit];
    837 	raidPtr = raidPtrs[unit];
    838 
    839 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    840 		(int) DISKPART(dev), (int) unit, (int) cmd));
    841 
    842 	/* Must be open for writes for these commands... */
    843 	switch (cmd) {
    844 	case DIOCSDINFO:
    845 	case DIOCWDINFO:
    846 #ifdef __HAVE_OLD_DISKLABEL
    847 	case ODIOCWDINFO:
    848 	case ODIOCSDINFO:
    849 #endif
    850 	case DIOCWLABEL:
    851 		if ((flag & FWRITE) == 0)
    852 			return (EBADF);
    853 	}
    854 
    855 	/* Must be initialized for these... */
    856 	switch (cmd) {
    857 	case DIOCGDINFO:
    858 	case DIOCSDINFO:
    859 	case DIOCWDINFO:
    860 #ifdef __HAVE_OLD_DISKLABEL
    861 	case ODIOCGDINFO:
    862 	case ODIOCWDINFO:
    863 	case ODIOCSDINFO:
    864 	case ODIOCGDEFLABEL:
    865 #endif
    866 	case DIOCGPART:
    867 	case DIOCWLABEL:
    868 	case DIOCGDEFLABEL:
    869 	case RAIDFRAME_SHUTDOWN:
    870 	case RAIDFRAME_REWRITEPARITY:
    871 	case RAIDFRAME_GET_INFO:
    872 	case RAIDFRAME_RESET_ACCTOTALS:
    873 	case RAIDFRAME_GET_ACCTOTALS:
    874 	case RAIDFRAME_KEEP_ACCTOTALS:
    875 	case RAIDFRAME_GET_SIZE:
    876 	case RAIDFRAME_FAIL_DISK:
    877 	case RAIDFRAME_COPYBACK:
    878 	case RAIDFRAME_CHECK_RECON_STATUS:
    879 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    880 	case RAIDFRAME_GET_COMPONENT_LABEL:
    881 	case RAIDFRAME_SET_COMPONENT_LABEL:
    882 	case RAIDFRAME_ADD_HOT_SPARE:
    883 	case RAIDFRAME_REMOVE_HOT_SPARE:
    884 	case RAIDFRAME_INIT_LABELS:
    885 	case RAIDFRAME_REBUILD_IN_PLACE:
    886 	case RAIDFRAME_CHECK_PARITY:
    887 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    888 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    889 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    890 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    891 	case RAIDFRAME_SET_AUTOCONFIG:
    892 	case RAIDFRAME_SET_ROOT:
    893 	case RAIDFRAME_DELETE_COMPONENT:
    894 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    895 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    896 			return (ENXIO);
    897 	}
    898 
    899 	switch (cmd) {
    900 
    901 		/* configure the system */
    902 	case RAIDFRAME_CONFIGURE:
    903 
    904 		if (raidPtr->valid) {
    905 			/* There is a valid RAID set running on this unit! */
    906 			printf("raid%d: Device already configured!\n",unit);
    907 			return(EINVAL);
    908 		}
    909 
    910 		/* copy-in the configuration information */
    911 		/* data points to a pointer to the configuration structure */
    912 
    913 		u_cfg = *((RF_Config_t **) data);
    914 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    915 		if (k_cfg == NULL) {
    916 			return (ENOMEM);
    917 		}
    918 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
    919 		if (retcode) {
    920 			RF_Free(k_cfg, sizeof(RF_Config_t));
    921 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    922 				retcode));
    923 			return (retcode);
    924 		}
    925 		/* allocate a buffer for the layout-specific data, and copy it
    926 		 * in */
    927 		if (k_cfg->layoutSpecificSize) {
    928 			if (k_cfg->layoutSpecificSize > 10000) {
    929 				/* sanity check */
    930 				RF_Free(k_cfg, sizeof(RF_Config_t));
    931 				return (EINVAL);
    932 			}
    933 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    934 			    (u_char *));
    935 			if (specific_buf == NULL) {
    936 				RF_Free(k_cfg, sizeof(RF_Config_t));
    937 				return (ENOMEM);
    938 			}
    939 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
    940 			    k_cfg->layoutSpecificSize);
    941 			if (retcode) {
    942 				RF_Free(k_cfg, sizeof(RF_Config_t));
    943 				RF_Free(specific_buf,
    944 					k_cfg->layoutSpecificSize);
    945 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    946 					retcode));
    947 				return (retcode);
    948 			}
    949 		} else
    950 			specific_buf = NULL;
    951 		k_cfg->layoutSpecific = specific_buf;
    952 
    953 		/* should do some kind of sanity check on the configuration.
    954 		 * Store the sum of all the bytes in the last byte? */
    955 
    956 		/* configure the system */
    957 
    958 		/*
    959 		 * Clear the entire RAID descriptor, just to make sure
    960 		 *  there is no stale data left in the case of a
    961 		 *  reconfiguration
    962 		 */
    963 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    964 		raidPtr->raidid = unit;
    965 
    966 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    967 
    968 		if (retcode == 0) {
    969 
    970 			/* allow this many simultaneous IO's to
    971 			   this RAID device */
    972 			raidPtr->openings = RAIDOUTSTANDING;
    973 
    974 			raidinit(raidPtr);
    975 			rf_markalldirty(raidPtr);
    976 		}
    977 		/* free the buffers.  No return code here. */
    978 		if (k_cfg->layoutSpecificSize) {
    979 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    980 		}
    981 		RF_Free(k_cfg, sizeof(RF_Config_t));
    982 
    983 		return (retcode);
    984 
    985 		/* shutdown the system */
    986 	case RAIDFRAME_SHUTDOWN:
    987 
    988 		if ((error = raidlock(rs)) != 0)
    989 			return (error);
    990 
    991 		/*
    992 		 * If somebody has a partition mounted, we shouldn't
    993 		 * shutdown.
    994 		 */
    995 
    996 		part = DISKPART(dev);
    997 		pmask = (1 << part);
    998 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    999 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1000 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1001 			raidunlock(rs);
   1002 			return (EBUSY);
   1003 		}
   1004 
   1005 		retcode = rf_Shutdown(raidPtr);
   1006 
   1007 		/* It's no longer initialized... */
   1008 		rs->sc_flags &= ~RAIDF_INITED;
   1009 
   1010 		/* Detach the disk. */
   1011 		disk_detach(&rs->sc_dkdev);
   1012 
   1013 		raidunlock(rs);
   1014 
   1015 		return (retcode);
   1016 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1017 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1018 		/* need to read the component label for the disk indicated
   1019 		   by row,column in clabel */
   1020 
   1021 		/* For practice, let's get it directly fromdisk, rather
   1022 		   than from the in-core copy */
   1023 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1024 			   (RF_ComponentLabel_t *));
   1025 		if (clabel == NULL)
   1026 			return (ENOMEM);
   1027 
   1028 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1029 
   1030 		retcode = copyin( *clabel_ptr, clabel,
   1031 				  sizeof(RF_ComponentLabel_t));
   1032 
   1033 		if (retcode) {
   1034 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1035 			return(retcode);
   1036 		}
   1037 
   1038 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1039 
   1040 		column = clabel->column;
   1041 
   1042 		if ((column < 0) || (column >= raidPtr->numCol +
   1043 				     raidPtr->numSpare)) {
   1044 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1045 			return(EINVAL);
   1046 		}
   1047 
   1048 		raidread_component_label(raidPtr->Disks[column].dev,
   1049 				raidPtr->raid_cinfo[column].ci_vp,
   1050 				clabel );
   1051 
   1052 		retcode = copyout(clabel, *clabel_ptr,
   1053 				  sizeof(RF_ComponentLabel_t));
   1054 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1055 		return (retcode);
   1056 
   1057 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1058 		clabel = (RF_ComponentLabel_t *) data;
   1059 
   1060 		/* XXX check the label for valid stuff... */
   1061 		/* Note that some things *should not* get modified --
   1062 		   the user should be re-initing the labels instead of
   1063 		   trying to patch things.
   1064 		   */
   1065 
   1066 		raidid = raidPtr->raidid;
   1067 		printf("raid%d: Got component label:\n", raidid);
   1068 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1069 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1070 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1071 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1072 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1073 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1074 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1075 
   1076 		clabel->row = 0;
   1077 		column = clabel->column;
   1078 
   1079 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1080 			return(EINVAL);
   1081 		}
   1082 
   1083 		/* XXX this isn't allowed to do anything for now :-) */
   1084 
   1085 		/* XXX and before it is, we need to fill in the rest
   1086 		   of the fields!?!?!?! */
   1087 #if 0
   1088 		raidwrite_component_label(
   1089                             raidPtr->Disks[column].dev,
   1090 			    raidPtr->raid_cinfo[column].ci_vp,
   1091 			    clabel );
   1092 #endif
   1093 		return (0);
   1094 
   1095 	case RAIDFRAME_INIT_LABELS:
   1096 		clabel = (RF_ComponentLabel_t *) data;
   1097 		/*
   1098 		   we only want the serial number from
   1099 		   the above.  We get all the rest of the information
   1100 		   from the config that was used to create this RAID
   1101 		   set.
   1102 		   */
   1103 
   1104 		raidPtr->serial_number = clabel->serial_number;
   1105 
   1106 		raid_init_component_label(raidPtr, &ci_label);
   1107 		ci_label.serial_number = clabel->serial_number;
   1108 		ci_label.row = 0; /* we dont' pretend to support more */
   1109 
   1110 		for(column=0;column<raidPtr->numCol;column++) {
   1111 			diskPtr = &raidPtr->Disks[column];
   1112 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1113 				ci_label.partitionSize = diskPtr->partitionSize;
   1114 				ci_label.column = column;
   1115 				raidwrite_component_label(
   1116 							  raidPtr->Disks[column].dev,
   1117 							  raidPtr->raid_cinfo[column].ci_vp,
   1118 							  &ci_label );
   1119 			}
   1120 		}
   1121 
   1122 		return (retcode);
   1123 	case RAIDFRAME_SET_AUTOCONFIG:
   1124 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1125 		printf("raid%d: New autoconfig value is: %d\n",
   1126 		       raidPtr->raidid, d);
   1127 		*(int *) data = d;
   1128 		return (retcode);
   1129 
   1130 	case RAIDFRAME_SET_ROOT:
   1131 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1132 		printf("raid%d: New rootpartition value is: %d\n",
   1133 		       raidPtr->raidid, d);
   1134 		*(int *) data = d;
   1135 		return (retcode);
   1136 
   1137 		/* initialize all parity */
   1138 	case RAIDFRAME_REWRITEPARITY:
   1139 
   1140 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1141 			/* Parity for RAID 0 is trivially correct */
   1142 			raidPtr->parity_good = RF_RAID_CLEAN;
   1143 			return(0);
   1144 		}
   1145 
   1146 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1147 			/* Re-write is already in progress! */
   1148 			return(EINVAL);
   1149 		}
   1150 
   1151 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1152 					   rf_RewriteParityThread,
   1153 					   raidPtr,"raid_parity");
   1154 		return (retcode);
   1155 
   1156 
   1157 	case RAIDFRAME_ADD_HOT_SPARE:
   1158 		sparePtr = (RF_SingleComponent_t *) data;
   1159 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1160 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1161 		return(retcode);
   1162 
   1163 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1164 		return(retcode);
   1165 
   1166 	case RAIDFRAME_DELETE_COMPONENT:
   1167 		componentPtr = (RF_SingleComponent_t *)data;
   1168 		memcpy( &component, componentPtr,
   1169 			sizeof(RF_SingleComponent_t));
   1170 		retcode = rf_delete_component(raidPtr, &component);
   1171 		return(retcode);
   1172 
   1173 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1174 		componentPtr = (RF_SingleComponent_t *)data;
   1175 		memcpy( &component, componentPtr,
   1176 			sizeof(RF_SingleComponent_t));
   1177 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1178 		return(retcode);
   1179 
   1180 	case RAIDFRAME_REBUILD_IN_PLACE:
   1181 
   1182 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1183 			/* Can't do this on a RAID 0!! */
   1184 			return(EINVAL);
   1185 		}
   1186 
   1187 		if (raidPtr->recon_in_progress == 1) {
   1188 			/* a reconstruct is already in progress! */
   1189 			return(EINVAL);
   1190 		}
   1191 
   1192 		componentPtr = (RF_SingleComponent_t *) data;
   1193 		memcpy( &component, componentPtr,
   1194 			sizeof(RF_SingleComponent_t));
   1195 		component.row = 0; /* we don't support any more */
   1196 		column = component.column;
   1197 
   1198 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1199 			return(EINVAL);
   1200 		}
   1201 
   1202 		RF_LOCK_MUTEX(raidPtr->mutex);
   1203 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1204 		    (raidPtr->numFailures > 0)) {
   1205 			/* XXX 0 above shouldn't be constant!!! */
   1206 			/* some component other than this has failed.
   1207 			   Let's not make things worse than they already
   1208 			   are... */
   1209 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1210 			       raidPtr->raidid);
   1211 			printf("raid%d:     Col: %d   Too many failures.\n",
   1212 			       raidPtr->raidid, column);
   1213 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1214 			return (EINVAL);
   1215 		}
   1216 		if (raidPtr->Disks[column].status ==
   1217 		    rf_ds_reconstructing) {
   1218 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1219 			       raidPtr->raidid);
   1220 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1221 
   1222 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1223 			return (EINVAL);
   1224 		}
   1225 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1226 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1227 			return (EINVAL);
   1228 		}
   1229 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1230 
   1231 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1232 		if (rrcopy == NULL)
   1233 			return(ENOMEM);
   1234 
   1235 		rrcopy->raidPtr = (void *) raidPtr;
   1236 		rrcopy->col = column;
   1237 
   1238 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1239 					   rf_ReconstructInPlaceThread,
   1240 					   rrcopy,"raid_reconip");
   1241 		return(retcode);
   1242 
   1243 	case RAIDFRAME_GET_INFO:
   1244 		if (!raidPtr->valid)
   1245 			return (ENODEV);
   1246 		ucfgp = (RF_DeviceConfig_t **) data;
   1247 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1248 			  (RF_DeviceConfig_t *));
   1249 		if (d_cfg == NULL)
   1250 			return (ENOMEM);
   1251 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1252 		d_cfg->rows = 1; /* there is only 1 row now */
   1253 		d_cfg->cols = raidPtr->numCol;
   1254 		d_cfg->ndevs = raidPtr->numCol;
   1255 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1256 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1257 			return (ENOMEM);
   1258 		}
   1259 		d_cfg->nspares = raidPtr->numSpare;
   1260 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1261 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1262 			return (ENOMEM);
   1263 		}
   1264 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1265 		d = 0;
   1266 		for (j = 0; j < d_cfg->cols; j++) {
   1267 			d_cfg->devs[d] = raidPtr->Disks[j];
   1268 			d++;
   1269 		}
   1270 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1271 			d_cfg->spares[i] = raidPtr->Disks[j];
   1272 		}
   1273 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1274 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1275 
   1276 		return (retcode);
   1277 
   1278 	case RAIDFRAME_CHECK_PARITY:
   1279 		*(int *) data = raidPtr->parity_good;
   1280 		return (0);
   1281 
   1282 	case RAIDFRAME_RESET_ACCTOTALS:
   1283 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1284 		return (0);
   1285 
   1286 	case RAIDFRAME_GET_ACCTOTALS:
   1287 		totals = (RF_AccTotals_t *) data;
   1288 		*totals = raidPtr->acc_totals;
   1289 		return (0);
   1290 
   1291 	case RAIDFRAME_KEEP_ACCTOTALS:
   1292 		raidPtr->keep_acc_totals = *(int *)data;
   1293 		return (0);
   1294 
   1295 	case RAIDFRAME_GET_SIZE:
   1296 		*(int *) data = raidPtr->totalSectors;
   1297 		return (0);
   1298 
   1299 		/* fail a disk & optionally start reconstruction */
   1300 	case RAIDFRAME_FAIL_DISK:
   1301 
   1302 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1303 			/* Can't do this on a RAID 0!! */
   1304 			return(EINVAL);
   1305 		}
   1306 
   1307 		rr = (struct rf_recon_req *) data;
   1308 		rr->row = 0;
   1309 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1310 			return (EINVAL);
   1311 
   1312 
   1313 		RF_LOCK_MUTEX(raidPtr->mutex);
   1314 		if ((raidPtr->Disks[rr->col].status ==
   1315 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1316 			/* some other component has failed.  Let's not make
   1317 			   things worse. XXX wrong for RAID6 */
   1318 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1319 			return (EINVAL);
   1320 		}
   1321 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1322 			/* Can't fail a spared disk! */
   1323 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1324 			return (EINVAL);
   1325 		}
   1326 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1327 
   1328 		/* make a copy of the recon request so that we don't rely on
   1329 		 * the user's buffer */
   1330 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1331 		if (rrcopy == NULL)
   1332 			return(ENOMEM);
   1333 		memcpy(rrcopy, rr, sizeof(*rr));
   1334 		rrcopy->raidPtr = (void *) raidPtr;
   1335 
   1336 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1337 					   rf_ReconThread,
   1338 					   rrcopy,"raid_recon");
   1339 		return (0);
   1340 
   1341 		/* invoke a copyback operation after recon on whatever disk
   1342 		 * needs it, if any */
   1343 	case RAIDFRAME_COPYBACK:
   1344 
   1345 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1346 			/* This makes no sense on a RAID 0!! */
   1347 			return(EINVAL);
   1348 		}
   1349 
   1350 		if (raidPtr->copyback_in_progress == 1) {
   1351 			/* Copyback is already in progress! */
   1352 			return(EINVAL);
   1353 		}
   1354 
   1355 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1356 					   rf_CopybackThread,
   1357 					   raidPtr,"raid_copyback");
   1358 		return (retcode);
   1359 
   1360 		/* return the percentage completion of reconstruction */
   1361 	case RAIDFRAME_CHECK_RECON_STATUS:
   1362 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1363 			/* This makes no sense on a RAID 0, so tell the
   1364 			   user it's done. */
   1365 			*(int *) data = 100;
   1366 			return(0);
   1367 		}
   1368 		if (raidPtr->status != rf_rs_reconstructing)
   1369 			*(int *) data = 100;
   1370 		else
   1371 			*(int *) data = raidPtr->reconControl->percentComplete;
   1372 		return (0);
   1373 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1374 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1375 		if (raidPtr->status != rf_rs_reconstructing) {
   1376 			progressInfo.remaining = 0;
   1377 			progressInfo.completed = 100;
   1378 			progressInfo.total = 100;
   1379 		} else {
   1380 			progressInfo.total =
   1381 				raidPtr->reconControl->numRUsTotal;
   1382 			progressInfo.completed =
   1383 				raidPtr->reconControl->numRUsComplete;
   1384 			progressInfo.remaining = progressInfo.total -
   1385 				progressInfo.completed;
   1386 		}
   1387 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1388 				  sizeof(RF_ProgressInfo_t));
   1389 		return (retcode);
   1390 
   1391 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1392 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1393 			/* This makes no sense on a RAID 0, so tell the
   1394 			   user it's done. */
   1395 			*(int *) data = 100;
   1396 			return(0);
   1397 		}
   1398 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1399 			*(int *) data = 100 *
   1400 				raidPtr->parity_rewrite_stripes_done /
   1401 				raidPtr->Layout.numStripe;
   1402 		} else {
   1403 			*(int *) data = 100;
   1404 		}
   1405 		return (0);
   1406 
   1407 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1408 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1409 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1410 			progressInfo.total = raidPtr->Layout.numStripe;
   1411 			progressInfo.completed =
   1412 				raidPtr->parity_rewrite_stripes_done;
   1413 			progressInfo.remaining = progressInfo.total -
   1414 				progressInfo.completed;
   1415 		} else {
   1416 			progressInfo.remaining = 0;
   1417 			progressInfo.completed = 100;
   1418 			progressInfo.total = 100;
   1419 		}
   1420 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1421 				  sizeof(RF_ProgressInfo_t));
   1422 		return (retcode);
   1423 
   1424 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1425 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1426 			/* This makes no sense on a RAID 0 */
   1427 			*(int *) data = 100;
   1428 			return(0);
   1429 		}
   1430 		if (raidPtr->copyback_in_progress == 1) {
   1431 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1432 				raidPtr->Layout.numStripe;
   1433 		} else {
   1434 			*(int *) data = 100;
   1435 		}
   1436 		return (0);
   1437 
   1438 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1439 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1440 		if (raidPtr->copyback_in_progress == 1) {
   1441 			progressInfo.total = raidPtr->Layout.numStripe;
   1442 			progressInfo.completed =
   1443 				raidPtr->copyback_stripes_done;
   1444 			progressInfo.remaining = progressInfo.total -
   1445 				progressInfo.completed;
   1446 		} else {
   1447 			progressInfo.remaining = 0;
   1448 			progressInfo.completed = 100;
   1449 			progressInfo.total = 100;
   1450 		}
   1451 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1452 				  sizeof(RF_ProgressInfo_t));
   1453 		return (retcode);
   1454 
   1455 		/* the sparetable daemon calls this to wait for the kernel to
   1456 		 * need a spare table. this ioctl does not return until a
   1457 		 * spare table is needed. XXX -- calling mpsleep here in the
   1458 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1459 		 * -- I should either compute the spare table in the kernel,
   1460 		 * or have a different -- XXX XXX -- interface (a different
   1461 		 * character device) for delivering the table     -- XXX */
   1462 #if 0
   1463 	case RAIDFRAME_SPARET_WAIT:
   1464 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1465 		while (!rf_sparet_wait_queue)
   1466 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1467 		waitreq = rf_sparet_wait_queue;
   1468 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1469 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1470 
   1471 		/* structure assignment */
   1472 		*((RF_SparetWait_t *) data) = *waitreq;
   1473 
   1474 		RF_Free(waitreq, sizeof(*waitreq));
   1475 		return (0);
   1476 
   1477 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1478 		 * code in it that will cause the dameon to exit */
   1479 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1480 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1481 		waitreq->fcol = -1;
   1482 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1483 		waitreq->next = rf_sparet_wait_queue;
   1484 		rf_sparet_wait_queue = waitreq;
   1485 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1486 		wakeup(&rf_sparet_wait_queue);
   1487 		return (0);
   1488 
   1489 		/* used by the spare table daemon to deliver a spare table
   1490 		 * into the kernel */
   1491 	case RAIDFRAME_SEND_SPARET:
   1492 
   1493 		/* install the spare table */
   1494 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1495 
   1496 		/* respond to the requestor.  the return status of the spare
   1497 		 * table installation is passed in the "fcol" field */
   1498 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1499 		waitreq->fcol = retcode;
   1500 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1501 		waitreq->next = rf_sparet_resp_queue;
   1502 		rf_sparet_resp_queue = waitreq;
   1503 		wakeup(&rf_sparet_resp_queue);
   1504 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1505 
   1506 		return (retcode);
   1507 #endif
   1508 
   1509 	default:
   1510 		break; /* fall through to the os-specific code below */
   1511 
   1512 	}
   1513 
   1514 	if (!raidPtr->valid)
   1515 		return (EINVAL);
   1516 
   1517 	/*
   1518 	 * Add support for "regular" device ioctls here.
   1519 	 */
   1520 
   1521 	switch (cmd) {
   1522 	case DIOCGDINFO:
   1523 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1524 		break;
   1525 #ifdef __HAVE_OLD_DISKLABEL
   1526 	case ODIOCGDINFO:
   1527 		newlabel = *(rs->sc_dkdev.dk_label);
   1528 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1529 			return ENOTTY;
   1530 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1531 		break;
   1532 #endif
   1533 
   1534 	case DIOCGPART:
   1535 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1536 		((struct partinfo *) data)->part =
   1537 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1538 		break;
   1539 
   1540 	case DIOCWDINFO:
   1541 	case DIOCSDINFO:
   1542 #ifdef __HAVE_OLD_DISKLABEL
   1543 	case ODIOCWDINFO:
   1544 	case ODIOCSDINFO:
   1545 #endif
   1546 	{
   1547 		struct disklabel *lp;
   1548 #ifdef __HAVE_OLD_DISKLABEL
   1549 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1550 			memset(&newlabel, 0, sizeof newlabel);
   1551 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1552 			lp = &newlabel;
   1553 		} else
   1554 #endif
   1555 		lp = (struct disklabel *)data;
   1556 
   1557 		if ((error = raidlock(rs)) != 0)
   1558 			return (error);
   1559 
   1560 		rs->sc_flags |= RAIDF_LABELLING;
   1561 
   1562 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1563 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1564 		if (error == 0) {
   1565 			if (cmd == DIOCWDINFO
   1566 #ifdef __HAVE_OLD_DISKLABEL
   1567 			    || cmd == ODIOCWDINFO
   1568 #endif
   1569 			   )
   1570 				error = writedisklabel(RAIDLABELDEV(dev),
   1571 				    raidstrategy, rs->sc_dkdev.dk_label,
   1572 				    rs->sc_dkdev.dk_cpulabel);
   1573 		}
   1574 		rs->sc_flags &= ~RAIDF_LABELLING;
   1575 
   1576 		raidunlock(rs);
   1577 
   1578 		if (error)
   1579 			return (error);
   1580 		break;
   1581 	}
   1582 
   1583 	case DIOCWLABEL:
   1584 		if (*(int *) data != 0)
   1585 			rs->sc_flags |= RAIDF_WLABEL;
   1586 		else
   1587 			rs->sc_flags &= ~RAIDF_WLABEL;
   1588 		break;
   1589 
   1590 	case DIOCGDEFLABEL:
   1591 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1592 		break;
   1593 
   1594 #ifdef __HAVE_OLD_DISKLABEL
   1595 	case ODIOCGDEFLABEL:
   1596 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1597 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1598 			return ENOTTY;
   1599 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1600 		break;
   1601 #endif
   1602 
   1603 	default:
   1604 		retcode = ENOTTY;
   1605 	}
   1606 	return (retcode);
   1607 
   1608 }
   1609 
   1610 
   1611 /* raidinit -- complete the rest of the initialization for the
   1612    RAIDframe device.  */
   1613 
   1614 
   1615 static void
   1616 raidinit(raidPtr)
   1617 	RF_Raid_t *raidPtr;
   1618 {
   1619 	struct raid_softc *rs;
   1620 	int     unit;
   1621 
   1622 	unit = raidPtr->raidid;
   1623 
   1624 	rs = &raid_softc[unit];
   1625 
   1626 	/* XXX should check return code first... */
   1627 	rs->sc_flags |= RAIDF_INITED;
   1628 
   1629 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1630 
   1631 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1632 
   1633 	/* disk_attach actually creates space for the CPU disklabel, among
   1634 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1635 	 * with disklabels. */
   1636 
   1637 	disk_attach(&rs->sc_dkdev);
   1638 
   1639 	/* XXX There may be a weird interaction here between this, and
   1640 	 * protectedSectors, as used in RAIDframe.  */
   1641 
   1642 	rs->sc_size = raidPtr->totalSectors;
   1643 
   1644 }
   1645 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1646 /* wake up the daemon & tell it to get us a spare table
   1647  * XXX
   1648  * the entries in the queues should be tagged with the raidPtr
   1649  * so that in the extremely rare case that two recons happen at once,
   1650  * we know for which device were requesting a spare table
   1651  * XXX
   1652  *
   1653  * XXX This code is not currently used. GO
   1654  */
   1655 int
   1656 rf_GetSpareTableFromDaemon(req)
   1657 	RF_SparetWait_t *req;
   1658 {
   1659 	int     retcode;
   1660 
   1661 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1662 	req->next = rf_sparet_wait_queue;
   1663 	rf_sparet_wait_queue = req;
   1664 	wakeup(&rf_sparet_wait_queue);
   1665 
   1666 	/* mpsleep unlocks the mutex */
   1667 	while (!rf_sparet_resp_queue) {
   1668 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1669 		    "raidframe getsparetable", 0);
   1670 	}
   1671 	req = rf_sparet_resp_queue;
   1672 	rf_sparet_resp_queue = req->next;
   1673 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1674 
   1675 	retcode = req->fcol;
   1676 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1677 					 * alloc'd */
   1678 	return (retcode);
   1679 }
   1680 #endif
   1681 
   1682 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1683  * bp & passes it down.
   1684  * any calls originating in the kernel must use non-blocking I/O
   1685  * do some extra sanity checking to return "appropriate" error values for
   1686  * certain conditions (to make some standard utilities work)
   1687  *
   1688  * Formerly known as: rf_DoAccessKernel
   1689  */
   1690 void
   1691 raidstart(raidPtr)
   1692 	RF_Raid_t *raidPtr;
   1693 {
   1694 	RF_SectorCount_t num_blocks, pb, sum;
   1695 	RF_RaidAddr_t raid_addr;
   1696 	struct partition *pp;
   1697 	daddr_t blocknum;
   1698 	int     unit;
   1699 	struct raid_softc *rs;
   1700 	int     do_async;
   1701 	struct buf *bp;
   1702 
   1703 	unit = raidPtr->raidid;
   1704 	rs = &raid_softc[unit];
   1705 
   1706 	/* quick check to see if anything has died recently */
   1707 	RF_LOCK_MUTEX(raidPtr->mutex);
   1708 	if (raidPtr->numNewFailures > 0) {
   1709 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1710 		rf_update_component_labels(raidPtr,
   1711 					   RF_NORMAL_COMPONENT_UPDATE);
   1712 		RF_LOCK_MUTEX(raidPtr->mutex);
   1713 		raidPtr->numNewFailures--;
   1714 	}
   1715 
   1716 	/* Check to see if we're at the limit... */
   1717 	while (raidPtr->openings > 0) {
   1718 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1719 
   1720 		/* get the next item, if any, from the queue */
   1721 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1722 			/* nothing more to do */
   1723 			return;
   1724 		}
   1725 
   1726 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1727 		 * partition.. Need to make it absolute to the underlying
   1728 		 * device.. */
   1729 
   1730 		blocknum = bp->b_blkno;
   1731 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1732 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1733 			blocknum += pp->p_offset;
   1734 		}
   1735 
   1736 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1737 			    (int) blocknum));
   1738 
   1739 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1740 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1741 
   1742 		/* *THIS* is where we adjust what block we're going to...
   1743 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1744 		raid_addr = blocknum;
   1745 
   1746 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1747 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1748 		sum = raid_addr + num_blocks + pb;
   1749 		if (1 || rf_debugKernelAccess) {
   1750 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1751 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1752 				    (int) pb, (int) bp->b_resid));
   1753 		}
   1754 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1755 		    || (sum < num_blocks) || (sum < pb)) {
   1756 			bp->b_error = ENOSPC;
   1757 			bp->b_flags |= B_ERROR;
   1758 			bp->b_resid = bp->b_bcount;
   1759 			biodone(bp);
   1760 			RF_LOCK_MUTEX(raidPtr->mutex);
   1761 			continue;
   1762 		}
   1763 		/*
   1764 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1765 		 */
   1766 
   1767 		if (bp->b_bcount & raidPtr->sectorMask) {
   1768 			bp->b_error = EINVAL;
   1769 			bp->b_flags |= B_ERROR;
   1770 			bp->b_resid = bp->b_bcount;
   1771 			biodone(bp);
   1772 			RF_LOCK_MUTEX(raidPtr->mutex);
   1773 			continue;
   1774 
   1775 		}
   1776 		db1_printf(("Calling DoAccess..\n"));
   1777 
   1778 
   1779 		RF_LOCK_MUTEX(raidPtr->mutex);
   1780 		raidPtr->openings--;
   1781 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1782 
   1783 		/*
   1784 		 * Everything is async.
   1785 		 */
   1786 		do_async = 1;
   1787 
   1788 		disk_busy(&rs->sc_dkdev);
   1789 
   1790 		/* XXX we're still at splbio() here... do we *really*
   1791 		   need to be? */
   1792 
   1793 		/* don't ever condition on bp->b_flags & B_WRITE.
   1794 		 * always condition on B_READ instead */
   1795 
   1796 		bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1797 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1798 				      do_async, raid_addr, num_blocks,
   1799 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1800 
   1801 		if (bp->b_error) {
   1802 			bp->b_flags |= B_ERROR;
   1803 		}
   1804 
   1805 		RF_LOCK_MUTEX(raidPtr->mutex);
   1806 	}
   1807 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1808 }
   1809 
   1810 
   1811 
   1812 
   1813 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1814 
   1815 int
   1816 rf_DispatchKernelIO(queue, req)
   1817 	RF_DiskQueue_t *queue;
   1818 	RF_DiskQueueData_t *req;
   1819 {
   1820 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1821 	struct buf *bp;
   1822 	struct raidbuf *raidbp = NULL;
   1823 
   1824 	req->queue = queue;
   1825 
   1826 #if DIAGNOSTIC
   1827 	if (queue->raidPtr->raidid >= numraid) {
   1828 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1829 		    numraid);
   1830 		panic("Invalid Unit number in rf_DispatchKernelIO");
   1831 	}
   1832 #endif
   1833 
   1834 	bp = req->bp;
   1835 #if 1
   1836 	/* XXX when there is a physical disk failure, someone is passing us a
   1837 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1838 	 * without taking a performance hit... (not sure where the real bug
   1839 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1840 
   1841 	if (bp->b_flags & B_ERROR) {
   1842 		bp->b_flags &= ~B_ERROR;
   1843 	}
   1844 	if (bp->b_error != 0) {
   1845 		bp->b_error = 0;
   1846 	}
   1847 #endif
   1848 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1849 	if (raidbp == NULL) {
   1850 		bp->b_flags |= B_ERROR;
   1851 		bp->b_error = ENOMEM;
   1852 		return (ENOMEM);
   1853 	}
   1854 	BUF_INIT(&raidbp->rf_buf);
   1855 
   1856 	/*
   1857 	 * context for raidiodone
   1858 	 */
   1859 	raidbp->rf_obp = bp;
   1860 	raidbp->req = req;
   1861 
   1862 	switch (req->type) {
   1863 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1864 		/* XXX need to do something extra here.. */
   1865 		/* I'm leaving this in, as I've never actually seen it used,
   1866 		 * and I'd like folks to report it... GO */
   1867 		printf(("WAKEUP CALLED\n"));
   1868 		queue->numOutstanding++;
   1869 
   1870 		/* XXX need to glue the original buffer into this??  */
   1871 
   1872 		KernelWakeupFunc(&raidbp->rf_buf);
   1873 		break;
   1874 
   1875 	case RF_IO_TYPE_READ:
   1876 	case RF_IO_TYPE_WRITE:
   1877 
   1878 		if (req->tracerec) {
   1879 			RF_ETIMER_START(req->tracerec->timer);
   1880 		}
   1881 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1882 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1883 		    req->sectorOffset, req->numSector,
   1884 		    req->buf, KernelWakeupFunc, (void *) req,
   1885 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1886 
   1887 		if (rf_debugKernelAccess) {
   1888 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1889 				(long) bp->b_blkno));
   1890 		}
   1891 		queue->numOutstanding++;
   1892 		queue->last_deq_sector = req->sectorOffset;
   1893 		/* acc wouldn't have been let in if there were any pending
   1894 		 * reqs at any other priority */
   1895 		queue->curPriority = req->priority;
   1896 
   1897 		db1_printf(("Going for %c to unit %d col %d\n",
   1898 			    req->type, queue->raidPtr->raidid,
   1899 			    queue->col));
   1900 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1901 			(int) req->sectorOffset, (int) req->numSector,
   1902 			(int) (req->numSector <<
   1903 			    queue->raidPtr->logBytesPerSector),
   1904 			(int) queue->raidPtr->logBytesPerSector));
   1905 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1906 			raidbp->rf_buf.b_vp->v_numoutput++;
   1907 		}
   1908 		VOP_STRATEGY(&raidbp->rf_buf);
   1909 
   1910 		break;
   1911 
   1912 	default:
   1913 		panic("bad req->type in rf_DispatchKernelIO");
   1914 	}
   1915 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1916 
   1917 	return (0);
   1918 }
   1919 /* this is the callback function associated with a I/O invoked from
   1920    kernel code.
   1921  */
   1922 static void
   1923 KernelWakeupFunc(vbp)
   1924 	struct buf *vbp;
   1925 {
   1926 	RF_DiskQueueData_t *req = NULL;
   1927 	RF_DiskQueue_t *queue;
   1928 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1929 	struct buf *bp;
   1930 	int s;
   1931 
   1932 	s = splbio();
   1933 	db1_printf(("recovering the request queue:\n"));
   1934 	req = raidbp->req;
   1935 
   1936 	bp = raidbp->rf_obp;
   1937 
   1938 	queue = (RF_DiskQueue_t *) req->queue;
   1939 
   1940 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1941 		bp->b_flags |= B_ERROR;
   1942 		bp->b_error = raidbp->rf_buf.b_error ?
   1943 		    raidbp->rf_buf.b_error : EIO;
   1944 	}
   1945 
   1946 	/* XXX methinks this could be wrong... */
   1947 #if 1
   1948 	bp->b_resid = raidbp->rf_buf.b_resid;
   1949 #endif
   1950 
   1951 	if (req->tracerec) {
   1952 		RF_ETIMER_STOP(req->tracerec->timer);
   1953 		RF_ETIMER_EVAL(req->tracerec->timer);
   1954 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1955 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1956 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1957 		req->tracerec->num_phys_ios++;
   1958 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1959 	}
   1960 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1961 
   1962 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1963 	 * ballistic, and mark the component as hosed... */
   1964 
   1965 	if (bp->b_flags & B_ERROR) {
   1966 		/* Mark the disk as dead */
   1967 		/* but only mark it once... */
   1968 		if (queue->raidPtr->Disks[queue->col].status ==
   1969 		    rf_ds_optimal) {
   1970 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1971 			       queue->raidPtr->raidid,
   1972 			       queue->raidPtr->Disks[queue->col].devname);
   1973 			queue->raidPtr->Disks[queue->col].status =
   1974 			    rf_ds_failed;
   1975 			queue->raidPtr->status = rf_rs_degraded;
   1976 			queue->raidPtr->numFailures++;
   1977 			queue->raidPtr->numNewFailures++;
   1978 		} else {	/* Disk is already dead... */
   1979 			/* printf("Disk already marked as dead!\n"); */
   1980 		}
   1981 
   1982 	}
   1983 
   1984 	pool_put(&raidframe_cbufpool, raidbp);
   1985 
   1986 	/* Fill in the error value */
   1987 
   1988 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   1989 
   1990 	simple_lock(&queue->raidPtr->iodone_lock);
   1991 
   1992 	/* Drop this one on the "finished" queue... */
   1993 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   1994 
   1995 	/* Let the raidio thread know there is work to be done. */
   1996 	wakeup(&(queue->raidPtr->iodone));
   1997 
   1998 	simple_unlock(&queue->raidPtr->iodone_lock);
   1999 
   2000 	splx(s);
   2001 }
   2002 
   2003 
   2004 
   2005 /*
   2006  * initialize a buf structure for doing an I/O in the kernel.
   2007  */
   2008 static void
   2009 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   2010        logBytesPerSector, b_proc)
   2011 	struct buf *bp;
   2012 	struct vnode *b_vp;
   2013 	unsigned rw_flag;
   2014 	dev_t dev;
   2015 	RF_SectorNum_t startSect;
   2016 	RF_SectorCount_t numSect;
   2017 	caddr_t buf;
   2018 	void (*cbFunc) (struct buf *);
   2019 	void *cbArg;
   2020 	int logBytesPerSector;
   2021 	struct proc *b_proc;
   2022 {
   2023 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2024 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   2025 	bp->b_bcount = numSect << logBytesPerSector;
   2026 	bp->b_bufsize = bp->b_bcount;
   2027 	bp->b_error = 0;
   2028 	bp->b_dev = dev;
   2029 	bp->b_data = buf;
   2030 	bp->b_blkno = startSect;
   2031 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2032 	if (bp->b_bcount == 0) {
   2033 		panic("bp->b_bcount is zero in InitBP!!");
   2034 	}
   2035 	bp->b_proc = b_proc;
   2036 	bp->b_iodone = cbFunc;
   2037 	bp->b_vp = b_vp;
   2038 
   2039 }
   2040 
   2041 static void
   2042 raidgetdefaultlabel(raidPtr, rs, lp)
   2043 	RF_Raid_t *raidPtr;
   2044 	struct raid_softc *rs;
   2045 	struct disklabel *lp;
   2046 {
   2047 	memset(lp, 0, sizeof(*lp));
   2048 
   2049 	/* fabricate a label... */
   2050 	lp->d_secperunit = raidPtr->totalSectors;
   2051 	lp->d_secsize = raidPtr->bytesPerSector;
   2052 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2053 	lp->d_ntracks = 4 * raidPtr->numCol;
   2054 	lp->d_ncylinders = raidPtr->totalSectors /
   2055 		(lp->d_nsectors * lp->d_ntracks);
   2056 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2057 
   2058 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2059 	lp->d_type = DTYPE_RAID;
   2060 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2061 	lp->d_rpm = 3600;
   2062 	lp->d_interleave = 1;
   2063 	lp->d_flags = 0;
   2064 
   2065 	lp->d_partitions[RAW_PART].p_offset = 0;
   2066 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2067 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2068 	lp->d_npartitions = RAW_PART + 1;
   2069 
   2070 	lp->d_magic = DISKMAGIC;
   2071 	lp->d_magic2 = DISKMAGIC;
   2072 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2073 
   2074 }
   2075 /*
   2076  * Read the disklabel from the raid device.  If one is not present, fake one
   2077  * up.
   2078  */
   2079 static void
   2080 raidgetdisklabel(dev)
   2081 	dev_t   dev;
   2082 {
   2083 	int     unit = raidunit(dev);
   2084 	struct raid_softc *rs = &raid_softc[unit];
   2085 	const char   *errstring;
   2086 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2087 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2088 	RF_Raid_t *raidPtr;
   2089 
   2090 	db1_printf(("Getting the disklabel...\n"));
   2091 
   2092 	memset(clp, 0, sizeof(*clp));
   2093 
   2094 	raidPtr = raidPtrs[unit];
   2095 
   2096 	raidgetdefaultlabel(raidPtr, rs, lp);
   2097 
   2098 	/*
   2099 	 * Call the generic disklabel extraction routine.
   2100 	 */
   2101 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2102 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2103 	if (errstring)
   2104 		raidmakedisklabel(rs);
   2105 	else {
   2106 		int     i;
   2107 		struct partition *pp;
   2108 
   2109 		/*
   2110 		 * Sanity check whether the found disklabel is valid.
   2111 		 *
   2112 		 * This is necessary since total size of the raid device
   2113 		 * may vary when an interleave is changed even though exactly
   2114 		 * same componets are used, and old disklabel may used
   2115 		 * if that is found.
   2116 		 */
   2117 		if (lp->d_secperunit != rs->sc_size)
   2118 			printf("raid%d: WARNING: %s: "
   2119 			    "total sector size in disklabel (%d) != "
   2120 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2121 			    lp->d_secperunit, (long) rs->sc_size);
   2122 		for (i = 0; i < lp->d_npartitions; i++) {
   2123 			pp = &lp->d_partitions[i];
   2124 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2125 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2126 				       "exceeds the size of raid (%ld)\n",
   2127 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2128 		}
   2129 	}
   2130 
   2131 }
   2132 /*
   2133  * Take care of things one might want to take care of in the event
   2134  * that a disklabel isn't present.
   2135  */
   2136 static void
   2137 raidmakedisklabel(rs)
   2138 	struct raid_softc *rs;
   2139 {
   2140 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2141 	db1_printf(("Making a label..\n"));
   2142 
   2143 	/*
   2144 	 * For historical reasons, if there's no disklabel present
   2145 	 * the raw partition must be marked FS_BSDFFS.
   2146 	 */
   2147 
   2148 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2149 
   2150 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2151 
   2152 	lp->d_checksum = dkcksum(lp);
   2153 }
   2154 /*
   2155  * Lookup the provided name in the filesystem.  If the file exists,
   2156  * is a valid block device, and isn't being used by anyone else,
   2157  * set *vpp to the file's vnode.
   2158  * You'll find the original of this in ccd.c
   2159  */
   2160 int
   2161 raidlookup(path, p, vpp)
   2162 	char   *path;
   2163 	struct proc *p;
   2164 	struct vnode **vpp;	/* result */
   2165 {
   2166 	struct nameidata nd;
   2167 	struct vnode *vp;
   2168 	struct vattr va;
   2169 	int     error;
   2170 
   2171 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2172 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2173 		return (error);
   2174 	}
   2175 	vp = nd.ni_vp;
   2176 	if (vp->v_usecount > 1) {
   2177 		VOP_UNLOCK(vp, 0);
   2178 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2179 		return (EBUSY);
   2180 	}
   2181 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2182 		VOP_UNLOCK(vp, 0);
   2183 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2184 		return (error);
   2185 	}
   2186 	/* XXX: eventually we should handle VREG, too. */
   2187 	if (va.va_type != VBLK) {
   2188 		VOP_UNLOCK(vp, 0);
   2189 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2190 		return (ENOTBLK);
   2191 	}
   2192 	VOP_UNLOCK(vp, 0);
   2193 	*vpp = vp;
   2194 	return (0);
   2195 }
   2196 /*
   2197  * Wait interruptibly for an exclusive lock.
   2198  *
   2199  * XXX
   2200  * Several drivers do this; it should be abstracted and made MP-safe.
   2201  * (Hmm... where have we seen this warning before :->  GO )
   2202  */
   2203 static int
   2204 raidlock(rs)
   2205 	struct raid_softc *rs;
   2206 {
   2207 	int     error;
   2208 
   2209 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2210 		rs->sc_flags |= RAIDF_WANTED;
   2211 		if ((error =
   2212 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2213 			return (error);
   2214 	}
   2215 	rs->sc_flags |= RAIDF_LOCKED;
   2216 	return (0);
   2217 }
   2218 /*
   2219  * Unlock and wake up any waiters.
   2220  */
   2221 static void
   2222 raidunlock(rs)
   2223 	struct raid_softc *rs;
   2224 {
   2225 
   2226 	rs->sc_flags &= ~RAIDF_LOCKED;
   2227 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2228 		rs->sc_flags &= ~RAIDF_WANTED;
   2229 		wakeup(rs);
   2230 	}
   2231 }
   2232 
   2233 
   2234 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2235 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2236 
   2237 int
   2238 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2239 {
   2240 	RF_ComponentLabel_t clabel;
   2241 	raidread_component_label(dev, b_vp, &clabel);
   2242 	clabel.mod_counter = mod_counter;
   2243 	clabel.clean = RF_RAID_CLEAN;
   2244 	raidwrite_component_label(dev, b_vp, &clabel);
   2245 	return(0);
   2246 }
   2247 
   2248 
   2249 int
   2250 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2251 {
   2252 	RF_ComponentLabel_t clabel;
   2253 	raidread_component_label(dev, b_vp, &clabel);
   2254 	clabel.mod_counter = mod_counter;
   2255 	clabel.clean = RF_RAID_DIRTY;
   2256 	raidwrite_component_label(dev, b_vp, &clabel);
   2257 	return(0);
   2258 }
   2259 
   2260 /* ARGSUSED */
   2261 int
   2262 raidread_component_label(dev, b_vp, clabel)
   2263 	dev_t dev;
   2264 	struct vnode *b_vp;
   2265 	RF_ComponentLabel_t *clabel;
   2266 {
   2267 	struct buf *bp;
   2268 	const struct bdevsw *bdev;
   2269 	int error;
   2270 
   2271 	/* XXX should probably ensure that we don't try to do this if
   2272 	   someone has changed rf_protected_sectors. */
   2273 
   2274 	if (b_vp == NULL) {
   2275 		/* For whatever reason, this component is not valid.
   2276 		   Don't try to read a component label from it. */
   2277 		return(EINVAL);
   2278 	}
   2279 
   2280 	/* get a block of the appropriate size... */
   2281 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2282 	bp->b_dev = dev;
   2283 
   2284 	/* get our ducks in a row for the read */
   2285 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2286 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2287 	bp->b_flags |= B_READ;
   2288  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2289 
   2290 	bdev = bdevsw_lookup(bp->b_dev);
   2291 	if (bdev == NULL)
   2292 		return (ENXIO);
   2293 	(*bdev->d_strategy)(bp);
   2294 
   2295 	error = biowait(bp);
   2296 
   2297 	if (!error) {
   2298 		memcpy(clabel, bp->b_data,
   2299 		       sizeof(RF_ComponentLabel_t));
   2300         }
   2301 
   2302 	brelse(bp);
   2303 	return(error);
   2304 }
   2305 /* ARGSUSED */
   2306 int
   2307 raidwrite_component_label(dev, b_vp, clabel)
   2308 	dev_t dev;
   2309 	struct vnode *b_vp;
   2310 	RF_ComponentLabel_t *clabel;
   2311 {
   2312 	struct buf *bp;
   2313 	const struct bdevsw *bdev;
   2314 	int error;
   2315 
   2316 	/* get a block of the appropriate size... */
   2317 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2318 	bp->b_dev = dev;
   2319 
   2320 	/* get our ducks in a row for the write */
   2321 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2322 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2323 	bp->b_flags |= B_WRITE;
   2324  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2325 
   2326 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2327 
   2328 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2329 
   2330 	bdev = bdevsw_lookup(bp->b_dev);
   2331 	if (bdev == NULL)
   2332 		return (ENXIO);
   2333 	(*bdev->d_strategy)(bp);
   2334 	error = biowait(bp);
   2335 	brelse(bp);
   2336 	if (error) {
   2337 #if 1
   2338 		printf("Failed to write RAID component info!\n");
   2339 #endif
   2340 	}
   2341 
   2342 	return(error);
   2343 }
   2344 
   2345 void
   2346 rf_markalldirty(raidPtr)
   2347 	RF_Raid_t *raidPtr;
   2348 {
   2349 	RF_ComponentLabel_t clabel;
   2350 	int sparecol;
   2351 	int c;
   2352 	int j;
   2353 	int scol = -1;
   2354 
   2355 	raidPtr->mod_counter++;
   2356 	for (c = 0; c < raidPtr->numCol; c++) {
   2357 		/* we don't want to touch (at all) a disk that has
   2358 		   failed */
   2359 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2360 			raidread_component_label(
   2361 						 raidPtr->Disks[c].dev,
   2362 						 raidPtr->raid_cinfo[c].ci_vp,
   2363 						 &clabel);
   2364 			if (clabel.status == rf_ds_spared) {
   2365 				/* XXX do something special...
   2366 				   but whatever you do, don't
   2367 				   try to access it!! */
   2368 			} else {
   2369 				raidmarkdirty(
   2370 					      raidPtr->Disks[c].dev,
   2371 					      raidPtr->raid_cinfo[c].ci_vp,
   2372 					      raidPtr->mod_counter);
   2373 			}
   2374 		}
   2375 	}
   2376 
   2377 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2378 		sparecol = raidPtr->numCol + c;
   2379 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2380 			/*
   2381 
   2382 			   we claim this disk is "optimal" if it's
   2383 			   rf_ds_used_spare, as that means it should be
   2384 			   directly substitutable for the disk it replaced.
   2385 			   We note that too...
   2386 
   2387 			 */
   2388 
   2389 			for(j=0;j<raidPtr->numCol;j++) {
   2390 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2391 					scol = j;
   2392 					break;
   2393 				}
   2394 			}
   2395 
   2396 			raidread_component_label(
   2397 				 raidPtr->Disks[sparecol].dev,
   2398 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2399 				 &clabel);
   2400 			/* make sure status is noted */
   2401 
   2402 			raid_init_component_label(raidPtr, &clabel);
   2403 
   2404 			clabel.row = 0;
   2405 			clabel.column = scol;
   2406 			/* Note: we *don't* change status from rf_ds_used_spare
   2407 			   to rf_ds_optimal */
   2408 			/* clabel.status = rf_ds_optimal; */
   2409 
   2410 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2411 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2412 				      raidPtr->mod_counter);
   2413 		}
   2414 	}
   2415 }
   2416 
   2417 
   2418 void
   2419 rf_update_component_labels(raidPtr, final)
   2420 	RF_Raid_t *raidPtr;
   2421 	int final;
   2422 {
   2423 	RF_ComponentLabel_t clabel;
   2424 	int sparecol;
   2425 	int c;
   2426 	int j;
   2427 	int scol;
   2428 
   2429 	scol = -1;
   2430 
   2431 	/* XXX should do extra checks to make sure things really are clean,
   2432 	   rather than blindly setting the clean bit... */
   2433 
   2434 	raidPtr->mod_counter++;
   2435 
   2436 	for (c = 0; c < raidPtr->numCol; c++) {
   2437 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2438 			raidread_component_label(
   2439 						 raidPtr->Disks[c].dev,
   2440 						 raidPtr->raid_cinfo[c].ci_vp,
   2441 						 &clabel);
   2442 				/* make sure status is noted */
   2443 			clabel.status = rf_ds_optimal;
   2444 				/* bump the counter */
   2445 			clabel.mod_counter = raidPtr->mod_counter;
   2446 
   2447 			raidwrite_component_label(
   2448 						  raidPtr->Disks[c].dev,
   2449 						  raidPtr->raid_cinfo[c].ci_vp,
   2450 						  &clabel);
   2451 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2452 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2453 					raidmarkclean(
   2454 						      raidPtr->Disks[c].dev,
   2455 						      raidPtr->raid_cinfo[c].ci_vp,
   2456 						      raidPtr->mod_counter);
   2457 				}
   2458 			}
   2459 		}
   2460 		/* else we don't touch it.. */
   2461 	}
   2462 
   2463 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2464 		sparecol = raidPtr->numCol + c;
   2465 		/* Need to ensure that the reconstruct actually completed! */
   2466 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2467 			/*
   2468 
   2469 			   we claim this disk is "optimal" if it's
   2470 			   rf_ds_used_spare, as that means it should be
   2471 			   directly substitutable for the disk it replaced.
   2472 			   We note that too...
   2473 
   2474 			 */
   2475 
   2476 			for(j=0;j<raidPtr->numCol;j++) {
   2477 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2478 					scol = j;
   2479 					break;
   2480 				}
   2481 			}
   2482 
   2483 			/* XXX shouldn't *really* need this... */
   2484 			raidread_component_label(
   2485 				      raidPtr->Disks[sparecol].dev,
   2486 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2487 				      &clabel);
   2488 			/* make sure status is noted */
   2489 
   2490 			raid_init_component_label(raidPtr, &clabel);
   2491 
   2492 			clabel.mod_counter = raidPtr->mod_counter;
   2493 			clabel.column = scol;
   2494 			clabel.status = rf_ds_optimal;
   2495 
   2496 			raidwrite_component_label(
   2497 				      raidPtr->Disks[sparecol].dev,
   2498 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2499 				      &clabel);
   2500 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2501 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2502 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2503 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2504 						       raidPtr->mod_counter);
   2505 				}
   2506 			}
   2507 		}
   2508 	}
   2509 }
   2510 
   2511 void
   2512 rf_close_component(raidPtr, vp, auto_configured)
   2513 	RF_Raid_t *raidPtr;
   2514 	struct vnode *vp;
   2515 	int auto_configured;
   2516 {
   2517 	struct proc *p;
   2518 
   2519 	p = raidPtr->engine_thread;
   2520 
   2521 	if (vp != NULL) {
   2522 		if (auto_configured == 1) {
   2523 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2524 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2525 			vput(vp);
   2526 
   2527 		} else {
   2528 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2529 		}
   2530 	}
   2531 }
   2532 
   2533 
   2534 void
   2535 rf_UnconfigureVnodes(raidPtr)
   2536 	RF_Raid_t *raidPtr;
   2537 {
   2538 	int r,c;
   2539 	struct vnode *vp;
   2540 	int acd;
   2541 
   2542 
   2543 	/* We take this opportunity to close the vnodes like we should.. */
   2544 
   2545 	for (c = 0; c < raidPtr->numCol; c++) {
   2546 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2547 		acd = raidPtr->Disks[c].auto_configured;
   2548 		rf_close_component(raidPtr, vp, acd);
   2549 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2550 		raidPtr->Disks[c].auto_configured = 0;
   2551 	}
   2552 
   2553 	for (r = 0; r < raidPtr->numSpare; r++) {
   2554 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2555 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2556 		rf_close_component(raidPtr, vp, acd);
   2557 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2558 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2559 	}
   2560 }
   2561 
   2562 
   2563 void
   2564 rf_ReconThread(req)
   2565 	struct rf_recon_req *req;
   2566 {
   2567 	int     s;
   2568 	RF_Raid_t *raidPtr;
   2569 
   2570 	s = splbio();
   2571 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2572 	raidPtr->recon_in_progress = 1;
   2573 
   2574 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2575 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2576 
   2577 	/* XXX get rid of this! we don't need it at all.. */
   2578 	RF_Free(req, sizeof(*req));
   2579 
   2580 	raidPtr->recon_in_progress = 0;
   2581 	splx(s);
   2582 
   2583 	/* That's all... */
   2584 	kthread_exit(0);        /* does not return */
   2585 }
   2586 
   2587 void
   2588 rf_RewriteParityThread(raidPtr)
   2589 	RF_Raid_t *raidPtr;
   2590 {
   2591 	int retcode;
   2592 	int s;
   2593 
   2594 	raidPtr->parity_rewrite_in_progress = 1;
   2595 	s = splbio();
   2596 	retcode = rf_RewriteParity(raidPtr);
   2597 	splx(s);
   2598 	if (retcode) {
   2599 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2600 	} else {
   2601 		/* set the clean bit!  If we shutdown correctly,
   2602 		   the clean bit on each component label will get
   2603 		   set */
   2604 		raidPtr->parity_good = RF_RAID_CLEAN;
   2605 	}
   2606 	raidPtr->parity_rewrite_in_progress = 0;
   2607 
   2608 	/* Anyone waiting for us to stop?  If so, inform them... */
   2609 	if (raidPtr->waitShutdown) {
   2610 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2611 	}
   2612 
   2613 	/* That's all... */
   2614 	kthread_exit(0);        /* does not return */
   2615 }
   2616 
   2617 
   2618 void
   2619 rf_CopybackThread(raidPtr)
   2620 	RF_Raid_t *raidPtr;
   2621 {
   2622 	int s;
   2623 
   2624 	raidPtr->copyback_in_progress = 1;
   2625 	s = splbio();
   2626 	rf_CopybackReconstructedData(raidPtr);
   2627 	splx(s);
   2628 	raidPtr->copyback_in_progress = 0;
   2629 
   2630 	/* That's all... */
   2631 	kthread_exit(0);        /* does not return */
   2632 }
   2633 
   2634 
   2635 void
   2636 rf_ReconstructInPlaceThread(req)
   2637 	struct rf_recon_req *req;
   2638 {
   2639 	int s;
   2640 	RF_Raid_t *raidPtr;
   2641 
   2642 	s = splbio();
   2643 	raidPtr = req->raidPtr;
   2644 	raidPtr->recon_in_progress = 1;
   2645 	rf_ReconstructInPlace(raidPtr, req->col);
   2646 	RF_Free(req, sizeof(*req));
   2647 	raidPtr->recon_in_progress = 0;
   2648 	splx(s);
   2649 
   2650 	/* That's all... */
   2651 	kthread_exit(0);        /* does not return */
   2652 }
   2653 
   2654 RF_AutoConfig_t *
   2655 rf_find_raid_components()
   2656 {
   2657 	struct vnode *vp;
   2658 	struct disklabel label;
   2659 	struct device *dv;
   2660 	dev_t dev;
   2661 	int bmajor;
   2662 	int error;
   2663 	int i;
   2664 	int good_one;
   2665 	RF_ComponentLabel_t *clabel;
   2666 	RF_AutoConfig_t *ac_list;
   2667 	RF_AutoConfig_t *ac;
   2668 
   2669 
   2670 	/* initialize the AutoConfig list */
   2671 	ac_list = NULL;
   2672 
   2673 	/* we begin by trolling through *all* the devices on the system */
   2674 
   2675 	for (dv = alldevs.tqh_first; dv != NULL;
   2676 	     dv = dv->dv_list.tqe_next) {
   2677 
   2678 		/* we are only interested in disks... */
   2679 		if (dv->dv_class != DV_DISK)
   2680 			continue;
   2681 
   2682 		/* we don't care about floppies... */
   2683 		if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
   2684 			continue;
   2685 		}
   2686 
   2687 		/* we don't care about CD's... */
   2688 		if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
   2689 			continue;
   2690 		}
   2691 
   2692 		/* hdfd is the Atari/Hades floppy driver */
   2693 		if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
   2694 			continue;
   2695 		}
   2696 		/* fdisa is the Atari/Milan floppy driver */
   2697 		if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
   2698 			continue;
   2699 		}
   2700 
   2701 		/* need to find the device_name_to_block_device_major stuff */
   2702 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2703 
   2704 		/* get a vnode for the raw partition of this disk */
   2705 
   2706 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2707 		if (bdevvp(dev, &vp))
   2708 			panic("RAID can't alloc vnode");
   2709 
   2710 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2711 
   2712 		if (error) {
   2713 			/* "Who cares."  Continue looking
   2714 			   for something that exists*/
   2715 			vput(vp);
   2716 			continue;
   2717 		}
   2718 
   2719 		/* Ok, the disk exists.  Go get the disklabel. */
   2720 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
   2721 		if (error) {
   2722 			/*
   2723 			 * XXX can't happen - open() would
   2724 			 * have errored out (or faked up one)
   2725 			 */
   2726 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2727 			       dv->dv_xname, 'a' + RAW_PART, error);
   2728 		}
   2729 
   2730 		/* don't need this any more.  We'll allocate it again
   2731 		   a little later if we really do... */
   2732 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2733 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2734 		vput(vp);
   2735 
   2736 		for (i=0; i < label.d_npartitions; i++) {
   2737 			/* We only support partitions marked as RAID */
   2738 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2739 				continue;
   2740 
   2741 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2742 			if (bdevvp(dev, &vp))
   2743 				panic("RAID can't alloc vnode");
   2744 
   2745 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2746 			if (error) {
   2747 				/* Whatever... */
   2748 				vput(vp);
   2749 				continue;
   2750 			}
   2751 
   2752 			good_one = 0;
   2753 
   2754 			clabel = (RF_ComponentLabel_t *)
   2755 				malloc(sizeof(RF_ComponentLabel_t),
   2756 				       M_RAIDFRAME, M_NOWAIT);
   2757 			if (clabel == NULL) {
   2758 				/* XXX CLEANUP HERE */
   2759 				printf("RAID auto config: out of memory!\n");
   2760 				return(NULL); /* XXX probably should panic? */
   2761 			}
   2762 
   2763 			if (!raidread_component_label(dev, vp, clabel)) {
   2764 				/* Got the label.  Does it look reasonable? */
   2765 				if (rf_reasonable_label(clabel) &&
   2766 				    (clabel->partitionSize <=
   2767 				     label.d_partitions[i].p_size)) {
   2768 #if DEBUG
   2769 					printf("Component on: %s%c: %d\n",
   2770 					       dv->dv_xname, 'a'+i,
   2771 					       label.d_partitions[i].p_size);
   2772 					rf_print_component_label(clabel);
   2773 #endif
   2774 					/* if it's reasonable, add it,
   2775 					   else ignore it. */
   2776 					ac = (RF_AutoConfig_t *)
   2777 						malloc(sizeof(RF_AutoConfig_t),
   2778 						       M_RAIDFRAME,
   2779 						       M_NOWAIT);
   2780 					if (ac == NULL) {
   2781 						/* XXX should panic?? */
   2782 						return(NULL);
   2783 					}
   2784 
   2785 					sprintf(ac->devname, "%s%c",
   2786 						dv->dv_xname, 'a'+i);
   2787 					ac->dev = dev;
   2788 					ac->vp = vp;
   2789 					ac->clabel = clabel;
   2790 					ac->next = ac_list;
   2791 					ac_list = ac;
   2792 					good_one = 1;
   2793 				}
   2794 			}
   2795 			if (!good_one) {
   2796 				/* cleanup */
   2797 				free(clabel, M_RAIDFRAME);
   2798 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2799 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2800 				vput(vp);
   2801 			}
   2802 		}
   2803 	}
   2804 	return(ac_list);
   2805 }
   2806 
   2807 static int
   2808 rf_reasonable_label(clabel)
   2809 	RF_ComponentLabel_t *clabel;
   2810 {
   2811 
   2812 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2813 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2814 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2815 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2816 	    clabel->row >=0 &&
   2817 	    clabel->column >= 0 &&
   2818 	    clabel->num_rows > 0 &&
   2819 	    clabel->num_columns > 0 &&
   2820 	    clabel->row < clabel->num_rows &&
   2821 	    clabel->column < clabel->num_columns &&
   2822 	    clabel->blockSize > 0 &&
   2823 	    clabel->numBlocks > 0) {
   2824 		/* label looks reasonable enough... */
   2825 		return(1);
   2826 	}
   2827 	return(0);
   2828 }
   2829 
   2830 
   2831 #if DEBUG
   2832 void
   2833 rf_print_component_label(clabel)
   2834 	RF_ComponentLabel_t *clabel;
   2835 {
   2836 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2837 	       clabel->row, clabel->column,
   2838 	       clabel->num_rows, clabel->num_columns);
   2839 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2840 	       clabel->version, clabel->serial_number,
   2841 	       clabel->mod_counter);
   2842 	printf("   Clean: %s Status: %d\n",
   2843 	       clabel->clean ? "Yes" : "No", clabel->status );
   2844 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2845 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2846 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2847 	       (char) clabel->parityConfig, clabel->blockSize,
   2848 	       clabel->numBlocks);
   2849 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2850 	printf("   Contains root partition: %s\n",
   2851 	       clabel->root_partition ? "Yes" : "No" );
   2852 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2853 #if 0
   2854 	   printf("   Config order: %d\n", clabel->config_order);
   2855 #endif
   2856 
   2857 }
   2858 #endif
   2859 
   2860 RF_ConfigSet_t *
   2861 rf_create_auto_sets(ac_list)
   2862 	RF_AutoConfig_t *ac_list;
   2863 {
   2864 	RF_AutoConfig_t *ac;
   2865 	RF_ConfigSet_t *config_sets;
   2866 	RF_ConfigSet_t *cset;
   2867 	RF_AutoConfig_t *ac_next;
   2868 
   2869 
   2870 	config_sets = NULL;
   2871 
   2872 	/* Go through the AutoConfig list, and figure out which components
   2873 	   belong to what sets.  */
   2874 	ac = ac_list;
   2875 	while(ac!=NULL) {
   2876 		/* we're going to putz with ac->next, so save it here
   2877 		   for use at the end of the loop */
   2878 		ac_next = ac->next;
   2879 
   2880 		if (config_sets == NULL) {
   2881 			/* will need at least this one... */
   2882 			config_sets = (RF_ConfigSet_t *)
   2883 				malloc(sizeof(RF_ConfigSet_t),
   2884 				       M_RAIDFRAME, M_NOWAIT);
   2885 			if (config_sets == NULL) {
   2886 				panic("rf_create_auto_sets: No memory!");
   2887 			}
   2888 			/* this one is easy :) */
   2889 			config_sets->ac = ac;
   2890 			config_sets->next = NULL;
   2891 			config_sets->rootable = 0;
   2892 			ac->next = NULL;
   2893 		} else {
   2894 			/* which set does this component fit into? */
   2895 			cset = config_sets;
   2896 			while(cset!=NULL) {
   2897 				if (rf_does_it_fit(cset, ac)) {
   2898 					/* looks like it matches... */
   2899 					ac->next = cset->ac;
   2900 					cset->ac = ac;
   2901 					break;
   2902 				}
   2903 				cset = cset->next;
   2904 			}
   2905 			if (cset==NULL) {
   2906 				/* didn't find a match above... new set..*/
   2907 				cset = (RF_ConfigSet_t *)
   2908 					malloc(sizeof(RF_ConfigSet_t),
   2909 					       M_RAIDFRAME, M_NOWAIT);
   2910 				if (cset == NULL) {
   2911 					panic("rf_create_auto_sets: No memory!");
   2912 				}
   2913 				cset->ac = ac;
   2914 				ac->next = NULL;
   2915 				cset->next = config_sets;
   2916 				cset->rootable = 0;
   2917 				config_sets = cset;
   2918 			}
   2919 		}
   2920 		ac = ac_next;
   2921 	}
   2922 
   2923 
   2924 	return(config_sets);
   2925 }
   2926 
   2927 static int
   2928 rf_does_it_fit(cset, ac)
   2929 	RF_ConfigSet_t *cset;
   2930 	RF_AutoConfig_t *ac;
   2931 {
   2932 	RF_ComponentLabel_t *clabel1, *clabel2;
   2933 
   2934 	/* If this one matches the *first* one in the set, that's good
   2935 	   enough, since the other members of the set would have been
   2936 	   through here too... */
   2937 	/* note that we are not checking partitionSize here..
   2938 
   2939 	   Note that we are also not checking the mod_counters here.
   2940 	   If everything else matches execpt the mod_counter, that's
   2941 	   good enough for this test.  We will deal with the mod_counters
   2942 	   a little later in the autoconfiguration process.
   2943 
   2944 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2945 
   2946 	   The reason we don't check for this is that failed disks
   2947 	   will have lower modification counts.  If those disks are
   2948 	   not added to the set they used to belong to, then they will
   2949 	   form their own set, which may result in 2 different sets,
   2950 	   for example, competing to be configured at raid0, and
   2951 	   perhaps competing to be the root filesystem set.  If the
   2952 	   wrong ones get configured, or both attempt to become /,
   2953 	   weird behaviour and or serious lossage will occur.  Thus we
   2954 	   need to bring them into the fold here, and kick them out at
   2955 	   a later point.
   2956 
   2957 	*/
   2958 
   2959 	clabel1 = cset->ac->clabel;
   2960 	clabel2 = ac->clabel;
   2961 	if ((clabel1->version == clabel2->version) &&
   2962 	    (clabel1->serial_number == clabel2->serial_number) &&
   2963 	    (clabel1->num_rows == clabel2->num_rows) &&
   2964 	    (clabel1->num_columns == clabel2->num_columns) &&
   2965 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2966 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2967 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2968 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2969 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2970 	    (clabel1->blockSize == clabel2->blockSize) &&
   2971 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2972 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2973 	    (clabel1->root_partition == clabel2->root_partition) &&
   2974 	    (clabel1->last_unit == clabel2->last_unit) &&
   2975 	    (clabel1->config_order == clabel2->config_order)) {
   2976 		/* if it get's here, it almost *has* to be a match */
   2977 	} else {
   2978 		/* it's not consistent with somebody in the set..
   2979 		   punt */
   2980 		return(0);
   2981 	}
   2982 	/* all was fine.. it must fit... */
   2983 	return(1);
   2984 }
   2985 
   2986 int
   2987 rf_have_enough_components(cset)
   2988 	RF_ConfigSet_t *cset;
   2989 {
   2990 	RF_AutoConfig_t *ac;
   2991 	RF_AutoConfig_t *auto_config;
   2992 	RF_ComponentLabel_t *clabel;
   2993 	int c;
   2994 	int num_cols;
   2995 	int num_missing;
   2996 	int mod_counter;
   2997 	int mod_counter_found;
   2998 	int even_pair_failed;
   2999 	char parity_type;
   3000 
   3001 
   3002 	/* check to see that we have enough 'live' components
   3003 	   of this set.  If so, we can configure it if necessary */
   3004 
   3005 	num_cols = cset->ac->clabel->num_columns;
   3006 	parity_type = cset->ac->clabel->parityConfig;
   3007 
   3008 	/* XXX Check for duplicate components!?!?!? */
   3009 
   3010 	/* Determine what the mod_counter is supposed to be for this set. */
   3011 
   3012 	mod_counter_found = 0;
   3013 	mod_counter = 0;
   3014 	ac = cset->ac;
   3015 	while(ac!=NULL) {
   3016 		if (mod_counter_found==0) {
   3017 			mod_counter = ac->clabel->mod_counter;
   3018 			mod_counter_found = 1;
   3019 		} else {
   3020 			if (ac->clabel->mod_counter > mod_counter) {
   3021 				mod_counter = ac->clabel->mod_counter;
   3022 			}
   3023 		}
   3024 		ac = ac->next;
   3025 	}
   3026 
   3027 	num_missing = 0;
   3028 	auto_config = cset->ac;
   3029 
   3030 	even_pair_failed = 0;
   3031 	for(c=0; c<num_cols; c++) {
   3032 		ac = auto_config;
   3033 		while(ac!=NULL) {
   3034 			if ((ac->clabel->column == c) &&
   3035 			    (ac->clabel->mod_counter == mod_counter)) {
   3036 				/* it's this one... */
   3037 #if DEBUG
   3038 				printf("Found: %s at %d\n",
   3039 				       ac->devname,c);
   3040 #endif
   3041 				break;
   3042 			}
   3043 			ac=ac->next;
   3044 		}
   3045 		if (ac==NULL) {
   3046 				/* Didn't find one here! */
   3047 				/* special case for RAID 1, especially
   3048 				   where there are more than 2
   3049 				   components (where RAIDframe treats
   3050 				   things a little differently :( ) */
   3051 			if (parity_type == '1') {
   3052 				if (c%2 == 0) { /* even component */
   3053 					even_pair_failed = 1;
   3054 				} else { /* odd component.  If
   3055 					    we're failed, and
   3056 					    so is the even
   3057 					    component, it's
   3058 					    "Good Night, Charlie" */
   3059 					if (even_pair_failed == 1) {
   3060 						return(0);
   3061 					}
   3062 				}
   3063 			} else {
   3064 				/* normal accounting */
   3065 				num_missing++;
   3066 			}
   3067 		}
   3068 		if ((parity_type == '1') && (c%2 == 1)) {
   3069 				/* Just did an even component, and we didn't
   3070 				   bail.. reset the even_pair_failed flag,
   3071 				   and go on to the next component.... */
   3072 			even_pair_failed = 0;
   3073 		}
   3074 	}
   3075 
   3076 	clabel = cset->ac->clabel;
   3077 
   3078 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3079 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3080 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3081 		/* XXX this needs to be made *much* more general */
   3082 		/* Too many failures */
   3083 		return(0);
   3084 	}
   3085 	/* otherwise, all is well, and we've got enough to take a kick
   3086 	   at autoconfiguring this set */
   3087 	return(1);
   3088 }
   3089 
   3090 void
   3091 rf_create_configuration(ac,config,raidPtr)
   3092 	RF_AutoConfig_t *ac;
   3093 	RF_Config_t *config;
   3094 	RF_Raid_t *raidPtr;
   3095 {
   3096 	RF_ComponentLabel_t *clabel;
   3097 	int i;
   3098 
   3099 	clabel = ac->clabel;
   3100 
   3101 	/* 1. Fill in the common stuff */
   3102 	config->numRow = clabel->num_rows = 1;
   3103 	config->numCol = clabel->num_columns;
   3104 	config->numSpare = 0; /* XXX should this be set here? */
   3105 	config->sectPerSU = clabel->sectPerSU;
   3106 	config->SUsPerPU = clabel->SUsPerPU;
   3107 	config->SUsPerRU = clabel->SUsPerRU;
   3108 	config->parityConfig = clabel->parityConfig;
   3109 	/* XXX... */
   3110 	strcpy(config->diskQueueType,"fifo");
   3111 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3112 	config->layoutSpecificSize = 0; /* XXX ?? */
   3113 
   3114 	while(ac!=NULL) {
   3115 		/* row/col values will be in range due to the checks
   3116 		   in reasonable_label() */
   3117 		strcpy(config->devnames[0][ac->clabel->column],
   3118 		       ac->devname);
   3119 		ac = ac->next;
   3120 	}
   3121 
   3122 	for(i=0;i<RF_MAXDBGV;i++) {
   3123 		config->debugVars[i][0] = 0;
   3124 	}
   3125 }
   3126 
   3127 int
   3128 rf_set_autoconfig(raidPtr, new_value)
   3129 	RF_Raid_t *raidPtr;
   3130 	int new_value;
   3131 {
   3132 	RF_ComponentLabel_t clabel;
   3133 	struct vnode *vp;
   3134 	dev_t dev;
   3135 	int column;
   3136 	int sparecol;
   3137 
   3138 	raidPtr->autoconfigure = new_value;
   3139 
   3140 	for(column=0; column<raidPtr->numCol; column++) {
   3141 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3142 			dev = raidPtr->Disks[column].dev;
   3143 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3144 			raidread_component_label(dev, vp, &clabel);
   3145 			clabel.autoconfigure = new_value;
   3146 			raidwrite_component_label(dev, vp, &clabel);
   3147 		}
   3148 	}
   3149 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3150 		sparecol = raidPtr->numCol + column;
   3151 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3152 			dev = raidPtr->Disks[sparecol].dev;
   3153 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3154 			raidread_component_label(dev, vp, &clabel);
   3155 			clabel.autoconfigure = new_value;
   3156 			raidwrite_component_label(dev, vp, &clabel);
   3157 		}
   3158 	}
   3159 	return(new_value);
   3160 }
   3161 
   3162 int
   3163 rf_set_rootpartition(raidPtr, new_value)
   3164 	RF_Raid_t *raidPtr;
   3165 	int new_value;
   3166 {
   3167 	RF_ComponentLabel_t clabel;
   3168 	struct vnode *vp;
   3169 	dev_t dev;
   3170 	int column;
   3171 	int sparecol;
   3172 
   3173 	raidPtr->root_partition = new_value;
   3174 	for(column=0; column<raidPtr->numCol; column++) {
   3175 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3176 			dev = raidPtr->Disks[column].dev;
   3177 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3178 			raidread_component_label(dev, vp, &clabel);
   3179 			clabel.root_partition = new_value;
   3180 			raidwrite_component_label(dev, vp, &clabel);
   3181 		}
   3182 	}
   3183 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3184 		sparecol = raidPtr->numCol + column;
   3185 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3186 			dev = raidPtr->Disks[sparecol].dev;
   3187 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3188 			raidread_component_label(dev, vp, &clabel);
   3189 			clabel.root_partition = new_value;
   3190 			raidwrite_component_label(dev, vp, &clabel);
   3191 		}
   3192 	}
   3193 	return(new_value);
   3194 }
   3195 
   3196 void
   3197 rf_release_all_vps(cset)
   3198 	RF_ConfigSet_t *cset;
   3199 {
   3200 	RF_AutoConfig_t *ac;
   3201 
   3202 	ac = cset->ac;
   3203 	while(ac!=NULL) {
   3204 		/* Close the vp, and give it back */
   3205 		if (ac->vp) {
   3206 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3207 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3208 			vput(ac->vp);
   3209 			ac->vp = NULL;
   3210 		}
   3211 		ac = ac->next;
   3212 	}
   3213 }
   3214 
   3215 
   3216 void
   3217 rf_cleanup_config_set(cset)
   3218 	RF_ConfigSet_t *cset;
   3219 {
   3220 	RF_AutoConfig_t *ac;
   3221 	RF_AutoConfig_t *next_ac;
   3222 
   3223 	ac = cset->ac;
   3224 	while(ac!=NULL) {
   3225 		next_ac = ac->next;
   3226 		/* nuke the label */
   3227 		free(ac->clabel, M_RAIDFRAME);
   3228 		/* cleanup the config structure */
   3229 		free(ac, M_RAIDFRAME);
   3230 		/* "next.." */
   3231 		ac = next_ac;
   3232 	}
   3233 	/* and, finally, nuke the config set */
   3234 	free(cset, M_RAIDFRAME);
   3235 }
   3236 
   3237 
   3238 void
   3239 raid_init_component_label(raidPtr, clabel)
   3240 	RF_Raid_t *raidPtr;
   3241 	RF_ComponentLabel_t *clabel;
   3242 {
   3243 	/* current version number */
   3244 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3245 	clabel->serial_number = raidPtr->serial_number;
   3246 	clabel->mod_counter = raidPtr->mod_counter;
   3247 	clabel->num_rows = 1;
   3248 	clabel->num_columns = raidPtr->numCol;
   3249 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3250 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3251 
   3252 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3253 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3254 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3255 
   3256 	clabel->blockSize = raidPtr->bytesPerSector;
   3257 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3258 
   3259 	/* XXX not portable */
   3260 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3261 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3262 	clabel->autoconfigure = raidPtr->autoconfigure;
   3263 	clabel->root_partition = raidPtr->root_partition;
   3264 	clabel->last_unit = raidPtr->raidid;
   3265 	clabel->config_order = raidPtr->config_order;
   3266 }
   3267 
   3268 int
   3269 rf_auto_config_set(cset,unit)
   3270 	RF_ConfigSet_t *cset;
   3271 	int *unit;
   3272 {
   3273 	RF_Raid_t *raidPtr;
   3274 	RF_Config_t *config;
   3275 	int raidID;
   3276 	int retcode;
   3277 
   3278 #if DEBUG
   3279 	printf("RAID autoconfigure\n");
   3280 #endif
   3281 
   3282 	retcode = 0;
   3283 	*unit = -1;
   3284 
   3285 	/* 1. Create a config structure */
   3286 
   3287 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3288 				       M_RAIDFRAME,
   3289 				       M_NOWAIT);
   3290 	if (config==NULL) {
   3291 		printf("Out of mem!?!?\n");
   3292 				/* XXX do something more intelligent here. */
   3293 		return(1);
   3294 	}
   3295 
   3296 	memset(config, 0, sizeof(RF_Config_t));
   3297 
   3298 	/*
   3299 	   2. Figure out what RAID ID this one is supposed to live at
   3300 	   See if we can get the same RAID dev that it was configured
   3301 	   on last time..
   3302 	*/
   3303 
   3304 	raidID = cset->ac->clabel->last_unit;
   3305 	if ((raidID < 0) || (raidID >= numraid)) {
   3306 		/* let's not wander off into lala land. */
   3307 		raidID = numraid - 1;
   3308 	}
   3309 	if (raidPtrs[raidID]->valid != 0) {
   3310 
   3311 		/*
   3312 		   Nope... Go looking for an alternative...
   3313 		   Start high so we don't immediately use raid0 if that's
   3314 		   not taken.
   3315 		*/
   3316 
   3317 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3318 			if (raidPtrs[raidID]->valid == 0) {
   3319 				/* can use this one! */
   3320 				break;
   3321 			}
   3322 		}
   3323 	}
   3324 
   3325 	if (raidID < 0) {
   3326 		/* punt... */
   3327 		printf("Unable to auto configure this set!\n");
   3328 		printf("(Out of RAID devs!)\n");
   3329 		return(1);
   3330 	}
   3331 
   3332 #if DEBUG
   3333 	printf("Configuring raid%d:\n",raidID);
   3334 #endif
   3335 
   3336 	raidPtr = raidPtrs[raidID];
   3337 
   3338 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3339 	raidPtr->raidid = raidID;
   3340 	raidPtr->openings = RAIDOUTSTANDING;
   3341 
   3342 	/* 3. Build the configuration structure */
   3343 	rf_create_configuration(cset->ac, config, raidPtr);
   3344 
   3345 	/* 4. Do the configuration */
   3346 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3347 
   3348 	if (retcode == 0) {
   3349 
   3350 		raidinit(raidPtrs[raidID]);
   3351 
   3352 		rf_markalldirty(raidPtrs[raidID]);
   3353 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3354 		if (cset->ac->clabel->root_partition==1) {
   3355 			/* everything configured just fine.  Make a note
   3356 			   that this set is eligible to be root. */
   3357 			cset->rootable = 1;
   3358 			/* XXX do this here? */
   3359 			raidPtrs[raidID]->root_partition = 1;
   3360 		}
   3361 	}
   3362 
   3363 	/* 5. Cleanup */
   3364 	free(config, M_RAIDFRAME);
   3365 
   3366 	*unit = raidID;
   3367 	return(retcode);
   3368 }
   3369 
   3370 void
   3371 rf_disk_unbusy(desc)
   3372 	RF_RaidAccessDesc_t *desc;
   3373 {
   3374 	struct buf *bp;
   3375 
   3376 	bp = (struct buf *)desc->bp;
   3377 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3378 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3379 }
   3380