Home | History | Annotate | Line # | Download | only in raidframe
rf_driver.c revision 1.97
      1  1.97     oster /*	$NetBSD: rf_driver.c,v 1.97 2004/03/20 04:22:05 oster Exp $	*/
      2   1.9     oster /*-
      3   1.9     oster  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4   1.9     oster  * All rights reserved.
      5   1.9     oster  *
      6   1.9     oster  * This code is derived from software contributed to The NetBSD Foundation
      7   1.9     oster  * by Greg Oster
      8   1.9     oster  *
      9   1.9     oster  * Redistribution and use in source and binary forms, with or without
     10   1.9     oster  * modification, are permitted provided that the following conditions
     11   1.9     oster  * are met:
     12   1.9     oster  * 1. Redistributions of source code must retain the above copyright
     13   1.9     oster  *    notice, this list of conditions and the following disclaimer.
     14   1.9     oster  * 2. Redistributions in binary form must reproduce the above copyright
     15   1.9     oster  *    notice, this list of conditions and the following disclaimer in the
     16   1.9     oster  *    documentation and/or other materials provided with the distribution.
     17   1.9     oster  * 3. All advertising materials mentioning features or use of this software
     18   1.9     oster  *    must display the following acknowledgement:
     19   1.9     oster  *        This product includes software developed by the NetBSD
     20   1.9     oster  *        Foundation, Inc. and its contributors.
     21   1.9     oster  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22   1.9     oster  *    contributors may be used to endorse or promote products derived
     23   1.9     oster  *    from this software without specific prior written permission.
     24   1.9     oster  *
     25   1.9     oster  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26   1.9     oster  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27   1.9     oster  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28   1.9     oster  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29   1.9     oster  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30   1.9     oster  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31   1.9     oster  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32   1.9     oster  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33   1.9     oster  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34   1.9     oster  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35   1.9     oster  * POSSIBILITY OF SUCH DAMAGE.
     36   1.9     oster  */
     37   1.9     oster 
     38   1.1     oster /*
     39   1.1     oster  * Copyright (c) 1995 Carnegie-Mellon University.
     40   1.1     oster  * All rights reserved.
     41   1.1     oster  *
     42   1.1     oster  * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
     43   1.1     oster  *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
     44   1.1     oster  *
     45   1.1     oster  * Permission to use, copy, modify and distribute this software and
     46   1.1     oster  * its documentation is hereby granted, provided that both the copyright
     47   1.1     oster  * notice and this permission notice appear in all copies of the
     48   1.1     oster  * software, derivative works or modified versions, and any portions
     49   1.1     oster  * thereof, and that both notices appear in supporting documentation.
     50   1.1     oster  *
     51   1.1     oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     52   1.1     oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     53   1.1     oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     54   1.1     oster  *
     55   1.1     oster  * Carnegie Mellon requests users of this software to return to
     56   1.1     oster  *
     57   1.1     oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     58   1.1     oster  *  School of Computer Science
     59   1.1     oster  *  Carnegie Mellon University
     60   1.1     oster  *  Pittsburgh PA 15213-3890
     61   1.1     oster  *
     62   1.1     oster  * any improvements or extensions that they make and grant Carnegie the
     63   1.1     oster  * rights to redistribute these changes.
     64   1.1     oster  */
     65   1.1     oster 
     66   1.1     oster /******************************************************************************
     67   1.1     oster  *
     68   1.1     oster  * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
     69   1.1     oster  *
     70   1.1     oster  * all routines are prefixed with rf_ (raidframe), to avoid conficts.
     71   1.1     oster  *
     72   1.1     oster  ******************************************************************************/
     73   1.1     oster 
     74  1.44     lukem 
     75  1.44     lukem #include <sys/cdefs.h>
     76  1.97     oster __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.97 2004/03/20 04:22:05 oster Exp $");
     77  1.71    martin 
     78  1.71    martin #include "opt_raid_diagnostic.h"
     79   1.1     oster 
     80   1.1     oster #include <sys/param.h>
     81   1.1     oster #include <sys/systm.h>
     82   1.1     oster #include <sys/ioctl.h>
     83   1.1     oster #include <sys/fcntl.h>
     84   1.1     oster #include <sys/vnode.h>
     85   1.1     oster 
     86   1.1     oster 
     87   1.1     oster #include "rf_archs.h"
     88   1.1     oster #include "rf_threadstuff.h"
     89   1.1     oster 
     90   1.1     oster #include <sys/errno.h>
     91   1.1     oster 
     92   1.1     oster #include "rf_raid.h"
     93   1.1     oster #include "rf_dag.h"
     94   1.1     oster #include "rf_aselect.h"
     95   1.1     oster #include "rf_diskqueue.h"
     96   1.1     oster #include "rf_parityscan.h"
     97   1.1     oster #include "rf_alloclist.h"
     98   1.1     oster #include "rf_dagutils.h"
     99   1.1     oster #include "rf_utils.h"
    100   1.1     oster #include "rf_etimer.h"
    101   1.1     oster #include "rf_acctrace.h"
    102   1.1     oster #include "rf_general.h"
    103   1.1     oster #include "rf_desc.h"
    104   1.1     oster #include "rf_states.h"
    105   1.1     oster #include "rf_decluster.h"
    106   1.1     oster #include "rf_map.h"
    107   1.1     oster #include "rf_revent.h"
    108   1.1     oster #include "rf_callback.h"
    109   1.1     oster #include "rf_engine.h"
    110   1.1     oster #include "rf_mcpair.h"
    111   1.1     oster #include "rf_nwayxor.h"
    112   1.1     oster #include "rf_copyback.h"
    113   1.1     oster #include "rf_driver.h"
    114   1.1     oster #include "rf_options.h"
    115   1.1     oster #include "rf_shutdown.h"
    116  1.24     oster #include "rf_kintf.h"
    117   1.1     oster 
    118   1.1     oster #include <sys/buf.h>
    119   1.1     oster 
    120  1.61     oster #ifndef RF_ACCESS_DEBUG
    121  1.61     oster #define RF_ACCESS_DEBUG 0
    122  1.61     oster #endif
    123  1.61     oster 
    124   1.1     oster /* rad == RF_RaidAccessDesc_t */
    125  1.91     oster RF_DECLARE_MUTEX(rf_rad_lock)
    126   1.1     oster #define RF_MAX_FREE_RAD 128
    127  1.88     oster #define RF_MIN_FREE_RAD  32
    128   1.1     oster 
    129   1.1     oster /* debug variables */
    130   1.6     oster char    rf_panicbuf[2048];	/* a buffer to hold an error msg when we panic */
    131   1.1     oster 
    132   1.1     oster /* main configuration routines */
    133   1.1     oster static int raidframe_booted = 0;
    134   1.1     oster 
    135   1.6     oster static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
    136   1.1     oster static void set_debug_option(char *name, long val);
    137   1.1     oster static void rf_UnconfigureArray(void);
    138   1.1     oster static void rf_ShutdownRDFreeList(void *);
    139   1.1     oster static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
    140   1.1     oster 
    141   1.6     oster RF_DECLARE_MUTEX(rf_printf_mutex)	/* debug only:  avoids interleaved
    142   1.6     oster 					 * printfs by different stripes */
    143   1.1     oster 
    144   1.1     oster #define SIGNAL_QUIESCENT_COND(_raid_)  wakeup(&((_raid_)->accesses_suspended))
    145   1.1     oster #define WAIT_FOR_QUIESCENCE(_raid_) \
    146  1.38     oster 	ltsleep(&((_raid_)->accesses_suspended), PRIBIO, \
    147  1.38     oster 		"raidframe quiesce", 0, &((_raid_)->access_suspend_mutex))
    148   1.1     oster 
    149   1.9     oster static int configureCount = 0;	/* number of active configurations */
    150   1.9     oster static int isconfigged = 0;	/* is basic raidframe (non per-array)
    151   1.9     oster 				 * stuff configged */
    152  1.55     oster RF_DECLARE_LKMGR_STATIC_MUTEX(configureMutex)	/* used to lock the configuration
    153   1.6     oster 					 * stuff */
    154   1.9     oster static RF_ShutdownList_t *globalShutdown;	/* non array-specific
    155   1.9     oster 						 * stuff */
    156   1.1     oster 
    157   1.9     oster static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp);
    158   1.1     oster 
    159   1.1     oster /* called at system boot time */
    160   1.7     oster int
    161   1.7     oster rf_BootRaidframe()
    162   1.1     oster {
    163   1.1     oster 
    164   1.6     oster 	if (raidframe_booted)
    165   1.6     oster 		return (EBUSY);
    166   1.6     oster 	raidframe_booted = 1;
    167  1.79     oster 	lockinit(&configureMutex, PRIBIO, "RAIDframe lock", 0, 0);
    168  1.79     oster  	configureCount = 0;
    169   1.6     oster 	isconfigged = 0;
    170   1.6     oster 	globalShutdown = NULL;
    171   1.6     oster 	return (0);
    172   1.1     oster }
    173   1.1     oster 
    174   1.1     oster /*
    175   1.1     oster  * Called whenever an array is shutdown
    176   1.1     oster  */
    177   1.6     oster static void
    178   1.6     oster rf_UnconfigureArray()
    179   1.1     oster {
    180   1.1     oster 
    181  1.55     oster 	RF_LOCK_LKMGR_MUTEX(configureMutex);
    182   1.6     oster 	if (--configureCount == 0) {	/* if no active configurations, shut
    183   1.6     oster 					 * everything down */
    184   1.6     oster 		isconfigged = 0;
    185  1.92     oster 		rf_ShutdownList(&globalShutdown);
    186   1.6     oster 
    187   1.6     oster 		/*
    188   1.6     oster 	         * We must wait until now, because the AllocList module
    189   1.6     oster 	         * uses the DebugMem module.
    190   1.6     oster 	         */
    191  1.60     oster #if RF_DEBUG_MEM
    192   1.6     oster 		if (rf_memDebug)
    193   1.6     oster 			rf_print_unfreed();
    194  1.60     oster #endif
    195   1.6     oster 	}
    196  1.55     oster 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
    197   1.9     oster }
    198   1.9     oster 
    199   1.1     oster /*
    200   1.1     oster  * Called to shut down an array.
    201   1.1     oster  */
    202   1.6     oster int
    203  1.80     oster rf_Shutdown(RF_Raid_t *raidPtr)
    204   1.1     oster {
    205   1.6     oster 	if (!raidPtr->valid) {
    206   1.6     oster 		RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
    207   1.6     oster 		return (EINVAL);
    208   1.6     oster 	}
    209   1.6     oster 	/*
    210   1.6     oster          * wait for outstanding IOs to land
    211   1.6     oster          * As described in rf_raid.h, we use the rad_freelist lock
    212   1.6     oster          * to protect the per-array info about outstanding descs
    213   1.6     oster          * since we need to do freelist locking anyway, and this
    214   1.6     oster          * cuts down on the amount of serialization we've got going
    215   1.6     oster          * on.
    216   1.6     oster          */
    217  1.91     oster 	RF_LOCK_MUTEX(rf_rad_lock);
    218   1.6     oster 	if (raidPtr->waitShutdown) {
    219  1.91     oster 		RF_UNLOCK_MUTEX(rf_rad_lock);
    220   1.6     oster 		return (EBUSY);
    221   1.6     oster 	}
    222   1.6     oster 	raidPtr->waitShutdown = 1;
    223   1.6     oster 	while (raidPtr->nAccOutstanding) {
    224  1.91     oster 		RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_lock);
    225   1.6     oster 	}
    226  1.91     oster 	RF_UNLOCK_MUTEX(rf_rad_lock);
    227  1.35     oster 
    228  1.35     oster 	/* Wait for any parity re-writes to stop... */
    229  1.35     oster 	while (raidPtr->parity_rewrite_in_progress) {
    230  1.35     oster 		printf("Waiting for parity re-write to exit...\n");
    231  1.35     oster 		tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
    232  1.35     oster 		       "rfprwshutdown", 0);
    233  1.35     oster 	}
    234   1.6     oster 
    235   1.6     oster 	raidPtr->valid = 0;
    236   1.6     oster 
    237  1.37     oster 	rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
    238   1.6     oster 
    239   1.7     oster 	rf_UnconfigureVnodes(raidPtr);
    240   1.7     oster 
    241   1.7     oster 	rf_ShutdownList(&raidPtr->shutdownList);
    242   1.7     oster 
    243   1.7     oster 	rf_UnconfigureArray();
    244   1.7     oster 
    245   1.7     oster 	return (0);
    246   1.7     oster }
    247   1.1     oster 
    248   1.6     oster 
    249   1.1     oster #define DO_INIT_CONFIGURE(f) { \
    250   1.1     oster 	rc = f (&globalShutdown); \
    251   1.1     oster 	if (rc) { \
    252   1.1     oster 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
    253   1.1     oster 		rf_ShutdownList(&globalShutdown); \
    254   1.1     oster 		configureCount--; \
    255  1.55     oster 		RF_UNLOCK_LKMGR_MUTEX(configureMutex); \
    256   1.1     oster 		return(rc); \
    257   1.1     oster 	} \
    258   1.1     oster }
    259   1.1     oster 
    260   1.1     oster #define DO_RAID_FAIL() { \
    261  1.12     oster 	rf_UnconfigureVnodes(raidPtr); \
    262   1.1     oster 	rf_ShutdownList(&raidPtr->shutdownList); \
    263   1.1     oster 	rf_UnconfigureArray(); \
    264   1.1     oster }
    265   1.1     oster 
    266   1.1     oster #define DO_RAID_INIT_CONFIGURE(f) { \
    267   1.1     oster 	rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
    268   1.1     oster 	if (rc) { \
    269   1.1     oster 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
    270   1.1     oster 		DO_RAID_FAIL(); \
    271   1.1     oster 		return(rc); \
    272   1.1     oster 	} \
    273   1.1     oster }
    274   1.1     oster 
    275   1.1     oster #define DO_RAID_MUTEX(_m_) { \
    276  1.75     oster 	rf_mutex_init((_m_)); \
    277   1.1     oster }
    278   1.1     oster 
    279   1.6     oster int
    280  1.80     oster rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
    281   1.6     oster {
    282  1.72     oster 	RF_RowCol_t col;
    283  1.97     oster 	RF_IOBufHeader_t *tmpbuf;
    284  1.97     oster 	int rc, i;
    285   1.6     oster 
    286  1.55     oster 	RF_LOCK_LKMGR_MUTEX(configureMutex);
    287   1.6     oster 	configureCount++;
    288   1.6     oster 	if (isconfigged == 0) {
    289  1.75     oster 		rf_mutex_init(&rf_printf_mutex);
    290  1.75     oster 
    291   1.6     oster 		/* initialize globals */
    292   1.6     oster 
    293   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureAllocList);
    294  1.28     oster 
    295   1.6     oster 		/*
    296  1.28     oster 	         * Yes, this does make debugging general to the whole
    297  1.28     oster 	         * system instead of being array specific. Bummer, drag.
    298  1.28     oster 		 */
    299   1.6     oster 		rf_ConfigureDebug(cfgPtr);
    300   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
    301  1.87     oster #if RF_ACC_TRACE > 0
    302   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
    303  1.87     oster #endif
    304   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureMapModule);
    305   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
    306   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureCallback);
    307   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
    308   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
    309   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
    310   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureMCPair);
    311   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureDAGs);
    312   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
    313   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
    314   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureCopyback);
    315   1.6     oster 		DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
    316   1.6     oster 		isconfigged = 1;
    317   1.6     oster 	}
    318  1.55     oster 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
    319   1.6     oster 
    320   1.6     oster 	DO_RAID_MUTEX(&raidPtr->mutex);
    321   1.6     oster 	/* set up the cleanup list.  Do this after ConfigureDebug so that
    322   1.6     oster 	 * value of memDebug will be set */
    323   1.6     oster 
    324   1.6     oster 	rf_MakeAllocList(raidPtr->cleanupList);
    325   1.6     oster 	if (raidPtr->cleanupList == NULL) {
    326   1.6     oster 		DO_RAID_FAIL();
    327   1.6     oster 		return (ENOMEM);
    328   1.6     oster 	}
    329  1.86     oster 	rf_ShutdownCreate(&raidPtr->shutdownList,
    330  1.86     oster 			  (void (*) (void *)) rf_FreeAllocList,
    331  1.86     oster 			  raidPtr->cleanupList);
    332  1.86     oster 
    333   1.6     oster 	raidPtr->numCol = cfgPtr->numCol;
    334   1.6     oster 	raidPtr->numSpare = cfgPtr->numSpare;
    335   1.6     oster 
    336  1.72     oster 	raidPtr->status = rf_rs_optimal;
    337  1.72     oster 	raidPtr->reconControl = NULL;
    338  1.72     oster 
    339  1.64     oster 	TAILQ_INIT(&(raidPtr->iodone));
    340  1.64     oster 	simple_lock_init(&(raidPtr->iodone_lock));
    341   1.6     oster 
    342   1.6     oster 	DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
    343   1.6     oster 	DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
    344   1.6     oster 
    345  1.76     oster 	raidPtr->outstandingCond = 0;
    346   1.6     oster 
    347   1.6     oster 	raidPtr->nAccOutstanding = 0;
    348   1.6     oster 	raidPtr->waitShutdown = 0;
    349   1.6     oster 
    350   1.6     oster 	DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
    351   1.6     oster 
    352  1.76     oster 	raidPtr->waitForReconCond = 0;
    353   1.6     oster 
    354  1.28     oster 	if (ac!=NULL) {
    355  1.28     oster 		/* We have an AutoConfig structure..  Don't do the
    356  1.28     oster 		   normal disk configuration... call the auto config
    357  1.28     oster 		   stuff */
    358  1.28     oster 		rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
    359  1.28     oster 	} else {
    360  1.28     oster 		DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
    361  1.28     oster 		DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
    362  1.28     oster 	}
    363   1.6     oster 	/* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
    364   1.6     oster 	 * no. is set */
    365   1.6     oster 	DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
    366   1.6     oster 
    367   1.6     oster 	DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
    368   1.6     oster 
    369   1.6     oster 	DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
    370   1.6     oster 
    371  1.82     oster #if RF_INCLUDE_CHAINDECLUSTER > 0
    372  1.72     oster 	for (col = 0; col < raidPtr->numCol; col++) {
    373  1.72     oster 		/*
    374  1.72     oster 		 * XXX better distribution
    375  1.72     oster 		 */
    376  1.72     oster 		raidPtr->hist_diskreq[col] = 0;
    377   1.6     oster 	}
    378  1.82     oster #endif
    379  1.30     oster 	raidPtr->numNewFailures = 0;
    380  1.28     oster 	raidPtr->copyback_in_progress = 0;
    381  1.28     oster 	raidPtr->parity_rewrite_in_progress = 0;
    382  1.66     oster 	raidPtr->adding_hot_spare = 0;
    383  1.28     oster 	raidPtr->recon_in_progress = 0;
    384  1.29     oster 	raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
    385  1.29     oster 
    386  1.29     oster 	/* autoconfigure and root_partition will actually get filled in
    387  1.29     oster 	   after the config is done */
    388  1.29     oster 	raidPtr->autoconfigure = 0;
    389  1.29     oster 	raidPtr->root_partition = 0;
    390  1.29     oster 	raidPtr->last_unit = raidPtr->raidid;
    391  1.29     oster 	raidPtr->config_order = 0;
    392   1.6     oster 
    393   1.6     oster 	if (rf_keepAccTotals) {
    394   1.6     oster 		raidPtr->keep_acc_totals = 1;
    395   1.6     oster 	}
    396   1.1     oster 
    397  1.97     oster 	/* Allocate a bunch of buffers to be used in low-memory conditions */
    398  1.97     oster 	raidPtr->iobuf = NULL;
    399  1.97     oster 	/* XXX next line needs tuning... */
    400  1.97     oster 	raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol;
    401  1.97     oster #if DEBUG
    402  1.97     oster 	printf("raid%d: allocating %d buffers of %d bytes.\n",
    403  1.97     oster 	       raidPtr->raidid,
    404  1.97     oster 	       raidPtr->numEmergencyBuffers,
    405  1.97     oster 	       (int)(raidPtr->Layout.sectorsPerStripeUnit <<
    406  1.97     oster 	       raidPtr->logBytesPerSector));
    407  1.97     oster #endif
    408  1.97     oster 	for (i = 0; i < raidPtr->numEmergencyBuffers; i++) {
    409  1.97     oster 		tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
    410  1.97     oster 				 raidPtr->logBytesPerSector,
    411  1.97     oster 				 M_RAIDFRAME, M_NOWAIT);
    412  1.97     oster 		if (tmpbuf) {
    413  1.97     oster 			tmpbuf->next = raidPtr->iobuf;
    414  1.97     oster 			raidPtr->iobuf = tmpbuf;
    415  1.97     oster 			raidPtr->iobuf_count++;
    416  1.97     oster 		} else {
    417  1.97     oster 			printf("raid%d: failed to allocate emergency buffer!\n",
    418  1.97     oster 			       raidPtr->raidid);
    419  1.97     oster 		}
    420  1.97     oster 	}
    421  1.97     oster 
    422   1.6     oster 	raidPtr->valid = 1;
    423  1.52     oster 
    424  1.52     oster 	printf("raid%d: %s\n", raidPtr->raidid,
    425  1.52     oster 	       raidPtr->Layout.map->configName);
    426  1.52     oster 	printf("raid%d: Components:", raidPtr->raidid);
    427  1.72     oster 
    428  1.72     oster 	for (col = 0; col < raidPtr->numCol; col++) {
    429  1.72     oster 		printf(" %s", raidPtr->Disks[col].devname);
    430  1.72     oster 		if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
    431  1.72     oster 			printf("[**FAILED**]");
    432  1.52     oster 		}
    433  1.52     oster 	}
    434  1.52     oster 	printf("\n");
    435  1.52     oster 	printf("raid%d: Total Sectors: %lu (%lu MB)\n",
    436  1.52     oster 	       raidPtr->raidid,
    437  1.52     oster 	       (unsigned long) raidPtr->totalSectors,
    438  1.52     oster 	       (unsigned long) (raidPtr->totalSectors / 1024 *
    439  1.52     oster 				(1 << raidPtr->logBytesPerSector) / 1024));
    440  1.50     oster 
    441   1.6     oster 	return (0);
    442   1.1     oster }
    443   1.1     oster 
    444   1.6     oster static void
    445  1.80     oster rf_ShutdownRDFreeList(void *ignored)
    446   1.1     oster {
    447  1.89     oster 	pool_destroy(&rf_pools.rad);
    448   1.1     oster }
    449   1.1     oster 
    450   1.6     oster static int
    451  1.80     oster rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
    452   1.1     oster {
    453   1.1     oster 
    454  1.89     oster 	rf_pool_init(&rf_pools.rad, sizeof(RF_RaidAccessDesc_t),
    455  1.89     oster 		     "rf_rad_pl", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD);
    456  1.86     oster 	rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
    457  1.91     oster 	simple_lock_init(&rf_rad_lock);
    458   1.6     oster 	return (0);
    459   1.6     oster }
    460   1.6     oster 
    461   1.6     oster RF_RaidAccessDesc_t *
    462  1.80     oster rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
    463  1.80     oster 		    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
    464  1.80     oster 		    caddr_t bufPtr, void *bp, RF_RaidAccessFlags_t flags,
    465  1.80     oster 		    RF_AccessState_t *states)
    466   1.6     oster {
    467   1.6     oster 	RF_RaidAccessDesc_t *desc;
    468   1.6     oster 
    469  1.89     oster 	desc = pool_get(&rf_pools.rad, PR_WAITOK);
    470  1.73     oster 
    471  1.91     oster 	RF_LOCK_MUTEX(rf_rad_lock);
    472   1.6     oster 	if (raidPtr->waitShutdown) {
    473   1.6     oster 		/*
    474   1.6     oster 	         * Actually, we're shutting the array down. Free the desc
    475   1.6     oster 	         * and return NULL.
    476   1.6     oster 	         */
    477  1.73     oster 
    478  1.91     oster 		RF_UNLOCK_MUTEX(rf_rad_lock);
    479  1.89     oster 		pool_put(&rf_pools.rad, desc);
    480   1.6     oster 		return (NULL);
    481   1.6     oster 	}
    482   1.6     oster 	raidPtr->nAccOutstanding++;
    483  1.73     oster 
    484  1.91     oster 	RF_UNLOCK_MUTEX(rf_rad_lock);
    485   1.6     oster 
    486   1.6     oster 	desc->raidPtr = (void *) raidPtr;
    487   1.6     oster 	desc->type = type;
    488   1.6     oster 	desc->raidAddress = raidAddress;
    489   1.6     oster 	desc->numBlocks = numBlocks;
    490   1.6     oster 	desc->bufPtr = bufPtr;
    491   1.6     oster 	desc->bp = bp;
    492   1.6     oster 	desc->flags = flags;
    493   1.6     oster 	desc->states = states;
    494   1.6     oster 	desc->state = 0;
    495   1.6     oster 
    496   1.6     oster 	desc->status = 0;
    497  1.87     oster #if RF_ACC_TRACE > 0
    498  1.40   thorpej 	memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t));
    499  1.87     oster #endif
    500  1.41     oster 	desc->callbackFunc = NULL;
    501  1.41     oster 	desc->callbackArg = NULL;
    502   1.6     oster 	desc->next = NULL;
    503   1.6     oster 	desc->cleanupList = NULL;
    504   1.6     oster 	rf_MakeAllocList(desc->cleanupList);
    505   1.6     oster 	return (desc);
    506   1.6     oster }
    507   1.6     oster 
    508   1.6     oster void
    509  1.80     oster rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
    510   1.6     oster {
    511   1.6     oster 	RF_Raid_t *raidPtr = desc->raidPtr;
    512  1.85     oster 	RF_DagList_t *dagList, *temp;
    513   1.6     oster 
    514   1.6     oster 	RF_ASSERT(desc);
    515   1.6     oster 
    516  1.85     oster 	/* Cleanup the dagList(s) */
    517  1.85     oster 	dagList = desc->dagList;
    518  1.85     oster 	while(dagList != NULL) {
    519  1.85     oster 		temp = dagList;
    520  1.85     oster 		dagList = dagList->next;
    521  1.85     oster 		rf_FreeDAGList(temp);
    522  1.85     oster 	}
    523  1.85     oster 
    524   1.6     oster 	rf_FreeAllocList(desc->cleanupList);
    525  1.89     oster 	pool_put(&rf_pools.rad, desc);
    526  1.91     oster 	RF_LOCK_MUTEX(rf_rad_lock);
    527   1.6     oster 	raidPtr->nAccOutstanding--;
    528   1.6     oster 	if (raidPtr->waitShutdown) {
    529   1.6     oster 		RF_SIGNAL_COND(raidPtr->outstandingCond);
    530   1.6     oster 	}
    531  1.91     oster 	RF_UNLOCK_MUTEX(rf_rad_lock);
    532   1.1     oster }
    533   1.1     oster /*********************************************************************
    534   1.1     oster  * Main routine for performing an access.
    535   1.1     oster  * Accesses are retried until a DAG can not be selected.  This occurs
    536   1.1     oster  * when either the DAG library is incomplete or there are too many
    537   1.1     oster  * failures in a parity group.
    538  1.80     oster  *
    539  1.80     oster  * type should be read or write async_flag should be RF_TRUE or
    540  1.80     oster  * RF_FALSE bp_in is a buf pointer.  void * to facilitate ignoring it
    541  1.80     oster  * outside the kernel
    542   1.1     oster  ********************************************************************/
    543   1.6     oster int
    544  1.80     oster rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag,
    545  1.80     oster 	    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
    546  1.80     oster 	    caddr_t bufPtr, void *bp_in, RF_RaidAccessFlags_t flags)
    547   1.1     oster {
    548   1.6     oster 	RF_RaidAccessDesc_t *desc;
    549   1.6     oster 	caddr_t lbufPtr = bufPtr;
    550   1.6     oster 	struct buf *bp = (struct buf *) bp_in;
    551   1.6     oster 
    552   1.6     oster 	raidAddress += rf_raidSectorOffset;
    553   1.6     oster 
    554  1.61     oster #if RF_ACCESS_DEBUG
    555   1.6     oster 	if (rf_accessDebug) {
    556   1.1     oster 
    557   1.6     oster 		printf("logBytes is: %d %d %d\n", raidPtr->raidid,
    558   1.6     oster 		    raidPtr->logBytesPerSector,
    559   1.6     oster 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks));
    560  1.22     oster 		printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
    561   1.6     oster 		    (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
    562   1.6     oster 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
    563   1.6     oster 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
    564   1.6     oster 		    (int) numBlocks,
    565   1.6     oster 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks),
    566   1.6     oster 		    (long) bufPtr);
    567   1.6     oster 	}
    568  1.61     oster #endif
    569   1.6     oster 	if (raidAddress + numBlocks > raidPtr->totalSectors) {
    570   1.1     oster 
    571   1.6     oster 		printf("DoAccess: raid addr %lu too large to access %lu sectors.  Max legal addr is %lu\n",
    572   1.6     oster 		    (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
    573   1.1     oster 
    574  1.77     oster 
    575  1.77     oster 		bp->b_flags |= B_ERROR;
    576  1.77     oster 		bp->b_resid = bp->b_bcount;
    577  1.77     oster 		bp->b_error = ENOSPC;
    578  1.77     oster 		biodone(bp);
    579  1.16     oster 		return (ENOSPC);
    580   1.6     oster 	}
    581   1.6     oster 	desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
    582  1.41     oster 	    numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);
    583   1.1     oster 
    584   1.6     oster 	if (desc == NULL) {
    585   1.6     oster 		return (ENOMEM);
    586   1.6     oster 	}
    587  1.87     oster #if RF_ACC_TRACE > 0
    588   1.6     oster 	RF_ETIMER_START(desc->tracerec.tot_timer);
    589  1.87     oster #endif
    590   1.6     oster 	desc->async_flag = async_flag;
    591   1.3  explorer 
    592   1.6     oster 	rf_ContinueRaidAccess(desc);
    593   1.1     oster 
    594   1.6     oster 	return (0);
    595   1.1     oster }
    596  1.46     oster #if 0
    597   1.1     oster /* force the array into reconfigured mode without doing reconstruction */
    598   1.6     oster int
    599  1.80     oster rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
    600   1.6     oster {
    601   1.6     oster 	if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
    602   1.6     oster 		printf("Can't set reconfigured mode in dedicated-spare array\n");
    603   1.6     oster 		RF_PANIC();
    604   1.6     oster 	}
    605   1.6     oster 	RF_LOCK_MUTEX(raidPtr->mutex);
    606   1.6     oster 	raidPtr->numFailures++;
    607  1.72     oster 	raidPtr->Disks[col].status = rf_ds_dist_spared;
    608  1.72     oster 	raidPtr->status = rf_rs_reconfigured;
    609  1.37     oster 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
    610   1.6     oster 	/* install spare table only if declustering + distributed sparing
    611   1.6     oster 	 * architecture. */
    612   1.6     oster 	if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
    613  1.72     oster 		rf_InstallSpareTable(raidPtr, col);
    614   1.6     oster 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    615   1.6     oster 	return (0);
    616   1.1     oster }
    617  1.46     oster #endif
    618   1.1     oster 
    619   1.6     oster int
    620  1.80     oster rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
    621   1.6     oster {
    622   1.6     oster 	RF_LOCK_MUTEX(raidPtr->mutex);
    623  1.72     oster 	if (raidPtr->Disks[fcol].status != rf_ds_failed) {
    624  1.68     oster 		/* must be failing something that is valid, or else it's
    625  1.68     oster 		   already marked as failed (in which case we don't
    626  1.68     oster 		   want to mark it failed again!) */
    627  1.68     oster 		raidPtr->numFailures++;
    628  1.72     oster 		raidPtr->Disks[fcol].status = rf_ds_failed;
    629  1.72     oster 		raidPtr->status = rf_rs_degraded;
    630  1.68     oster 	}
    631  1.65     oster 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    632  1.68     oster 
    633  1.37     oster 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
    634  1.68     oster 
    635  1.56     oster 	/* Close the component, so that it's not "locked" if someone
    636  1.56     oster 	   else want's to use it! */
    637  1.56     oster 
    638  1.72     oster 	rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
    639  1.72     oster 			   raidPtr->Disks[fcol].auto_configured);
    640  1.65     oster 
    641  1.65     oster 	RF_LOCK_MUTEX(raidPtr->mutex);
    642  1.72     oster 	raidPtr->raid_cinfo[fcol].ci_vp = NULL;
    643  1.56     oster 
    644  1.56     oster 	/* Need to mark the component as not being auto_configured
    645  1.56     oster 	   (in case it was previously). */
    646  1.56     oster 
    647  1.72     oster 	raidPtr->Disks[fcol].auto_configured = 0;
    648  1.65     oster 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    649  1.56     oster 
    650   1.6     oster 	if (initRecon)
    651  1.72     oster 		rf_ReconstructFailedDisk(raidPtr, fcol);
    652   1.6     oster 	return (0);
    653   1.1     oster }
    654   1.1     oster /* releases a thread that is waiting for the array to become quiesced.
    655   1.1     oster  * access_suspend_mutex should be locked upon calling this
    656   1.1     oster  */
    657   1.6     oster void
    658  1.80     oster rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
    659   1.6     oster {
    660  1.61     oster #if RF_DEBUG_QUIESCE
    661   1.6     oster 	if (rf_quiesceDebug) {
    662  1.22     oster 		printf("raid%d: Signalling quiescence lock\n",
    663  1.22     oster 		       raidPtr->raidid);
    664   1.6     oster 	}
    665  1.61     oster #endif
    666   1.6     oster 	raidPtr->access_suspend_release = 1;
    667   1.6     oster 
    668   1.6     oster 	if (raidPtr->waiting_for_quiescence) {
    669   1.6     oster 		SIGNAL_QUIESCENT_COND(raidPtr);
    670   1.6     oster 	}
    671   1.1     oster }
    672   1.1     oster /* suspends all new requests to the array.  No effect on accesses that are in flight.  */
    673   1.6     oster int
    674  1.80     oster rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
    675   1.6     oster {
    676  1.61     oster #if RF_DEBUG_QUIESCE
    677   1.6     oster 	if (rf_quiesceDebug)
    678  1.53     oster 		printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
    679  1.61     oster #endif
    680   1.6     oster 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    681   1.6     oster 	raidPtr->accesses_suspended++;
    682   1.6     oster 	raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
    683   1.6     oster 
    684   1.6     oster 	if (raidPtr->waiting_for_quiescence) {
    685   1.6     oster 		raidPtr->access_suspend_release = 0;
    686   1.6     oster 		while (!raidPtr->access_suspend_release) {
    687  1.93     oster #if RF_DEBUG_QUIESCE
    688  1.53     oster 			printf("raid%d: Suspending: Waiting for Quiescence\n",
    689  1.53     oster 			       raidPtr->raidid);
    690  1.93     oster #endif
    691   1.6     oster 			WAIT_FOR_QUIESCENCE(raidPtr);
    692   1.6     oster 			raidPtr->waiting_for_quiescence = 0;
    693   1.6     oster 		}
    694   1.6     oster 	}
    695  1.93     oster #if RF_DEBUG_QUIESCE
    696  1.53     oster 	printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
    697  1.93     oster #endif
    698   1.1     oster 
    699   1.6     oster 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    700   1.6     oster 	return (raidPtr->waiting_for_quiescence);
    701   1.1     oster }
    702   1.1     oster /* wake up everyone waiting for quiescence to be released */
    703   1.6     oster void
    704  1.80     oster rf_ResumeNewRequests(RF_Raid_t *raidPtr)
    705   1.6     oster {
    706   1.6     oster 	RF_CallbackDesc_t *t, *cb;
    707   1.6     oster 
    708  1.61     oster #if RF_DEBUG_QUIESCE
    709   1.6     oster 	if (rf_quiesceDebug)
    710   1.6     oster 		printf("Resuming new reqs\n");
    711  1.61     oster #endif
    712   1.6     oster 
    713   1.6     oster 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    714   1.6     oster 	raidPtr->accesses_suspended--;
    715   1.6     oster 	if (raidPtr->accesses_suspended == 0)
    716   1.6     oster 		cb = raidPtr->quiesce_wait_list;
    717   1.6     oster 	else
    718   1.6     oster 		cb = NULL;
    719   1.6     oster 	raidPtr->quiesce_wait_list = NULL;
    720   1.6     oster 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    721   1.6     oster 
    722   1.6     oster 	while (cb) {
    723   1.6     oster 		t = cb;
    724   1.6     oster 		cb = cb->next;
    725   1.6     oster 		(t->callbackFunc) (t->callbackArg);
    726   1.6     oster 		rf_FreeCallbackDesc(t);
    727   1.6     oster 	}
    728   1.1     oster }
    729   1.1     oster /*****************************************************************************************
    730   1.1     oster  *
    731   1.1     oster  * debug routines
    732   1.1     oster  *
    733   1.1     oster  ****************************************************************************************/
    734   1.1     oster 
    735   1.6     oster static void
    736  1.80     oster set_debug_option(char *name, long val)
    737   1.6     oster {
    738   1.6     oster 	RF_DebugName_t *p;
    739   1.6     oster 
    740   1.6     oster 	for (p = rf_debugNames; p->name; p++) {
    741   1.6     oster 		if (!strcmp(p->name, name)) {
    742   1.6     oster 			*(p->ptr) = val;
    743   1.6     oster 			printf("[Set debug variable %s to %ld]\n", name, val);
    744   1.6     oster 			return;
    745   1.6     oster 		}
    746   1.6     oster 	}
    747   1.6     oster 	RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
    748   1.1     oster }
    749   1.1     oster 
    750   1.1     oster 
    751   1.1     oster /* would like to use sscanf here, but apparently not available in kernel */
    752   1.1     oster /*ARGSUSED*/
    753   1.6     oster static void
    754  1.80     oster rf_ConfigureDebug(RF_Config_t *cfgPtr)
    755   1.6     oster {
    756   1.6     oster 	char   *val_p, *name_p, *white_p;
    757   1.6     oster 	long    val;
    758   1.6     oster 	int     i;
    759   1.6     oster 
    760   1.6     oster 	rf_ResetDebugOptions();
    761   1.6     oster 	for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
    762   1.6     oster 		name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
    763   1.6     oster 		white_p = rf_find_white(name_p);	/* skip to start of 2nd
    764   1.6     oster 							 * word */
    765   1.6     oster 		val_p = rf_find_non_white(white_p);
    766   1.6     oster 		if (*val_p == '0' && *(val_p + 1) == 'x')
    767   1.6     oster 			val = rf_htoi(val_p + 2);
    768   1.6     oster 		else
    769   1.6     oster 			val = rf_atoi(val_p);
    770   1.6     oster 		*white_p = '\0';
    771   1.6     oster 		set_debug_option(name_p, val);
    772   1.6     oster 	}
    773   1.1     oster }
    774  1.39     oster 
    775  1.39     oster void
    776  1.80     oster rf_print_panic_message(int line, char *file)
    777  1.39     oster {
    778  1.39     oster 	sprintf(rf_panicbuf,"raidframe error at line %d file %s",
    779  1.39     oster 		line, file);
    780  1.39     oster }
    781  1.39     oster 
    782  1.62     oster #ifdef RAID_DIAGNOSTIC
    783  1.39     oster void
    784  1.80     oster rf_print_assert_panic_message(int line,	char *file, char *condition)
    785  1.39     oster {
    786  1.39     oster 	sprintf(rf_panicbuf,
    787  1.39     oster 		"raidframe error at line %d file %s (failed asserting %s)\n",
    788  1.39     oster 		line, file, condition);
    789  1.58     oster }
    790  1.62     oster #endif
    791  1.58     oster 
    792  1.58     oster void
    793  1.80     oster rf_print_unable_to_init_mutex(char *file, int line, int rc)
    794  1.58     oster {
    795  1.58     oster 	RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
    796  1.58     oster 		     file, line, rc);
    797  1.58     oster }
    798  1.58     oster 
    799  1.58     oster void
    800  1.80     oster rf_print_unable_to_add_shutdown(char *file, int line, int rc)
    801  1.58     oster {
    802  1.58     oster 	RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
    803  1.58     oster 		     file, line, rc);
    804   1.1     oster }
    805