rf_driver.c revision 1.103 1 1.103 oster /* $NetBSD: rf_driver.c,v 1.103 2004/06/27 03:15:18 oster Exp $ */
2 1.9 oster /*-
3 1.9 oster * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 1.9 oster * All rights reserved.
5 1.9 oster *
6 1.9 oster * This code is derived from software contributed to The NetBSD Foundation
7 1.9 oster * by Greg Oster
8 1.9 oster *
9 1.9 oster * Redistribution and use in source and binary forms, with or without
10 1.9 oster * modification, are permitted provided that the following conditions
11 1.9 oster * are met:
12 1.9 oster * 1. Redistributions of source code must retain the above copyright
13 1.9 oster * notice, this list of conditions and the following disclaimer.
14 1.9 oster * 2. Redistributions in binary form must reproduce the above copyright
15 1.9 oster * notice, this list of conditions and the following disclaimer in the
16 1.9 oster * documentation and/or other materials provided with the distribution.
17 1.9 oster * 3. All advertising materials mentioning features or use of this software
18 1.9 oster * must display the following acknowledgement:
19 1.9 oster * This product includes software developed by the NetBSD
20 1.9 oster * Foundation, Inc. and its contributors.
21 1.9 oster * 4. Neither the name of The NetBSD Foundation nor the names of its
22 1.9 oster * contributors may be used to endorse or promote products derived
23 1.9 oster * from this software without specific prior written permission.
24 1.9 oster *
25 1.9 oster * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 1.9 oster * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 1.9 oster * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 1.9 oster * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 1.9 oster * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 1.9 oster * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 1.9 oster * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 1.9 oster * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 1.9 oster * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 1.9 oster * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 1.9 oster * POSSIBILITY OF SUCH DAMAGE.
36 1.9 oster */
37 1.9 oster
38 1.1 oster /*
39 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
40 1.1 oster * All rights reserved.
41 1.1 oster *
42 1.1 oster * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
43 1.1 oster * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
44 1.1 oster *
45 1.1 oster * Permission to use, copy, modify and distribute this software and
46 1.1 oster * its documentation is hereby granted, provided that both the copyright
47 1.1 oster * notice and this permission notice appear in all copies of the
48 1.1 oster * software, derivative works or modified versions, and any portions
49 1.1 oster * thereof, and that both notices appear in supporting documentation.
50 1.1 oster *
51 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
52 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
53 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
54 1.1 oster *
55 1.1 oster * Carnegie Mellon requests users of this software to return to
56 1.1 oster *
57 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
58 1.1 oster * School of Computer Science
59 1.1 oster * Carnegie Mellon University
60 1.1 oster * Pittsburgh PA 15213-3890
61 1.1 oster *
62 1.1 oster * any improvements or extensions that they make and grant Carnegie the
63 1.1 oster * rights to redistribute these changes.
64 1.1 oster */
65 1.1 oster
66 1.1 oster /******************************************************************************
67 1.1 oster *
68 1.1 oster * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
69 1.1 oster *
70 1.1 oster * all routines are prefixed with rf_ (raidframe), to avoid conficts.
71 1.1 oster *
72 1.1 oster ******************************************************************************/
73 1.1 oster
74 1.44 lukem
75 1.44 lukem #include <sys/cdefs.h>
76 1.103 oster __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.103 2004/06/27 03:15:18 oster Exp $");
77 1.71 martin
78 1.71 martin #include "opt_raid_diagnostic.h"
79 1.1 oster
80 1.1 oster #include <sys/param.h>
81 1.1 oster #include <sys/systm.h>
82 1.1 oster #include <sys/ioctl.h>
83 1.1 oster #include <sys/fcntl.h>
84 1.1 oster #include <sys/vnode.h>
85 1.1 oster
86 1.1 oster
87 1.1 oster #include "rf_archs.h"
88 1.1 oster #include "rf_threadstuff.h"
89 1.1 oster
90 1.1 oster #include <sys/errno.h>
91 1.1 oster
92 1.1 oster #include "rf_raid.h"
93 1.1 oster #include "rf_dag.h"
94 1.1 oster #include "rf_aselect.h"
95 1.1 oster #include "rf_diskqueue.h"
96 1.1 oster #include "rf_parityscan.h"
97 1.1 oster #include "rf_alloclist.h"
98 1.1 oster #include "rf_dagutils.h"
99 1.1 oster #include "rf_utils.h"
100 1.1 oster #include "rf_etimer.h"
101 1.1 oster #include "rf_acctrace.h"
102 1.1 oster #include "rf_general.h"
103 1.1 oster #include "rf_desc.h"
104 1.1 oster #include "rf_states.h"
105 1.1 oster #include "rf_decluster.h"
106 1.1 oster #include "rf_map.h"
107 1.1 oster #include "rf_revent.h"
108 1.1 oster #include "rf_callback.h"
109 1.1 oster #include "rf_engine.h"
110 1.1 oster #include "rf_mcpair.h"
111 1.1 oster #include "rf_nwayxor.h"
112 1.1 oster #include "rf_copyback.h"
113 1.1 oster #include "rf_driver.h"
114 1.1 oster #include "rf_options.h"
115 1.1 oster #include "rf_shutdown.h"
116 1.24 oster #include "rf_kintf.h"
117 1.1 oster
118 1.1 oster #include <sys/buf.h>
119 1.1 oster
120 1.61 oster #ifndef RF_ACCESS_DEBUG
121 1.61 oster #define RF_ACCESS_DEBUG 0
122 1.61 oster #endif
123 1.61 oster
124 1.1 oster /* rad == RF_RaidAccessDesc_t */
125 1.91 oster RF_DECLARE_MUTEX(rf_rad_lock)
126 1.1 oster #define RF_MAX_FREE_RAD 128
127 1.88 oster #define RF_MIN_FREE_RAD 32
128 1.1 oster
129 1.1 oster /* debug variables */
130 1.6 oster char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */
131 1.1 oster
132 1.1 oster /* main configuration routines */
133 1.1 oster static int raidframe_booted = 0;
134 1.1 oster
135 1.6 oster static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
136 1.1 oster static void set_debug_option(char *name, long val);
137 1.1 oster static void rf_UnconfigureArray(void);
138 1.1 oster static void rf_ShutdownRDFreeList(void *);
139 1.1 oster static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
140 1.1 oster
141 1.6 oster RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved
142 1.6 oster * printfs by different stripes */
143 1.1 oster
144 1.1 oster #define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended))
145 1.1 oster #define WAIT_FOR_QUIESCENCE(_raid_) \
146 1.38 oster ltsleep(&((_raid_)->accesses_suspended), PRIBIO, \
147 1.38 oster "raidframe quiesce", 0, &((_raid_)->access_suspend_mutex))
148 1.1 oster
149 1.9 oster static int configureCount = 0; /* number of active configurations */
150 1.9 oster static int isconfigged = 0; /* is basic raidframe (non per-array)
151 1.9 oster * stuff configged */
152 1.55 oster RF_DECLARE_LKMGR_STATIC_MUTEX(configureMutex) /* used to lock the configuration
153 1.6 oster * stuff */
154 1.9 oster static RF_ShutdownList_t *globalShutdown; /* non array-specific
155 1.9 oster * stuff */
156 1.1 oster
157 1.9 oster static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp);
158 1.103 oster static int rf_AllocEmergBuffers(RF_Raid_t *);
159 1.103 oster static void rf_FreeEmergBuffers(RF_Raid_t *);
160 1.1 oster
161 1.1 oster /* called at system boot time */
162 1.7 oster int
163 1.7 oster rf_BootRaidframe()
164 1.1 oster {
165 1.1 oster
166 1.6 oster if (raidframe_booted)
167 1.6 oster return (EBUSY);
168 1.6 oster raidframe_booted = 1;
169 1.79 oster lockinit(&configureMutex, PRIBIO, "RAIDframe lock", 0, 0);
170 1.79 oster configureCount = 0;
171 1.6 oster isconfigged = 0;
172 1.6 oster globalShutdown = NULL;
173 1.6 oster return (0);
174 1.1 oster }
175 1.1 oster
176 1.1 oster /*
177 1.1 oster * Called whenever an array is shutdown
178 1.1 oster */
179 1.6 oster static void
180 1.6 oster rf_UnconfigureArray()
181 1.1 oster {
182 1.1 oster
183 1.55 oster RF_LOCK_LKMGR_MUTEX(configureMutex);
184 1.6 oster if (--configureCount == 0) { /* if no active configurations, shut
185 1.6 oster * everything down */
186 1.6 oster isconfigged = 0;
187 1.92 oster rf_ShutdownList(&globalShutdown);
188 1.6 oster
189 1.6 oster /*
190 1.6 oster * We must wait until now, because the AllocList module
191 1.6 oster * uses the DebugMem module.
192 1.6 oster */
193 1.60 oster #if RF_DEBUG_MEM
194 1.6 oster if (rf_memDebug)
195 1.6 oster rf_print_unfreed();
196 1.60 oster #endif
197 1.6 oster }
198 1.55 oster RF_UNLOCK_LKMGR_MUTEX(configureMutex);
199 1.9 oster }
200 1.9 oster
201 1.1 oster /*
202 1.1 oster * Called to shut down an array.
203 1.1 oster */
204 1.6 oster int
205 1.80 oster rf_Shutdown(RF_Raid_t *raidPtr)
206 1.1 oster {
207 1.100 oster
208 1.6 oster if (!raidPtr->valid) {
209 1.6 oster RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n");
210 1.6 oster return (EINVAL);
211 1.6 oster }
212 1.6 oster /*
213 1.6 oster * wait for outstanding IOs to land
214 1.6 oster * As described in rf_raid.h, we use the rad_freelist lock
215 1.6 oster * to protect the per-array info about outstanding descs
216 1.6 oster * since we need to do freelist locking anyway, and this
217 1.6 oster * cuts down on the amount of serialization we've got going
218 1.6 oster * on.
219 1.6 oster */
220 1.91 oster RF_LOCK_MUTEX(rf_rad_lock);
221 1.6 oster if (raidPtr->waitShutdown) {
222 1.91 oster RF_UNLOCK_MUTEX(rf_rad_lock);
223 1.6 oster return (EBUSY);
224 1.6 oster }
225 1.6 oster raidPtr->waitShutdown = 1;
226 1.6 oster while (raidPtr->nAccOutstanding) {
227 1.91 oster RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_lock);
228 1.6 oster }
229 1.91 oster RF_UNLOCK_MUTEX(rf_rad_lock);
230 1.35 oster
231 1.35 oster /* Wait for any parity re-writes to stop... */
232 1.35 oster while (raidPtr->parity_rewrite_in_progress) {
233 1.35 oster printf("Waiting for parity re-write to exit...\n");
234 1.35 oster tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
235 1.35 oster "rfprwshutdown", 0);
236 1.35 oster }
237 1.6 oster
238 1.6 oster raidPtr->valid = 0;
239 1.6 oster
240 1.37 oster rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
241 1.6 oster
242 1.7 oster rf_UnconfigureVnodes(raidPtr);
243 1.7 oster
244 1.103 oster rf_FreeEmergBuffers(raidPtr);
245 1.100 oster
246 1.7 oster rf_ShutdownList(&raidPtr->shutdownList);
247 1.7 oster
248 1.7 oster rf_UnconfigureArray();
249 1.7 oster
250 1.7 oster return (0);
251 1.7 oster }
252 1.1 oster
253 1.6 oster
254 1.1 oster #define DO_INIT_CONFIGURE(f) { \
255 1.1 oster rc = f (&globalShutdown); \
256 1.1 oster if (rc) { \
257 1.1 oster RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
258 1.1 oster rf_ShutdownList(&globalShutdown); \
259 1.1 oster configureCount--; \
260 1.55 oster RF_UNLOCK_LKMGR_MUTEX(configureMutex); \
261 1.1 oster return(rc); \
262 1.1 oster } \
263 1.1 oster }
264 1.1 oster
265 1.1 oster #define DO_RAID_FAIL() { \
266 1.12 oster rf_UnconfigureVnodes(raidPtr); \
267 1.103 oster rf_FreeEmergBuffers(raidPtr); \
268 1.1 oster rf_ShutdownList(&raidPtr->shutdownList); \
269 1.1 oster rf_UnconfigureArray(); \
270 1.1 oster }
271 1.1 oster
272 1.1 oster #define DO_RAID_INIT_CONFIGURE(f) { \
273 1.1 oster rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
274 1.1 oster if (rc) { \
275 1.1 oster RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
276 1.1 oster DO_RAID_FAIL(); \
277 1.1 oster return(rc); \
278 1.1 oster } \
279 1.1 oster }
280 1.1 oster
281 1.1 oster #define DO_RAID_MUTEX(_m_) { \
282 1.75 oster rf_mutex_init((_m_)); \
283 1.1 oster }
284 1.1 oster
285 1.6 oster int
286 1.80 oster rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
287 1.6 oster {
288 1.72 oster RF_RowCol_t col;
289 1.103 oster int rc;
290 1.6 oster
291 1.55 oster RF_LOCK_LKMGR_MUTEX(configureMutex);
292 1.6 oster configureCount++;
293 1.6 oster if (isconfigged == 0) {
294 1.75 oster rf_mutex_init(&rf_printf_mutex);
295 1.75 oster
296 1.6 oster /* initialize globals */
297 1.6 oster
298 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureAllocList);
299 1.28 oster
300 1.6 oster /*
301 1.28 oster * Yes, this does make debugging general to the whole
302 1.28 oster * system instead of being array specific. Bummer, drag.
303 1.28 oster */
304 1.6 oster rf_ConfigureDebug(cfgPtr);
305 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
306 1.87 oster #if RF_ACC_TRACE > 0
307 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
308 1.87 oster #endif
309 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureMapModule);
310 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
311 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureCallback);
312 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
313 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
314 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
315 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureMCPair);
316 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureDAGs);
317 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
318 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
319 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureCopyback);
320 1.6 oster DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
321 1.6 oster isconfigged = 1;
322 1.6 oster }
323 1.55 oster RF_UNLOCK_LKMGR_MUTEX(configureMutex);
324 1.6 oster
325 1.6 oster DO_RAID_MUTEX(&raidPtr->mutex);
326 1.6 oster /* set up the cleanup list. Do this after ConfigureDebug so that
327 1.6 oster * value of memDebug will be set */
328 1.6 oster
329 1.6 oster rf_MakeAllocList(raidPtr->cleanupList);
330 1.6 oster if (raidPtr->cleanupList == NULL) {
331 1.6 oster DO_RAID_FAIL();
332 1.6 oster return (ENOMEM);
333 1.6 oster }
334 1.86 oster rf_ShutdownCreate(&raidPtr->shutdownList,
335 1.86 oster (void (*) (void *)) rf_FreeAllocList,
336 1.86 oster raidPtr->cleanupList);
337 1.86 oster
338 1.6 oster raidPtr->numCol = cfgPtr->numCol;
339 1.6 oster raidPtr->numSpare = cfgPtr->numSpare;
340 1.6 oster
341 1.72 oster raidPtr->status = rf_rs_optimal;
342 1.72 oster raidPtr->reconControl = NULL;
343 1.72 oster
344 1.64 oster TAILQ_INIT(&(raidPtr->iodone));
345 1.64 oster simple_lock_init(&(raidPtr->iodone_lock));
346 1.6 oster
347 1.6 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
348 1.6 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
349 1.6 oster
350 1.76 oster raidPtr->outstandingCond = 0;
351 1.6 oster
352 1.6 oster raidPtr->nAccOutstanding = 0;
353 1.6 oster raidPtr->waitShutdown = 0;
354 1.6 oster
355 1.6 oster DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
356 1.6 oster
357 1.76 oster raidPtr->waitForReconCond = 0;
358 1.6 oster
359 1.28 oster if (ac!=NULL) {
360 1.28 oster /* We have an AutoConfig structure.. Don't do the
361 1.28 oster normal disk configuration... call the auto config
362 1.28 oster stuff */
363 1.28 oster rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
364 1.28 oster } else {
365 1.28 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
366 1.28 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
367 1.28 oster }
368 1.6 oster /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
369 1.6 oster * no. is set */
370 1.6 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
371 1.6 oster
372 1.6 oster DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
373 1.6 oster
374 1.6 oster DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
375 1.6 oster
376 1.82 oster #if RF_INCLUDE_CHAINDECLUSTER > 0
377 1.72 oster for (col = 0; col < raidPtr->numCol; col++) {
378 1.72 oster /*
379 1.72 oster * XXX better distribution
380 1.72 oster */
381 1.72 oster raidPtr->hist_diskreq[col] = 0;
382 1.6 oster }
383 1.82 oster #endif
384 1.30 oster raidPtr->numNewFailures = 0;
385 1.28 oster raidPtr->copyback_in_progress = 0;
386 1.28 oster raidPtr->parity_rewrite_in_progress = 0;
387 1.66 oster raidPtr->adding_hot_spare = 0;
388 1.28 oster raidPtr->recon_in_progress = 0;
389 1.29 oster raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
390 1.29 oster
391 1.29 oster /* autoconfigure and root_partition will actually get filled in
392 1.29 oster after the config is done */
393 1.29 oster raidPtr->autoconfigure = 0;
394 1.29 oster raidPtr->root_partition = 0;
395 1.29 oster raidPtr->last_unit = raidPtr->raidid;
396 1.29 oster raidPtr->config_order = 0;
397 1.6 oster
398 1.6 oster if (rf_keepAccTotals) {
399 1.6 oster raidPtr->keep_acc_totals = 1;
400 1.6 oster }
401 1.1 oster
402 1.97 oster /* Allocate a bunch of buffers to be used in low-memory conditions */
403 1.97 oster raidPtr->iobuf = NULL;
404 1.103 oster
405 1.103 oster rc = rf_AllocEmergBuffers(raidPtr);
406 1.103 oster if (rc) {
407 1.103 oster printf("raid%d: Unable to allocate emergency buffers.\n",
408 1.103 oster raidPtr->raidid);
409 1.103 oster DO_RAID_FAIL();
410 1.103 oster return(rc);
411 1.103 oster }
412 1.103 oster
413 1.103 oster raidPtr->valid = 1;
414 1.103 oster
415 1.103 oster printf("raid%d: %s\n", raidPtr->raidid,
416 1.103 oster raidPtr->Layout.map->configName);
417 1.103 oster printf("raid%d: Components:", raidPtr->raidid);
418 1.103 oster
419 1.103 oster for (col = 0; col < raidPtr->numCol; col++) {
420 1.103 oster printf(" %s", raidPtr->Disks[col].devname);
421 1.103 oster if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
422 1.103 oster printf("[**FAILED**]");
423 1.103 oster }
424 1.103 oster }
425 1.103 oster printf("\n");
426 1.103 oster printf("raid%d: Total Sectors: %lu (%lu MB)\n",
427 1.103 oster raidPtr->raidid,
428 1.103 oster (unsigned long) raidPtr->totalSectors,
429 1.103 oster (unsigned long) (raidPtr->totalSectors / 1024 *
430 1.103 oster (1 << raidPtr->logBytesPerSector) / 1024));
431 1.103 oster
432 1.103 oster return (0);
433 1.103 oster }
434 1.103 oster
435 1.103 oster
436 1.103 oster /*
437 1.103 oster
438 1.103 oster Routines to allocate and free the "emergency buffers" for a given
439 1.103 oster RAID set. These emergency buffers will be used when the kernel runs
440 1.103 oster out of kernel memory.
441 1.103 oster
442 1.103 oster */
443 1.103 oster
444 1.103 oster static int
445 1.103 oster rf_AllocEmergBuffers(RF_Raid_t *raidPtr)
446 1.103 oster {
447 1.103 oster void *tmpbuf;
448 1.103 oster RF_VoidPointerListElem_t *vple;
449 1.103 oster int i;
450 1.103 oster
451 1.97 oster /* XXX next line needs tuning... */
452 1.97 oster raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol;
453 1.97 oster #if DEBUG
454 1.97 oster printf("raid%d: allocating %d buffers of %d bytes.\n",
455 1.97 oster raidPtr->raidid,
456 1.97 oster raidPtr->numEmergencyBuffers,
457 1.97 oster (int)(raidPtr->Layout.sectorsPerStripeUnit <<
458 1.97 oster raidPtr->logBytesPerSector));
459 1.97 oster #endif
460 1.97 oster for (i = 0; i < raidPtr->numEmergencyBuffers; i++) {
461 1.97 oster tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
462 1.97 oster raidPtr->logBytesPerSector,
463 1.97 oster M_RAIDFRAME, M_NOWAIT);
464 1.97 oster if (tmpbuf) {
465 1.99 oster vple = rf_AllocVPListElem();
466 1.99 oster vple->p= tmpbuf;
467 1.99 oster vple->next = raidPtr->iobuf;
468 1.99 oster raidPtr->iobuf = vple;
469 1.97 oster raidPtr->iobuf_count++;
470 1.97 oster } else {
471 1.97 oster printf("raid%d: failed to allocate emergency buffer!\n",
472 1.97 oster raidPtr->raidid);
473 1.103 oster break;
474 1.97 oster }
475 1.97 oster }
476 1.97 oster
477 1.99 oster /* XXX next line needs tuning too... */
478 1.99 oster raidPtr->numEmergencyStripeBuffers = 10;
479 1.99 oster for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) {
480 1.99 oster tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
481 1.99 oster raidPtr->logBytesPerSector),
482 1.99 oster M_RAIDFRAME, M_NOWAIT);
483 1.99 oster if (tmpbuf) {
484 1.99 oster vple = rf_AllocVPListElem();
485 1.99 oster vple->p= tmpbuf;
486 1.99 oster vple->next = raidPtr->stripebuf;
487 1.99 oster raidPtr->stripebuf = vple;
488 1.99 oster raidPtr->stripebuf_count++;
489 1.99 oster } else {
490 1.99 oster printf("raid%d: failed to allocate emergency stripe buffer!\n",
491 1.99 oster raidPtr->raidid);
492 1.103 oster break;
493 1.99 oster }
494 1.99 oster }
495 1.103 oster
496 1.103 oster return (0);
497 1.103 oster }
498 1.99 oster
499 1.103 oster static void
500 1.103 oster rf_FreeEmergBuffers(RF_Raid_t *raidPtr)
501 1.103 oster {
502 1.103 oster RF_VoidPointerListElem_t *tmp;
503 1.99 oster
504 1.103 oster /* Free the emergency IO buffers */
505 1.103 oster while (raidPtr->iobuf != NULL) {
506 1.103 oster tmp = raidPtr->iobuf;
507 1.103 oster raidPtr->iobuf = raidPtr->iobuf->next;
508 1.103 oster free(tmp->p, M_RAIDFRAME);
509 1.103 oster rf_FreeVPListElem(tmp);
510 1.103 oster }
511 1.52 oster
512 1.103 oster /* Free the emergency stripe buffers */
513 1.103 oster while (raidPtr->stripebuf != NULL) {
514 1.103 oster tmp = raidPtr->stripebuf;
515 1.103 oster raidPtr->stripebuf = raidPtr->stripebuf->next;
516 1.103 oster free(tmp->p, M_RAIDFRAME);
517 1.103 oster rf_FreeVPListElem(tmp);
518 1.52 oster }
519 1.103 oster }
520 1.50 oster
521 1.1 oster
522 1.6 oster static void
523 1.80 oster rf_ShutdownRDFreeList(void *ignored)
524 1.1 oster {
525 1.89 oster pool_destroy(&rf_pools.rad);
526 1.1 oster }
527 1.1 oster
528 1.6 oster static int
529 1.80 oster rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
530 1.1 oster {
531 1.1 oster
532 1.89 oster rf_pool_init(&rf_pools.rad, sizeof(RF_RaidAccessDesc_t),
533 1.89 oster "rf_rad_pl", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD);
534 1.86 oster rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
535 1.91 oster simple_lock_init(&rf_rad_lock);
536 1.6 oster return (0);
537 1.6 oster }
538 1.6 oster
539 1.6 oster RF_RaidAccessDesc_t *
540 1.80 oster rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
541 1.80 oster RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
542 1.80 oster caddr_t bufPtr, void *bp, RF_RaidAccessFlags_t flags,
543 1.102 drochner const RF_AccessState_t *states)
544 1.6 oster {
545 1.6 oster RF_RaidAccessDesc_t *desc;
546 1.6 oster
547 1.89 oster desc = pool_get(&rf_pools.rad, PR_WAITOK);
548 1.73 oster
549 1.91 oster RF_LOCK_MUTEX(rf_rad_lock);
550 1.6 oster if (raidPtr->waitShutdown) {
551 1.6 oster /*
552 1.6 oster * Actually, we're shutting the array down. Free the desc
553 1.6 oster * and return NULL.
554 1.6 oster */
555 1.73 oster
556 1.91 oster RF_UNLOCK_MUTEX(rf_rad_lock);
557 1.89 oster pool_put(&rf_pools.rad, desc);
558 1.6 oster return (NULL);
559 1.6 oster }
560 1.6 oster raidPtr->nAccOutstanding++;
561 1.73 oster
562 1.91 oster RF_UNLOCK_MUTEX(rf_rad_lock);
563 1.6 oster
564 1.6 oster desc->raidPtr = (void *) raidPtr;
565 1.6 oster desc->type = type;
566 1.6 oster desc->raidAddress = raidAddress;
567 1.6 oster desc->numBlocks = numBlocks;
568 1.6 oster desc->bufPtr = bufPtr;
569 1.6 oster desc->bp = bp;
570 1.6 oster desc->flags = flags;
571 1.6 oster desc->states = states;
572 1.6 oster desc->state = 0;
573 1.99 oster desc->dagList = NULL;
574 1.6 oster
575 1.6 oster desc->status = 0;
576 1.87 oster #if RF_ACC_TRACE > 0
577 1.40 thorpej memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t));
578 1.87 oster #endif
579 1.41 oster desc->callbackFunc = NULL;
580 1.41 oster desc->callbackArg = NULL;
581 1.6 oster desc->next = NULL;
582 1.99 oster desc->iobufs = NULL;
583 1.99 oster desc->stripebufs = NULL;
584 1.99 oster
585 1.6 oster return (desc);
586 1.6 oster }
587 1.6 oster
588 1.6 oster void
589 1.80 oster rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
590 1.6 oster {
591 1.6 oster RF_Raid_t *raidPtr = desc->raidPtr;
592 1.85 oster RF_DagList_t *dagList, *temp;
593 1.99 oster RF_VoidPointerListElem_t *tmp;
594 1.6 oster
595 1.6 oster RF_ASSERT(desc);
596 1.6 oster
597 1.85 oster /* Cleanup the dagList(s) */
598 1.85 oster dagList = desc->dagList;
599 1.85 oster while(dagList != NULL) {
600 1.85 oster temp = dagList;
601 1.85 oster dagList = dagList->next;
602 1.85 oster rf_FreeDAGList(temp);
603 1.85 oster }
604 1.85 oster
605 1.99 oster while (desc->iobufs) {
606 1.99 oster tmp = desc->iobufs;
607 1.99 oster desc->iobufs = desc->iobufs->next;
608 1.99 oster rf_FreeIOBuffer(raidPtr, tmp);
609 1.99 oster }
610 1.99 oster
611 1.99 oster while (desc->stripebufs) {
612 1.99 oster tmp = desc->stripebufs;
613 1.99 oster desc->stripebufs = desc->stripebufs->next;
614 1.99 oster rf_FreeStripeBuffer(raidPtr, tmp);
615 1.99 oster }
616 1.99 oster
617 1.89 oster pool_put(&rf_pools.rad, desc);
618 1.91 oster RF_LOCK_MUTEX(rf_rad_lock);
619 1.6 oster raidPtr->nAccOutstanding--;
620 1.6 oster if (raidPtr->waitShutdown) {
621 1.6 oster RF_SIGNAL_COND(raidPtr->outstandingCond);
622 1.6 oster }
623 1.91 oster RF_UNLOCK_MUTEX(rf_rad_lock);
624 1.1 oster }
625 1.1 oster /*********************************************************************
626 1.1 oster * Main routine for performing an access.
627 1.1 oster * Accesses are retried until a DAG can not be selected. This occurs
628 1.1 oster * when either the DAG library is incomplete or there are too many
629 1.1 oster * failures in a parity group.
630 1.80 oster *
631 1.80 oster * type should be read or write async_flag should be RF_TRUE or
632 1.80 oster * RF_FALSE bp_in is a buf pointer. void * to facilitate ignoring it
633 1.80 oster * outside the kernel
634 1.1 oster ********************************************************************/
635 1.6 oster int
636 1.80 oster rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag,
637 1.80 oster RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
638 1.80 oster caddr_t bufPtr, void *bp_in, RF_RaidAccessFlags_t flags)
639 1.1 oster {
640 1.6 oster RF_RaidAccessDesc_t *desc;
641 1.6 oster caddr_t lbufPtr = bufPtr;
642 1.6 oster struct buf *bp = (struct buf *) bp_in;
643 1.6 oster
644 1.6 oster raidAddress += rf_raidSectorOffset;
645 1.6 oster
646 1.61 oster #if RF_ACCESS_DEBUG
647 1.6 oster if (rf_accessDebug) {
648 1.1 oster
649 1.6 oster printf("logBytes is: %d %d %d\n", raidPtr->raidid,
650 1.6 oster raidPtr->logBytesPerSector,
651 1.6 oster (int) rf_RaidAddressToByte(raidPtr, numBlocks));
652 1.22 oster printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
653 1.6 oster (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
654 1.6 oster (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
655 1.6 oster (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
656 1.6 oster (int) numBlocks,
657 1.6 oster (int) rf_RaidAddressToByte(raidPtr, numBlocks),
658 1.6 oster (long) bufPtr);
659 1.6 oster }
660 1.61 oster #endif
661 1.6 oster if (raidAddress + numBlocks > raidPtr->totalSectors) {
662 1.1 oster
663 1.6 oster printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n",
664 1.6 oster (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
665 1.1 oster
666 1.77 oster
667 1.77 oster bp->b_flags |= B_ERROR;
668 1.77 oster bp->b_resid = bp->b_bcount;
669 1.77 oster bp->b_error = ENOSPC;
670 1.77 oster biodone(bp);
671 1.16 oster return (ENOSPC);
672 1.6 oster }
673 1.6 oster desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
674 1.41 oster numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);
675 1.1 oster
676 1.6 oster if (desc == NULL) {
677 1.6 oster return (ENOMEM);
678 1.6 oster }
679 1.87 oster #if RF_ACC_TRACE > 0
680 1.6 oster RF_ETIMER_START(desc->tracerec.tot_timer);
681 1.87 oster #endif
682 1.6 oster desc->async_flag = async_flag;
683 1.3 explorer
684 1.6 oster rf_ContinueRaidAccess(desc);
685 1.1 oster
686 1.6 oster return (0);
687 1.1 oster }
688 1.46 oster #if 0
689 1.1 oster /* force the array into reconfigured mode without doing reconstruction */
690 1.6 oster int
691 1.80 oster rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
692 1.6 oster {
693 1.6 oster if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
694 1.6 oster printf("Can't set reconfigured mode in dedicated-spare array\n");
695 1.6 oster RF_PANIC();
696 1.6 oster }
697 1.6 oster RF_LOCK_MUTEX(raidPtr->mutex);
698 1.6 oster raidPtr->numFailures++;
699 1.72 oster raidPtr->Disks[col].status = rf_ds_dist_spared;
700 1.72 oster raidPtr->status = rf_rs_reconfigured;
701 1.37 oster rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
702 1.6 oster /* install spare table only if declustering + distributed sparing
703 1.6 oster * architecture. */
704 1.6 oster if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
705 1.72 oster rf_InstallSpareTable(raidPtr, col);
706 1.6 oster RF_UNLOCK_MUTEX(raidPtr->mutex);
707 1.6 oster return (0);
708 1.1 oster }
709 1.46 oster #endif
710 1.1 oster
711 1.6 oster int
712 1.80 oster rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
713 1.6 oster {
714 1.98 oster
715 1.98 oster /* need to suspend IO's here -- if there are DAGs in flight
716 1.98 oster and we pull the rug out from under ci_vp, Bad Things
717 1.98 oster can happen. */
718 1.98 oster
719 1.98 oster rf_SuspendNewRequestsAndWait(raidPtr);
720 1.98 oster
721 1.6 oster RF_LOCK_MUTEX(raidPtr->mutex);
722 1.72 oster if (raidPtr->Disks[fcol].status != rf_ds_failed) {
723 1.68 oster /* must be failing something that is valid, or else it's
724 1.68 oster already marked as failed (in which case we don't
725 1.68 oster want to mark it failed again!) */
726 1.68 oster raidPtr->numFailures++;
727 1.72 oster raidPtr->Disks[fcol].status = rf_ds_failed;
728 1.72 oster raidPtr->status = rf_rs_degraded;
729 1.68 oster }
730 1.65 oster RF_UNLOCK_MUTEX(raidPtr->mutex);
731 1.68 oster
732 1.37 oster rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
733 1.68 oster
734 1.56 oster /* Close the component, so that it's not "locked" if someone
735 1.56 oster else want's to use it! */
736 1.56 oster
737 1.72 oster rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
738 1.72 oster raidPtr->Disks[fcol].auto_configured);
739 1.65 oster
740 1.65 oster RF_LOCK_MUTEX(raidPtr->mutex);
741 1.72 oster raidPtr->raid_cinfo[fcol].ci_vp = NULL;
742 1.56 oster
743 1.56 oster /* Need to mark the component as not being auto_configured
744 1.56 oster (in case it was previously). */
745 1.56 oster
746 1.72 oster raidPtr->Disks[fcol].auto_configured = 0;
747 1.65 oster RF_UNLOCK_MUTEX(raidPtr->mutex);
748 1.98 oster /* now we can allow IO to continue -- we'll be suspending it
749 1.98 oster again in rf_ReconstructFailedDisk() if we have to.. */
750 1.98 oster
751 1.98 oster rf_ResumeNewRequests(raidPtr);
752 1.56 oster
753 1.6 oster if (initRecon)
754 1.72 oster rf_ReconstructFailedDisk(raidPtr, fcol);
755 1.6 oster return (0);
756 1.1 oster }
757 1.1 oster /* releases a thread that is waiting for the array to become quiesced.
758 1.1 oster * access_suspend_mutex should be locked upon calling this
759 1.1 oster */
760 1.6 oster void
761 1.80 oster rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
762 1.6 oster {
763 1.61 oster #if RF_DEBUG_QUIESCE
764 1.6 oster if (rf_quiesceDebug) {
765 1.22 oster printf("raid%d: Signalling quiescence lock\n",
766 1.22 oster raidPtr->raidid);
767 1.6 oster }
768 1.61 oster #endif
769 1.6 oster raidPtr->access_suspend_release = 1;
770 1.6 oster
771 1.6 oster if (raidPtr->waiting_for_quiescence) {
772 1.6 oster SIGNAL_QUIESCENT_COND(raidPtr);
773 1.6 oster }
774 1.1 oster }
775 1.1 oster /* suspends all new requests to the array. No effect on accesses that are in flight. */
776 1.6 oster int
777 1.80 oster rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
778 1.6 oster {
779 1.61 oster #if RF_DEBUG_QUIESCE
780 1.6 oster if (rf_quiesceDebug)
781 1.53 oster printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
782 1.61 oster #endif
783 1.6 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
784 1.6 oster raidPtr->accesses_suspended++;
785 1.6 oster raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
786 1.6 oster
787 1.6 oster if (raidPtr->waiting_for_quiescence) {
788 1.6 oster raidPtr->access_suspend_release = 0;
789 1.6 oster while (!raidPtr->access_suspend_release) {
790 1.93 oster #if RF_DEBUG_QUIESCE
791 1.53 oster printf("raid%d: Suspending: Waiting for Quiescence\n",
792 1.53 oster raidPtr->raidid);
793 1.93 oster #endif
794 1.6 oster WAIT_FOR_QUIESCENCE(raidPtr);
795 1.6 oster raidPtr->waiting_for_quiescence = 0;
796 1.6 oster }
797 1.6 oster }
798 1.93 oster #if RF_DEBUG_QUIESCE
799 1.53 oster printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
800 1.93 oster #endif
801 1.1 oster
802 1.6 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
803 1.6 oster return (raidPtr->waiting_for_quiescence);
804 1.1 oster }
805 1.1 oster /* wake up everyone waiting for quiescence to be released */
806 1.6 oster void
807 1.80 oster rf_ResumeNewRequests(RF_Raid_t *raidPtr)
808 1.6 oster {
809 1.6 oster RF_CallbackDesc_t *t, *cb;
810 1.6 oster
811 1.61 oster #if RF_DEBUG_QUIESCE
812 1.6 oster if (rf_quiesceDebug)
813 1.6 oster printf("Resuming new reqs\n");
814 1.61 oster #endif
815 1.6 oster
816 1.6 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
817 1.6 oster raidPtr->accesses_suspended--;
818 1.6 oster if (raidPtr->accesses_suspended == 0)
819 1.6 oster cb = raidPtr->quiesce_wait_list;
820 1.6 oster else
821 1.6 oster cb = NULL;
822 1.6 oster raidPtr->quiesce_wait_list = NULL;
823 1.6 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
824 1.6 oster
825 1.6 oster while (cb) {
826 1.6 oster t = cb;
827 1.6 oster cb = cb->next;
828 1.6 oster (t->callbackFunc) (t->callbackArg);
829 1.6 oster rf_FreeCallbackDesc(t);
830 1.6 oster }
831 1.1 oster }
832 1.1 oster /*****************************************************************************************
833 1.1 oster *
834 1.1 oster * debug routines
835 1.1 oster *
836 1.1 oster ****************************************************************************************/
837 1.1 oster
838 1.6 oster static void
839 1.80 oster set_debug_option(char *name, long val)
840 1.6 oster {
841 1.6 oster RF_DebugName_t *p;
842 1.6 oster
843 1.6 oster for (p = rf_debugNames; p->name; p++) {
844 1.6 oster if (!strcmp(p->name, name)) {
845 1.6 oster *(p->ptr) = val;
846 1.6 oster printf("[Set debug variable %s to %ld]\n", name, val);
847 1.6 oster return;
848 1.6 oster }
849 1.6 oster }
850 1.6 oster RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
851 1.1 oster }
852 1.1 oster
853 1.1 oster
854 1.1 oster /* would like to use sscanf here, but apparently not available in kernel */
855 1.1 oster /*ARGSUSED*/
856 1.6 oster static void
857 1.80 oster rf_ConfigureDebug(RF_Config_t *cfgPtr)
858 1.6 oster {
859 1.6 oster char *val_p, *name_p, *white_p;
860 1.6 oster long val;
861 1.6 oster int i;
862 1.6 oster
863 1.6 oster rf_ResetDebugOptions();
864 1.6 oster for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
865 1.6 oster name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
866 1.6 oster white_p = rf_find_white(name_p); /* skip to start of 2nd
867 1.6 oster * word */
868 1.6 oster val_p = rf_find_non_white(white_p);
869 1.6 oster if (*val_p == '0' && *(val_p + 1) == 'x')
870 1.6 oster val = rf_htoi(val_p + 2);
871 1.6 oster else
872 1.6 oster val = rf_atoi(val_p);
873 1.6 oster *white_p = '\0';
874 1.6 oster set_debug_option(name_p, val);
875 1.6 oster }
876 1.1 oster }
877 1.39 oster
878 1.39 oster void
879 1.80 oster rf_print_panic_message(int line, char *file)
880 1.39 oster {
881 1.101 itojun snprintf(rf_panicbuf, sizeof(rf_panicbuf),
882 1.101 itojun "raidframe error at line %d file %s", line, file);
883 1.39 oster }
884 1.39 oster
885 1.62 oster #ifdef RAID_DIAGNOSTIC
886 1.39 oster void
887 1.80 oster rf_print_assert_panic_message(int line, char *file, char *condition)
888 1.39 oster {
889 1.101 itojun snprintf(rf_panicbuf, sizeof(rf_panicbuf),
890 1.39 oster "raidframe error at line %d file %s (failed asserting %s)\n",
891 1.39 oster line, file, condition);
892 1.58 oster }
893 1.62 oster #endif
894 1.58 oster
895 1.58 oster void
896 1.80 oster rf_print_unable_to_init_mutex(char *file, int line, int rc)
897 1.58 oster {
898 1.58 oster RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
899 1.58 oster file, line, rc);
900 1.58 oster }
901 1.58 oster
902 1.58 oster void
903 1.80 oster rf_print_unable_to_add_shutdown(char *file, int line, int rc)
904 1.58 oster {
905 1.58 oster RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
906 1.58 oster file, line, rc);
907 1.1 oster }
908