rf_netbsdkintf.c revision 1.353 1 /* $NetBSD: rf_netbsdkintf.c,v 1.353 2018/01/18 00:32:49 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.353 2018/01/18 00:32:49 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #ifdef COMPAT_50
153 #include "rf_compat50.h"
154 #endif
155
156 #ifdef COMPAT_80
157 #include "rf_compat80.h"
158 #endif
159
160 #ifdef COMPAT_NETBSD32
161 #include "rf_compat32.h"
162 #endif
163
164 #include "ioconf.h"
165
166 #ifdef DEBUG
167 int rf_kdebug_level = 0;
168 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
169 #else /* DEBUG */
170 #define db1_printf(a) { }
171 #endif /* DEBUG */
172
173 #ifdef DEBUG_ROOT
174 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
175 #else
176 #define DPRINTF(a, ...)
177 #endif
178
179 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
180 static rf_declare_mutex2(rf_sparet_wait_mutex);
181 static rf_declare_cond2(rf_sparet_wait_cv);
182 static rf_declare_cond2(rf_sparet_resp_cv);
183
184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
185 * spare table */
186 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
187 * installation process */
188 #endif
189
190 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
191
192 /* prototypes */
193 static void KernelWakeupFunc(struct buf *);
194 static void InitBP(struct buf *, struct vnode *, unsigned,
195 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
196 void *, int, struct proc *);
197 struct raid_softc;
198 static void raidinit(struct raid_softc *);
199 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
200 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
201
202 static int raid_match(device_t, cfdata_t, void *);
203 static void raid_attach(device_t, device_t, void *);
204 static int raid_detach(device_t, int);
205
206 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
207 daddr_t, daddr_t);
208 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
209 daddr_t, daddr_t, int);
210
211 static int raidwrite_component_label(unsigned,
212 dev_t, struct vnode *, RF_ComponentLabel_t *);
213 static int raidread_component_label(unsigned,
214 dev_t, struct vnode *, RF_ComponentLabel_t *);
215
216 static int raid_diskstart(device_t, struct buf *bp);
217 static int raid_dumpblocks(device_t, void *, daddr_t, int);
218 static int raid_lastclose(device_t);
219
220 static dev_type_open(raidopen);
221 static dev_type_close(raidclose);
222 static dev_type_read(raidread);
223 static dev_type_write(raidwrite);
224 static dev_type_ioctl(raidioctl);
225 static dev_type_strategy(raidstrategy);
226 static dev_type_dump(raiddump);
227 static dev_type_size(raidsize);
228
229 const struct bdevsw raid_bdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_strategy = raidstrategy,
233 .d_ioctl = raidioctl,
234 .d_dump = raiddump,
235 .d_psize = raidsize,
236 .d_discard = nodiscard,
237 .d_flag = D_DISK
238 };
239
240 const struct cdevsw raid_cdevsw = {
241 .d_open = raidopen,
242 .d_close = raidclose,
243 .d_read = raidread,
244 .d_write = raidwrite,
245 .d_ioctl = raidioctl,
246 .d_stop = nostop,
247 .d_tty = notty,
248 .d_poll = nopoll,
249 .d_mmap = nommap,
250 .d_kqfilter = nokqfilter,
251 .d_discard = nodiscard,
252 .d_flag = D_DISK
253 };
254
255 static struct dkdriver rf_dkdriver = {
256 .d_open = raidopen,
257 .d_close = raidclose,
258 .d_strategy = raidstrategy,
259 .d_diskstart = raid_diskstart,
260 .d_dumpblocks = raid_dumpblocks,
261 .d_lastclose = raid_lastclose,
262 .d_minphys = minphys
263 };
264
265 struct raid_softc {
266 struct dk_softc sc_dksc;
267 int sc_unit;
268 int sc_flags; /* flags */
269 int sc_cflags; /* configuration flags */
270 kmutex_t sc_mutex; /* interlock mutex */
271 kcondvar_t sc_cv; /* and the condvar */
272 uint64_t sc_size; /* size of the raid device */
273 char sc_xname[20]; /* XXX external name */
274 RF_Raid_t sc_r;
275 LIST_ENTRY(raid_softc) sc_link;
276 };
277 /* sc_flags */
278 #define RAIDF_INITED 0x01 /* unit has been initialized */
279 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
280 #define RAIDF_DETACH 0x04 /* detach after final close */
281 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
282 #define RAIDF_LOCKED 0x10 /* unit is locked */
283 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
284
285 #define raidunit(x) DISKUNIT(x)
286 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
287
288 extern struct cfdriver raid_cd;
289 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
290 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
291 DVF_DETACH_SHUTDOWN);
292
293 /* Internal representation of a rf_recon_req */
294 struct rf_recon_req_internal {
295 RF_RowCol_t col;
296 RF_ReconReqFlags_t flags;
297 void *raidPtr;
298 };
299
300 /*
301 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
302 * Be aware that large numbers can allow the driver to consume a lot of
303 * kernel memory, especially on writes, and in degraded mode reads.
304 *
305 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
306 * a single 64K write will typically require 64K for the old data,
307 * 64K for the old parity, and 64K for the new parity, for a total
308 * of 192K (if the parity buffer is not re-used immediately).
309 * Even it if is used immediately, that's still 128K, which when multiplied
310 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
311 *
312 * Now in degraded mode, for example, a 64K read on the above setup may
313 * require data reconstruction, which will require *all* of the 4 remaining
314 * disks to participate -- 4 * 32K/disk == 128K again.
315 */
316
317 #ifndef RAIDOUTSTANDING
318 #define RAIDOUTSTANDING 6
319 #endif
320
321 #define RAIDLABELDEV(dev) \
322 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
323
324 /* declared here, and made public, for the benefit of KVM stuff.. */
325
326 static int raidlock(struct raid_softc *);
327 static void raidunlock(struct raid_softc *);
328
329 static int raid_detach_unlocked(struct raid_softc *);
330
331 static void rf_markalldirty(RF_Raid_t *);
332 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
333
334 void rf_ReconThread(struct rf_recon_req_internal *);
335 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
336 void rf_CopybackThread(RF_Raid_t *raidPtr);
337 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
338 int rf_autoconfig(device_t);
339 void rf_buildroothack(RF_ConfigSet_t *);
340
341 RF_AutoConfig_t *rf_find_raid_components(void);
342 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
343 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
344 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
345 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
346 int rf_set_autoconfig(RF_Raid_t *, int);
347 int rf_set_rootpartition(RF_Raid_t *, int);
348 void rf_release_all_vps(RF_ConfigSet_t *);
349 void rf_cleanup_config_set(RF_ConfigSet_t *);
350 int rf_have_enough_components(RF_ConfigSet_t *);
351 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
352 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
353
354 /*
355 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
356 * Note that this is overridden by having RAID_AUTOCONFIG as an option
357 * in the kernel config file.
358 */
359 #ifdef RAID_AUTOCONFIG
360 int raidautoconfig = 1;
361 #else
362 int raidautoconfig = 0;
363 #endif
364 static bool raidautoconfigdone = false;
365
366 struct RF_Pools_s rf_pools;
367
368 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
369 static kmutex_t raid_lock;
370
371 static struct raid_softc *
372 raidcreate(int unit) {
373 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
374 sc->sc_unit = unit;
375 cv_init(&sc->sc_cv, "raidunit");
376 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
377 return sc;
378 }
379
380 static void
381 raiddestroy(struct raid_softc *sc) {
382 cv_destroy(&sc->sc_cv);
383 mutex_destroy(&sc->sc_mutex);
384 kmem_free(sc, sizeof(*sc));
385 }
386
387 static struct raid_softc *
388 raidget(int unit, bool create) {
389 struct raid_softc *sc;
390 if (unit < 0) {
391 #ifdef DIAGNOSTIC
392 panic("%s: unit %d!", __func__, unit);
393 #endif
394 return NULL;
395 }
396 mutex_enter(&raid_lock);
397 LIST_FOREACH(sc, &raids, sc_link) {
398 if (sc->sc_unit == unit) {
399 mutex_exit(&raid_lock);
400 return sc;
401 }
402 }
403 mutex_exit(&raid_lock);
404 if (!create)
405 return NULL;
406 if ((sc = raidcreate(unit)) == NULL)
407 return NULL;
408 mutex_enter(&raid_lock);
409 LIST_INSERT_HEAD(&raids, sc, sc_link);
410 mutex_exit(&raid_lock);
411 return sc;
412 }
413
414 static void
415 raidput(struct raid_softc *sc) {
416 mutex_enter(&raid_lock);
417 LIST_REMOVE(sc, sc_link);
418 mutex_exit(&raid_lock);
419 raiddestroy(sc);
420 }
421
422 void
423 raidattach(int num)
424 {
425
426 /*
427 * Device attachment and associated initialization now occurs
428 * as part of the module initialization.
429 */
430 }
431
432 int
433 rf_autoconfig(device_t self)
434 {
435 RF_AutoConfig_t *ac_list;
436 RF_ConfigSet_t *config_sets;
437
438 if (!raidautoconfig || raidautoconfigdone == true)
439 return (0);
440
441 /* XXX This code can only be run once. */
442 raidautoconfigdone = true;
443
444 #ifdef __HAVE_CPU_BOOTCONF
445 /*
446 * 0. find the boot device if needed first so we can use it later
447 * this needs to be done before we autoconfigure any raid sets,
448 * because if we use wedges we are not going to be able to open
449 * the boot device later
450 */
451 if (booted_device == NULL)
452 cpu_bootconf();
453 #endif
454 /* 1. locate all RAID components on the system */
455 aprint_debug("Searching for RAID components...\n");
456 ac_list = rf_find_raid_components();
457
458 /* 2. Sort them into their respective sets. */
459 config_sets = rf_create_auto_sets(ac_list);
460
461 /*
462 * 3. Evaluate each set and configure the valid ones.
463 * This gets done in rf_buildroothack().
464 */
465 rf_buildroothack(config_sets);
466
467 return 1;
468 }
469
470 static int
471 rf_containsboot(RF_Raid_t *r, device_t bdv) {
472 const char *bootname = device_xname(bdv);
473 size_t len = strlen(bootname);
474
475 for (int col = 0; col < r->numCol; col++) {
476 const char *devname = r->Disks[col].devname;
477 devname += sizeof("/dev/") - 1;
478 if (strncmp(devname, "dk", 2) == 0) {
479 const char *parent =
480 dkwedge_get_parent_name(r->Disks[col].dev);
481 if (parent != NULL)
482 devname = parent;
483 }
484 if (strncmp(devname, bootname, len) == 0) {
485 struct raid_softc *sc = r->softc;
486 aprint_debug("raid%d includes boot device %s\n",
487 sc->sc_unit, devname);
488 return 1;
489 }
490 }
491 return 0;
492 }
493
494 void
495 rf_buildroothack(RF_ConfigSet_t *config_sets)
496 {
497 RF_ConfigSet_t *cset;
498 RF_ConfigSet_t *next_cset;
499 int num_root;
500 struct raid_softc *sc, *rsc;
501 struct dk_softc *dksc;
502
503 sc = rsc = NULL;
504 num_root = 0;
505 cset = config_sets;
506 while (cset != NULL) {
507 next_cset = cset->next;
508 if (rf_have_enough_components(cset) &&
509 cset->ac->clabel->autoconfigure == 1) {
510 sc = rf_auto_config_set(cset);
511 if (sc != NULL) {
512 aprint_debug("raid%d: configured ok\n",
513 sc->sc_unit);
514 if (cset->rootable) {
515 rsc = sc;
516 num_root++;
517 }
518 } else {
519 /* The autoconfig didn't work :( */
520 aprint_debug("Autoconfig failed\n");
521 rf_release_all_vps(cset);
522 }
523 } else {
524 /* we're not autoconfiguring this set...
525 release the associated resources */
526 rf_release_all_vps(cset);
527 }
528 /* cleanup */
529 rf_cleanup_config_set(cset);
530 cset = next_cset;
531 }
532 dksc = &rsc->sc_dksc;
533
534 /* if the user has specified what the root device should be
535 then we don't touch booted_device or boothowto... */
536
537 if (rootspec != NULL)
538 return;
539
540 /* we found something bootable... */
541
542 /*
543 * XXX: The following code assumes that the root raid
544 * is the first ('a') partition. This is about the best
545 * we can do with a BSD disklabel, but we might be able
546 * to do better with a GPT label, by setting a specified
547 * attribute to indicate the root partition. We can then
548 * stash the partition number in the r->root_partition
549 * high bits (the bottom 2 bits are already used). For
550 * now we just set booted_partition to 0 when we override
551 * root.
552 */
553 if (num_root == 1) {
554 device_t candidate_root;
555 if (dksc->sc_dkdev.dk_nwedges != 0) {
556 char cname[sizeof(cset->ac->devname)];
557 /* XXX: assume partition 'a' first */
558 snprintf(cname, sizeof(cname), "%s%c",
559 device_xname(dksc->sc_dev), 'a');
560 candidate_root = dkwedge_find_by_wname(cname);
561 DPRINTF("%s: candidate wedge root=%s\n", __func__,
562 cname);
563 if (candidate_root == NULL) {
564 /*
565 * If that is not found, because we don't use
566 * disklabel, return the first dk child
567 * XXX: we can skip the 'a' check above
568 * and always do this...
569 */
570 size_t i = 0;
571 candidate_root = dkwedge_find_by_parent(
572 device_xname(dksc->sc_dev), &i);
573 }
574 DPRINTF("%s: candidate wedge root=%p\n", __func__,
575 candidate_root);
576 } else
577 candidate_root = dksc->sc_dev;
578 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
579 DPRINTF("%s: booted_device=%p root_partition=%d "
580 "contains_boot=%d\n", __func__, booted_device,
581 rsc->sc_r.root_partition,
582 rf_containsboot(&rsc->sc_r, booted_device));
583 if (booted_device == NULL ||
584 rsc->sc_r.root_partition == 1 ||
585 rf_containsboot(&rsc->sc_r, booted_device)) {
586 booted_device = candidate_root;
587 booted_method = "raidframe/single";
588 booted_partition = 0; /* XXX assume 'a' */
589 }
590 } else if (num_root > 1) {
591 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
592 booted_device);
593
594 /*
595 * Maybe the MD code can help. If it cannot, then
596 * setroot() will discover that we have no
597 * booted_device and will ask the user if nothing was
598 * hardwired in the kernel config file
599 */
600 if (booted_device == NULL)
601 return;
602
603 num_root = 0;
604 mutex_enter(&raid_lock);
605 LIST_FOREACH(sc, &raids, sc_link) {
606 RF_Raid_t *r = &sc->sc_r;
607 if (r->valid == 0)
608 continue;
609
610 if (r->root_partition == 0)
611 continue;
612
613 if (rf_containsboot(r, booted_device)) {
614 num_root++;
615 rsc = sc;
616 dksc = &rsc->sc_dksc;
617 }
618 }
619 mutex_exit(&raid_lock);
620
621 if (num_root == 1) {
622 booted_device = dksc->sc_dev;
623 booted_method = "raidframe/multi";
624 booted_partition = 0; /* XXX assume 'a' */
625 } else {
626 /* we can't guess.. require the user to answer... */
627 boothowto |= RB_ASKNAME;
628 }
629 }
630 }
631
632 static int
633 raidsize(dev_t dev)
634 {
635 struct raid_softc *rs;
636 struct dk_softc *dksc;
637 unsigned int unit;
638
639 unit = raidunit(dev);
640 if ((rs = raidget(unit, false)) == NULL)
641 return -1;
642 dksc = &rs->sc_dksc;
643
644 if ((rs->sc_flags & RAIDF_INITED) == 0)
645 return -1;
646
647 return dk_size(dksc, dev);
648 }
649
650 static int
651 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
652 {
653 unsigned int unit;
654 struct raid_softc *rs;
655 struct dk_softc *dksc;
656
657 unit = raidunit(dev);
658 if ((rs = raidget(unit, false)) == NULL)
659 return ENXIO;
660 dksc = &rs->sc_dksc;
661
662 if ((rs->sc_flags & RAIDF_INITED) == 0)
663 return ENODEV;
664
665 /*
666 Note that blkno is relative to this particular partition.
667 By adding adding RF_PROTECTED_SECTORS, we get a value that
668 is relative to the partition used for the underlying component.
669 */
670 blkno += RF_PROTECTED_SECTORS;
671
672 return dk_dump(dksc, dev, blkno, va, size);
673 }
674
675 static int
676 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
677 {
678 struct raid_softc *rs = raidsoftc(dev);
679 const struct bdevsw *bdev;
680 RF_Raid_t *raidPtr;
681 int c, sparecol, j, scol, dumpto;
682 int error = 0;
683
684 raidPtr = &rs->sc_r;
685
686 /* we only support dumping to RAID 1 sets */
687 if (raidPtr->Layout.numDataCol != 1 ||
688 raidPtr->Layout.numParityCol != 1)
689 return EINVAL;
690
691 if ((error = raidlock(rs)) != 0)
692 return error;
693
694 /* figure out what device is alive.. */
695
696 /*
697 Look for a component to dump to. The preference for the
698 component to dump to is as follows:
699 1) the master
700 2) a used_spare of the master
701 3) the slave
702 4) a used_spare of the slave
703 */
704
705 dumpto = -1;
706 for (c = 0; c < raidPtr->numCol; c++) {
707 if (raidPtr->Disks[c].status == rf_ds_optimal) {
708 /* this might be the one */
709 dumpto = c;
710 break;
711 }
712 }
713
714 /*
715 At this point we have possibly selected a live master or a
716 live slave. We now check to see if there is a spared
717 master (or a spared slave), if we didn't find a live master
718 or a live slave.
719 */
720
721 for (c = 0; c < raidPtr->numSpare; c++) {
722 sparecol = raidPtr->numCol + c;
723 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
724 /* How about this one? */
725 scol = -1;
726 for(j=0;j<raidPtr->numCol;j++) {
727 if (raidPtr->Disks[j].spareCol == sparecol) {
728 scol = j;
729 break;
730 }
731 }
732 if (scol == 0) {
733 /*
734 We must have found a spared master!
735 We'll take that over anything else
736 found so far. (We couldn't have
737 found a real master before, since
738 this is a used spare, and it's
739 saying that it's replacing the
740 master.) On reboot (with
741 autoconfiguration turned on)
742 sparecol will become the 1st
743 component (component0) of this set.
744 */
745 dumpto = sparecol;
746 break;
747 } else if (scol != -1) {
748 /*
749 Must be a spared slave. We'll dump
750 to that if we havn't found anything
751 else so far.
752 */
753 if (dumpto == -1)
754 dumpto = sparecol;
755 }
756 }
757 }
758
759 if (dumpto == -1) {
760 /* we couldn't find any live components to dump to!?!?
761 */
762 error = EINVAL;
763 goto out;
764 }
765
766 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
767 if (bdev == NULL) {
768 error = ENXIO;
769 goto out;
770 }
771
772 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
773 blkno, va, nblk * raidPtr->bytesPerSector);
774
775 out:
776 raidunlock(rs);
777
778 return error;
779 }
780
781 /* ARGSUSED */
782 static int
783 raidopen(dev_t dev, int flags, int fmt,
784 struct lwp *l)
785 {
786 int unit = raidunit(dev);
787 struct raid_softc *rs;
788 struct dk_softc *dksc;
789 int error = 0;
790 int part, pmask;
791
792 if ((rs = raidget(unit, true)) == NULL)
793 return ENXIO;
794 if ((error = raidlock(rs)) != 0)
795 return (error);
796
797 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
798 error = EBUSY;
799 goto bad;
800 }
801
802 dksc = &rs->sc_dksc;
803
804 part = DISKPART(dev);
805 pmask = (1 << part);
806
807 if (!DK_BUSY(dksc, pmask) &&
808 ((rs->sc_flags & RAIDF_INITED) != 0)) {
809 /* First one... mark things as dirty... Note that we *MUST*
810 have done a configure before this. I DO NOT WANT TO BE
811 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
812 THAT THEY BELONG TOGETHER!!!!! */
813 /* XXX should check to see if we're only open for reading
814 here... If so, we needn't do this, but then need some
815 other way of keeping track of what's happened.. */
816
817 rf_markalldirty(&rs->sc_r);
818 }
819
820 if ((rs->sc_flags & RAIDF_INITED) != 0)
821 error = dk_open(dksc, dev, flags, fmt, l);
822
823 bad:
824 raidunlock(rs);
825
826 return (error);
827
828
829 }
830
831 static int
832 raid_lastclose(device_t self)
833 {
834 struct raid_softc *rs = raidsoftc(self);
835
836 /* Last one... device is not unconfigured yet.
837 Device shutdown has taken care of setting the
838 clean bits if RAIDF_INITED is not set
839 mark things as clean... */
840
841 rf_update_component_labels(&rs->sc_r,
842 RF_FINAL_COMPONENT_UPDATE);
843
844 /* pass to unlocked code */
845 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
846 rs->sc_flags |= RAIDF_DETACH;
847
848 return 0;
849 }
850
851 /* ARGSUSED */
852 static int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 struct dk_softc *dksc;
858 cfdata_t cf;
859 int error = 0, do_detach = 0, do_put = 0;
860
861 if ((rs = raidget(unit, false)) == NULL)
862 return ENXIO;
863 dksc = &rs->sc_dksc;
864
865 if ((error = raidlock(rs)) != 0)
866 return (error);
867
868 if ((rs->sc_flags & RAIDF_INITED) != 0) {
869 error = dk_close(dksc, dev, flags, fmt, l);
870 if ((rs->sc_flags & RAIDF_DETACH) != 0)
871 do_detach = 1;
872 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
873 do_put = 1;
874
875 raidunlock(rs);
876
877 if (do_detach) {
878 /* free the pseudo device attach bits */
879 cf = device_cfdata(dksc->sc_dev);
880 error = config_detach(dksc->sc_dev, 0);
881 if (error == 0)
882 free(cf, M_RAIDFRAME);
883 } else if (do_put) {
884 raidput(rs);
885 }
886
887 return (error);
888
889 }
890
891 static void
892 raid_wakeup(RF_Raid_t *raidPtr)
893 {
894 rf_lock_mutex2(raidPtr->iodone_lock);
895 rf_signal_cond2(raidPtr->iodone_cv);
896 rf_unlock_mutex2(raidPtr->iodone_lock);
897 }
898
899 static void
900 raidstrategy(struct buf *bp)
901 {
902 unsigned int unit;
903 struct raid_softc *rs;
904 struct dk_softc *dksc;
905 RF_Raid_t *raidPtr;
906
907 unit = raidunit(bp->b_dev);
908 if ((rs = raidget(unit, false)) == NULL) {
909 bp->b_error = ENXIO;
910 goto fail;
911 }
912 if ((rs->sc_flags & RAIDF_INITED) == 0) {
913 bp->b_error = ENXIO;
914 goto fail;
915 }
916 dksc = &rs->sc_dksc;
917 raidPtr = &rs->sc_r;
918
919 /* Queue IO only */
920 if (dk_strategy_defer(dksc, bp))
921 goto done;
922
923 /* schedule the IO to happen at the next convenient time */
924 raid_wakeup(raidPtr);
925
926 done:
927 return;
928
929 fail:
930 bp->b_resid = bp->b_bcount;
931 biodone(bp);
932 }
933
934 static int
935 raid_diskstart(device_t dev, struct buf *bp)
936 {
937 struct raid_softc *rs = raidsoftc(dev);
938 RF_Raid_t *raidPtr;
939
940 raidPtr = &rs->sc_r;
941 if (!raidPtr->valid) {
942 db1_printf(("raid is not valid..\n"));
943 return ENODEV;
944 }
945
946 /* XXX */
947 bp->b_resid = 0;
948
949 return raiddoaccess(raidPtr, bp);
950 }
951
952 void
953 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
954 {
955 struct raid_softc *rs;
956 struct dk_softc *dksc;
957
958 rs = raidPtr->softc;
959 dksc = &rs->sc_dksc;
960
961 dk_done(dksc, bp);
962
963 rf_lock_mutex2(raidPtr->mutex);
964 raidPtr->openings++;
965 rf_unlock_mutex2(raidPtr->mutex);
966
967 /* schedule more IO */
968 raid_wakeup(raidPtr);
969 }
970
971 /* ARGSUSED */
972 static int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit, false)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987
988 /* ARGSUSED */
989 static int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit, false)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 struct dk_softc *dksc = &rs->sc_dksc;
1009 RF_Raid_t *raidPtr;
1010 int error;
1011
1012 raidPtr = &rs->sc_r;
1013
1014 if (DK_BUSY(dksc, 0) ||
1015 raidPtr->recon_in_progress != 0 ||
1016 raidPtr->parity_rewrite_in_progress != 0 ||
1017 raidPtr->copyback_in_progress != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 return 0;
1022
1023 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1024
1025 if ((error = rf_Shutdown(raidPtr)) != 0)
1026 return error;
1027
1028 rs->sc_flags &= ~RAIDF_INITED;
1029
1030 /* Kill off any queued buffers */
1031 dk_drain(dksc);
1032 bufq_free(dksc->sc_bufq);
1033
1034 /* Detach the disk. */
1035 dkwedge_delall(&dksc->sc_dkdev);
1036 disk_detach(&dksc->sc_dkdev);
1037 disk_destroy(&dksc->sc_dkdev);
1038 dk_detach(dksc);
1039
1040 return 0;
1041 }
1042
1043 static int
1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1045 {
1046 int unit = raidunit(dev);
1047 int error = 0;
1048 int part, pmask;
1049 struct raid_softc *rs;
1050 struct dk_softc *dksc;
1051 RF_Config_t *k_cfg, *u_cfg;
1052 RF_Raid_t *raidPtr;
1053 RF_RaidDisk_t *diskPtr;
1054 RF_AccTotals_t *totals;
1055 RF_DeviceConfig_t *d_cfg, *ucfgp;
1056 u_char *specific_buf;
1057 int retcode = 0;
1058 int column;
1059 /* int raidid; */
1060 struct rf_recon_req *rr;
1061 struct rf_recon_req_internal *rrint;
1062 RF_ComponentLabel_t *clabel;
1063 RF_ComponentLabel_t *ci_label;
1064 RF_SingleComponent_t *sparePtr,*componentPtr;
1065 RF_SingleComponent_t component;
1066 int d;
1067
1068 if ((rs = raidget(unit, false)) == NULL)
1069 return ENXIO;
1070 dksc = &rs->sc_dksc;
1071 raidPtr = &rs->sc_r;
1072
1073 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1074 (int) DISKPART(dev), (int) unit, cmd));
1075
1076 /* Must be initialized for these... */
1077 switch (cmd) {
1078 case RAIDFRAME_REWRITEPARITY:
1079 case RAIDFRAME_GET_INFO:
1080 case RAIDFRAME_RESET_ACCTOTALS:
1081 case RAIDFRAME_GET_ACCTOTALS:
1082 case RAIDFRAME_KEEP_ACCTOTALS:
1083 case RAIDFRAME_GET_SIZE:
1084 case RAIDFRAME_FAIL_DISK:
1085 case RAIDFRAME_COPYBACK:
1086 case RAIDFRAME_CHECK_RECON_STATUS:
1087 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1088 case RAIDFRAME_GET_COMPONENT_LABEL:
1089 case RAIDFRAME_SET_COMPONENT_LABEL:
1090 case RAIDFRAME_ADD_HOT_SPARE:
1091 case RAIDFRAME_REMOVE_HOT_SPARE:
1092 case RAIDFRAME_INIT_LABELS:
1093 case RAIDFRAME_REBUILD_IN_PLACE:
1094 case RAIDFRAME_CHECK_PARITY:
1095 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1096 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1097 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1098 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1099 case RAIDFRAME_SET_AUTOCONFIG:
1100 case RAIDFRAME_SET_ROOT:
1101 case RAIDFRAME_DELETE_COMPONENT:
1102 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1103 case RAIDFRAME_PARITYMAP_STATUS:
1104 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1105 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1106 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1107 #ifdef COMPAT_50
1108 case RAIDFRAME_GET_INFO50:
1109 #endif
1110 #ifdef COMPAT_80
1111 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1113 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1114 case RAIDFRAME_GET_INFO80:
1115 case RAIDFRAME_GET_COMPONENT_LABEL80:
1116 #endif
1117 #ifdef COMPAT_NETBSD32
1118 case RAIDFRAME_GET_INFO32:
1119 #endif
1120 if ((rs->sc_flags & RAIDF_INITED) == 0)
1121 return (ENXIO);
1122 }
1123
1124 switch (cmd) {
1125 #ifdef COMPAT_50
1126 case RAIDFRAME_GET_INFO50:
1127 return rf_get_info50(raidPtr, data);
1128
1129 case RAIDFRAME_CONFIGURE50:
1130 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1131 return retcode;
1132 goto config;
1133 #endif
1134
1135 #ifdef COMPAT_80
1136 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1137 return rf_check_recon_status_ext80(raidPtr, data);
1138
1139 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1140 return rf_check_parityrewrite_status_ext80(raidPtr, data);
1141
1142 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1143 return rf_check_copyback_status_ext80(raidPtr, data);
1144
1145 case RAIDFRAME_GET_INFO80:
1146 return rf_get_info80(raidPtr, data);
1147
1148 case RAIDFRAME_GET_COMPONENT_LABEL80:
1149 return rf_get_component_label80(raidPtr, data);
1150 #endif
1151
1152 /* configure the system */
1153 case RAIDFRAME_CONFIGURE:
1154 #ifdef COMPAT_NETBSD32
1155 case RAIDFRAME_CONFIGURE32:
1156 #endif
1157
1158 if (raidPtr->valid) {
1159 /* There is a valid RAID set running on this unit! */
1160 printf("raid%d: Device already configured!\n",unit);
1161 return(EINVAL);
1162 }
1163
1164 /* copy-in the configuration information */
1165 /* data points to a pointer to the configuration structure */
1166
1167 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1168 if (k_cfg == NULL) {
1169 return (ENOMEM);
1170 }
1171 #ifdef COMPAT_NETBSD32
1172 if (cmd == RAIDFRAME_CONFIGURE32 &&
1173 (l->l_proc->p_flag & PK_32) != 0)
1174 retcode = rf_config_netbsd32(data, k_cfg);
1175 else
1176 #endif
1177 {
1178 u_cfg = *((RF_Config_t **) data);
1179 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1180 }
1181 if (retcode) {
1182 RF_Free(k_cfg, sizeof(RF_Config_t));
1183 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1184 retcode));
1185 goto no_config;
1186 }
1187 goto config;
1188 config:
1189 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1190
1191 /* allocate a buffer for the layout-specific data, and copy it
1192 * in */
1193 if (k_cfg->layoutSpecificSize) {
1194 if (k_cfg->layoutSpecificSize > 10000) {
1195 /* sanity check */
1196 RF_Free(k_cfg, sizeof(RF_Config_t));
1197 retcode = EINVAL;
1198 goto no_config;
1199 }
1200 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1201 (u_char *));
1202 if (specific_buf == NULL) {
1203 RF_Free(k_cfg, sizeof(RF_Config_t));
1204 retcode = ENOMEM;
1205 goto no_config;
1206 }
1207 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1208 k_cfg->layoutSpecificSize);
1209 if (retcode) {
1210 RF_Free(k_cfg, sizeof(RF_Config_t));
1211 RF_Free(specific_buf,
1212 k_cfg->layoutSpecificSize);
1213 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1214 retcode));
1215 goto no_config;
1216 }
1217 } else
1218 specific_buf = NULL;
1219 k_cfg->layoutSpecific = specific_buf;
1220
1221 /* should do some kind of sanity check on the configuration.
1222 * Store the sum of all the bytes in the last byte? */
1223
1224 /* configure the system */
1225
1226 /*
1227 * Clear the entire RAID descriptor, just to make sure
1228 * there is no stale data left in the case of a
1229 * reconfiguration
1230 */
1231 memset(raidPtr, 0, sizeof(*raidPtr));
1232 raidPtr->softc = rs;
1233 raidPtr->raidid = unit;
1234
1235 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1236
1237 if (retcode == 0) {
1238
1239 /* allow this many simultaneous IO's to
1240 this RAID device */
1241 raidPtr->openings = RAIDOUTSTANDING;
1242
1243 raidinit(rs);
1244 raid_wakeup(raidPtr);
1245 rf_markalldirty(raidPtr);
1246 }
1247 /* free the buffers. No return code here. */
1248 if (k_cfg->layoutSpecificSize) {
1249 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1250 }
1251 RF_Free(k_cfg, sizeof(RF_Config_t));
1252
1253 no_config:
1254 /*
1255 * If configuration failed, set sc_flags so that we
1256 * will detach the device when we close it.
1257 */
1258 if (retcode != 0)
1259 rs->sc_flags |= RAIDF_SHUTDOWN;
1260 return (retcode);
1261
1262 /* shutdown the system */
1263 case RAIDFRAME_SHUTDOWN:
1264
1265 part = DISKPART(dev);
1266 pmask = (1 << part);
1267
1268 if ((error = raidlock(rs)) != 0)
1269 return (error);
1270
1271 if (DK_BUSY(dksc, pmask) ||
1272 raidPtr->recon_in_progress != 0 ||
1273 raidPtr->parity_rewrite_in_progress != 0 ||
1274 raidPtr->copyback_in_progress != 0)
1275 retcode = EBUSY;
1276 else {
1277 /* detach and free on close */
1278 rs->sc_flags |= RAIDF_SHUTDOWN;
1279 retcode = 0;
1280 }
1281
1282 raidunlock(rs);
1283
1284 return (retcode);
1285 case RAIDFRAME_GET_COMPONENT_LABEL:
1286 return rf_get_component_label(raidPtr, data);
1287
1288 #if 0
1289 case RAIDFRAME_SET_COMPONENT_LABEL:
1290 clabel = (RF_ComponentLabel_t *) data;
1291
1292 /* XXX check the label for valid stuff... */
1293 /* Note that some things *should not* get modified --
1294 the user should be re-initing the labels instead of
1295 trying to patch things.
1296 */
1297
1298 raidid = raidPtr->raidid;
1299 #ifdef DEBUG
1300 printf("raid%d: Got component label:\n", raidid);
1301 printf("raid%d: Version: %d\n", raidid, clabel->version);
1302 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1303 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1304 printf("raid%d: Column: %d\n", raidid, clabel->column);
1305 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1306 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1307 printf("raid%d: Status: %d\n", raidid, clabel->status);
1308 #endif
1309 clabel->row = 0;
1310 column = clabel->column;
1311
1312 if ((column < 0) || (column >= raidPtr->numCol)) {
1313 return(EINVAL);
1314 }
1315
1316 /* XXX this isn't allowed to do anything for now :-) */
1317
1318 /* XXX and before it is, we need to fill in the rest
1319 of the fields!?!?!?! */
1320 memcpy(raidget_component_label(raidPtr, column),
1321 clabel, sizeof(*clabel));
1322 raidflush_component_label(raidPtr, column);
1323 return (0);
1324 #endif
1325
1326 case RAIDFRAME_INIT_LABELS:
1327 clabel = (RF_ComponentLabel_t *) data;
1328 /*
1329 we only want the serial number from
1330 the above. We get all the rest of the information
1331 from the config that was used to create this RAID
1332 set.
1333 */
1334
1335 raidPtr->serial_number = clabel->serial_number;
1336
1337 for(column=0;column<raidPtr->numCol;column++) {
1338 diskPtr = &raidPtr->Disks[column];
1339 if (!RF_DEAD_DISK(diskPtr->status)) {
1340 ci_label = raidget_component_label(raidPtr,
1341 column);
1342 /* Zeroing this is important. */
1343 memset(ci_label, 0, sizeof(*ci_label));
1344 raid_init_component_label(raidPtr, ci_label);
1345 ci_label->serial_number =
1346 raidPtr->serial_number;
1347 ci_label->row = 0; /* we dont' pretend to support more */
1348 rf_component_label_set_partitionsize(ci_label,
1349 diskPtr->partitionSize);
1350 ci_label->column = column;
1351 raidflush_component_label(raidPtr, column);
1352 }
1353 /* XXXjld what about the spares? */
1354 }
1355
1356 return (retcode);
1357 case RAIDFRAME_SET_AUTOCONFIG:
1358 d = rf_set_autoconfig(raidPtr, *(int *) data);
1359 printf("raid%d: New autoconfig value is: %d\n",
1360 raidPtr->raidid, d);
1361 *(int *) data = d;
1362 return (retcode);
1363
1364 case RAIDFRAME_SET_ROOT:
1365 d = rf_set_rootpartition(raidPtr, *(int *) data);
1366 printf("raid%d: New rootpartition value is: %d\n",
1367 raidPtr->raidid, d);
1368 *(int *) data = d;
1369 return (retcode);
1370
1371 /* initialize all parity */
1372 case RAIDFRAME_REWRITEPARITY:
1373
1374 if (raidPtr->Layout.map->faultsTolerated == 0) {
1375 /* Parity for RAID 0 is trivially correct */
1376 raidPtr->parity_good = RF_RAID_CLEAN;
1377 return(0);
1378 }
1379
1380 if (raidPtr->parity_rewrite_in_progress == 1) {
1381 /* Re-write is already in progress! */
1382 return(EINVAL);
1383 }
1384
1385 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1386 rf_RewriteParityThread,
1387 raidPtr,"raid_parity");
1388 return (retcode);
1389
1390
1391 case RAIDFRAME_ADD_HOT_SPARE:
1392 sparePtr = (RF_SingleComponent_t *) data;
1393 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1394 retcode = rf_add_hot_spare(raidPtr, &component);
1395 return(retcode);
1396
1397 case RAIDFRAME_REMOVE_HOT_SPARE:
1398 return(retcode);
1399
1400 case RAIDFRAME_DELETE_COMPONENT:
1401 componentPtr = (RF_SingleComponent_t *)data;
1402 memcpy( &component, componentPtr,
1403 sizeof(RF_SingleComponent_t));
1404 retcode = rf_delete_component(raidPtr, &component);
1405 return(retcode);
1406
1407 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1408 componentPtr = (RF_SingleComponent_t *)data;
1409 memcpy( &component, componentPtr,
1410 sizeof(RF_SingleComponent_t));
1411 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1412 return(retcode);
1413
1414 case RAIDFRAME_REBUILD_IN_PLACE:
1415
1416 if (raidPtr->Layout.map->faultsTolerated == 0) {
1417 /* Can't do this on a RAID 0!! */
1418 return(EINVAL);
1419 }
1420
1421 if (raidPtr->recon_in_progress == 1) {
1422 /* a reconstruct is already in progress! */
1423 return(EINVAL);
1424 }
1425
1426 componentPtr = (RF_SingleComponent_t *) data;
1427 memcpy( &component, componentPtr,
1428 sizeof(RF_SingleComponent_t));
1429 component.row = 0; /* we don't support any more */
1430 column = component.column;
1431
1432 if ((column < 0) || (column >= raidPtr->numCol)) {
1433 return(EINVAL);
1434 }
1435
1436 rf_lock_mutex2(raidPtr->mutex);
1437 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1438 (raidPtr->numFailures > 0)) {
1439 /* XXX 0 above shouldn't be constant!!! */
1440 /* some component other than this has failed.
1441 Let's not make things worse than they already
1442 are... */
1443 printf("raid%d: Unable to reconstruct to disk at:\n",
1444 raidPtr->raidid);
1445 printf("raid%d: Col: %d Too many failures.\n",
1446 raidPtr->raidid, column);
1447 rf_unlock_mutex2(raidPtr->mutex);
1448 return (EINVAL);
1449 }
1450 if (raidPtr->Disks[column].status ==
1451 rf_ds_reconstructing) {
1452 printf("raid%d: Unable to reconstruct to disk at:\n",
1453 raidPtr->raidid);
1454 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1455
1456 rf_unlock_mutex2(raidPtr->mutex);
1457 return (EINVAL);
1458 }
1459 if (raidPtr->Disks[column].status == rf_ds_spared) {
1460 rf_unlock_mutex2(raidPtr->mutex);
1461 return (EINVAL);
1462 }
1463 rf_unlock_mutex2(raidPtr->mutex);
1464
1465 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1466 if (rrint == NULL)
1467 return(ENOMEM);
1468
1469 rrint->col = column;
1470 rrint->raidPtr = raidPtr;
1471
1472 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1473 rf_ReconstructInPlaceThread,
1474 rrint, "raid_reconip");
1475 return(retcode);
1476
1477 case RAIDFRAME_GET_INFO:
1478 #ifdef COMPAT_NETBSD32
1479 case RAIDFRAME_GET_INFO32:
1480 #endif
1481 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1482 (RF_DeviceConfig_t *));
1483 if (d_cfg == NULL)
1484 return (ENOMEM);
1485 retcode = rf_get_info(raidPtr, d_cfg);
1486 if (retcode == 0) {
1487 #ifdef COMPAT_NETBSD32
1488 if (cmd == RAIDFRAME_GET_INFO32)
1489 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1490 else
1491 #endif
1492 ucfgp = *(RF_DeviceConfig_t **)data;
1493 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1494 }
1495 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1496
1497 return (retcode);
1498
1499 case RAIDFRAME_CHECK_PARITY:
1500 *(int *) data = raidPtr->parity_good;
1501 return (0);
1502
1503 case RAIDFRAME_PARITYMAP_STATUS:
1504 if (rf_paritymap_ineligible(raidPtr))
1505 return EINVAL;
1506 rf_paritymap_status(raidPtr->parity_map,
1507 (struct rf_pmstat *)data);
1508 return 0;
1509
1510 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1511 if (rf_paritymap_ineligible(raidPtr))
1512 return EINVAL;
1513 if (raidPtr->parity_map == NULL)
1514 return ENOENT; /* ??? */
1515 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1516 (struct rf_pmparams *)data, 1))
1517 return EINVAL;
1518 return 0;
1519
1520 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1521 if (rf_paritymap_ineligible(raidPtr))
1522 return EINVAL;
1523 *(int *) data = rf_paritymap_get_disable(raidPtr);
1524 return 0;
1525
1526 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1527 if (rf_paritymap_ineligible(raidPtr))
1528 return EINVAL;
1529 rf_paritymap_set_disable(raidPtr, *(int *)data);
1530 /* XXX should errors be passed up? */
1531 return 0;
1532
1533 case RAIDFRAME_RESET_ACCTOTALS:
1534 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1535 return (0);
1536
1537 case RAIDFRAME_GET_ACCTOTALS:
1538 totals = (RF_AccTotals_t *) data;
1539 *totals = raidPtr->acc_totals;
1540 return (0);
1541
1542 case RAIDFRAME_KEEP_ACCTOTALS:
1543 raidPtr->keep_acc_totals = *(int *)data;
1544 return (0);
1545
1546 case RAIDFRAME_GET_SIZE:
1547 *(int *) data = raidPtr->totalSectors;
1548 return (0);
1549
1550 /* fail a disk & optionally start reconstruction */
1551 case RAIDFRAME_FAIL_DISK:
1552 #ifdef COMPAT_80
1553 case RAIDFRAME_FAIL_DISK80:
1554 #endif
1555
1556 if (raidPtr->Layout.map->faultsTolerated == 0) {
1557 /* Can't do this on a RAID 0!! */
1558 return(EINVAL);
1559 }
1560
1561 rr = (struct rf_recon_req *) data;
1562 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1563 return (EINVAL);
1564
1565 rf_lock_mutex2(raidPtr->mutex);
1566 if (raidPtr->status == rf_rs_reconstructing) {
1567 /* you can't fail a disk while we're reconstructing! */
1568 /* XXX wrong for RAID6 */
1569 rf_unlock_mutex2(raidPtr->mutex);
1570 return (EINVAL);
1571 }
1572 if ((raidPtr->Disks[rr->col].status ==
1573 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1574 /* some other component has failed. Let's not make
1575 things worse. XXX wrong for RAID6 */
1576 rf_unlock_mutex2(raidPtr->mutex);
1577 return (EINVAL);
1578 }
1579 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1580 /* Can't fail a spared disk! */
1581 rf_unlock_mutex2(raidPtr->mutex);
1582 return (EINVAL);
1583 }
1584 rf_unlock_mutex2(raidPtr->mutex);
1585
1586 /* make a copy of the recon request so that we don't rely on
1587 * the user's buffer */
1588 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1589 if (rrint == NULL)
1590 return(ENOMEM);
1591 rrint->col = rr->col;
1592 rrint->flags = rr->flags;
1593 rrint->raidPtr = raidPtr;
1594
1595 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1596 rf_ReconThread,
1597 rrint, "raid_recon");
1598 return (0);
1599
1600 /* invoke a copyback operation after recon on whatever disk
1601 * needs it, if any */
1602 case RAIDFRAME_COPYBACK:
1603
1604 if (raidPtr->Layout.map->faultsTolerated == 0) {
1605 /* This makes no sense on a RAID 0!! */
1606 return(EINVAL);
1607 }
1608
1609 if (raidPtr->copyback_in_progress == 1) {
1610 /* Copyback is already in progress! */
1611 return(EINVAL);
1612 }
1613
1614 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1615 rf_CopybackThread,
1616 raidPtr,"raid_copyback");
1617 return (retcode);
1618
1619 /* return the percentage completion of reconstruction */
1620 case RAIDFRAME_CHECK_RECON_STATUS:
1621 if (raidPtr->Layout.map->faultsTolerated == 0) {
1622 /* This makes no sense on a RAID 0, so tell the
1623 user it's done. */
1624 *(int *) data = 100;
1625 return(0);
1626 }
1627 if (raidPtr->status != rf_rs_reconstructing)
1628 *(int *) data = 100;
1629 else {
1630 if (raidPtr->reconControl->numRUsTotal > 0) {
1631 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1632 } else {
1633 *(int *) data = 0;
1634 }
1635 }
1636 return (0);
1637 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1638 rf_check_recon_status_ext(raidPtr, data);
1639 return (0);
1640
1641 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1642 if (raidPtr->Layout.map->faultsTolerated == 0) {
1643 /* This makes no sense on a RAID 0, so tell the
1644 user it's done. */
1645 *(int *) data = 100;
1646 return(0);
1647 }
1648 if (raidPtr->parity_rewrite_in_progress == 1) {
1649 *(int *) data = 100 *
1650 raidPtr->parity_rewrite_stripes_done /
1651 raidPtr->Layout.numStripe;
1652 } else {
1653 *(int *) data = 100;
1654 }
1655 return (0);
1656
1657 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1658 rf_check_parityrewrite_status_ext(raidPtr, data);
1659 return (0);
1660
1661 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1662 if (raidPtr->Layout.map->faultsTolerated == 0) {
1663 /* This makes no sense on a RAID 0 */
1664 *(int *) data = 100;
1665 return(0);
1666 }
1667 if (raidPtr->copyback_in_progress == 1) {
1668 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1669 raidPtr->Layout.numStripe;
1670 } else {
1671 *(int *) data = 100;
1672 }
1673 return (0);
1674
1675 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1676 rf_check_copyback_status_ext(raidPtr, data);
1677 return 0;
1678
1679 case RAIDFRAME_SET_LAST_UNIT:
1680 for (column = 0; column < raidPtr->numCol; column++)
1681 if (raidPtr->Disks[column].status != rf_ds_optimal)
1682 return EBUSY;
1683
1684 for (column = 0; column < raidPtr->numCol; column++) {
1685 clabel = raidget_component_label(raidPtr, column);
1686 clabel->last_unit = *(int *)data;
1687 raidflush_component_label(raidPtr, column);
1688 }
1689 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1690 return 0;
1691
1692 /* the sparetable daemon calls this to wait for the kernel to
1693 * need a spare table. this ioctl does not return until a
1694 * spare table is needed. XXX -- calling mpsleep here in the
1695 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1696 * -- I should either compute the spare table in the kernel,
1697 * or have a different -- XXX XXX -- interface (a different
1698 * character device) for delivering the table -- XXX */
1699 #if 0
1700 case RAIDFRAME_SPARET_WAIT:
1701 rf_lock_mutex2(rf_sparet_wait_mutex);
1702 while (!rf_sparet_wait_queue)
1703 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1704 waitreq = rf_sparet_wait_queue;
1705 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1706 rf_unlock_mutex2(rf_sparet_wait_mutex);
1707
1708 /* structure assignment */
1709 *((RF_SparetWait_t *) data) = *waitreq;
1710
1711 RF_Free(waitreq, sizeof(*waitreq));
1712 return (0);
1713
1714 /* wakes up a process waiting on SPARET_WAIT and puts an error
1715 * code in it that will cause the dameon to exit */
1716 case RAIDFRAME_ABORT_SPARET_WAIT:
1717 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1718 waitreq->fcol = -1;
1719 rf_lock_mutex2(rf_sparet_wait_mutex);
1720 waitreq->next = rf_sparet_wait_queue;
1721 rf_sparet_wait_queue = waitreq;
1722 rf_broadcast_conf2(rf_sparet_wait_cv);
1723 rf_unlock_mutex2(rf_sparet_wait_mutex);
1724 return (0);
1725
1726 /* used by the spare table daemon to deliver a spare table
1727 * into the kernel */
1728 case RAIDFRAME_SEND_SPARET:
1729
1730 /* install the spare table */
1731 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1732
1733 /* respond to the requestor. the return status of the spare
1734 * table installation is passed in the "fcol" field */
1735 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1736 waitreq->fcol = retcode;
1737 rf_lock_mutex2(rf_sparet_wait_mutex);
1738 waitreq->next = rf_sparet_resp_queue;
1739 rf_sparet_resp_queue = waitreq;
1740 rf_broadcast_cond2(rf_sparet_resp_cv);
1741 rf_unlock_mutex2(rf_sparet_wait_mutex);
1742
1743 return (retcode);
1744 #endif
1745
1746 default:
1747 break; /* fall through to the os-specific code below */
1748
1749 }
1750
1751 if (!raidPtr->valid)
1752 return (EINVAL);
1753
1754 /*
1755 * Add support for "regular" device ioctls here.
1756 */
1757
1758 switch (cmd) {
1759 case DIOCGCACHE:
1760 retcode = rf_get_component_caches(raidPtr, (int *)data);
1761 break;
1762
1763 case DIOCCACHESYNC:
1764 retcode = rf_sync_component_caches(raidPtr);
1765 break;
1766
1767 default:
1768 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1769 break;
1770 }
1771
1772 return (retcode);
1773
1774 }
1775
1776
1777 /* raidinit -- complete the rest of the initialization for the
1778 RAIDframe device. */
1779
1780
1781 static void
1782 raidinit(struct raid_softc *rs)
1783 {
1784 cfdata_t cf;
1785 unsigned int unit;
1786 struct dk_softc *dksc = &rs->sc_dksc;
1787 RF_Raid_t *raidPtr = &rs->sc_r;
1788 device_t dev;
1789
1790 unit = raidPtr->raidid;
1791
1792 /* XXX doesn't check bounds. */
1793 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1794
1795 /* attach the pseudo device */
1796 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1797 cf->cf_name = raid_cd.cd_name;
1798 cf->cf_atname = raid_cd.cd_name;
1799 cf->cf_unit = unit;
1800 cf->cf_fstate = FSTATE_STAR;
1801
1802 dev = config_attach_pseudo(cf);
1803 if (dev == NULL) {
1804 printf("raid%d: config_attach_pseudo failed\n",
1805 raidPtr->raidid);
1806 free(cf, M_RAIDFRAME);
1807 return;
1808 }
1809
1810 /* provide a backpointer to the real softc */
1811 raidsoftc(dev) = rs;
1812
1813 /* disk_attach actually creates space for the CPU disklabel, among
1814 * other things, so it's critical to call this *BEFORE* we try putzing
1815 * with disklabels. */
1816 dk_init(dksc, dev, DKTYPE_RAID);
1817 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1818
1819 /* XXX There may be a weird interaction here between this, and
1820 * protectedSectors, as used in RAIDframe. */
1821
1822 rs->sc_size = raidPtr->totalSectors;
1823
1824 /* Attach dk and disk subsystems */
1825 dk_attach(dksc);
1826 disk_attach(&dksc->sc_dkdev);
1827 rf_set_geometry(rs, raidPtr);
1828
1829 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1830
1831 /* mark unit as usuable */
1832 rs->sc_flags |= RAIDF_INITED;
1833
1834 dkwedge_discover(&dksc->sc_dkdev);
1835 }
1836
1837 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1838 /* wake up the daemon & tell it to get us a spare table
1839 * XXX
1840 * the entries in the queues should be tagged with the raidPtr
1841 * so that in the extremely rare case that two recons happen at once,
1842 * we know for which device were requesting a spare table
1843 * XXX
1844 *
1845 * XXX This code is not currently used. GO
1846 */
1847 int
1848 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1849 {
1850 int retcode;
1851
1852 rf_lock_mutex2(rf_sparet_wait_mutex);
1853 req->next = rf_sparet_wait_queue;
1854 rf_sparet_wait_queue = req;
1855 rf_broadcast_cond2(rf_sparet_wait_cv);
1856
1857 /* mpsleep unlocks the mutex */
1858 while (!rf_sparet_resp_queue) {
1859 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1860 }
1861 req = rf_sparet_resp_queue;
1862 rf_sparet_resp_queue = req->next;
1863 rf_unlock_mutex2(rf_sparet_wait_mutex);
1864
1865 retcode = req->fcol;
1866 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1867 * alloc'd */
1868 return (retcode);
1869 }
1870 #endif
1871
1872 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1873 * bp & passes it down.
1874 * any calls originating in the kernel must use non-blocking I/O
1875 * do some extra sanity checking to return "appropriate" error values for
1876 * certain conditions (to make some standard utilities work)
1877 *
1878 * Formerly known as: rf_DoAccessKernel
1879 */
1880 void
1881 raidstart(RF_Raid_t *raidPtr)
1882 {
1883 struct raid_softc *rs;
1884 struct dk_softc *dksc;
1885
1886 rs = raidPtr->softc;
1887 dksc = &rs->sc_dksc;
1888 /* quick check to see if anything has died recently */
1889 rf_lock_mutex2(raidPtr->mutex);
1890 if (raidPtr->numNewFailures > 0) {
1891 rf_unlock_mutex2(raidPtr->mutex);
1892 rf_update_component_labels(raidPtr,
1893 RF_NORMAL_COMPONENT_UPDATE);
1894 rf_lock_mutex2(raidPtr->mutex);
1895 raidPtr->numNewFailures--;
1896 }
1897 rf_unlock_mutex2(raidPtr->mutex);
1898
1899 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1900 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1901 return;
1902 }
1903
1904 dk_start(dksc, NULL);
1905 }
1906
1907 static int
1908 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1909 {
1910 RF_SectorCount_t num_blocks, pb, sum;
1911 RF_RaidAddr_t raid_addr;
1912 daddr_t blocknum;
1913 int do_async;
1914 int rc;
1915
1916 rf_lock_mutex2(raidPtr->mutex);
1917 if (raidPtr->openings == 0) {
1918 rf_unlock_mutex2(raidPtr->mutex);
1919 return EAGAIN;
1920 }
1921 rf_unlock_mutex2(raidPtr->mutex);
1922
1923 blocknum = bp->b_rawblkno;
1924
1925 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1926 (int) blocknum));
1927
1928 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1929 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1930
1931 /* *THIS* is where we adjust what block we're going to...
1932 * but DO NOT TOUCH bp->b_blkno!!! */
1933 raid_addr = blocknum;
1934
1935 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1936 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1937 sum = raid_addr + num_blocks + pb;
1938 if (1 || rf_debugKernelAccess) {
1939 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1940 (int) raid_addr, (int) sum, (int) num_blocks,
1941 (int) pb, (int) bp->b_resid));
1942 }
1943 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1944 || (sum < num_blocks) || (sum < pb)) {
1945 rc = ENOSPC;
1946 goto done;
1947 }
1948 /*
1949 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1950 */
1951
1952 if (bp->b_bcount & raidPtr->sectorMask) {
1953 rc = ENOSPC;
1954 goto done;
1955 }
1956 db1_printf(("Calling DoAccess..\n"));
1957
1958
1959 rf_lock_mutex2(raidPtr->mutex);
1960 raidPtr->openings--;
1961 rf_unlock_mutex2(raidPtr->mutex);
1962
1963 /*
1964 * Everything is async.
1965 */
1966 do_async = 1;
1967
1968 /* don't ever condition on bp->b_flags & B_WRITE.
1969 * always condition on B_READ instead */
1970
1971 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1972 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1973 do_async, raid_addr, num_blocks,
1974 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1975
1976 done:
1977 return rc;
1978 }
1979
1980 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1981
1982 int
1983 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1984 {
1985 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1986 struct buf *bp;
1987
1988 req->queue = queue;
1989 bp = req->bp;
1990
1991 switch (req->type) {
1992 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1993 /* XXX need to do something extra here.. */
1994 /* I'm leaving this in, as I've never actually seen it used,
1995 * and I'd like folks to report it... GO */
1996 printf(("WAKEUP CALLED\n"));
1997 queue->numOutstanding++;
1998
1999 bp->b_flags = 0;
2000 bp->b_private = req;
2001
2002 KernelWakeupFunc(bp);
2003 break;
2004
2005 case RF_IO_TYPE_READ:
2006 case RF_IO_TYPE_WRITE:
2007 #if RF_ACC_TRACE > 0
2008 if (req->tracerec) {
2009 RF_ETIMER_START(req->tracerec->timer);
2010 }
2011 #endif
2012 InitBP(bp, queue->rf_cinfo->ci_vp,
2013 op, queue->rf_cinfo->ci_dev,
2014 req->sectorOffset, req->numSector,
2015 req->buf, KernelWakeupFunc, (void *) req,
2016 queue->raidPtr->logBytesPerSector, req->b_proc);
2017
2018 if (rf_debugKernelAccess) {
2019 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2020 (long) bp->b_blkno));
2021 }
2022 queue->numOutstanding++;
2023 queue->last_deq_sector = req->sectorOffset;
2024 /* acc wouldn't have been let in if there were any pending
2025 * reqs at any other priority */
2026 queue->curPriority = req->priority;
2027
2028 db1_printf(("Going for %c to unit %d col %d\n",
2029 req->type, queue->raidPtr->raidid,
2030 queue->col));
2031 db1_printf(("sector %d count %d (%d bytes) %d\n",
2032 (int) req->sectorOffset, (int) req->numSector,
2033 (int) (req->numSector <<
2034 queue->raidPtr->logBytesPerSector),
2035 (int) queue->raidPtr->logBytesPerSector));
2036
2037 /*
2038 * XXX: drop lock here since this can block at
2039 * least with backing SCSI devices. Retake it
2040 * to minimize fuss with calling interfaces.
2041 */
2042
2043 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2044 bdev_strategy(bp);
2045 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2046 break;
2047
2048 default:
2049 panic("bad req->type in rf_DispatchKernelIO");
2050 }
2051 db1_printf(("Exiting from DispatchKernelIO\n"));
2052
2053 return (0);
2054 }
2055 /* this is the callback function associated with a I/O invoked from
2056 kernel code.
2057 */
2058 static void
2059 KernelWakeupFunc(struct buf *bp)
2060 {
2061 RF_DiskQueueData_t *req = NULL;
2062 RF_DiskQueue_t *queue;
2063
2064 db1_printf(("recovering the request queue:\n"));
2065
2066 req = bp->b_private;
2067
2068 queue = (RF_DiskQueue_t *) req->queue;
2069
2070 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2071
2072 #if RF_ACC_TRACE > 0
2073 if (req->tracerec) {
2074 RF_ETIMER_STOP(req->tracerec->timer);
2075 RF_ETIMER_EVAL(req->tracerec->timer);
2076 rf_lock_mutex2(rf_tracing_mutex);
2077 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2078 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2079 req->tracerec->num_phys_ios++;
2080 rf_unlock_mutex2(rf_tracing_mutex);
2081 }
2082 #endif
2083
2084 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2085 * ballistic, and mark the component as hosed... */
2086
2087 if (bp->b_error != 0) {
2088 /* Mark the disk as dead */
2089 /* but only mark it once... */
2090 /* and only if it wouldn't leave this RAID set
2091 completely broken */
2092 if (((queue->raidPtr->Disks[queue->col].status ==
2093 rf_ds_optimal) ||
2094 (queue->raidPtr->Disks[queue->col].status ==
2095 rf_ds_used_spare)) &&
2096 (queue->raidPtr->numFailures <
2097 queue->raidPtr->Layout.map->faultsTolerated)) {
2098 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2099 queue->raidPtr->raidid,
2100 bp->b_error,
2101 queue->raidPtr->Disks[queue->col].devname);
2102 queue->raidPtr->Disks[queue->col].status =
2103 rf_ds_failed;
2104 queue->raidPtr->status = rf_rs_degraded;
2105 queue->raidPtr->numFailures++;
2106 queue->raidPtr->numNewFailures++;
2107 } else { /* Disk is already dead... */
2108 /* printf("Disk already marked as dead!\n"); */
2109 }
2110
2111 }
2112
2113 /* Fill in the error value */
2114 req->error = bp->b_error;
2115
2116 /* Drop this one on the "finished" queue... */
2117 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2118
2119 /* Let the raidio thread know there is work to be done. */
2120 rf_signal_cond2(queue->raidPtr->iodone_cv);
2121
2122 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2123 }
2124
2125
2126 /*
2127 * initialize a buf structure for doing an I/O in the kernel.
2128 */
2129 static void
2130 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2131 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2132 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2133 struct proc *b_proc)
2134 {
2135 /* bp->b_flags = B_PHYS | rw_flag; */
2136 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2137 bp->b_oflags = 0;
2138 bp->b_cflags = 0;
2139 bp->b_bcount = numSect << logBytesPerSector;
2140 bp->b_bufsize = bp->b_bcount;
2141 bp->b_error = 0;
2142 bp->b_dev = dev;
2143 bp->b_data = bf;
2144 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2145 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2146 if (bp->b_bcount == 0) {
2147 panic("bp->b_bcount is zero in InitBP!!");
2148 }
2149 bp->b_proc = b_proc;
2150 bp->b_iodone = cbFunc;
2151 bp->b_private = cbArg;
2152 }
2153
2154 /*
2155 * Wait interruptibly for an exclusive lock.
2156 *
2157 * XXX
2158 * Several drivers do this; it should be abstracted and made MP-safe.
2159 * (Hmm... where have we seen this warning before :-> GO )
2160 */
2161 static int
2162 raidlock(struct raid_softc *rs)
2163 {
2164 int error;
2165
2166 error = 0;
2167 mutex_enter(&rs->sc_mutex);
2168 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2169 rs->sc_flags |= RAIDF_WANTED;
2170 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2171 if (error != 0)
2172 goto done;
2173 }
2174 rs->sc_flags |= RAIDF_LOCKED;
2175 done:
2176 mutex_exit(&rs->sc_mutex);
2177 return (error);
2178 }
2179 /*
2180 * Unlock and wake up any waiters.
2181 */
2182 static void
2183 raidunlock(struct raid_softc *rs)
2184 {
2185
2186 mutex_enter(&rs->sc_mutex);
2187 rs->sc_flags &= ~RAIDF_LOCKED;
2188 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2189 rs->sc_flags &= ~RAIDF_WANTED;
2190 cv_broadcast(&rs->sc_cv);
2191 }
2192 mutex_exit(&rs->sc_mutex);
2193 }
2194
2195
2196 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2197 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2198 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2199
2200 static daddr_t
2201 rf_component_info_offset(void)
2202 {
2203
2204 return RF_COMPONENT_INFO_OFFSET;
2205 }
2206
2207 static daddr_t
2208 rf_component_info_size(unsigned secsize)
2209 {
2210 daddr_t info_size;
2211
2212 KASSERT(secsize);
2213 if (secsize > RF_COMPONENT_INFO_SIZE)
2214 info_size = secsize;
2215 else
2216 info_size = RF_COMPONENT_INFO_SIZE;
2217
2218 return info_size;
2219 }
2220
2221 static daddr_t
2222 rf_parity_map_offset(RF_Raid_t *raidPtr)
2223 {
2224 daddr_t map_offset;
2225
2226 KASSERT(raidPtr->bytesPerSector);
2227 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2228 map_offset = raidPtr->bytesPerSector;
2229 else
2230 map_offset = RF_COMPONENT_INFO_SIZE;
2231 map_offset += rf_component_info_offset();
2232
2233 return map_offset;
2234 }
2235
2236 static daddr_t
2237 rf_parity_map_size(RF_Raid_t *raidPtr)
2238 {
2239 daddr_t map_size;
2240
2241 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2242 map_size = raidPtr->bytesPerSector;
2243 else
2244 map_size = RF_PARITY_MAP_SIZE;
2245
2246 return map_size;
2247 }
2248
2249 int
2250 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2251 {
2252 RF_ComponentLabel_t *clabel;
2253
2254 clabel = raidget_component_label(raidPtr, col);
2255 clabel->clean = RF_RAID_CLEAN;
2256 raidflush_component_label(raidPtr, col);
2257 return(0);
2258 }
2259
2260
2261 int
2262 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2263 {
2264 RF_ComponentLabel_t *clabel;
2265
2266 clabel = raidget_component_label(raidPtr, col);
2267 clabel->clean = RF_RAID_DIRTY;
2268 raidflush_component_label(raidPtr, col);
2269 return(0);
2270 }
2271
2272 int
2273 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2274 {
2275 KASSERT(raidPtr->bytesPerSector);
2276 return raidread_component_label(raidPtr->bytesPerSector,
2277 raidPtr->Disks[col].dev,
2278 raidPtr->raid_cinfo[col].ci_vp,
2279 &raidPtr->raid_cinfo[col].ci_label);
2280 }
2281
2282 RF_ComponentLabel_t *
2283 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2284 {
2285 return &raidPtr->raid_cinfo[col].ci_label;
2286 }
2287
2288 int
2289 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2290 {
2291 RF_ComponentLabel_t *label;
2292
2293 label = &raidPtr->raid_cinfo[col].ci_label;
2294 label->mod_counter = raidPtr->mod_counter;
2295 #ifndef RF_NO_PARITY_MAP
2296 label->parity_map_modcount = label->mod_counter;
2297 #endif
2298 return raidwrite_component_label(raidPtr->bytesPerSector,
2299 raidPtr->Disks[col].dev,
2300 raidPtr->raid_cinfo[col].ci_vp, label);
2301 }
2302
2303
2304 static int
2305 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2306 RF_ComponentLabel_t *clabel)
2307 {
2308 return raidread_component_area(dev, b_vp, clabel,
2309 sizeof(RF_ComponentLabel_t),
2310 rf_component_info_offset(),
2311 rf_component_info_size(secsize));
2312 }
2313
2314 /* ARGSUSED */
2315 static int
2316 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2317 size_t msize, daddr_t offset, daddr_t dsize)
2318 {
2319 struct buf *bp;
2320 int error;
2321
2322 /* XXX should probably ensure that we don't try to do this if
2323 someone has changed rf_protected_sectors. */
2324
2325 if (b_vp == NULL) {
2326 /* For whatever reason, this component is not valid.
2327 Don't try to read a component label from it. */
2328 return(EINVAL);
2329 }
2330
2331 /* get a block of the appropriate size... */
2332 bp = geteblk((int)dsize);
2333 bp->b_dev = dev;
2334
2335 /* get our ducks in a row for the read */
2336 bp->b_blkno = offset / DEV_BSIZE;
2337 bp->b_bcount = dsize;
2338 bp->b_flags |= B_READ;
2339 bp->b_resid = dsize;
2340
2341 bdev_strategy(bp);
2342 error = biowait(bp);
2343
2344 if (!error) {
2345 memcpy(data, bp->b_data, msize);
2346 }
2347
2348 brelse(bp, 0);
2349 return(error);
2350 }
2351
2352
2353 static int
2354 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2355 RF_ComponentLabel_t *clabel)
2356 {
2357 return raidwrite_component_area(dev, b_vp, clabel,
2358 sizeof(RF_ComponentLabel_t),
2359 rf_component_info_offset(),
2360 rf_component_info_size(secsize), 0);
2361 }
2362
2363 /* ARGSUSED */
2364 static int
2365 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2366 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2367 {
2368 struct buf *bp;
2369 int error;
2370
2371 /* get a block of the appropriate size... */
2372 bp = geteblk((int)dsize);
2373 bp->b_dev = dev;
2374
2375 /* get our ducks in a row for the write */
2376 bp->b_blkno = offset / DEV_BSIZE;
2377 bp->b_bcount = dsize;
2378 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2379 bp->b_resid = dsize;
2380
2381 memset(bp->b_data, 0, dsize);
2382 memcpy(bp->b_data, data, msize);
2383
2384 bdev_strategy(bp);
2385 if (asyncp)
2386 return 0;
2387 error = biowait(bp);
2388 brelse(bp, 0);
2389 if (error) {
2390 #if 1
2391 printf("Failed to write RAID component info!\n");
2392 #endif
2393 }
2394
2395 return(error);
2396 }
2397
2398 void
2399 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2400 {
2401 int c;
2402
2403 for (c = 0; c < raidPtr->numCol; c++) {
2404 /* Skip dead disks. */
2405 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2406 continue;
2407 /* XXXjld: what if an error occurs here? */
2408 raidwrite_component_area(raidPtr->Disks[c].dev,
2409 raidPtr->raid_cinfo[c].ci_vp, map,
2410 RF_PARITYMAP_NBYTE,
2411 rf_parity_map_offset(raidPtr),
2412 rf_parity_map_size(raidPtr), 0);
2413 }
2414 }
2415
2416 void
2417 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2418 {
2419 struct rf_paritymap_ondisk tmp;
2420 int c,first;
2421
2422 first=1;
2423 for (c = 0; c < raidPtr->numCol; c++) {
2424 /* Skip dead disks. */
2425 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2426 continue;
2427 raidread_component_area(raidPtr->Disks[c].dev,
2428 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2429 RF_PARITYMAP_NBYTE,
2430 rf_parity_map_offset(raidPtr),
2431 rf_parity_map_size(raidPtr));
2432 if (first) {
2433 memcpy(map, &tmp, sizeof(*map));
2434 first = 0;
2435 } else {
2436 rf_paritymap_merge(map, &tmp);
2437 }
2438 }
2439 }
2440
2441 void
2442 rf_markalldirty(RF_Raid_t *raidPtr)
2443 {
2444 RF_ComponentLabel_t *clabel;
2445 int sparecol;
2446 int c;
2447 int j;
2448 int scol = -1;
2449
2450 raidPtr->mod_counter++;
2451 for (c = 0; c < raidPtr->numCol; c++) {
2452 /* we don't want to touch (at all) a disk that has
2453 failed */
2454 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2455 clabel = raidget_component_label(raidPtr, c);
2456 if (clabel->status == rf_ds_spared) {
2457 /* XXX do something special...
2458 but whatever you do, don't
2459 try to access it!! */
2460 } else {
2461 raidmarkdirty(raidPtr, c);
2462 }
2463 }
2464 }
2465
2466 for( c = 0; c < raidPtr->numSpare ; c++) {
2467 sparecol = raidPtr->numCol + c;
2468 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2469 /*
2470
2471 we claim this disk is "optimal" if it's
2472 rf_ds_used_spare, as that means it should be
2473 directly substitutable for the disk it replaced.
2474 We note that too...
2475
2476 */
2477
2478 for(j=0;j<raidPtr->numCol;j++) {
2479 if (raidPtr->Disks[j].spareCol == sparecol) {
2480 scol = j;
2481 break;
2482 }
2483 }
2484
2485 clabel = raidget_component_label(raidPtr, sparecol);
2486 /* make sure status is noted */
2487
2488 raid_init_component_label(raidPtr, clabel);
2489
2490 clabel->row = 0;
2491 clabel->column = scol;
2492 /* Note: we *don't* change status from rf_ds_used_spare
2493 to rf_ds_optimal */
2494 /* clabel.status = rf_ds_optimal; */
2495
2496 raidmarkdirty(raidPtr, sparecol);
2497 }
2498 }
2499 }
2500
2501
2502 void
2503 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2504 {
2505 RF_ComponentLabel_t *clabel;
2506 int sparecol;
2507 int c;
2508 int j;
2509 int scol;
2510 struct raid_softc *rs = raidPtr->softc;
2511
2512 scol = -1;
2513
2514 /* XXX should do extra checks to make sure things really are clean,
2515 rather than blindly setting the clean bit... */
2516
2517 raidPtr->mod_counter++;
2518
2519 for (c = 0; c < raidPtr->numCol; c++) {
2520 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2521 clabel = raidget_component_label(raidPtr, c);
2522 /* make sure status is noted */
2523 clabel->status = rf_ds_optimal;
2524
2525 /* note what unit we are configured as */
2526 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2527 clabel->last_unit = raidPtr->raidid;
2528
2529 raidflush_component_label(raidPtr, c);
2530 if (final == RF_FINAL_COMPONENT_UPDATE) {
2531 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2532 raidmarkclean(raidPtr, c);
2533 }
2534 }
2535 }
2536 /* else we don't touch it.. */
2537 }
2538
2539 for( c = 0; c < raidPtr->numSpare ; c++) {
2540 sparecol = raidPtr->numCol + c;
2541 /* Need to ensure that the reconstruct actually completed! */
2542 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2543 /*
2544
2545 we claim this disk is "optimal" if it's
2546 rf_ds_used_spare, as that means it should be
2547 directly substitutable for the disk it replaced.
2548 We note that too...
2549
2550 */
2551
2552 for(j=0;j<raidPtr->numCol;j++) {
2553 if (raidPtr->Disks[j].spareCol == sparecol) {
2554 scol = j;
2555 break;
2556 }
2557 }
2558
2559 /* XXX shouldn't *really* need this... */
2560 clabel = raidget_component_label(raidPtr, sparecol);
2561 /* make sure status is noted */
2562
2563 raid_init_component_label(raidPtr, clabel);
2564
2565 clabel->column = scol;
2566 clabel->status = rf_ds_optimal;
2567 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2568 clabel->last_unit = raidPtr->raidid;
2569
2570 raidflush_component_label(raidPtr, sparecol);
2571 if (final == RF_FINAL_COMPONENT_UPDATE) {
2572 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2573 raidmarkclean(raidPtr, sparecol);
2574 }
2575 }
2576 }
2577 }
2578 }
2579
2580 void
2581 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2582 {
2583
2584 if (vp != NULL) {
2585 if (auto_configured == 1) {
2586 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2587 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2588 vput(vp);
2589
2590 } else {
2591 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2592 }
2593 }
2594 }
2595
2596
2597 void
2598 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2599 {
2600 int r,c;
2601 struct vnode *vp;
2602 int acd;
2603
2604
2605 /* We take this opportunity to close the vnodes like we should.. */
2606
2607 for (c = 0; c < raidPtr->numCol; c++) {
2608 vp = raidPtr->raid_cinfo[c].ci_vp;
2609 acd = raidPtr->Disks[c].auto_configured;
2610 rf_close_component(raidPtr, vp, acd);
2611 raidPtr->raid_cinfo[c].ci_vp = NULL;
2612 raidPtr->Disks[c].auto_configured = 0;
2613 }
2614
2615 for (r = 0; r < raidPtr->numSpare; r++) {
2616 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2617 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2618 rf_close_component(raidPtr, vp, acd);
2619 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2620 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2621 }
2622 }
2623
2624
2625 void
2626 rf_ReconThread(struct rf_recon_req_internal *req)
2627 {
2628 int s;
2629 RF_Raid_t *raidPtr;
2630
2631 s = splbio();
2632 raidPtr = (RF_Raid_t *) req->raidPtr;
2633 raidPtr->recon_in_progress = 1;
2634
2635 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2636 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2637
2638 RF_Free(req, sizeof(*req));
2639
2640 raidPtr->recon_in_progress = 0;
2641 splx(s);
2642
2643 /* That's all... */
2644 kthread_exit(0); /* does not return */
2645 }
2646
2647 void
2648 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2649 {
2650 int retcode;
2651 int s;
2652
2653 raidPtr->parity_rewrite_stripes_done = 0;
2654 raidPtr->parity_rewrite_in_progress = 1;
2655 s = splbio();
2656 retcode = rf_RewriteParity(raidPtr);
2657 splx(s);
2658 if (retcode) {
2659 printf("raid%d: Error re-writing parity (%d)!\n",
2660 raidPtr->raidid, retcode);
2661 } else {
2662 /* set the clean bit! If we shutdown correctly,
2663 the clean bit on each component label will get
2664 set */
2665 raidPtr->parity_good = RF_RAID_CLEAN;
2666 }
2667 raidPtr->parity_rewrite_in_progress = 0;
2668
2669 /* Anyone waiting for us to stop? If so, inform them... */
2670 if (raidPtr->waitShutdown) {
2671 wakeup(&raidPtr->parity_rewrite_in_progress);
2672 }
2673
2674 /* That's all... */
2675 kthread_exit(0); /* does not return */
2676 }
2677
2678
2679 void
2680 rf_CopybackThread(RF_Raid_t *raidPtr)
2681 {
2682 int s;
2683
2684 raidPtr->copyback_in_progress = 1;
2685 s = splbio();
2686 rf_CopybackReconstructedData(raidPtr);
2687 splx(s);
2688 raidPtr->copyback_in_progress = 0;
2689
2690 /* That's all... */
2691 kthread_exit(0); /* does not return */
2692 }
2693
2694
2695 void
2696 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2697 {
2698 int s;
2699 RF_Raid_t *raidPtr;
2700
2701 s = splbio();
2702 raidPtr = req->raidPtr;
2703 raidPtr->recon_in_progress = 1;
2704 rf_ReconstructInPlace(raidPtr, req->col);
2705 RF_Free(req, sizeof(*req));
2706 raidPtr->recon_in_progress = 0;
2707 splx(s);
2708
2709 /* That's all... */
2710 kthread_exit(0); /* does not return */
2711 }
2712
2713 static RF_AutoConfig_t *
2714 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2715 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2716 unsigned secsize)
2717 {
2718 int good_one = 0;
2719 RF_ComponentLabel_t *clabel;
2720 RF_AutoConfig_t *ac;
2721
2722 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2723 if (clabel == NULL) {
2724 oomem:
2725 while(ac_list) {
2726 ac = ac_list;
2727 if (ac->clabel)
2728 free(ac->clabel, M_RAIDFRAME);
2729 ac_list = ac_list->next;
2730 free(ac, M_RAIDFRAME);
2731 }
2732 printf("RAID auto config: out of memory!\n");
2733 return NULL; /* XXX probably should panic? */
2734 }
2735
2736 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2737 /* Got the label. Does it look reasonable? */
2738 if (rf_reasonable_label(clabel, numsecs) &&
2739 (rf_component_label_partitionsize(clabel) <= size)) {
2740 #ifdef DEBUG
2741 printf("Component on: %s: %llu\n",
2742 cname, (unsigned long long)size);
2743 rf_print_component_label(clabel);
2744 #endif
2745 /* if it's reasonable, add it, else ignore it. */
2746 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2747 M_NOWAIT);
2748 if (ac == NULL) {
2749 free(clabel, M_RAIDFRAME);
2750 goto oomem;
2751 }
2752 strlcpy(ac->devname, cname, sizeof(ac->devname));
2753 ac->dev = dev;
2754 ac->vp = vp;
2755 ac->clabel = clabel;
2756 ac->next = ac_list;
2757 ac_list = ac;
2758 good_one = 1;
2759 }
2760 }
2761 if (!good_one) {
2762 /* cleanup */
2763 free(clabel, M_RAIDFRAME);
2764 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2765 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2766 vput(vp);
2767 }
2768 return ac_list;
2769 }
2770
2771 RF_AutoConfig_t *
2772 rf_find_raid_components(void)
2773 {
2774 struct vnode *vp;
2775 struct disklabel label;
2776 device_t dv;
2777 deviter_t di;
2778 dev_t dev;
2779 int bmajor, bminor, wedge, rf_part_found;
2780 int error;
2781 int i;
2782 RF_AutoConfig_t *ac_list;
2783 uint64_t numsecs;
2784 unsigned secsize;
2785 int dowedges;
2786
2787 /* initialize the AutoConfig list */
2788 ac_list = NULL;
2789
2790 /*
2791 * we begin by trolling through *all* the devices on the system *twice*
2792 * first we scan for wedges, second for other devices. This avoids
2793 * using a raw partition instead of a wedge that covers the whole disk
2794 */
2795
2796 for (dowedges=1; dowedges>=0; --dowedges) {
2797 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2798 dv = deviter_next(&di)) {
2799
2800 /* we are only interested in disks... */
2801 if (device_class(dv) != DV_DISK)
2802 continue;
2803
2804 /* we don't care about floppies... */
2805 if (device_is_a(dv, "fd")) {
2806 continue;
2807 }
2808
2809 /* we don't care about CD's... */
2810 if (device_is_a(dv, "cd")) {
2811 continue;
2812 }
2813
2814 /* we don't care about md's... */
2815 if (device_is_a(dv, "md")) {
2816 continue;
2817 }
2818
2819 /* hdfd is the Atari/Hades floppy driver */
2820 if (device_is_a(dv, "hdfd")) {
2821 continue;
2822 }
2823
2824 /* fdisa is the Atari/Milan floppy driver */
2825 if (device_is_a(dv, "fdisa")) {
2826 continue;
2827 }
2828
2829 /* are we in the wedges pass ? */
2830 wedge = device_is_a(dv, "dk");
2831 if (wedge != dowedges) {
2832 continue;
2833 }
2834
2835 /* need to find the device_name_to_block_device_major stuff */
2836 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2837
2838 rf_part_found = 0; /*No raid partition as yet*/
2839
2840 /* get a vnode for the raw partition of this disk */
2841 bminor = minor(device_unit(dv));
2842 dev = wedge ? makedev(bmajor, bminor) :
2843 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2844 if (bdevvp(dev, &vp))
2845 panic("RAID can't alloc vnode");
2846
2847 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2848
2849 if (error) {
2850 /* "Who cares." Continue looking
2851 for something that exists*/
2852 vput(vp);
2853 continue;
2854 }
2855
2856 error = getdisksize(vp, &numsecs, &secsize);
2857 if (error) {
2858 /*
2859 * Pseudo devices like vnd and cgd can be
2860 * opened but may still need some configuration.
2861 * Ignore these quietly.
2862 */
2863 if (error != ENXIO)
2864 printf("RAIDframe: can't get disk size"
2865 " for dev %s (%d)\n",
2866 device_xname(dv), error);
2867 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2868 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2869 vput(vp);
2870 continue;
2871 }
2872 if (wedge) {
2873 struct dkwedge_info dkw;
2874 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2875 NOCRED);
2876 if (error) {
2877 printf("RAIDframe: can't get wedge info for "
2878 "dev %s (%d)\n", device_xname(dv), error);
2879 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2880 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2881 vput(vp);
2882 continue;
2883 }
2884
2885 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2887 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2888 vput(vp);
2889 continue;
2890 }
2891
2892 ac_list = rf_get_component(ac_list, dev, vp,
2893 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2894 rf_part_found = 1; /*There is a raid component on this disk*/
2895 continue;
2896 }
2897
2898 /* Ok, the disk exists. Go get the disklabel. */
2899 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2900 if (error) {
2901 /*
2902 * XXX can't happen - open() would
2903 * have errored out (or faked up one)
2904 */
2905 if (error != ENOTTY)
2906 printf("RAIDframe: can't get label for dev "
2907 "%s (%d)\n", device_xname(dv), error);
2908 }
2909
2910 /* don't need this any more. We'll allocate it again
2911 a little later if we really do... */
2912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2913 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2914 vput(vp);
2915
2916 if (error)
2917 continue;
2918
2919 rf_part_found = 0; /*No raid partitions yet*/
2920 for (i = 0; i < label.d_npartitions; i++) {
2921 char cname[sizeof(ac_list->devname)];
2922
2923 /* We only support partitions marked as RAID */
2924 if (label.d_partitions[i].p_fstype != FS_RAID)
2925 continue;
2926
2927 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2928 if (bdevvp(dev, &vp))
2929 panic("RAID can't alloc vnode");
2930
2931 error = VOP_OPEN(vp, FREAD, NOCRED);
2932 if (error) {
2933 /* Whatever... */
2934 vput(vp);
2935 continue;
2936 }
2937 snprintf(cname, sizeof(cname), "%s%c",
2938 device_xname(dv), 'a' + i);
2939 ac_list = rf_get_component(ac_list, dev, vp, cname,
2940 label.d_partitions[i].p_size, numsecs, secsize);
2941 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2942 }
2943
2944 /*
2945 *If there is no raid component on this disk, either in a
2946 *disklabel or inside a wedge, check the raw partition as well,
2947 *as it is possible to configure raid components on raw disk
2948 *devices.
2949 */
2950
2951 if (!rf_part_found) {
2952 char cname[sizeof(ac_list->devname)];
2953
2954 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2955 if (bdevvp(dev, &vp))
2956 panic("RAID can't alloc vnode");
2957
2958 error = VOP_OPEN(vp, FREAD, NOCRED);
2959 if (error) {
2960 /* Whatever... */
2961 vput(vp);
2962 continue;
2963 }
2964 snprintf(cname, sizeof(cname), "%s%c",
2965 device_xname(dv), 'a' + RAW_PART);
2966 ac_list = rf_get_component(ac_list, dev, vp, cname,
2967 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2968 }
2969 }
2970 deviter_release(&di);
2971 }
2972 return ac_list;
2973 }
2974
2975
2976 int
2977 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2978 {
2979
2980 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2981 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2982 ((clabel->clean == RF_RAID_CLEAN) ||
2983 (clabel->clean == RF_RAID_DIRTY)) &&
2984 clabel->row >=0 &&
2985 clabel->column >= 0 &&
2986 clabel->num_rows > 0 &&
2987 clabel->num_columns > 0 &&
2988 clabel->row < clabel->num_rows &&
2989 clabel->column < clabel->num_columns &&
2990 clabel->blockSize > 0 &&
2991 /*
2992 * numBlocksHi may contain garbage, but it is ok since
2993 * the type is unsigned. If it is really garbage,
2994 * rf_fix_old_label_size() will fix it.
2995 */
2996 rf_component_label_numblocks(clabel) > 0) {
2997 /*
2998 * label looks reasonable enough...
2999 * let's make sure it has no old garbage.
3000 */
3001 if (numsecs)
3002 rf_fix_old_label_size(clabel, numsecs);
3003 return(1);
3004 }
3005 return(0);
3006 }
3007
3008
3009 /*
3010 * For reasons yet unknown, some old component labels have garbage in
3011 * the newer numBlocksHi region, and this causes lossage. Since those
3012 * disks will also have numsecs set to less than 32 bits of sectors,
3013 * we can determine when this corruption has occurred, and fix it.
3014 *
3015 * The exact same problem, with the same unknown reason, happens to
3016 * the partitionSizeHi member as well.
3017 */
3018 static void
3019 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3020 {
3021
3022 if (numsecs < ((uint64_t)1 << 32)) {
3023 if (clabel->numBlocksHi) {
3024 printf("WARNING: total sectors < 32 bits, yet "
3025 "numBlocksHi set\n"
3026 "WARNING: resetting numBlocksHi to zero.\n");
3027 clabel->numBlocksHi = 0;
3028 }
3029
3030 if (clabel->partitionSizeHi) {
3031 printf("WARNING: total sectors < 32 bits, yet "
3032 "partitionSizeHi set\n"
3033 "WARNING: resetting partitionSizeHi to zero.\n");
3034 clabel->partitionSizeHi = 0;
3035 }
3036 }
3037 }
3038
3039
3040 #ifdef DEBUG
3041 void
3042 rf_print_component_label(RF_ComponentLabel_t *clabel)
3043 {
3044 uint64_t numBlocks;
3045 static const char *rp[] = {
3046 "No", "Force", "Soft", "*invalid*"
3047 };
3048
3049
3050 numBlocks = rf_component_label_numblocks(clabel);
3051
3052 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3053 clabel->row, clabel->column,
3054 clabel->num_rows, clabel->num_columns);
3055 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3056 clabel->version, clabel->serial_number,
3057 clabel->mod_counter);
3058 printf(" Clean: %s Status: %d\n",
3059 clabel->clean ? "Yes" : "No", clabel->status);
3060 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3061 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3062 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3063 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3064 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3065 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3066 printf(" Last configured as: raid%d\n", clabel->last_unit);
3067 #if 0
3068 printf(" Config order: %d\n", clabel->config_order);
3069 #endif
3070
3071 }
3072 #endif
3073
3074 RF_ConfigSet_t *
3075 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3076 {
3077 RF_AutoConfig_t *ac;
3078 RF_ConfigSet_t *config_sets;
3079 RF_ConfigSet_t *cset;
3080 RF_AutoConfig_t *ac_next;
3081
3082
3083 config_sets = NULL;
3084
3085 /* Go through the AutoConfig list, and figure out which components
3086 belong to what sets. */
3087 ac = ac_list;
3088 while(ac!=NULL) {
3089 /* we're going to putz with ac->next, so save it here
3090 for use at the end of the loop */
3091 ac_next = ac->next;
3092
3093 if (config_sets == NULL) {
3094 /* will need at least this one... */
3095 config_sets = (RF_ConfigSet_t *)
3096 malloc(sizeof(RF_ConfigSet_t),
3097 M_RAIDFRAME, M_NOWAIT);
3098 if (config_sets == NULL) {
3099 panic("rf_create_auto_sets: No memory!");
3100 }
3101 /* this one is easy :) */
3102 config_sets->ac = ac;
3103 config_sets->next = NULL;
3104 config_sets->rootable = 0;
3105 ac->next = NULL;
3106 } else {
3107 /* which set does this component fit into? */
3108 cset = config_sets;
3109 while(cset!=NULL) {
3110 if (rf_does_it_fit(cset, ac)) {
3111 /* looks like it matches... */
3112 ac->next = cset->ac;
3113 cset->ac = ac;
3114 break;
3115 }
3116 cset = cset->next;
3117 }
3118 if (cset==NULL) {
3119 /* didn't find a match above... new set..*/
3120 cset = (RF_ConfigSet_t *)
3121 malloc(sizeof(RF_ConfigSet_t),
3122 M_RAIDFRAME, M_NOWAIT);
3123 if (cset == NULL) {
3124 panic("rf_create_auto_sets: No memory!");
3125 }
3126 cset->ac = ac;
3127 ac->next = NULL;
3128 cset->next = config_sets;
3129 cset->rootable = 0;
3130 config_sets = cset;
3131 }
3132 }
3133 ac = ac_next;
3134 }
3135
3136
3137 return(config_sets);
3138 }
3139
3140 static int
3141 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3142 {
3143 RF_ComponentLabel_t *clabel1, *clabel2;
3144
3145 /* If this one matches the *first* one in the set, that's good
3146 enough, since the other members of the set would have been
3147 through here too... */
3148 /* note that we are not checking partitionSize here..
3149
3150 Note that we are also not checking the mod_counters here.
3151 If everything else matches except the mod_counter, that's
3152 good enough for this test. We will deal with the mod_counters
3153 a little later in the autoconfiguration process.
3154
3155 (clabel1->mod_counter == clabel2->mod_counter) &&
3156
3157 The reason we don't check for this is that failed disks
3158 will have lower modification counts. If those disks are
3159 not added to the set they used to belong to, then they will
3160 form their own set, which may result in 2 different sets,
3161 for example, competing to be configured at raid0, and
3162 perhaps competing to be the root filesystem set. If the
3163 wrong ones get configured, or both attempt to become /,
3164 weird behaviour and or serious lossage will occur. Thus we
3165 need to bring them into the fold here, and kick them out at
3166 a later point.
3167
3168 */
3169
3170 clabel1 = cset->ac->clabel;
3171 clabel2 = ac->clabel;
3172 if ((clabel1->version == clabel2->version) &&
3173 (clabel1->serial_number == clabel2->serial_number) &&
3174 (clabel1->num_rows == clabel2->num_rows) &&
3175 (clabel1->num_columns == clabel2->num_columns) &&
3176 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3177 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3178 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3179 (clabel1->parityConfig == clabel2->parityConfig) &&
3180 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3181 (clabel1->blockSize == clabel2->blockSize) &&
3182 rf_component_label_numblocks(clabel1) ==
3183 rf_component_label_numblocks(clabel2) &&
3184 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3185 (clabel1->root_partition == clabel2->root_partition) &&
3186 (clabel1->last_unit == clabel2->last_unit) &&
3187 (clabel1->config_order == clabel2->config_order)) {
3188 /* if it get's here, it almost *has* to be a match */
3189 } else {
3190 /* it's not consistent with somebody in the set..
3191 punt */
3192 return(0);
3193 }
3194 /* all was fine.. it must fit... */
3195 return(1);
3196 }
3197
3198 int
3199 rf_have_enough_components(RF_ConfigSet_t *cset)
3200 {
3201 RF_AutoConfig_t *ac;
3202 RF_AutoConfig_t *auto_config;
3203 RF_ComponentLabel_t *clabel;
3204 int c;
3205 int num_cols;
3206 int num_missing;
3207 int mod_counter;
3208 int mod_counter_found;
3209 int even_pair_failed;
3210 char parity_type;
3211
3212
3213 /* check to see that we have enough 'live' components
3214 of this set. If so, we can configure it if necessary */
3215
3216 num_cols = cset->ac->clabel->num_columns;
3217 parity_type = cset->ac->clabel->parityConfig;
3218
3219 /* XXX Check for duplicate components!?!?!? */
3220
3221 /* Determine what the mod_counter is supposed to be for this set. */
3222
3223 mod_counter_found = 0;
3224 mod_counter = 0;
3225 ac = cset->ac;
3226 while(ac!=NULL) {
3227 if (mod_counter_found==0) {
3228 mod_counter = ac->clabel->mod_counter;
3229 mod_counter_found = 1;
3230 } else {
3231 if (ac->clabel->mod_counter > mod_counter) {
3232 mod_counter = ac->clabel->mod_counter;
3233 }
3234 }
3235 ac = ac->next;
3236 }
3237
3238 num_missing = 0;
3239 auto_config = cset->ac;
3240
3241 even_pair_failed = 0;
3242 for(c=0; c<num_cols; c++) {
3243 ac = auto_config;
3244 while(ac!=NULL) {
3245 if ((ac->clabel->column == c) &&
3246 (ac->clabel->mod_counter == mod_counter)) {
3247 /* it's this one... */
3248 #ifdef DEBUG
3249 printf("Found: %s at %d\n",
3250 ac->devname,c);
3251 #endif
3252 break;
3253 }
3254 ac=ac->next;
3255 }
3256 if (ac==NULL) {
3257 /* Didn't find one here! */
3258 /* special case for RAID 1, especially
3259 where there are more than 2
3260 components (where RAIDframe treats
3261 things a little differently :( ) */
3262 if (parity_type == '1') {
3263 if (c%2 == 0) { /* even component */
3264 even_pair_failed = 1;
3265 } else { /* odd component. If
3266 we're failed, and
3267 so is the even
3268 component, it's
3269 "Good Night, Charlie" */
3270 if (even_pair_failed == 1) {
3271 return(0);
3272 }
3273 }
3274 } else {
3275 /* normal accounting */
3276 num_missing++;
3277 }
3278 }
3279 if ((parity_type == '1') && (c%2 == 1)) {
3280 /* Just did an even component, and we didn't
3281 bail.. reset the even_pair_failed flag,
3282 and go on to the next component.... */
3283 even_pair_failed = 0;
3284 }
3285 }
3286
3287 clabel = cset->ac->clabel;
3288
3289 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3290 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3291 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3292 /* XXX this needs to be made *much* more general */
3293 /* Too many failures */
3294 return(0);
3295 }
3296 /* otherwise, all is well, and we've got enough to take a kick
3297 at autoconfiguring this set */
3298 return(1);
3299 }
3300
3301 void
3302 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3303 RF_Raid_t *raidPtr)
3304 {
3305 RF_ComponentLabel_t *clabel;
3306 int i;
3307
3308 clabel = ac->clabel;
3309
3310 /* 1. Fill in the common stuff */
3311 config->numCol = clabel->num_columns;
3312 config->numSpare = 0; /* XXX should this be set here? */
3313 config->sectPerSU = clabel->sectPerSU;
3314 config->SUsPerPU = clabel->SUsPerPU;
3315 config->SUsPerRU = clabel->SUsPerRU;
3316 config->parityConfig = clabel->parityConfig;
3317 /* XXX... */
3318 strcpy(config->diskQueueType,"fifo");
3319 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3320 config->layoutSpecificSize = 0; /* XXX ?? */
3321
3322 while(ac!=NULL) {
3323 /* row/col values will be in range due to the checks
3324 in reasonable_label() */
3325 strcpy(config->devnames[0][ac->clabel->column],
3326 ac->devname);
3327 ac = ac->next;
3328 }
3329
3330 for(i=0;i<RF_MAXDBGV;i++) {
3331 config->debugVars[i][0] = 0;
3332 }
3333 }
3334
3335 int
3336 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3337 {
3338 RF_ComponentLabel_t *clabel;
3339 int column;
3340 int sparecol;
3341
3342 raidPtr->autoconfigure = new_value;
3343
3344 for(column=0; column<raidPtr->numCol; column++) {
3345 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3346 clabel = raidget_component_label(raidPtr, column);
3347 clabel->autoconfigure = new_value;
3348 raidflush_component_label(raidPtr, column);
3349 }
3350 }
3351 for(column = 0; column < raidPtr->numSpare ; column++) {
3352 sparecol = raidPtr->numCol + column;
3353 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3354 clabel = raidget_component_label(raidPtr, sparecol);
3355 clabel->autoconfigure = new_value;
3356 raidflush_component_label(raidPtr, sparecol);
3357 }
3358 }
3359 return(new_value);
3360 }
3361
3362 int
3363 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3364 {
3365 RF_ComponentLabel_t *clabel;
3366 int column;
3367 int sparecol;
3368
3369 raidPtr->root_partition = new_value;
3370 for(column=0; column<raidPtr->numCol; column++) {
3371 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3372 clabel = raidget_component_label(raidPtr, column);
3373 clabel->root_partition = new_value;
3374 raidflush_component_label(raidPtr, column);
3375 }
3376 }
3377 for(column = 0; column < raidPtr->numSpare ; column++) {
3378 sparecol = raidPtr->numCol + column;
3379 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3380 clabel = raidget_component_label(raidPtr, sparecol);
3381 clabel->root_partition = new_value;
3382 raidflush_component_label(raidPtr, sparecol);
3383 }
3384 }
3385 return(new_value);
3386 }
3387
3388 void
3389 rf_release_all_vps(RF_ConfigSet_t *cset)
3390 {
3391 RF_AutoConfig_t *ac;
3392
3393 ac = cset->ac;
3394 while(ac!=NULL) {
3395 /* Close the vp, and give it back */
3396 if (ac->vp) {
3397 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3398 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3399 vput(ac->vp);
3400 ac->vp = NULL;
3401 }
3402 ac = ac->next;
3403 }
3404 }
3405
3406
3407 void
3408 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3409 {
3410 RF_AutoConfig_t *ac;
3411 RF_AutoConfig_t *next_ac;
3412
3413 ac = cset->ac;
3414 while(ac!=NULL) {
3415 next_ac = ac->next;
3416 /* nuke the label */
3417 free(ac->clabel, M_RAIDFRAME);
3418 /* cleanup the config structure */
3419 free(ac, M_RAIDFRAME);
3420 /* "next.." */
3421 ac = next_ac;
3422 }
3423 /* and, finally, nuke the config set */
3424 free(cset, M_RAIDFRAME);
3425 }
3426
3427
3428 void
3429 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3430 {
3431 /* current version number */
3432 clabel->version = RF_COMPONENT_LABEL_VERSION;
3433 clabel->serial_number = raidPtr->serial_number;
3434 clabel->mod_counter = raidPtr->mod_counter;
3435
3436 clabel->num_rows = 1;
3437 clabel->num_columns = raidPtr->numCol;
3438 clabel->clean = RF_RAID_DIRTY; /* not clean */
3439 clabel->status = rf_ds_optimal; /* "It's good!" */
3440
3441 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3442 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3443 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3444
3445 clabel->blockSize = raidPtr->bytesPerSector;
3446 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3447
3448 /* XXX not portable */
3449 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3450 clabel->maxOutstanding = raidPtr->maxOutstanding;
3451 clabel->autoconfigure = raidPtr->autoconfigure;
3452 clabel->root_partition = raidPtr->root_partition;
3453 clabel->last_unit = raidPtr->raidid;
3454 clabel->config_order = raidPtr->config_order;
3455
3456 #ifndef RF_NO_PARITY_MAP
3457 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3458 #endif
3459 }
3460
3461 struct raid_softc *
3462 rf_auto_config_set(RF_ConfigSet_t *cset)
3463 {
3464 RF_Raid_t *raidPtr;
3465 RF_Config_t *config;
3466 int raidID;
3467 struct raid_softc *sc;
3468
3469 #ifdef DEBUG
3470 printf("RAID autoconfigure\n");
3471 #endif
3472
3473 /* 1. Create a config structure */
3474 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3475 if (config == NULL) {
3476 printf("%s: Out of mem - config!?!?\n", __func__);
3477 /* XXX do something more intelligent here. */
3478 return NULL;
3479 }
3480
3481 /*
3482 2. Figure out what RAID ID this one is supposed to live at
3483 See if we can get the same RAID dev that it was configured
3484 on last time..
3485 */
3486
3487 raidID = cset->ac->clabel->last_unit;
3488 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3489 sc = raidget(++raidID, false))
3490 continue;
3491 #ifdef DEBUG
3492 printf("Configuring raid%d:\n",raidID);
3493 #endif
3494
3495 if (sc == NULL)
3496 sc = raidget(raidID, true);
3497 if (sc == NULL) {
3498 printf("%s: Out of mem - softc!?!?\n", __func__);
3499 /* XXX do something more intelligent here. */
3500 free(config, M_RAIDFRAME);
3501 return NULL;
3502 }
3503
3504 raidPtr = &sc->sc_r;
3505
3506 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3507 raidPtr->softc = sc;
3508 raidPtr->raidid = raidID;
3509 raidPtr->openings = RAIDOUTSTANDING;
3510
3511 /* 3. Build the configuration structure */
3512 rf_create_configuration(cset->ac, config, raidPtr);
3513
3514 /* 4. Do the configuration */
3515 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3516 raidinit(sc);
3517
3518 rf_markalldirty(raidPtr);
3519 raidPtr->autoconfigure = 1; /* XXX do this here? */
3520 switch (cset->ac->clabel->root_partition) {
3521 case 1: /* Force Root */
3522 case 2: /* Soft Root: root when boot partition part of raid */
3523 /*
3524 * everything configured just fine. Make a note
3525 * that this set is eligible to be root,
3526 * or forced to be root
3527 */
3528 cset->rootable = cset->ac->clabel->root_partition;
3529 /* XXX do this here? */
3530 raidPtr->root_partition = cset->rootable;
3531 break;
3532 default:
3533 break;
3534 }
3535 } else {
3536 raidput(sc);
3537 sc = NULL;
3538 }
3539
3540 /* 5. Cleanup */
3541 free(config, M_RAIDFRAME);
3542 return sc;
3543 }
3544
3545 void
3546 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3547 size_t xmin, size_t xmax)
3548 {
3549 int error;
3550
3551 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3552 pool_sethiwat(p, xmax);
3553 if ((error = pool_prime(p, xmin)) != 0)
3554 panic("%s: failed to prime pool: %d", __func__, error);
3555 pool_setlowat(p, xmin);
3556 }
3557
3558 /*
3559 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3560 * to see if there is IO pending and if that IO could possibly be done
3561 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3562 * otherwise.
3563 *
3564 */
3565 int
3566 rf_buf_queue_check(RF_Raid_t *raidPtr)
3567 {
3568 struct raid_softc *rs;
3569 struct dk_softc *dksc;
3570
3571 rs = raidPtr->softc;
3572 dksc = &rs->sc_dksc;
3573
3574 if ((rs->sc_flags & RAIDF_INITED) == 0)
3575 return 1;
3576
3577 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3578 /* there is work to do */
3579 return 0;
3580 }
3581 /* default is nothing to do */
3582 return 1;
3583 }
3584
3585 int
3586 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3587 {
3588 uint64_t numsecs;
3589 unsigned secsize;
3590 int error;
3591
3592 error = getdisksize(vp, &numsecs, &secsize);
3593 if (error == 0) {
3594 diskPtr->blockSize = secsize;
3595 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3596 diskPtr->partitionSize = numsecs;
3597 return 0;
3598 }
3599 return error;
3600 }
3601
3602 static int
3603 raid_match(device_t self, cfdata_t cfdata, void *aux)
3604 {
3605 return 1;
3606 }
3607
3608 static void
3609 raid_attach(device_t parent, device_t self, void *aux)
3610 {
3611 }
3612
3613
3614 static int
3615 raid_detach(device_t self, int flags)
3616 {
3617 int error;
3618 struct raid_softc *rs = raidsoftc(self);
3619
3620 if (rs == NULL)
3621 return ENXIO;
3622
3623 if ((error = raidlock(rs)) != 0)
3624 return (error);
3625
3626 error = raid_detach_unlocked(rs);
3627
3628 raidunlock(rs);
3629
3630 /* XXX raid can be referenced here */
3631
3632 if (error)
3633 return error;
3634
3635 /* Free the softc */
3636 raidput(rs);
3637
3638 return 0;
3639 }
3640
3641 static void
3642 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3643 {
3644 struct dk_softc *dksc = &rs->sc_dksc;
3645 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3646
3647 memset(dg, 0, sizeof(*dg));
3648
3649 dg->dg_secperunit = raidPtr->totalSectors;
3650 dg->dg_secsize = raidPtr->bytesPerSector;
3651 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3652 dg->dg_ntracks = 4 * raidPtr->numCol;
3653
3654 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3655 }
3656
3657 /*
3658 * Get cache info for all the components (including spares).
3659 * Returns intersection of all the cache flags of all disks, or first
3660 * error if any encountered.
3661 * XXXfua feature flags can change as spares are added - lock down somehow
3662 */
3663 static int
3664 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3665 {
3666 int c;
3667 int error;
3668 int dkwhole = 0, dkpart;
3669
3670 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3671 /*
3672 * Check any non-dead disk, even when currently being
3673 * reconstructed.
3674 */
3675 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3676 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3677 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3678 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3679 if (error) {
3680 if (error != ENODEV) {
3681 printf("raid%d: get cache for component %s failed\n",
3682 raidPtr->raidid,
3683 raidPtr->Disks[c].devname);
3684 }
3685
3686 return error;
3687 }
3688
3689 if (c == 0)
3690 dkwhole = dkpart;
3691 else
3692 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3693 }
3694 }
3695
3696 *data = dkwhole;
3697
3698 return 0;
3699 }
3700
3701 /*
3702 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3703 * We end up returning whatever error was returned by the first cache flush
3704 * that fails.
3705 */
3706
3707 int
3708 rf_sync_component_caches(RF_Raid_t *raidPtr)
3709 {
3710 int c, sparecol;
3711 int e,error;
3712 int force = 1;
3713
3714 error = 0;
3715 for (c = 0; c < raidPtr->numCol; c++) {
3716 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3717 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3718 &force, FWRITE, NOCRED);
3719 if (e) {
3720 if (e != ENODEV)
3721 printf("raid%d: cache flush to component %s failed.\n",
3722 raidPtr->raidid, raidPtr->Disks[c].devname);
3723 if (error == 0) {
3724 error = e;
3725 }
3726 }
3727 }
3728 }
3729
3730 for( c = 0; c < raidPtr->numSpare ; c++) {
3731 sparecol = raidPtr->numCol + c;
3732 /* Need to ensure that the reconstruct actually completed! */
3733 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3734 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3735 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3736 if (e) {
3737 if (e != ENODEV)
3738 printf("raid%d: cache flush to component %s failed.\n",
3739 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3740 if (error == 0) {
3741 error = e;
3742 }
3743 }
3744 }
3745 }
3746 return error;
3747 }
3748
3749 /* Fill in info with the current status */
3750 void
3751 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3752 {
3753
3754 if (raidPtr->status != rf_rs_reconstructing) {
3755 info->total = 100;
3756 info->completed = 100;
3757 } else {
3758 info->total = raidPtr->reconControl->numRUsTotal;
3759 info->completed = raidPtr->reconControl->numRUsComplete;
3760 }
3761 info->remaining = info->total - info->completed;
3762 }
3763
3764 /* Fill in info with the current status */
3765 void
3766 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3767 {
3768
3769 if (raidPtr->parity_rewrite_in_progress == 1) {
3770 info->total = raidPtr->Layout.numStripe;
3771 info->completed = raidPtr->parity_rewrite_stripes_done;
3772 } else {
3773 info->completed = 100;
3774 info->total = 100;
3775 }
3776 info->remaining = info->total - info->completed;
3777 }
3778
3779 /* Fill in info with the current status */
3780 void
3781 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3782 {
3783
3784 if (raidPtr->copyback_in_progress == 1) {
3785 info->total = raidPtr->Layout.numStripe;
3786 info->completed = raidPtr->copyback_stripes_done;
3787 info->remaining = info->total - info->completed;
3788 } else {
3789 info->remaining = 0;
3790 info->completed = 100;
3791 info->total = 100;
3792 }
3793 }
3794
3795 /* Fill in config with the current info */
3796 int
3797 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3798 {
3799 int d, i, j;
3800
3801 if (!raidPtr->valid)
3802 return (ENODEV);
3803 config->cols = raidPtr->numCol;
3804 config->ndevs = raidPtr->numCol;
3805 if (config->ndevs >= RF_MAX_DISKS)
3806 return (ENOMEM);
3807 config->nspares = raidPtr->numSpare;
3808 if (config->nspares >= RF_MAX_DISKS)
3809 return (ENOMEM);
3810 config->maxqdepth = raidPtr->maxQueueDepth;
3811 d = 0;
3812 for (j = 0; j < config->cols; j++) {
3813 config->devs[d] = raidPtr->Disks[j];
3814 d++;
3815 }
3816 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3817 config->spares[i] = raidPtr->Disks[j];
3818 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3819 /* XXX: raidctl(8) expects to see this as a used spare */
3820 config->spares[i].status = rf_ds_used_spare;
3821 }
3822 }
3823 return 0;
3824 }
3825
3826 int
3827 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3828 {
3829 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3830 RF_ComponentLabel_t *raid_clabel;
3831 int column = clabel->column;
3832
3833 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3834 return EINVAL;
3835 raid_clabel = raidget_component_label(raidPtr, column);
3836 memcpy(clabel, raid_clabel, sizeof *clabel);
3837
3838 return 0;
3839 }
3840
3841 /*
3842 * Module interface
3843 */
3844
3845 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
3846
3847 #ifdef _MODULE
3848 CFDRIVER_DECL(raid, DV_DISK, NULL);
3849 #endif
3850
3851 static int raid_modcmd(modcmd_t, void *);
3852 static int raid_modcmd_init(void);
3853 static int raid_modcmd_fini(void);
3854
3855 static int
3856 raid_modcmd(modcmd_t cmd, void *data)
3857 {
3858 int error;
3859
3860 error = 0;
3861 switch (cmd) {
3862 case MODULE_CMD_INIT:
3863 error = raid_modcmd_init();
3864 break;
3865 case MODULE_CMD_FINI:
3866 error = raid_modcmd_fini();
3867 break;
3868 default:
3869 error = ENOTTY;
3870 break;
3871 }
3872 return error;
3873 }
3874
3875 static int
3876 raid_modcmd_init(void)
3877 {
3878 int error;
3879 int bmajor, cmajor;
3880
3881 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3882 mutex_enter(&raid_lock);
3883 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3884 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3885 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3886 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3887
3888 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3889 #endif
3890
3891 bmajor = cmajor = -1;
3892 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3893 &raid_cdevsw, &cmajor);
3894 if (error != 0 && error != EEXIST) {
3895 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3896 mutex_exit(&raid_lock);
3897 return error;
3898 }
3899 #ifdef _MODULE
3900 error = config_cfdriver_attach(&raid_cd);
3901 if (error != 0) {
3902 aprint_error("%s: config_cfdriver_attach failed %d\n",
3903 __func__, error);
3904 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3905 mutex_exit(&raid_lock);
3906 return error;
3907 }
3908 #endif
3909 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3910 if (error != 0) {
3911 aprint_error("%s: config_cfattach_attach failed %d\n",
3912 __func__, error);
3913 #ifdef _MODULE
3914 config_cfdriver_detach(&raid_cd);
3915 #endif
3916 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3917 mutex_exit(&raid_lock);
3918 return error;
3919 }
3920
3921 raidautoconfigdone = false;
3922
3923 mutex_exit(&raid_lock);
3924
3925 if (error == 0) {
3926 if (rf_BootRaidframe(true) == 0)
3927 aprint_verbose("Kernelized RAIDframe activated\n");
3928 else
3929 panic("Serious error activating RAID!!");
3930 }
3931
3932 /*
3933 * Register a finalizer which will be used to auto-config RAID
3934 * sets once all real hardware devices have been found.
3935 */
3936 error = config_finalize_register(NULL, rf_autoconfig);
3937 if (error != 0) {
3938 aprint_error("WARNING: unable to register RAIDframe "
3939 "finalizer\n");
3940 error = 0;
3941 }
3942
3943 return error;
3944 }
3945
3946 static int
3947 raid_modcmd_fini(void)
3948 {
3949 int error;
3950
3951 mutex_enter(&raid_lock);
3952
3953 /* Don't allow unload if raid device(s) exist. */
3954 if (!LIST_EMPTY(&raids)) {
3955 mutex_exit(&raid_lock);
3956 return EBUSY;
3957 }
3958
3959 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3960 if (error != 0) {
3961 aprint_error("%s: cannot detach cfattach\n",__func__);
3962 mutex_exit(&raid_lock);
3963 return error;
3964 }
3965 #ifdef _MODULE
3966 error = config_cfdriver_detach(&raid_cd);
3967 if (error != 0) {
3968 aprint_error("%s: cannot detach cfdriver\n",__func__);
3969 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3970 mutex_exit(&raid_lock);
3971 return error;
3972 }
3973 #endif
3974 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3975 if (error != 0) {
3976 aprint_error("%s: cannot detach devsw\n",__func__);
3977 #ifdef _MODULE
3978 config_cfdriver_attach(&raid_cd);
3979 #endif
3980 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3981 mutex_exit(&raid_lock);
3982 return error;
3983 }
3984 rf_BootRaidframe(false);
3985 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3986 rf_destroy_mutex2(rf_sparet_wait_mutex);
3987 rf_destroy_cond2(rf_sparet_wait_cv);
3988 rf_destroy_cond2(rf_sparet_resp_cv);
3989 #endif
3990 mutex_exit(&raid_lock);
3991 mutex_destroy(&raid_lock);
3992
3993 return error;
3994 }
3995