rf_netbsdkintf.c revision 1.298.2.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.298.2.4 2014/08/20 00:03:49 tls Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.298.2.4 2014/08/20 00:03:49 tls Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static void raidminphys(struct buf *);
234
235 static struct dkdriver rf_dkdriver = { raidstrategy, raidminphys };
236
237 struct raid_softc {
238 device_t sc_dev;
239 int sc_unit;
240 int sc_flags; /* flags */
241 int sc_cflags; /* configuration flags */
242 uint64_t sc_size; /* size of the raid device */
243 char sc_xname[20]; /* XXX external name */
244 struct disk sc_dkdev; /* generic disk device info */
245 struct bufq_state *buf_queue; /* used for the device queue */
246 RF_Raid_t sc_r;
247 LIST_ENTRY(raid_softc) sc_link;
248 };
249 /* sc_flags */
250 #define RAIDF_INITED 0x01 /* unit has been initialized */
251 #define RAIDF_WLABEL 0x02 /* label area is writable */
252 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
253 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
254 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
255 #define RAIDF_LOCKED 0x80 /* unit is locked */
256
257 #define raidunit(x) DISKUNIT(x)
258
259 extern struct cfdriver raid_cd;
260 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
261 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
262 DVF_DETACH_SHUTDOWN);
263
264 /*
265 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
266 * Be aware that large numbers can allow the driver to consume a lot of
267 * kernel memory, especially on writes, and in degraded mode reads.
268 *
269 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
270 * a single 64K write will typically require 64K for the old data,
271 * 64K for the old parity, and 64K for the new parity, for a total
272 * of 192K (if the parity buffer is not re-used immediately).
273 * Even it if is used immediately, that's still 128K, which when multiplied
274 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
275 *
276 * Now in degraded mode, for example, a 64K read on the above setup may
277 * require data reconstruction, which will require *all* of the 4 remaining
278 * disks to participate -- 4 * 32K/disk == 128K again.
279 */
280
281 #ifndef RAIDOUTSTANDING
282 #define RAIDOUTSTANDING 6
283 #endif
284
285 #define RAIDLABELDEV(dev) \
286 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
287
288 /* declared here, and made public, for the benefit of KVM stuff.. */
289
290 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
291 struct disklabel *);
292 static void raidgetdisklabel(dev_t);
293 static void raidmakedisklabel(struct raid_softc *);
294
295 static int raidlock(struct raid_softc *);
296 static void raidunlock(struct raid_softc *);
297
298 static int raid_detach_unlocked(struct raid_softc *);
299
300 static void rf_markalldirty(RF_Raid_t *);
301 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
302
303 void rf_ReconThread(struct rf_recon_req *);
304 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
305 void rf_CopybackThread(RF_Raid_t *raidPtr);
306 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
307 int rf_autoconfig(device_t);
308 void rf_buildroothack(RF_ConfigSet_t *);
309
310 RF_AutoConfig_t *rf_find_raid_components(void);
311 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
312 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
313 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
314 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
315 int rf_set_autoconfig(RF_Raid_t *, int);
316 int rf_set_rootpartition(RF_Raid_t *, int);
317 void rf_release_all_vps(RF_ConfigSet_t *);
318 void rf_cleanup_config_set(RF_ConfigSet_t *);
319 int rf_have_enough_components(RF_ConfigSet_t *);
320 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
321 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
322
323 /*
324 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
325 * Note that this is overridden by having RAID_AUTOCONFIG as an option
326 * in the kernel config file.
327 */
328 #ifdef RAID_AUTOCONFIG
329 int raidautoconfig = 1;
330 #else
331 int raidautoconfig = 0;
332 #endif
333 static bool raidautoconfigdone = false;
334
335 struct RF_Pools_s rf_pools;
336
337 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
338 static kmutex_t raid_lock;
339
340 static struct raid_softc *
341 raidcreate(int unit) {
342 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
343 if (sc == NULL) {
344 #ifdef DIAGNOSTIC
345 printf("%s: out of memory\n", __func__);
346 #endif
347 return NULL;
348 }
349 sc->sc_unit = unit;
350 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
351 return sc;
352 }
353
354 static void
355 raiddestroy(struct raid_softc *sc) {
356 bufq_free(sc->buf_queue);
357 kmem_free(sc, sizeof(*sc));
358 }
359
360 static struct raid_softc *
361 raidget(int unit) {
362 struct raid_softc *sc;
363 if (unit < 0) {
364 #ifdef DIAGNOSTIC
365 panic("%s: unit %d!", __func__, unit);
366 #endif
367 return NULL;
368 }
369 mutex_enter(&raid_lock);
370 LIST_FOREACH(sc, &raids, sc_link) {
371 if (sc->sc_unit == unit) {
372 mutex_exit(&raid_lock);
373 return sc;
374 }
375 }
376 mutex_exit(&raid_lock);
377 if ((sc = raidcreate(unit)) == NULL)
378 return NULL;
379 mutex_enter(&raid_lock);
380 LIST_INSERT_HEAD(&raids, sc, sc_link);
381 mutex_exit(&raid_lock);
382 return sc;
383 }
384
385 static void
386 raidput(struct raid_softc *sc) {
387 mutex_enter(&raid_lock);
388 LIST_REMOVE(sc, sc_link);
389 mutex_exit(&raid_lock);
390 raiddestroy(sc);
391 }
392
393 void
394 raidattach(int num)
395 {
396 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
397 /* This is where all the initialization stuff gets done. */
398
399 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
400 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
401 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
402 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
403
404 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
405 #endif
406
407 if (rf_BootRaidframe() == 0)
408 aprint_verbose("Kernelized RAIDframe activated\n");
409 else
410 panic("Serious error booting RAID!!");
411
412 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
413 aprint_error("raidattach: config_cfattach_attach failed?\n");
414 }
415
416 raidautoconfigdone = false;
417
418 /*
419 * Register a finalizer which will be used to auto-config RAID
420 * sets once all real hardware devices have been found.
421 */
422 if (config_finalize_register(NULL, rf_autoconfig) != 0)
423 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
424 }
425
426 int
427 rf_autoconfig(device_t self)
428 {
429 RF_AutoConfig_t *ac_list;
430 RF_ConfigSet_t *config_sets;
431
432 if (!raidautoconfig || raidautoconfigdone == true)
433 return (0);
434
435 /* XXX This code can only be run once. */
436 raidautoconfigdone = true;
437
438 #ifdef __HAVE_CPU_BOOTCONF
439 /*
440 * 0. find the boot device if needed first so we can use it later
441 * this needs to be done before we autoconfigure any raid sets,
442 * because if we use wedges we are not going to be able to open
443 * the boot device later
444 */
445 if (booted_device == NULL)
446 cpu_bootconf();
447 #endif
448 /* 1. locate all RAID components on the system */
449 aprint_debug("Searching for RAID components...\n");
450 ac_list = rf_find_raid_components();
451
452 /* 2. Sort them into their respective sets. */
453 config_sets = rf_create_auto_sets(ac_list);
454
455 /*
456 * 3. Evaluate each set and configure the valid ones.
457 * This gets done in rf_buildroothack().
458 */
459 rf_buildroothack(config_sets);
460
461 return 1;
462 }
463
464 static int
465 rf_containsboot(RF_Raid_t *r, device_t bdv) {
466 const char *bootname = device_xname(bdv);
467 size_t len = strlen(bootname);
468
469 for (int col = 0; col < r->numCol; col++) {
470 const char *devname = r->Disks[col].devname;
471 devname += sizeof("/dev/") - 1;
472 if (strncmp(devname, "dk", 2) == 0) {
473 const char *parent =
474 dkwedge_get_parent_name(r->Disks[col].dev);
475 if (parent != NULL)
476 devname = parent;
477 }
478 if (strncmp(devname, bootname, len) == 0) {
479 struct raid_softc *sc = r->softc;
480 aprint_debug("raid%d includes boot device %s\n",
481 sc->sc_unit, devname);
482 return 1;
483 }
484 }
485 return 0;
486 }
487
488 void
489 rf_buildroothack(RF_ConfigSet_t *config_sets)
490 {
491 RF_ConfigSet_t *cset;
492 RF_ConfigSet_t *next_cset;
493 int num_root;
494 struct raid_softc *sc, *rsc;
495
496 sc = rsc = NULL;
497 num_root = 0;
498 cset = config_sets;
499 while (cset != NULL) {
500 next_cset = cset->next;
501 if (rf_have_enough_components(cset) &&
502 cset->ac->clabel->autoconfigure == 1) {
503 sc = rf_auto_config_set(cset);
504 if (sc != NULL) {
505 aprint_debug("raid%d: configured ok\n",
506 sc->sc_unit);
507 if (cset->rootable) {
508 rsc = sc;
509 num_root++;
510 }
511 } else {
512 /* The autoconfig didn't work :( */
513 aprint_debug("Autoconfig failed\n");
514 rf_release_all_vps(cset);
515 }
516 } else {
517 /* we're not autoconfiguring this set...
518 release the associated resources */
519 rf_release_all_vps(cset);
520 }
521 /* cleanup */
522 rf_cleanup_config_set(cset);
523 cset = next_cset;
524 }
525
526 /* if the user has specified what the root device should be
527 then we don't touch booted_device or boothowto... */
528
529 if (rootspec != NULL)
530 return;
531
532 /* we found something bootable... */
533
534 /*
535 * XXX: The following code assumes that the root raid
536 * is the first ('a') partition. This is about the best
537 * we can do with a BSD disklabel, but we might be able
538 * to do better with a GPT label, by setting a specified
539 * attribute to indicate the root partition. We can then
540 * stash the partition number in the r->root_partition
541 * high bits (the bottom 2 bits are already used). For
542 * now we just set booted_partition to 0 when we override
543 * root.
544 */
545 if (num_root == 1) {
546 device_t candidate_root;
547 if (rsc->sc_dkdev.dk_nwedges != 0) {
548 char cname[sizeof(cset->ac->devname)];
549 /* XXX: assume 'a' */
550 snprintf(cname, sizeof(cname), "%s%c",
551 device_xname(rsc->sc_dev), 'a');
552 candidate_root = dkwedge_find_by_wname(cname);
553 } else
554 candidate_root = rsc->sc_dev;
555 if (booted_device == NULL ||
556 rsc->sc_r.root_partition == 1 ||
557 rf_containsboot(&rsc->sc_r, booted_device)) {
558 booted_device = candidate_root;
559 booted_partition = 0; /* XXX assume 'a' */
560 }
561 } else if (num_root > 1) {
562
563 /*
564 * Maybe the MD code can help. If it cannot, then
565 * setroot() will discover that we have no
566 * booted_device and will ask the user if nothing was
567 * hardwired in the kernel config file
568 */
569 if (booted_device == NULL)
570 return;
571
572 num_root = 0;
573 mutex_enter(&raid_lock);
574 LIST_FOREACH(sc, &raids, sc_link) {
575 RF_Raid_t *r = &sc->sc_r;
576 if (r->valid == 0)
577 continue;
578
579 if (r->root_partition == 0)
580 continue;
581
582 if (rf_containsboot(r, booted_device)) {
583 num_root++;
584 rsc = sc;
585 }
586 }
587 mutex_exit(&raid_lock);
588
589 if (num_root == 1) {
590 booted_device = rsc->sc_dev;
591 booted_partition = 0; /* XXX assume 'a' */
592 } else {
593 /* we can't guess.. require the user to answer... */
594 boothowto |= RB_ASKNAME;
595 }
596 }
597 }
598
599
600 int
601 raidsize(dev_t dev)
602 {
603 struct raid_softc *rs;
604 struct disklabel *lp;
605 int part, unit, omask, size;
606
607 unit = raidunit(dev);
608 if ((rs = raidget(unit)) == NULL)
609 return -1;
610 if ((rs->sc_flags & RAIDF_INITED) == 0)
611 return (-1);
612
613 part = DISKPART(dev);
614 omask = rs->sc_dkdev.dk_openmask & (1 << part);
615 lp = rs->sc_dkdev.dk_label;
616
617 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
618 return (-1);
619
620 if (lp->d_partitions[part].p_fstype != FS_SWAP)
621 size = -1;
622 else
623 size = lp->d_partitions[part].p_size *
624 (lp->d_secsize / DEV_BSIZE);
625
626 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
627 return (-1);
628
629 return (size);
630
631 }
632
633 int
634 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
635 {
636 int unit = raidunit(dev);
637 struct raid_softc *rs;
638 const struct bdevsw *bdev;
639 struct disklabel *lp;
640 RF_Raid_t *raidPtr;
641 daddr_t offset;
642 int part, c, sparecol, j, scol, dumpto;
643 int error = 0;
644
645 if ((rs = raidget(unit)) == NULL)
646 return ENXIO;
647
648 raidPtr = &rs->sc_r;
649
650 if ((rs->sc_flags & RAIDF_INITED) == 0)
651 return ENXIO;
652
653 /* we only support dumping to RAID 1 sets */
654 if (raidPtr->Layout.numDataCol != 1 ||
655 raidPtr->Layout.numParityCol != 1)
656 return EINVAL;
657
658
659 if ((error = raidlock(rs)) != 0)
660 return error;
661
662 if (size % DEV_BSIZE != 0) {
663 error = EINVAL;
664 goto out;
665 }
666
667 if (blkno + size / DEV_BSIZE > rs->sc_size) {
668 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
669 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
670 size / DEV_BSIZE, rs->sc_size);
671 error = EINVAL;
672 goto out;
673 }
674
675 part = DISKPART(dev);
676 lp = rs->sc_dkdev.dk_label;
677 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
678
679 /* figure out what device is alive.. */
680
681 /*
682 Look for a component to dump to. The preference for the
683 component to dump to is as follows:
684 1) the master
685 2) a used_spare of the master
686 3) the slave
687 4) a used_spare of the slave
688 */
689
690 dumpto = -1;
691 for (c = 0; c < raidPtr->numCol; c++) {
692 if (raidPtr->Disks[c].status == rf_ds_optimal) {
693 /* this might be the one */
694 dumpto = c;
695 break;
696 }
697 }
698
699 /*
700 At this point we have possibly selected a live master or a
701 live slave. We now check to see if there is a spared
702 master (or a spared slave), if we didn't find a live master
703 or a live slave.
704 */
705
706 for (c = 0; c < raidPtr->numSpare; c++) {
707 sparecol = raidPtr->numCol + c;
708 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
709 /* How about this one? */
710 scol = -1;
711 for(j=0;j<raidPtr->numCol;j++) {
712 if (raidPtr->Disks[j].spareCol == sparecol) {
713 scol = j;
714 break;
715 }
716 }
717 if (scol == 0) {
718 /*
719 We must have found a spared master!
720 We'll take that over anything else
721 found so far. (We couldn't have
722 found a real master before, since
723 this is a used spare, and it's
724 saying that it's replacing the
725 master.) On reboot (with
726 autoconfiguration turned on)
727 sparecol will become the 1st
728 component (component0) of this set.
729 */
730 dumpto = sparecol;
731 break;
732 } else if (scol != -1) {
733 /*
734 Must be a spared slave. We'll dump
735 to that if we havn't found anything
736 else so far.
737 */
738 if (dumpto == -1)
739 dumpto = sparecol;
740 }
741 }
742 }
743
744 if (dumpto == -1) {
745 /* we couldn't find any live components to dump to!?!?
746 */
747 error = EINVAL;
748 goto out;
749 }
750
751 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
752
753 /*
754 Note that blkno is relative to this particular partition.
755 By adding the offset of this partition in the RAID
756 set, and also adding RF_PROTECTED_SECTORS, we get a
757 value that is relative to the partition used for the
758 underlying component.
759 */
760
761 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
762 blkno + offset, va, size);
763
764 out:
765 raidunlock(rs);
766
767 return error;
768 }
769 /* ARGSUSED */
770 int
771 raidopen(dev_t dev, int flags, int fmt,
772 struct lwp *l)
773 {
774 int unit = raidunit(dev);
775 struct raid_softc *rs;
776 struct disklabel *lp;
777 int part, pmask;
778 int error = 0;
779
780 if ((rs = raidget(unit)) == NULL)
781 return ENXIO;
782 if ((error = raidlock(rs)) != 0)
783 return (error);
784
785 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
786 error = EBUSY;
787 goto bad;
788 }
789
790 lp = rs->sc_dkdev.dk_label;
791
792 part = DISKPART(dev);
793
794 /*
795 * If there are wedges, and this is not RAW_PART, then we
796 * need to fail.
797 */
798 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
799 error = EBUSY;
800 goto bad;
801 }
802 pmask = (1 << part);
803
804 if ((rs->sc_flags & RAIDF_INITED) &&
805 (rs->sc_dkdev.dk_openmask == 0))
806 raidgetdisklabel(dev);
807
808 /* make sure that this partition exists */
809
810 if (part != RAW_PART) {
811 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
812 ((part >= lp->d_npartitions) ||
813 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
814 error = ENXIO;
815 goto bad;
816 }
817 }
818 /* Prevent this unit from being unconfigured while open. */
819 switch (fmt) {
820 case S_IFCHR:
821 rs->sc_dkdev.dk_copenmask |= pmask;
822 break;
823
824 case S_IFBLK:
825 rs->sc_dkdev.dk_bopenmask |= pmask;
826 break;
827 }
828
829 if ((rs->sc_dkdev.dk_openmask == 0) &&
830 ((rs->sc_flags & RAIDF_INITED) != 0)) {
831 /* First one... mark things as dirty... Note that we *MUST*
832 have done a configure before this. I DO NOT WANT TO BE
833 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
834 THAT THEY BELONG TOGETHER!!!!! */
835 /* XXX should check to see if we're only open for reading
836 here... If so, we needn't do this, but then need some
837 other way of keeping track of what's happened.. */
838
839 rf_markalldirty(&rs->sc_r);
840 }
841
842
843 rs->sc_dkdev.dk_openmask =
844 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
845
846 bad:
847 raidunlock(rs);
848
849 return (error);
850
851
852 }
853 /* ARGSUSED */
854 int
855 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
856 {
857 int unit = raidunit(dev);
858 struct raid_softc *rs;
859 int error = 0;
860 int part;
861
862 if ((rs = raidget(unit)) == NULL)
863 return ENXIO;
864
865 if ((error = raidlock(rs)) != 0)
866 return (error);
867
868 part = DISKPART(dev);
869
870 /* ...that much closer to allowing unconfiguration... */
871 switch (fmt) {
872 case S_IFCHR:
873 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
874 break;
875
876 case S_IFBLK:
877 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
878 break;
879 }
880 rs->sc_dkdev.dk_openmask =
881 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
882
883 if ((rs->sc_dkdev.dk_openmask == 0) &&
884 ((rs->sc_flags & RAIDF_INITED) != 0)) {
885 /* Last one... device is not unconfigured yet.
886 Device shutdown has taken care of setting the
887 clean bits if RAIDF_INITED is not set
888 mark things as clean... */
889
890 rf_update_component_labels(&rs->sc_r,
891 RF_FINAL_COMPONENT_UPDATE);
892
893 /* If the kernel is shutting down, it will detach
894 * this RAID set soon enough.
895 */
896 }
897
898 raidunlock(rs);
899 return (0);
900
901 }
902
903 void
904 raidstrategy(struct buf *bp)
905 {
906 unsigned int unit = raidunit(bp->b_dev);
907 RF_Raid_t *raidPtr;
908 int wlabel;
909 struct raid_softc *rs;
910
911 if ((rs = raidget(unit)) == NULL) {
912 bp->b_error = ENXIO;
913 goto done;
914 }
915 if ((rs->sc_flags & RAIDF_INITED) == 0) {
916 bp->b_error = ENXIO;
917 goto done;
918 }
919 raidPtr = &rs->sc_r;
920 if (!raidPtr->valid) {
921 bp->b_error = ENODEV;
922 goto done;
923 }
924 if (bp->b_bcount == 0) {
925 db1_printf(("b_bcount is zero..\n"));
926 goto done;
927 }
928
929 /*
930 * Do bounds checking and adjust transfer. If there's an
931 * error, the bounds check will flag that for us.
932 */
933
934 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
935 if (DISKPART(bp->b_dev) == RAW_PART) {
936 uint64_t size; /* device size in DEV_BSIZE unit */
937
938 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
939 size = raidPtr->totalSectors <<
940 (raidPtr->logBytesPerSector - DEV_BSHIFT);
941 } else {
942 size = raidPtr->totalSectors >>
943 (DEV_BSHIFT - raidPtr->logBytesPerSector);
944 }
945 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
946 goto done;
947 }
948 } else {
949 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
950 db1_printf(("Bounds check failed!!:%d %d\n",
951 (int) bp->b_blkno, (int) wlabel));
952 goto done;
953 }
954 }
955
956 rf_lock_mutex2(raidPtr->iodone_lock);
957
958 bp->b_resid = 0;
959
960 /* stuff it onto our queue */
961 bufq_put(rs->buf_queue, bp);
962
963 /* scheduled the IO to happen at the next convenient time */
964 rf_signal_cond2(raidPtr->iodone_cv);
965 rf_unlock_mutex2(raidPtr->iodone_lock);
966
967 return;
968
969 done:
970 bp->b_resid = bp->b_bcount;
971 biodone(bp);
972 }
973 /* ARGSUSED */
974 int
975 raidread(dev_t dev, struct uio *uio, int flags)
976 {
977 int unit = raidunit(dev);
978 struct raid_softc *rs;
979
980 if ((rs = raidget(unit)) == NULL)
981 return ENXIO;
982
983 if ((rs->sc_flags & RAIDF_INITED) == 0)
984 return (ENXIO);
985
986 return (physio(raidstrategy, NULL, dev, B_READ, raidminphys, uio));
987
988 }
989 /* ARGSUSED */
990 int
991 raidwrite(dev_t dev, struct uio *uio, int flags)
992 {
993 int unit = raidunit(dev);
994 struct raid_softc *rs;
995
996 if ((rs = raidget(unit)) == NULL)
997 return ENXIO;
998
999 if ((rs->sc_flags & RAIDF_INITED) == 0)
1000 return (ENXIO);
1001
1002 return (physio(raidstrategy, NULL, dev, B_WRITE, raidminphys, uio));
1003
1004 }
1005
1006 static int
1007 raid_detach_unlocked(struct raid_softc *rs)
1008 {
1009 int error;
1010 RF_Raid_t *raidPtr;
1011
1012 raidPtr = &rs->sc_r;
1013
1014 /*
1015 * If somebody has a partition mounted, we shouldn't
1016 * shutdown.
1017 */
1018 if (rs->sc_dkdev.dk_openmask != 0)
1019 return EBUSY;
1020
1021 if ((rs->sc_flags & RAIDF_INITED) == 0)
1022 ; /* not initialized: nothing to do */
1023 else if ((error = rf_Shutdown(raidPtr)) != 0)
1024 return error;
1025 else
1026 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1027
1028 /* Detach the disk. */
1029 dkwedge_delall(&rs->sc_dkdev);
1030 disk_detach(&rs->sc_dkdev);
1031 disk_destroy(&rs->sc_dkdev);
1032
1033 aprint_normal_dev(rs->sc_dev, "detached\n");
1034
1035 return 0;
1036 }
1037
1038 int
1039 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1040 {
1041 int unit = raidunit(dev);
1042 int error = 0;
1043 int part, pmask, s;
1044 cfdata_t cf;
1045 struct raid_softc *rs;
1046 RF_Config_t *k_cfg, *u_cfg;
1047 RF_Raid_t *raidPtr;
1048 RF_RaidDisk_t *diskPtr;
1049 RF_AccTotals_t *totals;
1050 RF_DeviceConfig_t *d_cfg, **ucfgp;
1051 u_char *specific_buf;
1052 int retcode = 0;
1053 int column;
1054 /* int raidid; */
1055 struct rf_recon_req *rrcopy, *rr;
1056 RF_ComponentLabel_t *clabel;
1057 RF_ComponentLabel_t *ci_label;
1058 RF_ComponentLabel_t **clabel_ptr;
1059 RF_SingleComponent_t *sparePtr,*componentPtr;
1060 RF_SingleComponent_t component;
1061 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1062 int i, j, d;
1063 #ifdef __HAVE_OLD_DISKLABEL
1064 struct disklabel newlabel;
1065 #endif
1066 struct dkwedge_info *dkw;
1067
1068 if ((rs = raidget(unit)) == NULL)
1069 return ENXIO;
1070 raidPtr = &rs->sc_r;
1071
1072 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1073 (int) DISKPART(dev), (int) unit, cmd));
1074
1075 /* Must be open for writes for these commands... */
1076 switch (cmd) {
1077 #ifdef DIOCGSECTORSIZE
1078 case DIOCGSECTORSIZE:
1079 *(u_int *)data = raidPtr->bytesPerSector;
1080 return 0;
1081 case DIOCGMEDIASIZE:
1082 *(off_t *)data =
1083 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1084 return 0;
1085 #endif
1086 case DIOCSDINFO:
1087 case DIOCWDINFO:
1088 #ifdef __HAVE_OLD_DISKLABEL
1089 case ODIOCWDINFO:
1090 case ODIOCSDINFO:
1091 #endif
1092 case DIOCWLABEL:
1093 case DIOCAWEDGE:
1094 case DIOCDWEDGE:
1095 case DIOCSSTRATEGY:
1096 if ((flag & FWRITE) == 0)
1097 return (EBADF);
1098 }
1099
1100 /* Must be initialized for these... */
1101 switch (cmd) {
1102 case DIOCGDINFO:
1103 case DIOCSDINFO:
1104 case DIOCWDINFO:
1105 #ifdef __HAVE_OLD_DISKLABEL
1106 case ODIOCGDINFO:
1107 case ODIOCWDINFO:
1108 case ODIOCSDINFO:
1109 case ODIOCGDEFLABEL:
1110 #endif
1111 case DIOCGPART:
1112 case DIOCWLABEL:
1113 case DIOCGDEFLABEL:
1114 case DIOCAWEDGE:
1115 case DIOCDWEDGE:
1116 case DIOCLWEDGES:
1117 case DIOCCACHESYNC:
1118 case RAIDFRAME_SHUTDOWN:
1119 case RAIDFRAME_REWRITEPARITY:
1120 case RAIDFRAME_GET_INFO:
1121 case RAIDFRAME_RESET_ACCTOTALS:
1122 case RAIDFRAME_GET_ACCTOTALS:
1123 case RAIDFRAME_KEEP_ACCTOTALS:
1124 case RAIDFRAME_GET_SIZE:
1125 case RAIDFRAME_FAIL_DISK:
1126 case RAIDFRAME_COPYBACK:
1127 case RAIDFRAME_CHECK_RECON_STATUS:
1128 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1129 case RAIDFRAME_GET_COMPONENT_LABEL:
1130 case RAIDFRAME_SET_COMPONENT_LABEL:
1131 case RAIDFRAME_ADD_HOT_SPARE:
1132 case RAIDFRAME_REMOVE_HOT_SPARE:
1133 case RAIDFRAME_INIT_LABELS:
1134 case RAIDFRAME_REBUILD_IN_PLACE:
1135 case RAIDFRAME_CHECK_PARITY:
1136 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1137 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1138 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1139 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1140 case RAIDFRAME_SET_AUTOCONFIG:
1141 case RAIDFRAME_SET_ROOT:
1142 case RAIDFRAME_DELETE_COMPONENT:
1143 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1144 case RAIDFRAME_PARITYMAP_STATUS:
1145 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1146 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1147 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1148 case DIOCGSTRATEGY:
1149 case DIOCSSTRATEGY:
1150 if ((rs->sc_flags & RAIDF_INITED) == 0)
1151 return (ENXIO);
1152 }
1153
1154 switch (cmd) {
1155 #ifdef COMPAT_50
1156 case RAIDFRAME_GET_INFO50:
1157 return rf_get_info50(raidPtr, data);
1158
1159 case RAIDFRAME_CONFIGURE50:
1160 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1161 return retcode;
1162 goto config;
1163 #endif
1164 /* configure the system */
1165 case RAIDFRAME_CONFIGURE:
1166
1167 if (raidPtr->valid) {
1168 /* There is a valid RAID set running on this unit! */
1169 printf("raid%d: Device already configured!\n",unit);
1170 return(EINVAL);
1171 }
1172
1173 /* copy-in the configuration information */
1174 /* data points to a pointer to the configuration structure */
1175
1176 u_cfg = *((RF_Config_t **) data);
1177 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1178 if (k_cfg == NULL) {
1179 return (ENOMEM);
1180 }
1181 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1182 if (retcode) {
1183 RF_Free(k_cfg, sizeof(RF_Config_t));
1184 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1185 retcode));
1186 return (retcode);
1187 }
1188 goto config;
1189 config:
1190 /* allocate a buffer for the layout-specific data, and copy it
1191 * in */
1192 if (k_cfg->layoutSpecificSize) {
1193 if (k_cfg->layoutSpecificSize > 10000) {
1194 /* sanity check */
1195 RF_Free(k_cfg, sizeof(RF_Config_t));
1196 return (EINVAL);
1197 }
1198 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1199 (u_char *));
1200 if (specific_buf == NULL) {
1201 RF_Free(k_cfg, sizeof(RF_Config_t));
1202 return (ENOMEM);
1203 }
1204 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1205 k_cfg->layoutSpecificSize);
1206 if (retcode) {
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 RF_Free(specific_buf,
1209 k_cfg->layoutSpecificSize);
1210 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1211 retcode));
1212 return (retcode);
1213 }
1214 } else
1215 specific_buf = NULL;
1216 k_cfg->layoutSpecific = specific_buf;
1217
1218 /* should do some kind of sanity check on the configuration.
1219 * Store the sum of all the bytes in the last byte? */
1220
1221 /* configure the system */
1222
1223 /*
1224 * Clear the entire RAID descriptor, just to make sure
1225 * there is no stale data left in the case of a
1226 * reconfiguration
1227 */
1228 memset(raidPtr, 0, sizeof(*raidPtr));
1229 raidPtr->softc = rs;
1230 raidPtr->raidid = unit;
1231
1232 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1233
1234 if (retcode == 0) {
1235
1236 /* allow this many simultaneous IO's to
1237 this RAID device */
1238 raidPtr->openings = RAIDOUTSTANDING;
1239
1240 raidinit(rs);
1241 rf_markalldirty(raidPtr);
1242 }
1243 /* free the buffers. No return code here. */
1244 if (k_cfg->layoutSpecificSize) {
1245 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1246 }
1247 RF_Free(k_cfg, sizeof(RF_Config_t));
1248
1249 return (retcode);
1250
1251 /* shutdown the system */
1252 case RAIDFRAME_SHUTDOWN:
1253
1254 part = DISKPART(dev);
1255 pmask = (1 << part);
1256
1257 if ((error = raidlock(rs)) != 0)
1258 return (error);
1259
1260 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1261 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1262 (rs->sc_dkdev.dk_copenmask & pmask)))
1263 retcode = EBUSY;
1264 else {
1265 rs->sc_flags |= RAIDF_SHUTDOWN;
1266 rs->sc_dkdev.dk_copenmask &= ~pmask;
1267 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1268 rs->sc_dkdev.dk_openmask &= ~pmask;
1269 retcode = 0;
1270 }
1271
1272 raidunlock(rs);
1273
1274 if (retcode != 0)
1275 return retcode;
1276
1277 /* free the pseudo device attach bits */
1278
1279 cf = device_cfdata(rs->sc_dev);
1280 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1281 free(cf, M_RAIDFRAME);
1282
1283 return (retcode);
1284 case RAIDFRAME_GET_COMPONENT_LABEL:
1285 clabel_ptr = (RF_ComponentLabel_t **) data;
1286 /* need to read the component label for the disk indicated
1287 by row,column in clabel */
1288
1289 /*
1290 * Perhaps there should be an option to skip the in-core
1291 * copy and hit the disk, as with disklabel(8).
1292 */
1293 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1294
1295 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1296
1297 if (retcode) {
1298 RF_Free(clabel, sizeof(*clabel));
1299 return retcode;
1300 }
1301
1302 clabel->row = 0; /* Don't allow looking at anything else.*/
1303
1304 column = clabel->column;
1305
1306 if ((column < 0) || (column >= raidPtr->numCol +
1307 raidPtr->numSpare)) {
1308 RF_Free(clabel, sizeof(*clabel));
1309 return EINVAL;
1310 }
1311
1312 RF_Free(clabel, sizeof(*clabel));
1313
1314 clabel = raidget_component_label(raidPtr, column);
1315
1316 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1317
1318 #if 0
1319 case RAIDFRAME_SET_COMPONENT_LABEL:
1320 clabel = (RF_ComponentLabel_t *) data;
1321
1322 /* XXX check the label for valid stuff... */
1323 /* Note that some things *should not* get modified --
1324 the user should be re-initing the labels instead of
1325 trying to patch things.
1326 */
1327
1328 raidid = raidPtr->raidid;
1329 #ifdef DEBUG
1330 printf("raid%d: Got component label:\n", raidid);
1331 printf("raid%d: Version: %d\n", raidid, clabel->version);
1332 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1333 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1334 printf("raid%d: Column: %d\n", raidid, clabel->column);
1335 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1336 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1337 printf("raid%d: Status: %d\n", raidid, clabel->status);
1338 #endif
1339 clabel->row = 0;
1340 column = clabel->column;
1341
1342 if ((column < 0) || (column >= raidPtr->numCol)) {
1343 return(EINVAL);
1344 }
1345
1346 /* XXX this isn't allowed to do anything for now :-) */
1347
1348 /* XXX and before it is, we need to fill in the rest
1349 of the fields!?!?!?! */
1350 memcpy(raidget_component_label(raidPtr, column),
1351 clabel, sizeof(*clabel));
1352 raidflush_component_label(raidPtr, column);
1353 return (0);
1354 #endif
1355
1356 case RAIDFRAME_INIT_LABELS:
1357 clabel = (RF_ComponentLabel_t *) data;
1358 /*
1359 we only want the serial number from
1360 the above. We get all the rest of the information
1361 from the config that was used to create this RAID
1362 set.
1363 */
1364
1365 raidPtr->serial_number = clabel->serial_number;
1366
1367 for(column=0;column<raidPtr->numCol;column++) {
1368 diskPtr = &raidPtr->Disks[column];
1369 if (!RF_DEAD_DISK(diskPtr->status)) {
1370 ci_label = raidget_component_label(raidPtr,
1371 column);
1372 /* Zeroing this is important. */
1373 memset(ci_label, 0, sizeof(*ci_label));
1374 raid_init_component_label(raidPtr, ci_label);
1375 ci_label->serial_number =
1376 raidPtr->serial_number;
1377 ci_label->row = 0; /* we dont' pretend to support more */
1378 rf_component_label_set_partitionsize(ci_label,
1379 diskPtr->partitionSize);
1380 ci_label->column = column;
1381 raidflush_component_label(raidPtr, column);
1382 }
1383 /* XXXjld what about the spares? */
1384 }
1385
1386 return (retcode);
1387 case RAIDFRAME_SET_AUTOCONFIG:
1388 d = rf_set_autoconfig(raidPtr, *(int *) data);
1389 printf("raid%d: New autoconfig value is: %d\n",
1390 raidPtr->raidid, d);
1391 *(int *) data = d;
1392 return (retcode);
1393
1394 case RAIDFRAME_SET_ROOT:
1395 d = rf_set_rootpartition(raidPtr, *(int *) data);
1396 printf("raid%d: New rootpartition value is: %d\n",
1397 raidPtr->raidid, d);
1398 *(int *) data = d;
1399 return (retcode);
1400
1401 /* initialize all parity */
1402 case RAIDFRAME_REWRITEPARITY:
1403
1404 if (raidPtr->Layout.map->faultsTolerated == 0) {
1405 /* Parity for RAID 0 is trivially correct */
1406 raidPtr->parity_good = RF_RAID_CLEAN;
1407 return(0);
1408 }
1409
1410 if (raidPtr->parity_rewrite_in_progress == 1) {
1411 /* Re-write is already in progress! */
1412 return(EINVAL);
1413 }
1414
1415 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1416 rf_RewriteParityThread,
1417 raidPtr,"raid_parity");
1418 return (retcode);
1419
1420
1421 case RAIDFRAME_ADD_HOT_SPARE:
1422 sparePtr = (RF_SingleComponent_t *) data;
1423 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1424 retcode = rf_add_hot_spare(raidPtr, &component);
1425 return(retcode);
1426
1427 case RAIDFRAME_REMOVE_HOT_SPARE:
1428 return(retcode);
1429
1430 case RAIDFRAME_DELETE_COMPONENT:
1431 componentPtr = (RF_SingleComponent_t *)data;
1432 memcpy( &component, componentPtr,
1433 sizeof(RF_SingleComponent_t));
1434 retcode = rf_delete_component(raidPtr, &component);
1435 return(retcode);
1436
1437 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1438 componentPtr = (RF_SingleComponent_t *)data;
1439 memcpy( &component, componentPtr,
1440 sizeof(RF_SingleComponent_t));
1441 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1442 return(retcode);
1443
1444 case RAIDFRAME_REBUILD_IN_PLACE:
1445
1446 if (raidPtr->Layout.map->faultsTolerated == 0) {
1447 /* Can't do this on a RAID 0!! */
1448 return(EINVAL);
1449 }
1450
1451 if (raidPtr->recon_in_progress == 1) {
1452 /* a reconstruct is already in progress! */
1453 return(EINVAL);
1454 }
1455
1456 componentPtr = (RF_SingleComponent_t *) data;
1457 memcpy( &component, componentPtr,
1458 sizeof(RF_SingleComponent_t));
1459 component.row = 0; /* we don't support any more */
1460 column = component.column;
1461
1462 if ((column < 0) || (column >= raidPtr->numCol)) {
1463 return(EINVAL);
1464 }
1465
1466 rf_lock_mutex2(raidPtr->mutex);
1467 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1468 (raidPtr->numFailures > 0)) {
1469 /* XXX 0 above shouldn't be constant!!! */
1470 /* some component other than this has failed.
1471 Let's not make things worse than they already
1472 are... */
1473 printf("raid%d: Unable to reconstruct to disk at:\n",
1474 raidPtr->raidid);
1475 printf("raid%d: Col: %d Too many failures.\n",
1476 raidPtr->raidid, column);
1477 rf_unlock_mutex2(raidPtr->mutex);
1478 return (EINVAL);
1479 }
1480 if (raidPtr->Disks[column].status ==
1481 rf_ds_reconstructing) {
1482 printf("raid%d: Unable to reconstruct to disk at:\n",
1483 raidPtr->raidid);
1484 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1485
1486 rf_unlock_mutex2(raidPtr->mutex);
1487 return (EINVAL);
1488 }
1489 if (raidPtr->Disks[column].status == rf_ds_spared) {
1490 rf_unlock_mutex2(raidPtr->mutex);
1491 return (EINVAL);
1492 }
1493 rf_unlock_mutex2(raidPtr->mutex);
1494
1495 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1496 if (rrcopy == NULL)
1497 return(ENOMEM);
1498
1499 rrcopy->raidPtr = (void *) raidPtr;
1500 rrcopy->col = column;
1501
1502 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1503 rf_ReconstructInPlaceThread,
1504 rrcopy,"raid_reconip");
1505 return(retcode);
1506
1507 case RAIDFRAME_GET_INFO:
1508 if (!raidPtr->valid)
1509 return (ENODEV);
1510 ucfgp = (RF_DeviceConfig_t **) data;
1511 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1512 (RF_DeviceConfig_t *));
1513 if (d_cfg == NULL)
1514 return (ENOMEM);
1515 d_cfg->rows = 1; /* there is only 1 row now */
1516 d_cfg->cols = raidPtr->numCol;
1517 d_cfg->ndevs = raidPtr->numCol;
1518 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1519 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1520 return (ENOMEM);
1521 }
1522 d_cfg->nspares = raidPtr->numSpare;
1523 if (d_cfg->nspares >= RF_MAX_DISKS) {
1524 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1525 return (ENOMEM);
1526 }
1527 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1528 d = 0;
1529 for (j = 0; j < d_cfg->cols; j++) {
1530 d_cfg->devs[d] = raidPtr->Disks[j];
1531 d++;
1532 }
1533 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1534 d_cfg->spares[i] = raidPtr->Disks[j];
1535 }
1536 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1537 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1538
1539 return (retcode);
1540
1541 case RAIDFRAME_CHECK_PARITY:
1542 *(int *) data = raidPtr->parity_good;
1543 return (0);
1544
1545 case RAIDFRAME_PARITYMAP_STATUS:
1546 if (rf_paritymap_ineligible(raidPtr))
1547 return EINVAL;
1548 rf_paritymap_status(raidPtr->parity_map,
1549 (struct rf_pmstat *)data);
1550 return 0;
1551
1552 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1553 if (rf_paritymap_ineligible(raidPtr))
1554 return EINVAL;
1555 if (raidPtr->parity_map == NULL)
1556 return ENOENT; /* ??? */
1557 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1558 (struct rf_pmparams *)data, 1))
1559 return EINVAL;
1560 return 0;
1561
1562 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1563 if (rf_paritymap_ineligible(raidPtr))
1564 return EINVAL;
1565 *(int *) data = rf_paritymap_get_disable(raidPtr);
1566 return 0;
1567
1568 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1569 if (rf_paritymap_ineligible(raidPtr))
1570 return EINVAL;
1571 rf_paritymap_set_disable(raidPtr, *(int *)data);
1572 /* XXX should errors be passed up? */
1573 return 0;
1574
1575 case RAIDFRAME_RESET_ACCTOTALS:
1576 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1577 return (0);
1578
1579 case RAIDFRAME_GET_ACCTOTALS:
1580 totals = (RF_AccTotals_t *) data;
1581 *totals = raidPtr->acc_totals;
1582 return (0);
1583
1584 case RAIDFRAME_KEEP_ACCTOTALS:
1585 raidPtr->keep_acc_totals = *(int *)data;
1586 return (0);
1587
1588 case RAIDFRAME_GET_SIZE:
1589 *(int *) data = raidPtr->totalSectors;
1590 return (0);
1591
1592 /* fail a disk & optionally start reconstruction */
1593 case RAIDFRAME_FAIL_DISK:
1594
1595 if (raidPtr->Layout.map->faultsTolerated == 0) {
1596 /* Can't do this on a RAID 0!! */
1597 return(EINVAL);
1598 }
1599
1600 rr = (struct rf_recon_req *) data;
1601 rr->row = 0;
1602 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1603 return (EINVAL);
1604
1605
1606 rf_lock_mutex2(raidPtr->mutex);
1607 if (raidPtr->status == rf_rs_reconstructing) {
1608 /* you can't fail a disk while we're reconstructing! */
1609 /* XXX wrong for RAID6 */
1610 rf_unlock_mutex2(raidPtr->mutex);
1611 return (EINVAL);
1612 }
1613 if ((raidPtr->Disks[rr->col].status ==
1614 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1615 /* some other component has failed. Let's not make
1616 things worse. XXX wrong for RAID6 */
1617 rf_unlock_mutex2(raidPtr->mutex);
1618 return (EINVAL);
1619 }
1620 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1621 /* Can't fail a spared disk! */
1622 rf_unlock_mutex2(raidPtr->mutex);
1623 return (EINVAL);
1624 }
1625 rf_unlock_mutex2(raidPtr->mutex);
1626
1627 /* make a copy of the recon request so that we don't rely on
1628 * the user's buffer */
1629 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1630 if (rrcopy == NULL)
1631 return(ENOMEM);
1632 memcpy(rrcopy, rr, sizeof(*rr));
1633 rrcopy->raidPtr = (void *) raidPtr;
1634
1635 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1636 rf_ReconThread,
1637 rrcopy,"raid_recon");
1638 return (0);
1639
1640 /* invoke a copyback operation after recon on whatever disk
1641 * needs it, if any */
1642 case RAIDFRAME_COPYBACK:
1643
1644 if (raidPtr->Layout.map->faultsTolerated == 0) {
1645 /* This makes no sense on a RAID 0!! */
1646 return(EINVAL);
1647 }
1648
1649 if (raidPtr->copyback_in_progress == 1) {
1650 /* Copyback is already in progress! */
1651 return(EINVAL);
1652 }
1653
1654 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1655 rf_CopybackThread,
1656 raidPtr,"raid_copyback");
1657 return (retcode);
1658
1659 /* return the percentage completion of reconstruction */
1660 case RAIDFRAME_CHECK_RECON_STATUS:
1661 if (raidPtr->Layout.map->faultsTolerated == 0) {
1662 /* This makes no sense on a RAID 0, so tell the
1663 user it's done. */
1664 *(int *) data = 100;
1665 return(0);
1666 }
1667 if (raidPtr->status != rf_rs_reconstructing)
1668 *(int *) data = 100;
1669 else {
1670 if (raidPtr->reconControl->numRUsTotal > 0) {
1671 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1672 } else {
1673 *(int *) data = 0;
1674 }
1675 }
1676 return (0);
1677 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1678 progressInfoPtr = (RF_ProgressInfo_t **) data;
1679 if (raidPtr->status != rf_rs_reconstructing) {
1680 progressInfo.remaining = 0;
1681 progressInfo.completed = 100;
1682 progressInfo.total = 100;
1683 } else {
1684 progressInfo.total =
1685 raidPtr->reconControl->numRUsTotal;
1686 progressInfo.completed =
1687 raidPtr->reconControl->numRUsComplete;
1688 progressInfo.remaining = progressInfo.total -
1689 progressInfo.completed;
1690 }
1691 retcode = copyout(&progressInfo, *progressInfoPtr,
1692 sizeof(RF_ProgressInfo_t));
1693 return (retcode);
1694
1695 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1696 if (raidPtr->Layout.map->faultsTolerated == 0) {
1697 /* This makes no sense on a RAID 0, so tell the
1698 user it's done. */
1699 *(int *) data = 100;
1700 return(0);
1701 }
1702 if (raidPtr->parity_rewrite_in_progress == 1) {
1703 *(int *) data = 100 *
1704 raidPtr->parity_rewrite_stripes_done /
1705 raidPtr->Layout.numStripe;
1706 } else {
1707 *(int *) data = 100;
1708 }
1709 return (0);
1710
1711 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1712 progressInfoPtr = (RF_ProgressInfo_t **) data;
1713 if (raidPtr->parity_rewrite_in_progress == 1) {
1714 progressInfo.total = raidPtr->Layout.numStripe;
1715 progressInfo.completed =
1716 raidPtr->parity_rewrite_stripes_done;
1717 progressInfo.remaining = progressInfo.total -
1718 progressInfo.completed;
1719 } else {
1720 progressInfo.remaining = 0;
1721 progressInfo.completed = 100;
1722 progressInfo.total = 100;
1723 }
1724 retcode = copyout(&progressInfo, *progressInfoPtr,
1725 sizeof(RF_ProgressInfo_t));
1726 return (retcode);
1727
1728 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1729 if (raidPtr->Layout.map->faultsTolerated == 0) {
1730 /* This makes no sense on a RAID 0 */
1731 *(int *) data = 100;
1732 return(0);
1733 }
1734 if (raidPtr->copyback_in_progress == 1) {
1735 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1736 raidPtr->Layout.numStripe;
1737 } else {
1738 *(int *) data = 100;
1739 }
1740 return (0);
1741
1742 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1743 progressInfoPtr = (RF_ProgressInfo_t **) data;
1744 if (raidPtr->copyback_in_progress == 1) {
1745 progressInfo.total = raidPtr->Layout.numStripe;
1746 progressInfo.completed =
1747 raidPtr->copyback_stripes_done;
1748 progressInfo.remaining = progressInfo.total -
1749 progressInfo.completed;
1750 } else {
1751 progressInfo.remaining = 0;
1752 progressInfo.completed = 100;
1753 progressInfo.total = 100;
1754 }
1755 retcode = copyout(&progressInfo, *progressInfoPtr,
1756 sizeof(RF_ProgressInfo_t));
1757 return (retcode);
1758
1759 /* the sparetable daemon calls this to wait for the kernel to
1760 * need a spare table. this ioctl does not return until a
1761 * spare table is needed. XXX -- calling mpsleep here in the
1762 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1763 * -- I should either compute the spare table in the kernel,
1764 * or have a different -- XXX XXX -- interface (a different
1765 * character device) for delivering the table -- XXX */
1766 #if 0
1767 case RAIDFRAME_SPARET_WAIT:
1768 rf_lock_mutex2(rf_sparet_wait_mutex);
1769 while (!rf_sparet_wait_queue)
1770 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1771 waitreq = rf_sparet_wait_queue;
1772 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1773 rf_unlock_mutex2(rf_sparet_wait_mutex);
1774
1775 /* structure assignment */
1776 *((RF_SparetWait_t *) data) = *waitreq;
1777
1778 RF_Free(waitreq, sizeof(*waitreq));
1779 return (0);
1780
1781 /* wakes up a process waiting on SPARET_WAIT and puts an error
1782 * code in it that will cause the dameon to exit */
1783 case RAIDFRAME_ABORT_SPARET_WAIT:
1784 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1785 waitreq->fcol = -1;
1786 rf_lock_mutex2(rf_sparet_wait_mutex);
1787 waitreq->next = rf_sparet_wait_queue;
1788 rf_sparet_wait_queue = waitreq;
1789 rf_broadcast_conf2(rf_sparet_wait_cv);
1790 rf_unlock_mutex2(rf_sparet_wait_mutex);
1791 return (0);
1792
1793 /* used by the spare table daemon to deliver a spare table
1794 * into the kernel */
1795 case RAIDFRAME_SEND_SPARET:
1796
1797 /* install the spare table */
1798 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1799
1800 /* respond to the requestor. the return status of the spare
1801 * table installation is passed in the "fcol" field */
1802 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1803 waitreq->fcol = retcode;
1804 rf_lock_mutex2(rf_sparet_wait_mutex);
1805 waitreq->next = rf_sparet_resp_queue;
1806 rf_sparet_resp_queue = waitreq;
1807 rf_broadcast_cond2(rf_sparet_resp_cv);
1808 rf_unlock_mutex2(rf_sparet_wait_mutex);
1809
1810 return (retcode);
1811 #endif
1812
1813 default:
1814 break; /* fall through to the os-specific code below */
1815
1816 }
1817
1818 if (!raidPtr->valid)
1819 return (EINVAL);
1820
1821 /*
1822 * Add support for "regular" device ioctls here.
1823 */
1824
1825 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1826 if (error != EPASSTHROUGH)
1827 return (error);
1828
1829 switch (cmd) {
1830 case DIOCGDINFO:
1831 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1832 break;
1833 #ifdef __HAVE_OLD_DISKLABEL
1834 case ODIOCGDINFO:
1835 newlabel = *(rs->sc_dkdev.dk_label);
1836 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1837 return ENOTTY;
1838 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1839 break;
1840 #endif
1841
1842 case DIOCGPART:
1843 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1844 ((struct partinfo *) data)->part =
1845 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1846 break;
1847
1848 case DIOCWDINFO:
1849 case DIOCSDINFO:
1850 #ifdef __HAVE_OLD_DISKLABEL
1851 case ODIOCWDINFO:
1852 case ODIOCSDINFO:
1853 #endif
1854 {
1855 struct disklabel *lp;
1856 #ifdef __HAVE_OLD_DISKLABEL
1857 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1858 memset(&newlabel, 0, sizeof newlabel);
1859 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1860 lp = &newlabel;
1861 } else
1862 #endif
1863 lp = (struct disklabel *)data;
1864
1865 if ((error = raidlock(rs)) != 0)
1866 return (error);
1867
1868 rs->sc_flags |= RAIDF_LABELLING;
1869
1870 error = setdisklabel(rs->sc_dkdev.dk_label,
1871 lp, 0, rs->sc_dkdev.dk_cpulabel);
1872 if (error == 0) {
1873 if (cmd == DIOCWDINFO
1874 #ifdef __HAVE_OLD_DISKLABEL
1875 || cmd == ODIOCWDINFO
1876 #endif
1877 )
1878 error = writedisklabel(RAIDLABELDEV(dev),
1879 raidstrategy, rs->sc_dkdev.dk_label,
1880 rs->sc_dkdev.dk_cpulabel);
1881 }
1882 rs->sc_flags &= ~RAIDF_LABELLING;
1883
1884 raidunlock(rs);
1885
1886 if (error)
1887 return (error);
1888 break;
1889 }
1890
1891 case DIOCWLABEL:
1892 if (*(int *) data != 0)
1893 rs->sc_flags |= RAIDF_WLABEL;
1894 else
1895 rs->sc_flags &= ~RAIDF_WLABEL;
1896 break;
1897
1898 case DIOCGDEFLABEL:
1899 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1900 break;
1901
1902 #ifdef __HAVE_OLD_DISKLABEL
1903 case ODIOCGDEFLABEL:
1904 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1905 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1906 return ENOTTY;
1907 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1908 break;
1909 #endif
1910
1911 case DIOCAWEDGE:
1912 case DIOCDWEDGE:
1913 dkw = (void *)data;
1914
1915 /* If the ioctl happens here, the parent is us. */
1916 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1917 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1918
1919 case DIOCLWEDGES:
1920 return dkwedge_list(&rs->sc_dkdev,
1921 (struct dkwedge_list *)data, l);
1922 case DIOCCACHESYNC:
1923 return rf_sync_component_caches(raidPtr);
1924
1925 case DIOCGSTRATEGY:
1926 {
1927 struct disk_strategy *dks = (void *)data;
1928
1929 s = splbio();
1930 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1931 sizeof(dks->dks_name));
1932 splx(s);
1933 dks->dks_paramlen = 0;
1934
1935 return 0;
1936 }
1937
1938 case DIOCSSTRATEGY:
1939 {
1940 struct disk_strategy *dks = (void *)data;
1941 struct bufq_state *new;
1942 struct bufq_state *old;
1943
1944 if (dks->dks_param != NULL) {
1945 return EINVAL;
1946 }
1947 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1948 error = bufq_alloc(&new, dks->dks_name,
1949 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1950 if (error) {
1951 return error;
1952 }
1953 s = splbio();
1954 old = rs->buf_queue;
1955 bufq_move(new, old);
1956 rs->buf_queue = new;
1957 splx(s);
1958 bufq_free(old);
1959
1960 return 0;
1961 }
1962
1963 default:
1964 retcode = ENOTTY;
1965 }
1966 return (retcode);
1967
1968 }
1969
1970
1971 /* raidinit -- complete the rest of the initialization for the
1972 RAIDframe device. */
1973
1974
1975 static void
1976 raidinit(struct raid_softc *rs)
1977 {
1978 cfdata_t cf;
1979 int unit;
1980 RF_Raid_t *raidPtr = &rs->sc_r;
1981
1982 unit = raidPtr->raidid;
1983
1984
1985 /* XXX should check return code first... */
1986 rs->sc_flags |= RAIDF_INITED;
1987
1988 /* XXX doesn't check bounds. */
1989 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1990
1991 /* attach the pseudo device */
1992 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1993 cf->cf_name = raid_cd.cd_name;
1994 cf->cf_atname = raid_cd.cd_name;
1995 cf->cf_unit = unit;
1996 cf->cf_fstate = FSTATE_STAR;
1997
1998 rs->sc_dev = config_attach_pseudo(cf);
1999
2000 if (rs->sc_dev == NULL) {
2001 printf("raid%d: config_attach_pseudo failed\n",
2002 raidPtr->raidid);
2003 rs->sc_flags &= ~RAIDF_INITED;
2004 free(cf, M_RAIDFRAME);
2005 return;
2006 }
2007
2008 /* disk_attach actually creates space for the CPU disklabel, among
2009 * other things, so it's critical to call this *BEFORE* we try putzing
2010 * with disklabels. */
2011
2012 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2013 disk_attach(&rs->sc_dkdev);
2014 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2015
2016 /* XXX There may be a weird interaction here between this, and
2017 * protectedSectors, as used in RAIDframe. */
2018
2019 rs->sc_size = raidPtr->totalSectors;
2020
2021 dkwedge_discover(&rs->sc_dkdev);
2022
2023 rf_set_geometry(rs, raidPtr);
2024
2025 }
2026 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2027 /* wake up the daemon & tell it to get us a spare table
2028 * XXX
2029 * the entries in the queues should be tagged with the raidPtr
2030 * so that in the extremely rare case that two recons happen at once,
2031 * we know for which device were requesting a spare table
2032 * XXX
2033 *
2034 * XXX This code is not currently used. GO
2035 */
2036 int
2037 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2038 {
2039 int retcode;
2040
2041 rf_lock_mutex2(rf_sparet_wait_mutex);
2042 req->next = rf_sparet_wait_queue;
2043 rf_sparet_wait_queue = req;
2044 rf_broadcast_cond2(rf_sparet_wait_cv);
2045
2046 /* mpsleep unlocks the mutex */
2047 while (!rf_sparet_resp_queue) {
2048 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2049 }
2050 req = rf_sparet_resp_queue;
2051 rf_sparet_resp_queue = req->next;
2052 rf_unlock_mutex2(rf_sparet_wait_mutex);
2053
2054 retcode = req->fcol;
2055 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2056 * alloc'd */
2057 return (retcode);
2058 }
2059 #endif
2060
2061 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2062 * bp & passes it down.
2063 * any calls originating in the kernel must use non-blocking I/O
2064 * do some extra sanity checking to return "appropriate" error values for
2065 * certain conditions (to make some standard utilities work)
2066 *
2067 * Formerly known as: rf_DoAccessKernel
2068 */
2069 void
2070 raidstart(RF_Raid_t *raidPtr)
2071 {
2072 RF_SectorCount_t num_blocks, pb, sum;
2073 RF_RaidAddr_t raid_addr;
2074 struct partition *pp;
2075 daddr_t blocknum;
2076 struct raid_softc *rs;
2077 int do_async;
2078 struct buf *bp;
2079 int rc;
2080
2081 rs = raidPtr->softc;
2082 /* quick check to see if anything has died recently */
2083 rf_lock_mutex2(raidPtr->mutex);
2084 if (raidPtr->numNewFailures > 0) {
2085 rf_unlock_mutex2(raidPtr->mutex);
2086 rf_update_component_labels(raidPtr,
2087 RF_NORMAL_COMPONENT_UPDATE);
2088 rf_lock_mutex2(raidPtr->mutex);
2089 raidPtr->numNewFailures--;
2090 }
2091
2092 /* Check to see if we're at the limit... */
2093 while (raidPtr->openings > 0) {
2094 rf_unlock_mutex2(raidPtr->mutex);
2095
2096 /* get the next item, if any, from the queue */
2097 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2098 /* nothing more to do */
2099 return;
2100 }
2101
2102 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2103 * partition.. Need to make it absolute to the underlying
2104 * device.. */
2105
2106 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2107 if (DISKPART(bp->b_dev) != RAW_PART) {
2108 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2109 blocknum += pp->p_offset;
2110 }
2111
2112 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2113 (int) blocknum));
2114
2115 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2116 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2117
2118 /* *THIS* is where we adjust what block we're going to...
2119 * but DO NOT TOUCH bp->b_blkno!!! */
2120 raid_addr = blocknum;
2121
2122 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2123 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2124 sum = raid_addr + num_blocks + pb;
2125 if (1 || rf_debugKernelAccess) {
2126 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2127 (int) raid_addr, (int) sum, (int) num_blocks,
2128 (int) pb, (int) bp->b_resid));
2129 }
2130 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2131 || (sum < num_blocks) || (sum < pb)) {
2132 bp->b_error = ENOSPC;
2133 bp->b_resid = bp->b_bcount;
2134 biodone(bp);
2135 rf_lock_mutex2(raidPtr->mutex);
2136 continue;
2137 }
2138 /*
2139 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2140 */
2141
2142 if (bp->b_bcount & raidPtr->sectorMask) {
2143 bp->b_error = EINVAL;
2144 bp->b_resid = bp->b_bcount;
2145 biodone(bp);
2146 rf_lock_mutex2(raidPtr->mutex);
2147 continue;
2148
2149 }
2150 db1_printf(("Calling DoAccess..\n"));
2151
2152
2153 rf_lock_mutex2(raidPtr->mutex);
2154 raidPtr->openings--;
2155 rf_unlock_mutex2(raidPtr->mutex);
2156
2157 /*
2158 * Everything is async.
2159 */
2160 do_async = 1;
2161
2162 disk_busy(&rs->sc_dkdev);
2163
2164 /* XXX we're still at splbio() here... do we *really*
2165 need to be? */
2166
2167 /* don't ever condition on bp->b_flags & B_WRITE.
2168 * always condition on B_READ instead */
2169
2170 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2171 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2172 do_async, raid_addr, num_blocks,
2173 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2174
2175 if (rc) {
2176 bp->b_error = rc;
2177 bp->b_resid = bp->b_bcount;
2178 biodone(bp);
2179 /* continue loop */
2180 }
2181
2182 rf_lock_mutex2(raidPtr->mutex);
2183 }
2184 rf_unlock_mutex2(raidPtr->mutex);
2185 }
2186
2187
2188
2189
2190 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2191
2192 int
2193 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2194 {
2195 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2196 struct buf *bp;
2197
2198 req->queue = queue;
2199 bp = req->bp;
2200
2201 switch (req->type) {
2202 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2203 /* XXX need to do something extra here.. */
2204 /* I'm leaving this in, as I've never actually seen it used,
2205 * and I'd like folks to report it... GO */
2206 printf(("WAKEUP CALLED\n"));
2207 queue->numOutstanding++;
2208
2209 bp->b_flags = 0;
2210 bp->b_private = req;
2211
2212 KernelWakeupFunc(bp);
2213 break;
2214
2215 case RF_IO_TYPE_READ:
2216 case RF_IO_TYPE_WRITE:
2217 #if RF_ACC_TRACE > 0
2218 if (req->tracerec) {
2219 RF_ETIMER_START(req->tracerec->timer);
2220 }
2221 #endif
2222 InitBP(bp, queue->rf_cinfo->ci_vp,
2223 op, queue->rf_cinfo->ci_dev,
2224 req->sectorOffset, req->numSector,
2225 req->buf, KernelWakeupFunc, (void *) req,
2226 queue->raidPtr->logBytesPerSector, req->b_proc);
2227
2228 if (rf_debugKernelAccess) {
2229 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2230 (long) bp->b_blkno));
2231 }
2232 queue->numOutstanding++;
2233 queue->last_deq_sector = req->sectorOffset;
2234 /* acc wouldn't have been let in if there were any pending
2235 * reqs at any other priority */
2236 queue->curPriority = req->priority;
2237
2238 db1_printf(("Going for %c to unit %d col %d\n",
2239 req->type, queue->raidPtr->raidid,
2240 queue->col));
2241 db1_printf(("sector %d count %d (%d bytes) %d\n",
2242 (int) req->sectorOffset, (int) req->numSector,
2243 (int) (req->numSector <<
2244 queue->raidPtr->logBytesPerSector),
2245 (int) queue->raidPtr->logBytesPerSector));
2246
2247 /*
2248 * XXX: drop lock here since this can block at
2249 * least with backing SCSI devices. Retake it
2250 * to minimize fuss with calling interfaces.
2251 */
2252
2253 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2254 bdev_strategy(bp);
2255 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2256 break;
2257
2258 default:
2259 panic("bad req->type in rf_DispatchKernelIO");
2260 }
2261 db1_printf(("Exiting from DispatchKernelIO\n"));
2262
2263 return (0);
2264 }
2265 /* this is the callback function associated with a I/O invoked from
2266 kernel code.
2267 */
2268 static void
2269 KernelWakeupFunc(struct buf *bp)
2270 {
2271 RF_DiskQueueData_t *req = NULL;
2272 RF_DiskQueue_t *queue;
2273
2274 db1_printf(("recovering the request queue:\n"));
2275
2276 req = bp->b_private;
2277
2278 queue = (RF_DiskQueue_t *) req->queue;
2279
2280 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2281
2282 #if RF_ACC_TRACE > 0
2283 if (req->tracerec) {
2284 RF_ETIMER_STOP(req->tracerec->timer);
2285 RF_ETIMER_EVAL(req->tracerec->timer);
2286 rf_lock_mutex2(rf_tracing_mutex);
2287 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2288 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2289 req->tracerec->num_phys_ios++;
2290 rf_unlock_mutex2(rf_tracing_mutex);
2291 }
2292 #endif
2293
2294 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2295 * ballistic, and mark the component as hosed... */
2296
2297 if (bp->b_error != 0) {
2298 /* Mark the disk as dead */
2299 /* but only mark it once... */
2300 /* and only if it wouldn't leave this RAID set
2301 completely broken */
2302 if (((queue->raidPtr->Disks[queue->col].status ==
2303 rf_ds_optimal) ||
2304 (queue->raidPtr->Disks[queue->col].status ==
2305 rf_ds_used_spare)) &&
2306 (queue->raidPtr->numFailures <
2307 queue->raidPtr->Layout.map->faultsTolerated)) {
2308 printf("raid%d: IO Error. Marking %s as failed.\n",
2309 queue->raidPtr->raidid,
2310 queue->raidPtr->Disks[queue->col].devname);
2311 queue->raidPtr->Disks[queue->col].status =
2312 rf_ds_failed;
2313 queue->raidPtr->status = rf_rs_degraded;
2314 queue->raidPtr->numFailures++;
2315 queue->raidPtr->numNewFailures++;
2316 } else { /* Disk is already dead... */
2317 /* printf("Disk already marked as dead!\n"); */
2318 }
2319
2320 }
2321
2322 /* Fill in the error value */
2323 req->error = bp->b_error;
2324
2325 /* Drop this one on the "finished" queue... */
2326 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2327
2328 /* Let the raidio thread know there is work to be done. */
2329 rf_signal_cond2(queue->raidPtr->iodone_cv);
2330
2331 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2332 }
2333
2334
2335 /*
2336 * initialize a buf structure for doing an I/O in the kernel.
2337 */
2338 static void
2339 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2340 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2341 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2342 struct proc *b_proc)
2343 {
2344 /* bp->b_flags = B_PHYS | rw_flag; */
2345 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2346 bp->b_oflags = 0;
2347 bp->b_cflags = 0;
2348 bp->b_bcount = numSect << logBytesPerSector;
2349 bp->b_bufsize = bp->b_bcount;
2350 bp->b_error = 0;
2351 bp->b_dev = dev;
2352 bp->b_data = bf;
2353 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2354 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2355 if (bp->b_bcount == 0) {
2356 panic("bp->b_bcount is zero in InitBP!!");
2357 }
2358 bp->b_proc = b_proc;
2359 bp->b_iodone = cbFunc;
2360 bp->b_private = cbArg;
2361 }
2362
2363 static void
2364 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2365 struct disklabel *lp)
2366 {
2367 memset(lp, 0, sizeof(*lp));
2368
2369 /* fabricate a label... */
2370 lp->d_secperunit = raidPtr->totalSectors;
2371 lp->d_secsize = raidPtr->bytesPerSector;
2372 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2373 lp->d_ntracks = 4 * raidPtr->numCol;
2374 lp->d_ncylinders = raidPtr->totalSectors /
2375 (lp->d_nsectors * lp->d_ntracks);
2376 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2377
2378 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2379 lp->d_type = DTYPE_RAID;
2380 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2381 lp->d_rpm = 3600;
2382 lp->d_interleave = 1;
2383 lp->d_flags = 0;
2384
2385 lp->d_partitions[RAW_PART].p_offset = 0;
2386 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2387 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2388 lp->d_npartitions = RAW_PART + 1;
2389
2390 lp->d_magic = DISKMAGIC;
2391 lp->d_magic2 = DISKMAGIC;
2392 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2393
2394 }
2395 /*
2396 * Read the disklabel from the raid device. If one is not present, fake one
2397 * up.
2398 */
2399 static void
2400 raidgetdisklabel(dev_t dev)
2401 {
2402 int unit = raidunit(dev);
2403 struct raid_softc *rs;
2404 const char *errstring;
2405 struct disklabel *lp;
2406 struct cpu_disklabel *clp;
2407 RF_Raid_t *raidPtr;
2408
2409 if ((rs = raidget(unit)) == NULL)
2410 return;
2411
2412 lp = rs->sc_dkdev.dk_label;
2413 clp = rs->sc_dkdev.dk_cpulabel;
2414
2415 db1_printf(("Getting the disklabel...\n"));
2416
2417 memset(clp, 0, sizeof(*clp));
2418
2419 raidPtr = &rs->sc_r;
2420
2421 raidgetdefaultlabel(raidPtr, rs, lp);
2422
2423 /*
2424 * Call the generic disklabel extraction routine.
2425 */
2426 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2427 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2428 if (errstring)
2429 raidmakedisklabel(rs);
2430 else {
2431 int i;
2432 struct partition *pp;
2433
2434 /*
2435 * Sanity check whether the found disklabel is valid.
2436 *
2437 * This is necessary since total size of the raid device
2438 * may vary when an interleave is changed even though exactly
2439 * same components are used, and old disklabel may used
2440 * if that is found.
2441 */
2442 if (lp->d_secperunit != rs->sc_size)
2443 printf("raid%d: WARNING: %s: "
2444 "total sector size in disklabel (%" PRIu32 ") != "
2445 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2446 lp->d_secperunit, rs->sc_size);
2447 for (i = 0; i < lp->d_npartitions; i++) {
2448 pp = &lp->d_partitions[i];
2449 if (pp->p_offset + pp->p_size > rs->sc_size)
2450 printf("raid%d: WARNING: %s: end of partition `%c' "
2451 "exceeds the size of raid (%" PRIu64 ")\n",
2452 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2453 }
2454 }
2455
2456 }
2457 /*
2458 * Take care of things one might want to take care of in the event
2459 * that a disklabel isn't present.
2460 */
2461 static void
2462 raidmakedisklabel(struct raid_softc *rs)
2463 {
2464 struct disklabel *lp = rs->sc_dkdev.dk_label;
2465 db1_printf(("Making a label..\n"));
2466
2467 /*
2468 * For historical reasons, if there's no disklabel present
2469 * the raw partition must be marked FS_BSDFFS.
2470 */
2471
2472 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2473
2474 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2475
2476 lp->d_checksum = dkcksum(lp);
2477 }
2478 /*
2479 * Wait interruptibly for an exclusive lock.
2480 *
2481 * XXX
2482 * Several drivers do this; it should be abstracted and made MP-safe.
2483 * (Hmm... where have we seen this warning before :-> GO )
2484 */
2485 static int
2486 raidlock(struct raid_softc *rs)
2487 {
2488 int error;
2489
2490 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2491 rs->sc_flags |= RAIDF_WANTED;
2492 if ((error =
2493 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2494 return (error);
2495 }
2496 rs->sc_flags |= RAIDF_LOCKED;
2497 return (0);
2498 }
2499 /*
2500 * Unlock and wake up any waiters.
2501 */
2502 static void
2503 raidunlock(struct raid_softc *rs)
2504 {
2505
2506 rs->sc_flags &= ~RAIDF_LOCKED;
2507 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2508 rs->sc_flags &= ~RAIDF_WANTED;
2509 wakeup(rs);
2510 }
2511 }
2512
2513
2514 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2515 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2516 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2517
2518 static daddr_t
2519 rf_component_info_offset(void)
2520 {
2521
2522 return RF_COMPONENT_INFO_OFFSET;
2523 }
2524
2525 static daddr_t
2526 rf_component_info_size(unsigned secsize)
2527 {
2528 daddr_t info_size;
2529
2530 KASSERT(secsize);
2531 if (secsize > RF_COMPONENT_INFO_SIZE)
2532 info_size = secsize;
2533 else
2534 info_size = RF_COMPONENT_INFO_SIZE;
2535
2536 return info_size;
2537 }
2538
2539 static daddr_t
2540 rf_parity_map_offset(RF_Raid_t *raidPtr)
2541 {
2542 daddr_t map_offset;
2543
2544 KASSERT(raidPtr->bytesPerSector);
2545 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2546 map_offset = raidPtr->bytesPerSector;
2547 else
2548 map_offset = RF_COMPONENT_INFO_SIZE;
2549 map_offset += rf_component_info_offset();
2550
2551 return map_offset;
2552 }
2553
2554 static daddr_t
2555 rf_parity_map_size(RF_Raid_t *raidPtr)
2556 {
2557 daddr_t map_size;
2558
2559 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2560 map_size = raidPtr->bytesPerSector;
2561 else
2562 map_size = RF_PARITY_MAP_SIZE;
2563
2564 return map_size;
2565 }
2566
2567 int
2568 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2569 {
2570 RF_ComponentLabel_t *clabel;
2571
2572 clabel = raidget_component_label(raidPtr, col);
2573 clabel->clean = RF_RAID_CLEAN;
2574 raidflush_component_label(raidPtr, col);
2575 return(0);
2576 }
2577
2578
2579 int
2580 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2581 {
2582 RF_ComponentLabel_t *clabel;
2583
2584 clabel = raidget_component_label(raidPtr, col);
2585 clabel->clean = RF_RAID_DIRTY;
2586 raidflush_component_label(raidPtr, col);
2587 return(0);
2588 }
2589
2590 int
2591 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2592 {
2593 KASSERT(raidPtr->bytesPerSector);
2594 return raidread_component_label(raidPtr->bytesPerSector,
2595 raidPtr->Disks[col].dev,
2596 raidPtr->raid_cinfo[col].ci_vp,
2597 &raidPtr->raid_cinfo[col].ci_label);
2598 }
2599
2600 RF_ComponentLabel_t *
2601 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2602 {
2603 return &raidPtr->raid_cinfo[col].ci_label;
2604 }
2605
2606 int
2607 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2608 {
2609 RF_ComponentLabel_t *label;
2610
2611 label = &raidPtr->raid_cinfo[col].ci_label;
2612 label->mod_counter = raidPtr->mod_counter;
2613 #ifndef RF_NO_PARITY_MAP
2614 label->parity_map_modcount = label->mod_counter;
2615 #endif
2616 return raidwrite_component_label(raidPtr->bytesPerSector,
2617 raidPtr->Disks[col].dev,
2618 raidPtr->raid_cinfo[col].ci_vp, label);
2619 }
2620
2621
2622 static int
2623 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2624 RF_ComponentLabel_t *clabel)
2625 {
2626 return raidread_component_area(dev, b_vp, clabel,
2627 sizeof(RF_ComponentLabel_t),
2628 rf_component_info_offset(),
2629 rf_component_info_size(secsize));
2630 }
2631
2632 /* ARGSUSED */
2633 static int
2634 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2635 size_t msize, daddr_t offset, daddr_t dsize)
2636 {
2637 struct buf *bp;
2638 const struct bdevsw *bdev;
2639 int error;
2640
2641 /* XXX should probably ensure that we don't try to do this if
2642 someone has changed rf_protected_sectors. */
2643
2644 if (b_vp == NULL) {
2645 /* For whatever reason, this component is not valid.
2646 Don't try to read a component label from it. */
2647 return(EINVAL);
2648 }
2649
2650 /* get a block of the appropriate size... */
2651 bp = geteblk((int)dsize);
2652 bp->b_dev = dev;
2653
2654 /* get our ducks in a row for the read */
2655 bp->b_blkno = offset / DEV_BSIZE;
2656 bp->b_bcount = dsize;
2657 bp->b_flags |= B_READ;
2658 bp->b_resid = dsize;
2659
2660 bdev = bdevsw_lookup(bp->b_dev);
2661 if (bdev == NULL)
2662 return (ENXIO);
2663 (*bdev->d_strategy)(bp);
2664
2665 error = biowait(bp);
2666
2667 if (!error) {
2668 memcpy(data, bp->b_data, msize);
2669 }
2670
2671 brelse(bp, 0);
2672 return(error);
2673 }
2674
2675
2676 static int
2677 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2678 RF_ComponentLabel_t *clabel)
2679 {
2680 return raidwrite_component_area(dev, b_vp, clabel,
2681 sizeof(RF_ComponentLabel_t),
2682 rf_component_info_offset(),
2683 rf_component_info_size(secsize), 0);
2684 }
2685
2686 /* ARGSUSED */
2687 static int
2688 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2689 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2690 {
2691 struct buf *bp;
2692 const struct bdevsw *bdev;
2693 int error;
2694
2695 /* get a block of the appropriate size... */
2696 bp = geteblk((int)dsize);
2697 bp->b_dev = dev;
2698
2699 /* get our ducks in a row for the write */
2700 bp->b_blkno = offset / DEV_BSIZE;
2701 bp->b_bcount = dsize;
2702 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2703 bp->b_resid = dsize;
2704
2705 memset(bp->b_data, 0, dsize);
2706 memcpy(bp->b_data, data, msize);
2707
2708 bdev = bdevsw_lookup(bp->b_dev);
2709 if (bdev == NULL)
2710 return (ENXIO);
2711 (*bdev->d_strategy)(bp);
2712 if (asyncp)
2713 return 0;
2714 error = biowait(bp);
2715 brelse(bp, 0);
2716 if (error) {
2717 #if 1
2718 printf("Failed to write RAID component info!\n");
2719 #endif
2720 }
2721
2722 return(error);
2723 }
2724
2725 void
2726 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2727 {
2728 int c;
2729
2730 for (c = 0; c < raidPtr->numCol; c++) {
2731 /* Skip dead disks. */
2732 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2733 continue;
2734 /* XXXjld: what if an error occurs here? */
2735 raidwrite_component_area(raidPtr->Disks[c].dev,
2736 raidPtr->raid_cinfo[c].ci_vp, map,
2737 RF_PARITYMAP_NBYTE,
2738 rf_parity_map_offset(raidPtr),
2739 rf_parity_map_size(raidPtr), 0);
2740 }
2741 }
2742
2743 void
2744 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2745 {
2746 struct rf_paritymap_ondisk tmp;
2747 int c,first;
2748
2749 first=1;
2750 for (c = 0; c < raidPtr->numCol; c++) {
2751 /* Skip dead disks. */
2752 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2753 continue;
2754 raidread_component_area(raidPtr->Disks[c].dev,
2755 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2756 RF_PARITYMAP_NBYTE,
2757 rf_parity_map_offset(raidPtr),
2758 rf_parity_map_size(raidPtr));
2759 if (first) {
2760 memcpy(map, &tmp, sizeof(*map));
2761 first = 0;
2762 } else {
2763 rf_paritymap_merge(map, &tmp);
2764 }
2765 }
2766 }
2767
2768 void
2769 rf_markalldirty(RF_Raid_t *raidPtr)
2770 {
2771 RF_ComponentLabel_t *clabel;
2772 int sparecol;
2773 int c;
2774 int j;
2775 int scol = -1;
2776
2777 raidPtr->mod_counter++;
2778 for (c = 0; c < raidPtr->numCol; c++) {
2779 /* we don't want to touch (at all) a disk that has
2780 failed */
2781 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2782 clabel = raidget_component_label(raidPtr, c);
2783 if (clabel->status == rf_ds_spared) {
2784 /* XXX do something special...
2785 but whatever you do, don't
2786 try to access it!! */
2787 } else {
2788 raidmarkdirty(raidPtr, c);
2789 }
2790 }
2791 }
2792
2793 for( c = 0; c < raidPtr->numSpare ; c++) {
2794 sparecol = raidPtr->numCol + c;
2795 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2796 /*
2797
2798 we claim this disk is "optimal" if it's
2799 rf_ds_used_spare, as that means it should be
2800 directly substitutable for the disk it replaced.
2801 We note that too...
2802
2803 */
2804
2805 for(j=0;j<raidPtr->numCol;j++) {
2806 if (raidPtr->Disks[j].spareCol == sparecol) {
2807 scol = j;
2808 break;
2809 }
2810 }
2811
2812 clabel = raidget_component_label(raidPtr, sparecol);
2813 /* make sure status is noted */
2814
2815 raid_init_component_label(raidPtr, clabel);
2816
2817 clabel->row = 0;
2818 clabel->column = scol;
2819 /* Note: we *don't* change status from rf_ds_used_spare
2820 to rf_ds_optimal */
2821 /* clabel.status = rf_ds_optimal; */
2822
2823 raidmarkdirty(raidPtr, sparecol);
2824 }
2825 }
2826 }
2827
2828
2829 void
2830 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2831 {
2832 RF_ComponentLabel_t *clabel;
2833 int sparecol;
2834 int c;
2835 int j;
2836 int scol;
2837
2838 scol = -1;
2839
2840 /* XXX should do extra checks to make sure things really are clean,
2841 rather than blindly setting the clean bit... */
2842
2843 raidPtr->mod_counter++;
2844
2845 for (c = 0; c < raidPtr->numCol; c++) {
2846 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2847 clabel = raidget_component_label(raidPtr, c);
2848 /* make sure status is noted */
2849 clabel->status = rf_ds_optimal;
2850
2851 /* note what unit we are configured as */
2852 clabel->last_unit = raidPtr->raidid;
2853
2854 raidflush_component_label(raidPtr, c);
2855 if (final == RF_FINAL_COMPONENT_UPDATE) {
2856 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2857 raidmarkclean(raidPtr, c);
2858 }
2859 }
2860 }
2861 /* else we don't touch it.. */
2862 }
2863
2864 for( c = 0; c < raidPtr->numSpare ; c++) {
2865 sparecol = raidPtr->numCol + c;
2866 /* Need to ensure that the reconstruct actually completed! */
2867 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2868 /*
2869
2870 we claim this disk is "optimal" if it's
2871 rf_ds_used_spare, as that means it should be
2872 directly substitutable for the disk it replaced.
2873 We note that too...
2874
2875 */
2876
2877 for(j=0;j<raidPtr->numCol;j++) {
2878 if (raidPtr->Disks[j].spareCol == sparecol) {
2879 scol = j;
2880 break;
2881 }
2882 }
2883
2884 /* XXX shouldn't *really* need this... */
2885 clabel = raidget_component_label(raidPtr, sparecol);
2886 /* make sure status is noted */
2887
2888 raid_init_component_label(raidPtr, clabel);
2889
2890 clabel->column = scol;
2891 clabel->status = rf_ds_optimal;
2892 clabel->last_unit = raidPtr->raidid;
2893
2894 raidflush_component_label(raidPtr, sparecol);
2895 if (final == RF_FINAL_COMPONENT_UPDATE) {
2896 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2897 raidmarkclean(raidPtr, sparecol);
2898 }
2899 }
2900 }
2901 }
2902 }
2903
2904 void
2905 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2906 {
2907
2908 if (vp != NULL) {
2909 if (auto_configured == 1) {
2910 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2911 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2912 vput(vp);
2913
2914 } else {
2915 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2916 }
2917 }
2918 }
2919
2920
2921 void
2922 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2923 {
2924 int r,c;
2925 struct vnode *vp;
2926 int acd;
2927
2928
2929 /* We take this opportunity to close the vnodes like we should.. */
2930
2931 for (c = 0; c < raidPtr->numCol; c++) {
2932 vp = raidPtr->raid_cinfo[c].ci_vp;
2933 acd = raidPtr->Disks[c].auto_configured;
2934 rf_close_component(raidPtr, vp, acd);
2935 raidPtr->raid_cinfo[c].ci_vp = NULL;
2936 raidPtr->Disks[c].auto_configured = 0;
2937 }
2938
2939 for (r = 0; r < raidPtr->numSpare; r++) {
2940 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2941 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2942 rf_close_component(raidPtr, vp, acd);
2943 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2944 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2945 }
2946 }
2947
2948
2949 void
2950 rf_ReconThread(struct rf_recon_req *req)
2951 {
2952 int s;
2953 RF_Raid_t *raidPtr;
2954
2955 s = splbio();
2956 raidPtr = (RF_Raid_t *) req->raidPtr;
2957 raidPtr->recon_in_progress = 1;
2958
2959 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2960 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2961
2962 RF_Free(req, sizeof(*req));
2963
2964 raidPtr->recon_in_progress = 0;
2965 splx(s);
2966
2967 /* That's all... */
2968 kthread_exit(0); /* does not return */
2969 }
2970
2971 void
2972 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2973 {
2974 int retcode;
2975 int s;
2976
2977 raidPtr->parity_rewrite_stripes_done = 0;
2978 raidPtr->parity_rewrite_in_progress = 1;
2979 s = splbio();
2980 retcode = rf_RewriteParity(raidPtr);
2981 splx(s);
2982 if (retcode) {
2983 printf("raid%d: Error re-writing parity (%d)!\n",
2984 raidPtr->raidid, retcode);
2985 } else {
2986 /* set the clean bit! If we shutdown correctly,
2987 the clean bit on each component label will get
2988 set */
2989 raidPtr->parity_good = RF_RAID_CLEAN;
2990 }
2991 raidPtr->parity_rewrite_in_progress = 0;
2992
2993 /* Anyone waiting for us to stop? If so, inform them... */
2994 if (raidPtr->waitShutdown) {
2995 wakeup(&raidPtr->parity_rewrite_in_progress);
2996 }
2997
2998 /* That's all... */
2999 kthread_exit(0); /* does not return */
3000 }
3001
3002
3003 void
3004 rf_CopybackThread(RF_Raid_t *raidPtr)
3005 {
3006 int s;
3007
3008 raidPtr->copyback_in_progress = 1;
3009 s = splbio();
3010 rf_CopybackReconstructedData(raidPtr);
3011 splx(s);
3012 raidPtr->copyback_in_progress = 0;
3013
3014 /* That's all... */
3015 kthread_exit(0); /* does not return */
3016 }
3017
3018
3019 void
3020 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3021 {
3022 int s;
3023 RF_Raid_t *raidPtr;
3024
3025 s = splbio();
3026 raidPtr = req->raidPtr;
3027 raidPtr->recon_in_progress = 1;
3028 rf_ReconstructInPlace(raidPtr, req->col);
3029 RF_Free(req, sizeof(*req));
3030 raidPtr->recon_in_progress = 0;
3031 splx(s);
3032
3033 /* That's all... */
3034 kthread_exit(0); /* does not return */
3035 }
3036
3037 static RF_AutoConfig_t *
3038 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3039 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3040 unsigned secsize)
3041 {
3042 int good_one = 0;
3043 RF_ComponentLabel_t *clabel;
3044 RF_AutoConfig_t *ac;
3045
3046 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3047 if (clabel == NULL) {
3048 oomem:
3049 while(ac_list) {
3050 ac = ac_list;
3051 if (ac->clabel)
3052 free(ac->clabel, M_RAIDFRAME);
3053 ac_list = ac_list->next;
3054 free(ac, M_RAIDFRAME);
3055 }
3056 printf("RAID auto config: out of memory!\n");
3057 return NULL; /* XXX probably should panic? */
3058 }
3059
3060 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3061 /* Got the label. Does it look reasonable? */
3062 if (rf_reasonable_label(clabel, numsecs) &&
3063 (rf_component_label_partitionsize(clabel) <= size)) {
3064 #ifdef DEBUG
3065 printf("Component on: %s: %llu\n",
3066 cname, (unsigned long long)size);
3067 rf_print_component_label(clabel);
3068 #endif
3069 /* if it's reasonable, add it, else ignore it. */
3070 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3071 M_NOWAIT);
3072 if (ac == NULL) {
3073 free(clabel, M_RAIDFRAME);
3074 goto oomem;
3075 }
3076 strlcpy(ac->devname, cname, sizeof(ac->devname));
3077 ac->dev = dev;
3078 ac->vp = vp;
3079 ac->clabel = clabel;
3080 ac->next = ac_list;
3081 ac_list = ac;
3082 good_one = 1;
3083 }
3084 }
3085 if (!good_one) {
3086 /* cleanup */
3087 free(clabel, M_RAIDFRAME);
3088 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3089 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3090 vput(vp);
3091 }
3092 return ac_list;
3093 }
3094
3095 RF_AutoConfig_t *
3096 rf_find_raid_components(void)
3097 {
3098 struct vnode *vp;
3099 struct disklabel label;
3100 device_t dv;
3101 deviter_t di;
3102 dev_t dev;
3103 int bmajor, bminor, wedge, rf_part_found;
3104 int error;
3105 int i;
3106 RF_AutoConfig_t *ac_list;
3107 uint64_t numsecs;
3108 unsigned secsize;
3109
3110 /* initialize the AutoConfig list */
3111 ac_list = NULL;
3112
3113 /* we begin by trolling through *all* the devices on the system */
3114
3115 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3116 dv = deviter_next(&di)) {
3117
3118 /* we are only interested in disks... */
3119 if (device_class(dv) != DV_DISK)
3120 continue;
3121
3122 /* we don't care about floppies... */
3123 if (device_is_a(dv, "fd")) {
3124 continue;
3125 }
3126
3127 /* we don't care about CD's... */
3128 if (device_is_a(dv, "cd")) {
3129 continue;
3130 }
3131
3132 /* we don't care about md's... */
3133 if (device_is_a(dv, "md")) {
3134 continue;
3135 }
3136
3137 /* hdfd is the Atari/Hades floppy driver */
3138 if (device_is_a(dv, "hdfd")) {
3139 continue;
3140 }
3141
3142 /* fdisa is the Atari/Milan floppy driver */
3143 if (device_is_a(dv, "fdisa")) {
3144 continue;
3145 }
3146
3147 /* need to find the device_name_to_block_device_major stuff */
3148 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3149
3150 rf_part_found = 0; /*No raid partition as yet*/
3151
3152 /* get a vnode for the raw partition of this disk */
3153
3154 wedge = device_is_a(dv, "dk");
3155 bminor = minor(device_unit(dv));
3156 dev = wedge ? makedev(bmajor, bminor) :
3157 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3158 if (bdevvp(dev, &vp))
3159 panic("RAID can't alloc vnode");
3160
3161 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3162
3163 if (error) {
3164 /* "Who cares." Continue looking
3165 for something that exists*/
3166 vput(vp);
3167 continue;
3168 }
3169
3170 error = getdisksize(vp, &numsecs, &secsize);
3171 if (error) {
3172 vput(vp);
3173 continue;
3174 }
3175 if (wedge) {
3176 struct dkwedge_info dkw;
3177 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3178 NOCRED);
3179 if (error) {
3180 printf("RAIDframe: can't get wedge info for "
3181 "dev %s (%d)\n", device_xname(dv), error);
3182 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3183 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3184 vput(vp);
3185 continue;
3186 }
3187
3188 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3190 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3191 vput(vp);
3192 continue;
3193 }
3194
3195 ac_list = rf_get_component(ac_list, dev, vp,
3196 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3197 rf_part_found = 1; /*There is a raid component on this disk*/
3198 continue;
3199 }
3200
3201 /* Ok, the disk exists. Go get the disklabel. */
3202 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3203 if (error) {
3204 /*
3205 * XXX can't happen - open() would
3206 * have errored out (or faked up one)
3207 */
3208 if (error != ENOTTY)
3209 printf("RAIDframe: can't get label for dev "
3210 "%s (%d)\n", device_xname(dv), error);
3211 }
3212
3213 /* don't need this any more. We'll allocate it again
3214 a little later if we really do... */
3215 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3216 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3217 vput(vp);
3218
3219 if (error)
3220 continue;
3221
3222 rf_part_found = 0; /*No raid partitions yet*/
3223 for (i = 0; i < label.d_npartitions; i++) {
3224 char cname[sizeof(ac_list->devname)];
3225
3226 /* We only support partitions marked as RAID */
3227 if (label.d_partitions[i].p_fstype != FS_RAID)
3228 continue;
3229
3230 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3231 if (bdevvp(dev, &vp))
3232 panic("RAID can't alloc vnode");
3233
3234 error = VOP_OPEN(vp, FREAD, NOCRED);
3235 if (error) {
3236 /* Whatever... */
3237 vput(vp);
3238 continue;
3239 }
3240 snprintf(cname, sizeof(cname), "%s%c",
3241 device_xname(dv), 'a' + i);
3242 ac_list = rf_get_component(ac_list, dev, vp, cname,
3243 label.d_partitions[i].p_size, numsecs, secsize);
3244 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3245 }
3246
3247 /*
3248 *If there is no raid component on this disk, either in a
3249 *disklabel or inside a wedge, check the raw partition as well,
3250 *as it is possible to configure raid components on raw disk
3251 *devices.
3252 */
3253
3254 if (!rf_part_found) {
3255 char cname[sizeof(ac_list->devname)];
3256
3257 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3258 if (bdevvp(dev, &vp))
3259 panic("RAID can't alloc vnode");
3260
3261 error = VOP_OPEN(vp, FREAD, NOCRED);
3262 if (error) {
3263 /* Whatever... */
3264 vput(vp);
3265 continue;
3266 }
3267 snprintf(cname, sizeof(cname), "%s%c",
3268 device_xname(dv), 'a' + RAW_PART);
3269 ac_list = rf_get_component(ac_list, dev, vp, cname,
3270 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3271 }
3272 }
3273 deviter_release(&di);
3274 return ac_list;
3275 }
3276
3277
3278 int
3279 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3280 {
3281
3282 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3283 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3284 ((clabel->clean == RF_RAID_CLEAN) ||
3285 (clabel->clean == RF_RAID_DIRTY)) &&
3286 clabel->row >=0 &&
3287 clabel->column >= 0 &&
3288 clabel->num_rows > 0 &&
3289 clabel->num_columns > 0 &&
3290 clabel->row < clabel->num_rows &&
3291 clabel->column < clabel->num_columns &&
3292 clabel->blockSize > 0 &&
3293 /*
3294 * numBlocksHi may contain garbage, but it is ok since
3295 * the type is unsigned. If it is really garbage,
3296 * rf_fix_old_label_size() will fix it.
3297 */
3298 rf_component_label_numblocks(clabel) > 0) {
3299 /*
3300 * label looks reasonable enough...
3301 * let's make sure it has no old garbage.
3302 */
3303 if (numsecs)
3304 rf_fix_old_label_size(clabel, numsecs);
3305 return(1);
3306 }
3307 return(0);
3308 }
3309
3310
3311 /*
3312 * For reasons yet unknown, some old component labels have garbage in
3313 * the newer numBlocksHi region, and this causes lossage. Since those
3314 * disks will also have numsecs set to less than 32 bits of sectors,
3315 * we can determine when this corruption has occurred, and fix it.
3316 *
3317 * The exact same problem, with the same unknown reason, happens to
3318 * the partitionSizeHi member as well.
3319 */
3320 static void
3321 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3322 {
3323
3324 if (numsecs < ((uint64_t)1 << 32)) {
3325 if (clabel->numBlocksHi) {
3326 printf("WARNING: total sectors < 32 bits, yet "
3327 "numBlocksHi set\n"
3328 "WARNING: resetting numBlocksHi to zero.\n");
3329 clabel->numBlocksHi = 0;
3330 }
3331
3332 if (clabel->partitionSizeHi) {
3333 printf("WARNING: total sectors < 32 bits, yet "
3334 "partitionSizeHi set\n"
3335 "WARNING: resetting partitionSizeHi to zero.\n");
3336 clabel->partitionSizeHi = 0;
3337 }
3338 }
3339 }
3340
3341
3342 #ifdef DEBUG
3343 void
3344 rf_print_component_label(RF_ComponentLabel_t *clabel)
3345 {
3346 uint64_t numBlocks;
3347 static const char *rp[] = {
3348 "No", "Force", "Soft", "*invalid*"
3349 };
3350
3351
3352 numBlocks = rf_component_label_numblocks(clabel);
3353
3354 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3355 clabel->row, clabel->column,
3356 clabel->num_rows, clabel->num_columns);
3357 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3358 clabel->version, clabel->serial_number,
3359 clabel->mod_counter);
3360 printf(" Clean: %s Status: %d\n",
3361 clabel->clean ? "Yes" : "No", clabel->status);
3362 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3363 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3364 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3365 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3366 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3367 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3368 printf(" Last configured as: raid%d\n", clabel->last_unit);
3369 #if 0
3370 printf(" Config order: %d\n", clabel->config_order);
3371 #endif
3372
3373 }
3374 #endif
3375
3376 RF_ConfigSet_t *
3377 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3378 {
3379 RF_AutoConfig_t *ac;
3380 RF_ConfigSet_t *config_sets;
3381 RF_ConfigSet_t *cset;
3382 RF_AutoConfig_t *ac_next;
3383
3384
3385 config_sets = NULL;
3386
3387 /* Go through the AutoConfig list, and figure out which components
3388 belong to what sets. */
3389 ac = ac_list;
3390 while(ac!=NULL) {
3391 /* we're going to putz with ac->next, so save it here
3392 for use at the end of the loop */
3393 ac_next = ac->next;
3394
3395 if (config_sets == NULL) {
3396 /* will need at least this one... */
3397 config_sets = (RF_ConfigSet_t *)
3398 malloc(sizeof(RF_ConfigSet_t),
3399 M_RAIDFRAME, M_NOWAIT);
3400 if (config_sets == NULL) {
3401 panic("rf_create_auto_sets: No memory!");
3402 }
3403 /* this one is easy :) */
3404 config_sets->ac = ac;
3405 config_sets->next = NULL;
3406 config_sets->rootable = 0;
3407 ac->next = NULL;
3408 } else {
3409 /* which set does this component fit into? */
3410 cset = config_sets;
3411 while(cset!=NULL) {
3412 if (rf_does_it_fit(cset, ac)) {
3413 /* looks like it matches... */
3414 ac->next = cset->ac;
3415 cset->ac = ac;
3416 break;
3417 }
3418 cset = cset->next;
3419 }
3420 if (cset==NULL) {
3421 /* didn't find a match above... new set..*/
3422 cset = (RF_ConfigSet_t *)
3423 malloc(sizeof(RF_ConfigSet_t),
3424 M_RAIDFRAME, M_NOWAIT);
3425 if (cset == NULL) {
3426 panic("rf_create_auto_sets: No memory!");
3427 }
3428 cset->ac = ac;
3429 ac->next = NULL;
3430 cset->next = config_sets;
3431 cset->rootable = 0;
3432 config_sets = cset;
3433 }
3434 }
3435 ac = ac_next;
3436 }
3437
3438
3439 return(config_sets);
3440 }
3441
3442 static int
3443 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3444 {
3445 RF_ComponentLabel_t *clabel1, *clabel2;
3446
3447 /* If this one matches the *first* one in the set, that's good
3448 enough, since the other members of the set would have been
3449 through here too... */
3450 /* note that we are not checking partitionSize here..
3451
3452 Note that we are also not checking the mod_counters here.
3453 If everything else matches except the mod_counter, that's
3454 good enough for this test. We will deal with the mod_counters
3455 a little later in the autoconfiguration process.
3456
3457 (clabel1->mod_counter == clabel2->mod_counter) &&
3458
3459 The reason we don't check for this is that failed disks
3460 will have lower modification counts. If those disks are
3461 not added to the set they used to belong to, then they will
3462 form their own set, which may result in 2 different sets,
3463 for example, competing to be configured at raid0, and
3464 perhaps competing to be the root filesystem set. If the
3465 wrong ones get configured, or both attempt to become /,
3466 weird behaviour and or serious lossage will occur. Thus we
3467 need to bring them into the fold here, and kick them out at
3468 a later point.
3469
3470 */
3471
3472 clabel1 = cset->ac->clabel;
3473 clabel2 = ac->clabel;
3474 if ((clabel1->version == clabel2->version) &&
3475 (clabel1->serial_number == clabel2->serial_number) &&
3476 (clabel1->num_rows == clabel2->num_rows) &&
3477 (clabel1->num_columns == clabel2->num_columns) &&
3478 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3479 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3480 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3481 (clabel1->parityConfig == clabel2->parityConfig) &&
3482 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3483 (clabel1->blockSize == clabel2->blockSize) &&
3484 rf_component_label_numblocks(clabel1) ==
3485 rf_component_label_numblocks(clabel2) &&
3486 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3487 (clabel1->root_partition == clabel2->root_partition) &&
3488 (clabel1->last_unit == clabel2->last_unit) &&
3489 (clabel1->config_order == clabel2->config_order)) {
3490 /* if it get's here, it almost *has* to be a match */
3491 } else {
3492 /* it's not consistent with somebody in the set..
3493 punt */
3494 return(0);
3495 }
3496 /* all was fine.. it must fit... */
3497 return(1);
3498 }
3499
3500 int
3501 rf_have_enough_components(RF_ConfigSet_t *cset)
3502 {
3503 RF_AutoConfig_t *ac;
3504 RF_AutoConfig_t *auto_config;
3505 RF_ComponentLabel_t *clabel;
3506 int c;
3507 int num_cols;
3508 int num_missing;
3509 int mod_counter;
3510 int mod_counter_found;
3511 int even_pair_failed;
3512 char parity_type;
3513
3514
3515 /* check to see that we have enough 'live' components
3516 of this set. If so, we can configure it if necessary */
3517
3518 num_cols = cset->ac->clabel->num_columns;
3519 parity_type = cset->ac->clabel->parityConfig;
3520
3521 /* XXX Check for duplicate components!?!?!? */
3522
3523 /* Determine what the mod_counter is supposed to be for this set. */
3524
3525 mod_counter_found = 0;
3526 mod_counter = 0;
3527 ac = cset->ac;
3528 while(ac!=NULL) {
3529 if (mod_counter_found==0) {
3530 mod_counter = ac->clabel->mod_counter;
3531 mod_counter_found = 1;
3532 } else {
3533 if (ac->clabel->mod_counter > mod_counter) {
3534 mod_counter = ac->clabel->mod_counter;
3535 }
3536 }
3537 ac = ac->next;
3538 }
3539
3540 num_missing = 0;
3541 auto_config = cset->ac;
3542
3543 even_pair_failed = 0;
3544 for(c=0; c<num_cols; c++) {
3545 ac = auto_config;
3546 while(ac!=NULL) {
3547 if ((ac->clabel->column == c) &&
3548 (ac->clabel->mod_counter == mod_counter)) {
3549 /* it's this one... */
3550 #ifdef DEBUG
3551 printf("Found: %s at %d\n",
3552 ac->devname,c);
3553 #endif
3554 break;
3555 }
3556 ac=ac->next;
3557 }
3558 if (ac==NULL) {
3559 /* Didn't find one here! */
3560 /* special case for RAID 1, especially
3561 where there are more than 2
3562 components (where RAIDframe treats
3563 things a little differently :( ) */
3564 if (parity_type == '1') {
3565 if (c%2 == 0) { /* even component */
3566 even_pair_failed = 1;
3567 } else { /* odd component. If
3568 we're failed, and
3569 so is the even
3570 component, it's
3571 "Good Night, Charlie" */
3572 if (even_pair_failed == 1) {
3573 return(0);
3574 }
3575 }
3576 } else {
3577 /* normal accounting */
3578 num_missing++;
3579 }
3580 }
3581 if ((parity_type == '1') && (c%2 == 1)) {
3582 /* Just did an even component, and we didn't
3583 bail.. reset the even_pair_failed flag,
3584 and go on to the next component.... */
3585 even_pair_failed = 0;
3586 }
3587 }
3588
3589 clabel = cset->ac->clabel;
3590
3591 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3592 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3593 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3594 /* XXX this needs to be made *much* more general */
3595 /* Too many failures */
3596 return(0);
3597 }
3598 /* otherwise, all is well, and we've got enough to take a kick
3599 at autoconfiguring this set */
3600 return(1);
3601 }
3602
3603 void
3604 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3605 RF_Raid_t *raidPtr)
3606 {
3607 RF_ComponentLabel_t *clabel;
3608 int i;
3609
3610 clabel = ac->clabel;
3611
3612 /* 1. Fill in the common stuff */
3613 config->numRow = clabel->num_rows = 1;
3614 config->numCol = clabel->num_columns;
3615 config->numSpare = 0; /* XXX should this be set here? */
3616 config->sectPerSU = clabel->sectPerSU;
3617 config->SUsPerPU = clabel->SUsPerPU;
3618 config->SUsPerRU = clabel->SUsPerRU;
3619 config->parityConfig = clabel->parityConfig;
3620 /* XXX... */
3621 strcpy(config->diskQueueType,"fifo");
3622 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3623 config->layoutSpecificSize = 0; /* XXX ?? */
3624
3625 while(ac!=NULL) {
3626 /* row/col values will be in range due to the checks
3627 in reasonable_label() */
3628 strcpy(config->devnames[0][ac->clabel->column],
3629 ac->devname);
3630 ac = ac->next;
3631 }
3632
3633 for(i=0;i<RF_MAXDBGV;i++) {
3634 config->debugVars[i][0] = 0;
3635 }
3636 }
3637
3638 int
3639 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3640 {
3641 RF_ComponentLabel_t *clabel;
3642 int column;
3643 int sparecol;
3644
3645 raidPtr->autoconfigure = new_value;
3646
3647 for(column=0; column<raidPtr->numCol; column++) {
3648 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3649 clabel = raidget_component_label(raidPtr, column);
3650 clabel->autoconfigure = new_value;
3651 raidflush_component_label(raidPtr, column);
3652 }
3653 }
3654 for(column = 0; column < raidPtr->numSpare ; column++) {
3655 sparecol = raidPtr->numCol + column;
3656 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3657 clabel = raidget_component_label(raidPtr, sparecol);
3658 clabel->autoconfigure = new_value;
3659 raidflush_component_label(raidPtr, sparecol);
3660 }
3661 }
3662 return(new_value);
3663 }
3664
3665 int
3666 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3667 {
3668 RF_ComponentLabel_t *clabel;
3669 int column;
3670 int sparecol;
3671
3672 raidPtr->root_partition = new_value;
3673 for(column=0; column<raidPtr->numCol; column++) {
3674 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3675 clabel = raidget_component_label(raidPtr, column);
3676 clabel->root_partition = new_value;
3677 raidflush_component_label(raidPtr, column);
3678 }
3679 }
3680 for(column = 0; column < raidPtr->numSpare ; column++) {
3681 sparecol = raidPtr->numCol + column;
3682 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3683 clabel = raidget_component_label(raidPtr, sparecol);
3684 clabel->root_partition = new_value;
3685 raidflush_component_label(raidPtr, sparecol);
3686 }
3687 }
3688 return(new_value);
3689 }
3690
3691 void
3692 rf_release_all_vps(RF_ConfigSet_t *cset)
3693 {
3694 RF_AutoConfig_t *ac;
3695
3696 ac = cset->ac;
3697 while(ac!=NULL) {
3698 /* Close the vp, and give it back */
3699 if (ac->vp) {
3700 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3701 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3702 vput(ac->vp);
3703 ac->vp = NULL;
3704 }
3705 ac = ac->next;
3706 }
3707 }
3708
3709
3710 void
3711 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3712 {
3713 RF_AutoConfig_t *ac;
3714 RF_AutoConfig_t *next_ac;
3715
3716 ac = cset->ac;
3717 while(ac!=NULL) {
3718 next_ac = ac->next;
3719 /* nuke the label */
3720 free(ac->clabel, M_RAIDFRAME);
3721 /* cleanup the config structure */
3722 free(ac, M_RAIDFRAME);
3723 /* "next.." */
3724 ac = next_ac;
3725 }
3726 /* and, finally, nuke the config set */
3727 free(cset, M_RAIDFRAME);
3728 }
3729
3730
3731 void
3732 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3733 {
3734 /* current version number */
3735 clabel->version = RF_COMPONENT_LABEL_VERSION;
3736 clabel->serial_number = raidPtr->serial_number;
3737 clabel->mod_counter = raidPtr->mod_counter;
3738
3739 clabel->num_rows = 1;
3740 clabel->num_columns = raidPtr->numCol;
3741 clabel->clean = RF_RAID_DIRTY; /* not clean */
3742 clabel->status = rf_ds_optimal; /* "It's good!" */
3743
3744 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3745 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3746 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3747
3748 clabel->blockSize = raidPtr->bytesPerSector;
3749 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3750
3751 /* XXX not portable */
3752 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3753 clabel->maxOutstanding = raidPtr->maxOutstanding;
3754 clabel->autoconfigure = raidPtr->autoconfigure;
3755 clabel->root_partition = raidPtr->root_partition;
3756 clabel->last_unit = raidPtr->raidid;
3757 clabel->config_order = raidPtr->config_order;
3758
3759 #ifndef RF_NO_PARITY_MAP
3760 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3761 #endif
3762 }
3763
3764 struct raid_softc *
3765 rf_auto_config_set(RF_ConfigSet_t *cset)
3766 {
3767 RF_Raid_t *raidPtr;
3768 RF_Config_t *config;
3769 int raidID;
3770 struct raid_softc *sc;
3771
3772 #ifdef DEBUG
3773 printf("RAID autoconfigure\n");
3774 #endif
3775
3776 /* 1. Create a config structure */
3777 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3778 if (config == NULL) {
3779 printf("Out of mem!?!?\n");
3780 /* XXX do something more intelligent here. */
3781 return NULL;
3782 }
3783
3784 /*
3785 2. Figure out what RAID ID this one is supposed to live at
3786 See if we can get the same RAID dev that it was configured
3787 on last time..
3788 */
3789
3790 raidID = cset->ac->clabel->last_unit;
3791 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3792 continue;
3793 #ifdef DEBUG
3794 printf("Configuring raid%d:\n",raidID);
3795 #endif
3796
3797 raidPtr = &sc->sc_r;
3798
3799 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3800 raidPtr->softc = sc;
3801 raidPtr->raidid = raidID;
3802 raidPtr->openings = RAIDOUTSTANDING;
3803
3804 /* 3. Build the configuration structure */
3805 rf_create_configuration(cset->ac, config, raidPtr);
3806
3807 /* 4. Do the configuration */
3808 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3809 raidinit(sc);
3810
3811 rf_markalldirty(raidPtr);
3812 raidPtr->autoconfigure = 1; /* XXX do this here? */
3813 switch (cset->ac->clabel->root_partition) {
3814 case 1: /* Force Root */
3815 case 2: /* Soft Root: root when boot partition part of raid */
3816 /*
3817 * everything configured just fine. Make a note
3818 * that this set is eligible to be root,
3819 * or forced to be root
3820 */
3821 cset->rootable = cset->ac->clabel->root_partition;
3822 /* XXX do this here? */
3823 raidPtr->root_partition = cset->rootable;
3824 break;
3825 default:
3826 break;
3827 }
3828 } else {
3829 raidput(sc);
3830 sc = NULL;
3831 }
3832
3833 /* 5. Cleanup */
3834 free(config, M_RAIDFRAME);
3835 return sc;
3836 }
3837
3838 void
3839 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3840 {
3841 struct buf *bp;
3842 struct raid_softc *rs;
3843
3844 bp = (struct buf *)desc->bp;
3845 rs = desc->raidPtr->softc;
3846 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3847 (bp->b_flags & B_READ));
3848 }
3849
3850 void
3851 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3852 size_t xmin, size_t xmax)
3853 {
3854 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3855 pool_sethiwat(p, xmax);
3856 pool_prime(p, xmin);
3857 pool_setlowat(p, xmin);
3858 }
3859
3860 /*
3861 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3862 * if there is IO pending and if that IO could possibly be done for a
3863 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3864 * otherwise.
3865 *
3866 */
3867
3868 int
3869 rf_buf_queue_check(RF_Raid_t *raidPtr)
3870 {
3871 struct raid_softc *rs = raidPtr->softc;
3872 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3873 /* there is work to do */
3874 return 0;
3875 }
3876 /* default is nothing to do */
3877 return 1;
3878 }
3879
3880 int
3881 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3882 {
3883 uint64_t numsecs;
3884 unsigned secsize;
3885 int error;
3886
3887 error = getdisksize(vp, &numsecs, &secsize);
3888 if (error == 0) {
3889 diskPtr->blockSize = secsize;
3890 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3891 diskPtr->partitionSize = numsecs;
3892 return 0;
3893 }
3894 return error;
3895 }
3896
3897 static int
3898 raid_match(device_t self, cfdata_t cfdata, void *aux)
3899 {
3900 return 1;
3901 }
3902
3903 static void
3904 raid_attach(device_t parent, device_t self, void *aux)
3905 {
3906
3907 }
3908
3909
3910 static int
3911 raid_detach(device_t self, int flags)
3912 {
3913 int error;
3914 struct raid_softc *rs = raidget(device_unit(self));
3915
3916 if (rs == NULL)
3917 return ENXIO;
3918
3919 if ((error = raidlock(rs)) != 0)
3920 return (error);
3921
3922 error = raid_detach_unlocked(rs);
3923
3924 raidunlock(rs);
3925
3926 /* XXXkd: raidput(rs) ??? */
3927
3928 return error;
3929 }
3930
3931 static void
3932 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3933 {
3934 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3935
3936 memset(dg, 0, sizeof(*dg));
3937
3938 dg->dg_secperunit = raidPtr->totalSectors;
3939 dg->dg_secsize = raidPtr->bytesPerSector;
3940 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3941 dg->dg_ntracks = 4 * raidPtr->numCol;
3942
3943 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3944 }
3945
3946 /*
3947 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3948 * We end up returning whatever error was returned by the first cache flush
3949 * that fails.
3950 */
3951
3952 int
3953 rf_sync_component_caches(RF_Raid_t *raidPtr)
3954 {
3955 int c, sparecol;
3956 int e,error;
3957 int force = 1;
3958
3959 error = 0;
3960 for (c = 0; c < raidPtr->numCol; c++) {
3961 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3962 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3963 &force, FWRITE, NOCRED);
3964 if (e) {
3965 if (e != ENODEV)
3966 printf("raid%d: cache flush to component %s failed.\n",
3967 raidPtr->raidid, raidPtr->Disks[c].devname);
3968 if (error == 0) {
3969 error = e;
3970 }
3971 }
3972 }
3973 }
3974
3975 for( c = 0; c < raidPtr->numSpare ; c++) {
3976 sparecol = raidPtr->numCol + c;
3977 /* Need to ensure that the reconstruct actually completed! */
3978 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3979 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3980 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3981 if (e) {
3982 if (e != ENODEV)
3983 printf("raid%d: cache flush to component %s failed.\n",
3984 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3985 if (error == 0) {
3986 error = e;
3987 }
3988 }
3989 }
3990 }
3991 return error;
3992 }
3993
3994 static void
3995 raidminphys(struct buf *bp)
3996 {
3997 dev_t dev;
3998 int unit;
3999 struct raid_softc *rs;
4000 RF_Raid_t *raidPtr;
4001 long xmax;
4002
4003 dev = bp->b_dev;
4004 unit = raidunit(dev);
4005 rs = raidget(unit);
4006 raidPtr = &(rs->sc_r);
4007
4008 xmax = raidPtr->Layout.numDataCol * MAXPHYS;
4009
4010 if (bp->b_bcount > xmax) {
4011 bp->b_bcount = xmax;
4012 }
4013 }
4014