rf_netbsdkintf.c revision 1.295.6.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.295.6.4 2014/12/22 04:11:38 msaitoh Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.4 2014/12/22 04:11:38 msaitoh Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #ifdef DEBUG
156 int rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else /* DEBUG */
159 #define db1_printf(a) { }
160 #endif /* DEBUG */
161
162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 raidopen, raidclose, raidstrategy, raidioctl,
211 raiddump, raidsize, D_DISK
212 };
213
214 const struct cdevsw raid_cdevsw = {
215 raidopen, raidclose, raidread, raidwrite, raidioctl,
216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218
219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220
221 /* XXX Not sure if the following should be replacing the raidPtrs above,
222 or if it should be used in conjunction with that...
223 */
224
225 struct raid_softc {
226 device_t sc_dev;
227 int sc_flags; /* flags */
228 int sc_cflags; /* configuration flags */
229 uint64_t sc_size; /* size of the raid device */
230 char sc_xname[20]; /* XXX external name */
231 struct disk sc_dkdev; /* generic disk device info */
232 struct bufq_state *buf_queue; /* used for the device queue */
233 };
234 /* sc_flags */
235 #define RAIDF_INITED 0x01 /* unit has been initialized */
236 #define RAIDF_WLABEL 0x02 /* label area is writable */
237 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
238 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 extern struct cfdriver raid_cd;
246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248 DVF_DETACH_SHUTDOWN);
249
250 /*
251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252 * Be aware that large numbers can allow the driver to consume a lot of
253 * kernel memory, especially on writes, and in degraded mode reads.
254 *
255 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256 * a single 64K write will typically require 64K for the old data,
257 * 64K for the old parity, and 64K for the new parity, for a total
258 * of 192K (if the parity buffer is not re-used immediately).
259 * Even it if is used immediately, that's still 128K, which when multiplied
260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261 *
262 * Now in degraded mode, for example, a 64K read on the above setup may
263 * require data reconstruction, which will require *all* of the 4 remaining
264 * disks to participate -- 4 * 32K/disk == 128K again.
265 */
266
267 #ifndef RAIDOUTSTANDING
268 #define RAIDOUTSTANDING 6
269 #endif
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278 struct disklabel *);
279 static void raidgetdisklabel(dev_t);
280 static void raidmakedisklabel(struct raid_softc *);
281
282 static int raidlock(struct raid_softc *);
283 static void raidunlock(struct raid_softc *);
284
285 static int raid_detach_unlocked(struct raid_softc *);
286
287 static void rf_markalldirty(RF_Raid_t *);
288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289
290 void rf_ReconThread(struct rf_recon_req *);
291 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292 void rf_CopybackThread(RF_Raid_t *raidPtr);
293 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294 int rf_autoconfig(device_t);
295 void rf_buildroothack(RF_ConfigSet_t *);
296
297 RF_AutoConfig_t *rf_find_raid_components(void);
298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302 int rf_set_autoconfig(RF_Raid_t *, int);
303 int rf_set_rootpartition(RF_Raid_t *, int);
304 void rf_release_all_vps(RF_ConfigSet_t *);
305 void rf_cleanup_config_set(RF_ConfigSet_t *);
306 int rf_have_enough_components(RF_ConfigSet_t *);
307 int rf_auto_config_set(RF_ConfigSet_t *, int *);
308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309
310 /*
311 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
312 * Note that this is overridden by having RAID_AUTOCONFIG as an option
313 * in the kernel config file.
314 */
315 #ifdef RAID_AUTOCONFIG
316 int raidautoconfig = 1;
317 #else
318 int raidautoconfig = 0;
319 #endif
320 static bool raidautoconfigdone = false;
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 aprint_debug("raidattach: Asked for %d units\n", num);
331
332 if (num <= 0) {
333 #ifdef DIAGNOSTIC
334 panic("raidattach: count <= 0");
335 #endif
336 return;
337 }
338 /* This is where all the initialization stuff gets done. */
339
340 numraid = num;
341
342 /* Make some space for requested number of units... */
343
344 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345 if (raidPtrs == NULL) {
346 panic("raidPtrs is NULL!!");
347 }
348
349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353
354 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355 #endif
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 aprint_verbose("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 aprint_error("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 for (raidID = 0; raidID < num; raidID++) {
379 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380
381 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382 (RF_Raid_t *));
383 if (raidPtrs[raidID] == NULL) {
384 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385 numraid = raidID;
386 return;
387 }
388 }
389
390 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391 aprint_error("raidattach: config_cfattach_attach failed?\n");
392 }
393
394 raidautoconfigdone = false;
395
396 /*
397 * Register a finalizer which will be used to auto-config RAID
398 * sets once all real hardware devices have been found.
399 */
400 if (config_finalize_register(NULL, rf_autoconfig) != 0)
401 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402 }
403
404 int
405 rf_autoconfig(device_t self)
406 {
407 RF_AutoConfig_t *ac_list;
408 RF_ConfigSet_t *config_sets;
409
410 if (!raidautoconfig || raidautoconfigdone == true)
411 return (0);
412
413 /* XXX This code can only be run once. */
414 raidautoconfigdone = true;
415
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set andconfigure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 void
433 rf_buildroothack(RF_ConfigSet_t *config_sets)
434 {
435 RF_ConfigSet_t *cset;
436 RF_ConfigSet_t *next_cset;
437 int retcode;
438 int raidID;
439 int rootID;
440 int col;
441 int num_root;
442 char *devname;
443
444 rootID = 0;
445 num_root = 0;
446 cset = config_sets;
447 while (cset != NULL) {
448 next_cset = cset->next;
449 if (rf_have_enough_components(cset) &&
450 cset->ac->clabel->autoconfigure==1) {
451 retcode = rf_auto_config_set(cset,&raidID);
452 if (!retcode) {
453 aprint_debug("raid%d: configured ok\n", raidID);
454 if (cset->rootable) {
455 rootID = raidID;
456 num_root++;
457 }
458 } else {
459 /* The autoconfig didn't work :( */
460 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461 rf_release_all_vps(cset);
462 }
463 } else {
464 /* we're not autoconfiguring this set...
465 release the associated resources */
466 rf_release_all_vps(cset);
467 }
468 /* cleanup */
469 rf_cleanup_config_set(cset);
470 cset = next_cset;
471 }
472
473 /* if the user has specified what the root device should be
474 then we don't touch booted_device or boothowto... */
475
476 if (rootspec != NULL)
477 return;
478
479 /* we found something bootable... */
480
481 if (num_root == 1) {
482 booted_device = raid_softc[rootID].sc_dev;
483 } else if (num_root > 1) {
484
485 /*
486 * Maybe the MD code can help. If it cannot, then
487 * setroot() will discover that we have no
488 * booted_device and will ask the user if nothing was
489 * hardwired in the kernel config file
490 */
491
492 if (booted_device == NULL)
493 cpu_rootconf();
494 if (booted_device == NULL)
495 return;
496
497 num_root = 0;
498 for (raidID = 0; raidID < numraid; raidID++) {
499 if (raidPtrs[raidID]->valid == 0)
500 continue;
501
502 if (raidPtrs[raidID]->root_partition == 0)
503 continue;
504
505 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
506 devname = raidPtrs[raidID]->Disks[col].devname;
507 devname += sizeof("/dev/") - 1;
508 if (strncmp(devname, device_xname(booted_device),
509 strlen(device_xname(booted_device))) != 0)
510 continue;
511 aprint_debug("raid%d includes boot device %s\n",
512 raidID, devname);
513 num_root++;
514 rootID = raidID;
515 }
516 }
517
518 if (num_root == 1) {
519 booted_device = raid_softc[rootID].sc_dev;
520 } else {
521 /* we can't guess.. require the user to answer... */
522 boothowto |= RB_ASKNAME;
523 }
524 }
525 }
526
527
528 int
529 raidsize(dev_t dev)
530 {
531 struct raid_softc *rs;
532 struct disklabel *lp;
533 int part, unit, omask, size;
534
535 unit = raidunit(dev);
536 if (unit >= numraid)
537 return (-1);
538 rs = &raid_softc[unit];
539
540 if ((rs->sc_flags & RAIDF_INITED) == 0)
541 return (-1);
542
543 part = DISKPART(dev);
544 omask = rs->sc_dkdev.dk_openmask & (1 << part);
545 lp = rs->sc_dkdev.dk_label;
546
547 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
548 return (-1);
549
550 if (lp->d_partitions[part].p_fstype != FS_SWAP)
551 size = -1;
552 else
553 size = lp->d_partitions[part].p_size *
554 (lp->d_secsize / DEV_BSIZE);
555
556 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
557 return (-1);
558
559 return (size);
560
561 }
562
563 int
564 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
565 {
566 int unit = raidunit(dev);
567 struct raid_softc *rs;
568 const struct bdevsw *bdev;
569 struct disklabel *lp;
570 RF_Raid_t *raidPtr;
571 daddr_t offset;
572 int part, c, sparecol, j, scol, dumpto;
573 int error = 0;
574
575 if (unit >= numraid)
576 return (ENXIO);
577
578 rs = &raid_softc[unit];
579 raidPtr = raidPtrs[unit];
580
581 if ((rs->sc_flags & RAIDF_INITED) == 0)
582 return ENXIO;
583
584 /* we only support dumping to RAID 1 sets */
585 if (raidPtr->Layout.numDataCol != 1 ||
586 raidPtr->Layout.numParityCol != 1)
587 return EINVAL;
588
589
590 if ((error = raidlock(rs)) != 0)
591 return error;
592
593 if (size % DEV_BSIZE != 0) {
594 error = EINVAL;
595 goto out;
596 }
597
598 if (blkno + size / DEV_BSIZE > rs->sc_size) {
599 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
600 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
601 size / DEV_BSIZE, rs->sc_size);
602 error = EINVAL;
603 goto out;
604 }
605
606 part = DISKPART(dev);
607 lp = rs->sc_dkdev.dk_label;
608 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
609
610 /* figure out what device is alive.. */
611
612 /*
613 Look for a component to dump to. The preference for the
614 component to dump to is as follows:
615 1) the master
616 2) a used_spare of the master
617 3) the slave
618 4) a used_spare of the slave
619 */
620
621 dumpto = -1;
622 for (c = 0; c < raidPtr->numCol; c++) {
623 if (raidPtr->Disks[c].status == rf_ds_optimal) {
624 /* this might be the one */
625 dumpto = c;
626 break;
627 }
628 }
629
630 /*
631 At this point we have possibly selected a live master or a
632 live slave. We now check to see if there is a spared
633 master (or a spared slave), if we didn't find a live master
634 or a live slave.
635 */
636
637 for (c = 0; c < raidPtr->numSpare; c++) {
638 sparecol = raidPtr->numCol + c;
639 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
640 /* How about this one? */
641 scol = -1;
642 for(j=0;j<raidPtr->numCol;j++) {
643 if (raidPtr->Disks[j].spareCol == sparecol) {
644 scol = j;
645 break;
646 }
647 }
648 if (scol == 0) {
649 /*
650 We must have found a spared master!
651 We'll take that over anything else
652 found so far. (We couldn't have
653 found a real master before, since
654 this is a used spare, and it's
655 saying that it's replacing the
656 master.) On reboot (with
657 autoconfiguration turned on)
658 sparecol will become the 1st
659 component (component0) of this set.
660 */
661 dumpto = sparecol;
662 break;
663 } else if (scol != -1) {
664 /*
665 Must be a spared slave. We'll dump
666 to that if we havn't found anything
667 else so far.
668 */
669 if (dumpto == -1)
670 dumpto = sparecol;
671 }
672 }
673 }
674
675 if (dumpto == -1) {
676 /* we couldn't find any live components to dump to!?!?
677 */
678 error = EINVAL;
679 goto out;
680 }
681
682 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
683
684 /*
685 Note that blkno is relative to this particular partition.
686 By adding the offset of this partition in the RAID
687 set, and also adding RF_PROTECTED_SECTORS, we get a
688 value that is relative to the partition used for the
689 underlying component.
690 */
691
692 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
693 blkno + offset, va, size);
694
695 out:
696 raidunlock(rs);
697
698 return error;
699 }
700 /* ARGSUSED */
701 int
702 raidopen(dev_t dev, int flags, int fmt,
703 struct lwp *l)
704 {
705 int unit = raidunit(dev);
706 struct raid_softc *rs;
707 struct disklabel *lp;
708 int part, pmask;
709 int error = 0;
710
711 if (unit >= numraid)
712 return (ENXIO);
713 rs = &raid_softc[unit];
714
715 if ((error = raidlock(rs)) != 0)
716 return (error);
717
718 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
719 error = EBUSY;
720 goto bad;
721 }
722
723 lp = rs->sc_dkdev.dk_label;
724
725 part = DISKPART(dev);
726
727 /*
728 * If there are wedges, and this is not RAW_PART, then we
729 * need to fail.
730 */
731 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
732 error = EBUSY;
733 goto bad;
734 }
735 pmask = (1 << part);
736
737 if ((rs->sc_flags & RAIDF_INITED) &&
738 (rs->sc_dkdev.dk_nwedges == 0) &&
739 (rs->sc_dkdev.dk_openmask == 0))
740 raidgetdisklabel(dev);
741
742 /* make sure that this partition exists */
743
744 if (part != RAW_PART) {
745 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
746 ((part >= lp->d_npartitions) ||
747 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
748 error = ENXIO;
749 goto bad;
750 }
751 }
752 /* Prevent this unit from being unconfigured while open. */
753 switch (fmt) {
754 case S_IFCHR:
755 rs->sc_dkdev.dk_copenmask |= pmask;
756 break;
757
758 case S_IFBLK:
759 rs->sc_dkdev.dk_bopenmask |= pmask;
760 break;
761 }
762
763 if ((rs->sc_dkdev.dk_openmask == 0) &&
764 ((rs->sc_flags & RAIDF_INITED) != 0)) {
765 /* First one... mark things as dirty... Note that we *MUST*
766 have done a configure before this. I DO NOT WANT TO BE
767 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
768 THAT THEY BELONG TOGETHER!!!!! */
769 /* XXX should check to see if we're only open for reading
770 here... If so, we needn't do this, but then need some
771 other way of keeping track of what's happened.. */
772
773 rf_markalldirty(raidPtrs[unit]);
774 }
775
776
777 rs->sc_dkdev.dk_openmask =
778 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
779
780 bad:
781 raidunlock(rs);
782
783 return (error);
784
785
786 }
787 /* ARGSUSED */
788 int
789 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
790 {
791 int unit = raidunit(dev);
792 struct raid_softc *rs;
793 int error = 0;
794 int part;
795
796 if (unit >= numraid)
797 return (ENXIO);
798 rs = &raid_softc[unit];
799
800 if ((error = raidlock(rs)) != 0)
801 return (error);
802
803 part = DISKPART(dev);
804
805 /* ...that much closer to allowing unconfiguration... */
806 switch (fmt) {
807 case S_IFCHR:
808 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
809 break;
810
811 case S_IFBLK:
812 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
813 break;
814 }
815 rs->sc_dkdev.dk_openmask =
816 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
817
818 if ((rs->sc_dkdev.dk_openmask == 0) &&
819 ((rs->sc_flags & RAIDF_INITED) != 0)) {
820 /* Last one... device is not unconfigured yet.
821 Device shutdown has taken care of setting the
822 clean bits if RAIDF_INITED is not set
823 mark things as clean... */
824
825 rf_update_component_labels(raidPtrs[unit],
826 RF_FINAL_COMPONENT_UPDATE);
827
828 /* If the kernel is shutting down, it will detach
829 * this RAID set soon enough.
830 */
831 }
832
833 raidunlock(rs);
834 return (0);
835
836 }
837
838 void
839 raidstrategy(struct buf *bp)
840 {
841 unsigned int raidID = raidunit(bp->b_dev);
842 RF_Raid_t *raidPtr;
843 struct raid_softc *rs = &raid_softc[raidID];
844 int wlabel;
845
846 if ((rs->sc_flags & RAIDF_INITED) ==0) {
847 bp->b_error = ENXIO;
848 goto done;
849 }
850 if (raidID >= numraid || !raidPtrs[raidID]) {
851 bp->b_error = ENODEV;
852 goto done;
853 }
854 raidPtr = raidPtrs[raidID];
855 if (!raidPtr->valid) {
856 bp->b_error = ENODEV;
857 goto done;
858 }
859 if (bp->b_bcount == 0) {
860 db1_printf(("b_bcount is zero..\n"));
861 goto done;
862 }
863
864 /*
865 * Do bounds checking and adjust transfer. If there's an
866 * error, the bounds check will flag that for us.
867 */
868
869 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
870 if (DISKPART(bp->b_dev) == RAW_PART) {
871 uint64_t size; /* device size in DEV_BSIZE unit */
872
873 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
874 size = raidPtr->totalSectors <<
875 (raidPtr->logBytesPerSector - DEV_BSHIFT);
876 } else {
877 size = raidPtr->totalSectors >>
878 (DEV_BSHIFT - raidPtr->logBytesPerSector);
879 }
880 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
881 goto done;
882 }
883 } else {
884 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
885 db1_printf(("Bounds check failed!!:%d %d\n",
886 (int) bp->b_blkno, (int) wlabel));
887 goto done;
888 }
889 }
890
891 rf_lock_mutex2(raidPtr->iodone_lock);
892
893 bp->b_resid = 0;
894
895 /* stuff it onto our queue */
896 bufq_put(rs->buf_queue, bp);
897
898 /* scheduled the IO to happen at the next convenient time */
899 rf_signal_cond2(raidPtr->iodone_cv);
900 rf_unlock_mutex2(raidPtr->iodone_lock);
901
902 return;
903
904 done:
905 bp->b_resid = bp->b_bcount;
906 biodone(bp);
907 }
908 /* ARGSUSED */
909 int
910 raidread(dev_t dev, struct uio *uio, int flags)
911 {
912 int unit = raidunit(dev);
913 struct raid_softc *rs;
914
915 if (unit >= numraid)
916 return (ENXIO);
917 rs = &raid_softc[unit];
918
919 if ((rs->sc_flags & RAIDF_INITED) == 0)
920 return (ENXIO);
921
922 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
923
924 }
925 /* ARGSUSED */
926 int
927 raidwrite(dev_t dev, struct uio *uio, int flags)
928 {
929 int unit = raidunit(dev);
930 struct raid_softc *rs;
931
932 if (unit >= numraid)
933 return (ENXIO);
934 rs = &raid_softc[unit];
935
936 if ((rs->sc_flags & RAIDF_INITED) == 0)
937 return (ENXIO);
938
939 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
940
941 }
942
943 static int
944 raid_detach_unlocked(struct raid_softc *rs)
945 {
946 int error;
947 RF_Raid_t *raidPtr;
948
949 raidPtr = raidPtrs[device_unit(rs->sc_dev)];
950
951 /*
952 * If somebody has a partition mounted, we shouldn't
953 * shutdown.
954 */
955 if (rs->sc_dkdev.dk_openmask != 0)
956 return EBUSY;
957
958 if ((rs->sc_flags & RAIDF_INITED) == 0)
959 ; /* not initialized: nothing to do */
960 else if ((error = rf_Shutdown(raidPtr)) != 0)
961 return error;
962 else
963 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
964
965 /* Detach the disk. */
966 dkwedge_delall(&rs->sc_dkdev);
967 disk_detach(&rs->sc_dkdev);
968 disk_destroy(&rs->sc_dkdev);
969
970 aprint_normal_dev(rs->sc_dev, "detached\n");
971
972 return 0;
973 }
974
975 int
976 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
977 {
978 int unit = raidunit(dev);
979 int error = 0;
980 int part, pmask, s;
981 cfdata_t cf;
982 struct raid_softc *rs;
983 RF_Config_t *k_cfg, *u_cfg;
984 RF_Raid_t *raidPtr;
985 RF_RaidDisk_t *diskPtr;
986 RF_AccTotals_t *totals;
987 RF_DeviceConfig_t *d_cfg, **ucfgp;
988 u_char *specific_buf;
989 int retcode = 0;
990 int column;
991 /* int raidid; */
992 struct rf_recon_req *rrcopy, *rr;
993 RF_ComponentLabel_t *clabel;
994 RF_ComponentLabel_t *ci_label;
995 RF_ComponentLabel_t **clabel_ptr;
996 RF_SingleComponent_t *sparePtr,*componentPtr;
997 RF_SingleComponent_t component;
998 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
999 int i, j, d;
1000 #ifdef __HAVE_OLD_DISKLABEL
1001 struct disklabel newlabel;
1002 #endif
1003 struct dkwedge_info *dkw;
1004
1005 if (unit >= numraid)
1006 return (ENXIO);
1007 rs = &raid_softc[unit];
1008 raidPtr = raidPtrs[unit];
1009
1010 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1011 (int) DISKPART(dev), (int) unit, cmd));
1012
1013 /* Must be open for writes for these commands... */
1014 switch (cmd) {
1015 #ifdef DIOCGSECTORSIZE
1016 case DIOCGSECTORSIZE:
1017 *(u_int *)data = raidPtr->bytesPerSector;
1018 return 0;
1019 case DIOCGMEDIASIZE:
1020 *(off_t *)data =
1021 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1022 return 0;
1023 #endif
1024 case DIOCSDINFO:
1025 case DIOCWDINFO:
1026 #ifdef __HAVE_OLD_DISKLABEL
1027 case ODIOCWDINFO:
1028 case ODIOCSDINFO:
1029 #endif
1030 case DIOCWLABEL:
1031 case DIOCAWEDGE:
1032 case DIOCDWEDGE:
1033 case DIOCSSTRATEGY:
1034 if ((flag & FWRITE) == 0)
1035 return (EBADF);
1036 }
1037
1038 /* Must be initialized for these... */
1039 switch (cmd) {
1040 case DIOCGDINFO:
1041 case DIOCSDINFO:
1042 case DIOCWDINFO:
1043 #ifdef __HAVE_OLD_DISKLABEL
1044 case ODIOCGDINFO:
1045 case ODIOCWDINFO:
1046 case ODIOCSDINFO:
1047 case ODIOCGDEFLABEL:
1048 #endif
1049 case DIOCGPART:
1050 case DIOCWLABEL:
1051 case DIOCGDEFLABEL:
1052 case DIOCAWEDGE:
1053 case DIOCDWEDGE:
1054 case DIOCLWEDGES:
1055 case DIOCCACHESYNC:
1056 case RAIDFRAME_SHUTDOWN:
1057 case RAIDFRAME_REWRITEPARITY:
1058 case RAIDFRAME_GET_INFO:
1059 case RAIDFRAME_RESET_ACCTOTALS:
1060 case RAIDFRAME_GET_ACCTOTALS:
1061 case RAIDFRAME_KEEP_ACCTOTALS:
1062 case RAIDFRAME_GET_SIZE:
1063 case RAIDFRAME_FAIL_DISK:
1064 case RAIDFRAME_COPYBACK:
1065 case RAIDFRAME_CHECK_RECON_STATUS:
1066 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1067 case RAIDFRAME_GET_COMPONENT_LABEL:
1068 case RAIDFRAME_SET_COMPONENT_LABEL:
1069 case RAIDFRAME_ADD_HOT_SPARE:
1070 case RAIDFRAME_REMOVE_HOT_SPARE:
1071 case RAIDFRAME_INIT_LABELS:
1072 case RAIDFRAME_REBUILD_IN_PLACE:
1073 case RAIDFRAME_CHECK_PARITY:
1074 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1075 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1076 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1077 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1078 case RAIDFRAME_SET_AUTOCONFIG:
1079 case RAIDFRAME_SET_ROOT:
1080 case RAIDFRAME_DELETE_COMPONENT:
1081 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1082 case RAIDFRAME_PARITYMAP_STATUS:
1083 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1084 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1085 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1086 case DIOCGSTRATEGY:
1087 case DIOCSSTRATEGY:
1088 if ((rs->sc_flags & RAIDF_INITED) == 0)
1089 return (ENXIO);
1090 }
1091
1092 switch (cmd) {
1093 #ifdef COMPAT_50
1094 case RAIDFRAME_GET_INFO50:
1095 return rf_get_info50(raidPtr, data);
1096
1097 case RAIDFRAME_CONFIGURE50:
1098 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1099 return retcode;
1100 goto config;
1101 #endif
1102 /* configure the system */
1103 case RAIDFRAME_CONFIGURE:
1104
1105 if (raidPtr->valid) {
1106 /* There is a valid RAID set running on this unit! */
1107 printf("raid%d: Device already configured!\n",unit);
1108 return(EINVAL);
1109 }
1110
1111 /* copy-in the configuration information */
1112 /* data points to a pointer to the configuration structure */
1113
1114 u_cfg = *((RF_Config_t **) data);
1115 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1116 if (k_cfg == NULL) {
1117 return (ENOMEM);
1118 }
1119 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1120 if (retcode) {
1121 RF_Free(k_cfg, sizeof(RF_Config_t));
1122 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1123 retcode));
1124 return (retcode);
1125 }
1126 goto config;
1127 config:
1128 /* allocate a buffer for the layout-specific data, and copy it
1129 * in */
1130 if (k_cfg->layoutSpecificSize) {
1131 if (k_cfg->layoutSpecificSize > 10000) {
1132 /* sanity check */
1133 RF_Free(k_cfg, sizeof(RF_Config_t));
1134 return (EINVAL);
1135 }
1136 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1137 (u_char *));
1138 if (specific_buf == NULL) {
1139 RF_Free(k_cfg, sizeof(RF_Config_t));
1140 return (ENOMEM);
1141 }
1142 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1143 k_cfg->layoutSpecificSize);
1144 if (retcode) {
1145 RF_Free(k_cfg, sizeof(RF_Config_t));
1146 RF_Free(specific_buf,
1147 k_cfg->layoutSpecificSize);
1148 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1149 retcode));
1150 return (retcode);
1151 }
1152 } else
1153 specific_buf = NULL;
1154 k_cfg->layoutSpecific = specific_buf;
1155
1156 /* should do some kind of sanity check on the configuration.
1157 * Store the sum of all the bytes in the last byte? */
1158
1159 /* configure the system */
1160
1161 /*
1162 * Clear the entire RAID descriptor, just to make sure
1163 * there is no stale data left in the case of a
1164 * reconfiguration
1165 */
1166 memset(raidPtr, 0, sizeof(*raidPtr));
1167 raidPtr->raidid = unit;
1168
1169 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1170
1171 if (retcode == 0) {
1172
1173 /* allow this many simultaneous IO's to
1174 this RAID device */
1175 raidPtr->openings = RAIDOUTSTANDING;
1176
1177 raidinit(raidPtr);
1178 rf_markalldirty(raidPtr);
1179 }
1180 /* free the buffers. No return code here. */
1181 if (k_cfg->layoutSpecificSize) {
1182 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1183 }
1184 RF_Free(k_cfg, sizeof(RF_Config_t));
1185
1186 return (retcode);
1187
1188 /* shutdown the system */
1189 case RAIDFRAME_SHUTDOWN:
1190
1191 part = DISKPART(dev);
1192 pmask = (1 << part);
1193
1194 if ((error = raidlock(rs)) != 0)
1195 return (error);
1196
1197 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1198 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1199 (rs->sc_dkdev.dk_copenmask & pmask)))
1200 retcode = EBUSY;
1201 else {
1202 rs->sc_flags |= RAIDF_SHUTDOWN;
1203 rs->sc_dkdev.dk_copenmask &= ~pmask;
1204 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1205 rs->sc_dkdev.dk_openmask &= ~pmask;
1206 retcode = 0;
1207 }
1208
1209 raidunlock(rs);
1210
1211 if (retcode != 0)
1212 return retcode;
1213
1214 /* free the pseudo device attach bits */
1215
1216 cf = device_cfdata(rs->sc_dev);
1217 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1218 free(cf, M_RAIDFRAME);
1219
1220 return (retcode);
1221 case RAIDFRAME_GET_COMPONENT_LABEL:
1222 clabel_ptr = (RF_ComponentLabel_t **) data;
1223 /* need to read the component label for the disk indicated
1224 by row,column in clabel */
1225
1226 /*
1227 * Perhaps there should be an option to skip the in-core
1228 * copy and hit the disk, as with disklabel(8).
1229 */
1230 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1231
1232 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1233
1234 if (retcode) {
1235 RF_Free(clabel, sizeof(*clabel));
1236 return retcode;
1237 }
1238
1239 clabel->row = 0; /* Don't allow looking at anything else.*/
1240
1241 column = clabel->column;
1242
1243 if ((column < 0) || (column >= raidPtr->numCol +
1244 raidPtr->numSpare)) {
1245 RF_Free(clabel, sizeof(*clabel));
1246 return EINVAL;
1247 }
1248
1249 RF_Free(clabel, sizeof(*clabel));
1250
1251 clabel = raidget_component_label(raidPtr, column);
1252
1253 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1254
1255 #if 0
1256 case RAIDFRAME_SET_COMPONENT_LABEL:
1257 clabel = (RF_ComponentLabel_t *) data;
1258
1259 /* XXX check the label for valid stuff... */
1260 /* Note that some things *should not* get modified --
1261 the user should be re-initing the labels instead of
1262 trying to patch things.
1263 */
1264
1265 raidid = raidPtr->raidid;
1266 #ifdef DEBUG
1267 printf("raid%d: Got component label:\n", raidid);
1268 printf("raid%d: Version: %d\n", raidid, clabel->version);
1269 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1270 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1271 printf("raid%d: Column: %d\n", raidid, clabel->column);
1272 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1273 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1274 printf("raid%d: Status: %d\n", raidid, clabel->status);
1275 #endif
1276 clabel->row = 0;
1277 column = clabel->column;
1278
1279 if ((column < 0) || (column >= raidPtr->numCol)) {
1280 return(EINVAL);
1281 }
1282
1283 /* XXX this isn't allowed to do anything for now :-) */
1284
1285 /* XXX and before it is, we need to fill in the rest
1286 of the fields!?!?!?! */
1287 memcpy(raidget_component_label(raidPtr, column),
1288 clabel, sizeof(*clabel));
1289 raidflush_component_label(raidPtr, column);
1290 return (0);
1291 #endif
1292
1293 case RAIDFRAME_INIT_LABELS:
1294 clabel = (RF_ComponentLabel_t *) data;
1295 /*
1296 we only want the serial number from
1297 the above. We get all the rest of the information
1298 from the config that was used to create this RAID
1299 set.
1300 */
1301
1302 raidPtr->serial_number = clabel->serial_number;
1303
1304 for(column=0;column<raidPtr->numCol;column++) {
1305 diskPtr = &raidPtr->Disks[column];
1306 if (!RF_DEAD_DISK(diskPtr->status)) {
1307 ci_label = raidget_component_label(raidPtr,
1308 column);
1309 /* Zeroing this is important. */
1310 memset(ci_label, 0, sizeof(*ci_label));
1311 raid_init_component_label(raidPtr, ci_label);
1312 ci_label->serial_number =
1313 raidPtr->serial_number;
1314 ci_label->row = 0; /* we dont' pretend to support more */
1315 rf_component_label_set_partitionsize(ci_label,
1316 diskPtr->partitionSize);
1317 ci_label->column = column;
1318 raidflush_component_label(raidPtr, column);
1319 }
1320 /* XXXjld what about the spares? */
1321 }
1322
1323 return (retcode);
1324 case RAIDFRAME_SET_AUTOCONFIG:
1325 d = rf_set_autoconfig(raidPtr, *(int *) data);
1326 printf("raid%d: New autoconfig value is: %d\n",
1327 raidPtr->raidid, d);
1328 *(int *) data = d;
1329 return (retcode);
1330
1331 case RAIDFRAME_SET_ROOT:
1332 d = rf_set_rootpartition(raidPtr, *(int *) data);
1333 printf("raid%d: New rootpartition value is: %d\n",
1334 raidPtr->raidid, d);
1335 *(int *) data = d;
1336 return (retcode);
1337
1338 /* initialize all parity */
1339 case RAIDFRAME_REWRITEPARITY:
1340
1341 if (raidPtr->Layout.map->faultsTolerated == 0) {
1342 /* Parity for RAID 0 is trivially correct */
1343 raidPtr->parity_good = RF_RAID_CLEAN;
1344 return(0);
1345 }
1346
1347 if (raidPtr->parity_rewrite_in_progress == 1) {
1348 /* Re-write is already in progress! */
1349 return(EINVAL);
1350 }
1351
1352 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1353 rf_RewriteParityThread,
1354 raidPtr,"raid_parity");
1355 return (retcode);
1356
1357
1358 case RAIDFRAME_ADD_HOT_SPARE:
1359 sparePtr = (RF_SingleComponent_t *) data;
1360 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1361 retcode = rf_add_hot_spare(raidPtr, &component);
1362 return(retcode);
1363
1364 case RAIDFRAME_REMOVE_HOT_SPARE:
1365 return(retcode);
1366
1367 case RAIDFRAME_DELETE_COMPONENT:
1368 componentPtr = (RF_SingleComponent_t *)data;
1369 memcpy( &component, componentPtr,
1370 sizeof(RF_SingleComponent_t));
1371 retcode = rf_delete_component(raidPtr, &component);
1372 return(retcode);
1373
1374 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1375 componentPtr = (RF_SingleComponent_t *)data;
1376 memcpy( &component, componentPtr,
1377 sizeof(RF_SingleComponent_t));
1378 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1379 return(retcode);
1380
1381 case RAIDFRAME_REBUILD_IN_PLACE:
1382
1383 if (raidPtr->Layout.map->faultsTolerated == 0) {
1384 /* Can't do this on a RAID 0!! */
1385 return(EINVAL);
1386 }
1387
1388 if (raidPtr->recon_in_progress == 1) {
1389 /* a reconstruct is already in progress! */
1390 return(EINVAL);
1391 }
1392
1393 componentPtr = (RF_SingleComponent_t *) data;
1394 memcpy( &component, componentPtr,
1395 sizeof(RF_SingleComponent_t));
1396 component.row = 0; /* we don't support any more */
1397 column = component.column;
1398
1399 if ((column < 0) || (column >= raidPtr->numCol)) {
1400 return(EINVAL);
1401 }
1402
1403 rf_lock_mutex2(raidPtr->mutex);
1404 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1405 (raidPtr->numFailures > 0)) {
1406 /* XXX 0 above shouldn't be constant!!! */
1407 /* some component other than this has failed.
1408 Let's not make things worse than they already
1409 are... */
1410 printf("raid%d: Unable to reconstruct to disk at:\n",
1411 raidPtr->raidid);
1412 printf("raid%d: Col: %d Too many failures.\n",
1413 raidPtr->raidid, column);
1414 rf_unlock_mutex2(raidPtr->mutex);
1415 return (EINVAL);
1416 }
1417 if (raidPtr->Disks[column].status ==
1418 rf_ds_reconstructing) {
1419 printf("raid%d: Unable to reconstruct to disk at:\n",
1420 raidPtr->raidid);
1421 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1422
1423 rf_unlock_mutex2(raidPtr->mutex);
1424 return (EINVAL);
1425 }
1426 if (raidPtr->Disks[column].status == rf_ds_spared) {
1427 rf_unlock_mutex2(raidPtr->mutex);
1428 return (EINVAL);
1429 }
1430 rf_unlock_mutex2(raidPtr->mutex);
1431
1432 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1433 if (rrcopy == NULL)
1434 return(ENOMEM);
1435
1436 rrcopy->raidPtr = (void *) raidPtr;
1437 rrcopy->col = column;
1438
1439 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1440 rf_ReconstructInPlaceThread,
1441 rrcopy,"raid_reconip");
1442 return(retcode);
1443
1444 case RAIDFRAME_GET_INFO:
1445 if (!raidPtr->valid)
1446 return (ENODEV);
1447 ucfgp = (RF_DeviceConfig_t **) data;
1448 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1449 (RF_DeviceConfig_t *));
1450 if (d_cfg == NULL)
1451 return (ENOMEM);
1452 d_cfg->rows = 1; /* there is only 1 row now */
1453 d_cfg->cols = raidPtr->numCol;
1454 d_cfg->ndevs = raidPtr->numCol;
1455 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1456 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1457 return (ENOMEM);
1458 }
1459 d_cfg->nspares = raidPtr->numSpare;
1460 if (d_cfg->nspares >= RF_MAX_DISKS) {
1461 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1462 return (ENOMEM);
1463 }
1464 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1465 d = 0;
1466 for (j = 0; j < d_cfg->cols; j++) {
1467 d_cfg->devs[d] = raidPtr->Disks[j];
1468 d++;
1469 }
1470 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1471 d_cfg->spares[i] = raidPtr->Disks[j];
1472 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1473 /* XXX: raidctl(8) expects to see this as a used spare */
1474 d_cfg->spares[i].status = rf_ds_used_spare;
1475 }
1476 }
1477 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1478 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1479
1480 return (retcode);
1481
1482 case RAIDFRAME_CHECK_PARITY:
1483 *(int *) data = raidPtr->parity_good;
1484 return (0);
1485
1486 case RAIDFRAME_PARITYMAP_STATUS:
1487 if (rf_paritymap_ineligible(raidPtr))
1488 return EINVAL;
1489 rf_paritymap_status(raidPtr->parity_map,
1490 (struct rf_pmstat *)data);
1491 return 0;
1492
1493 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1494 if (rf_paritymap_ineligible(raidPtr))
1495 return EINVAL;
1496 if (raidPtr->parity_map == NULL)
1497 return ENOENT; /* ??? */
1498 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1499 (struct rf_pmparams *)data, 1))
1500 return EINVAL;
1501 return 0;
1502
1503 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1504 if (rf_paritymap_ineligible(raidPtr))
1505 return EINVAL;
1506 *(int *) data = rf_paritymap_get_disable(raidPtr);
1507 return 0;
1508
1509 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1510 if (rf_paritymap_ineligible(raidPtr))
1511 return EINVAL;
1512 rf_paritymap_set_disable(raidPtr, *(int *)data);
1513 /* XXX should errors be passed up? */
1514 return 0;
1515
1516 case RAIDFRAME_RESET_ACCTOTALS:
1517 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1518 return (0);
1519
1520 case RAIDFRAME_GET_ACCTOTALS:
1521 totals = (RF_AccTotals_t *) data;
1522 *totals = raidPtr->acc_totals;
1523 return (0);
1524
1525 case RAIDFRAME_KEEP_ACCTOTALS:
1526 raidPtr->keep_acc_totals = *(int *)data;
1527 return (0);
1528
1529 case RAIDFRAME_GET_SIZE:
1530 *(int *) data = raidPtr->totalSectors;
1531 return (0);
1532
1533 /* fail a disk & optionally start reconstruction */
1534 case RAIDFRAME_FAIL_DISK:
1535
1536 if (raidPtr->Layout.map->faultsTolerated == 0) {
1537 /* Can't do this on a RAID 0!! */
1538 return(EINVAL);
1539 }
1540
1541 rr = (struct rf_recon_req *) data;
1542 rr->row = 0;
1543 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1544 return (EINVAL);
1545
1546
1547 rf_lock_mutex2(raidPtr->mutex);
1548 if (raidPtr->status == rf_rs_reconstructing) {
1549 /* you can't fail a disk while we're reconstructing! */
1550 /* XXX wrong for RAID6 */
1551 rf_unlock_mutex2(raidPtr->mutex);
1552 return (EINVAL);
1553 }
1554 if ((raidPtr->Disks[rr->col].status ==
1555 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1556 /* some other component has failed. Let's not make
1557 things worse. XXX wrong for RAID6 */
1558 rf_unlock_mutex2(raidPtr->mutex);
1559 return (EINVAL);
1560 }
1561 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1562 /* Can't fail a spared disk! */
1563 rf_unlock_mutex2(raidPtr->mutex);
1564 return (EINVAL);
1565 }
1566 rf_unlock_mutex2(raidPtr->mutex);
1567
1568 /* make a copy of the recon request so that we don't rely on
1569 * the user's buffer */
1570 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1571 if (rrcopy == NULL)
1572 return(ENOMEM);
1573 memcpy(rrcopy, rr, sizeof(*rr));
1574 rrcopy->raidPtr = (void *) raidPtr;
1575
1576 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1577 rf_ReconThread,
1578 rrcopy,"raid_recon");
1579 return (0);
1580
1581 /* invoke a copyback operation after recon on whatever disk
1582 * needs it, if any */
1583 case RAIDFRAME_COPYBACK:
1584
1585 if (raidPtr->Layout.map->faultsTolerated == 0) {
1586 /* This makes no sense on a RAID 0!! */
1587 return(EINVAL);
1588 }
1589
1590 if (raidPtr->copyback_in_progress == 1) {
1591 /* Copyback is already in progress! */
1592 return(EINVAL);
1593 }
1594
1595 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1596 rf_CopybackThread,
1597 raidPtr,"raid_copyback");
1598 return (retcode);
1599
1600 /* return the percentage completion of reconstruction */
1601 case RAIDFRAME_CHECK_RECON_STATUS:
1602 if (raidPtr->Layout.map->faultsTolerated == 0) {
1603 /* This makes no sense on a RAID 0, so tell the
1604 user it's done. */
1605 *(int *) data = 100;
1606 return(0);
1607 }
1608 if (raidPtr->status != rf_rs_reconstructing)
1609 *(int *) data = 100;
1610 else {
1611 if (raidPtr->reconControl->numRUsTotal > 0) {
1612 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1613 } else {
1614 *(int *) data = 0;
1615 }
1616 }
1617 return (0);
1618 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1619 progressInfoPtr = (RF_ProgressInfo_t **) data;
1620 if (raidPtr->status != rf_rs_reconstructing) {
1621 progressInfo.remaining = 0;
1622 progressInfo.completed = 100;
1623 progressInfo.total = 100;
1624 } else {
1625 progressInfo.total =
1626 raidPtr->reconControl->numRUsTotal;
1627 progressInfo.completed =
1628 raidPtr->reconControl->numRUsComplete;
1629 progressInfo.remaining = progressInfo.total -
1630 progressInfo.completed;
1631 }
1632 retcode = copyout(&progressInfo, *progressInfoPtr,
1633 sizeof(RF_ProgressInfo_t));
1634 return (retcode);
1635
1636 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1637 if (raidPtr->Layout.map->faultsTolerated == 0) {
1638 /* This makes no sense on a RAID 0, so tell the
1639 user it's done. */
1640 *(int *) data = 100;
1641 return(0);
1642 }
1643 if (raidPtr->parity_rewrite_in_progress == 1) {
1644 *(int *) data = 100 *
1645 raidPtr->parity_rewrite_stripes_done /
1646 raidPtr->Layout.numStripe;
1647 } else {
1648 *(int *) data = 100;
1649 }
1650 return (0);
1651
1652 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1653 progressInfoPtr = (RF_ProgressInfo_t **) data;
1654 if (raidPtr->parity_rewrite_in_progress == 1) {
1655 progressInfo.total = raidPtr->Layout.numStripe;
1656 progressInfo.completed =
1657 raidPtr->parity_rewrite_stripes_done;
1658 progressInfo.remaining = progressInfo.total -
1659 progressInfo.completed;
1660 } else {
1661 progressInfo.remaining = 0;
1662 progressInfo.completed = 100;
1663 progressInfo.total = 100;
1664 }
1665 retcode = copyout(&progressInfo, *progressInfoPtr,
1666 sizeof(RF_ProgressInfo_t));
1667 return (retcode);
1668
1669 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1670 if (raidPtr->Layout.map->faultsTolerated == 0) {
1671 /* This makes no sense on a RAID 0 */
1672 *(int *) data = 100;
1673 return(0);
1674 }
1675 if (raidPtr->copyback_in_progress == 1) {
1676 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1677 raidPtr->Layout.numStripe;
1678 } else {
1679 *(int *) data = 100;
1680 }
1681 return (0);
1682
1683 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1684 progressInfoPtr = (RF_ProgressInfo_t **) data;
1685 if (raidPtr->copyback_in_progress == 1) {
1686 progressInfo.total = raidPtr->Layout.numStripe;
1687 progressInfo.completed =
1688 raidPtr->copyback_stripes_done;
1689 progressInfo.remaining = progressInfo.total -
1690 progressInfo.completed;
1691 } else {
1692 progressInfo.remaining = 0;
1693 progressInfo.completed = 100;
1694 progressInfo.total = 100;
1695 }
1696 retcode = copyout(&progressInfo, *progressInfoPtr,
1697 sizeof(RF_ProgressInfo_t));
1698 return (retcode);
1699
1700 /* the sparetable daemon calls this to wait for the kernel to
1701 * need a spare table. this ioctl does not return until a
1702 * spare table is needed. XXX -- calling mpsleep here in the
1703 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1704 * -- I should either compute the spare table in the kernel,
1705 * or have a different -- XXX XXX -- interface (a different
1706 * character device) for delivering the table -- XXX */
1707 #if 0
1708 case RAIDFRAME_SPARET_WAIT:
1709 rf_lock_mutex2(rf_sparet_wait_mutex);
1710 while (!rf_sparet_wait_queue)
1711 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1712 waitreq = rf_sparet_wait_queue;
1713 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1714 rf_unlock_mutex2(rf_sparet_wait_mutex);
1715
1716 /* structure assignment */
1717 *((RF_SparetWait_t *) data) = *waitreq;
1718
1719 RF_Free(waitreq, sizeof(*waitreq));
1720 return (0);
1721
1722 /* wakes up a process waiting on SPARET_WAIT and puts an error
1723 * code in it that will cause the dameon to exit */
1724 case RAIDFRAME_ABORT_SPARET_WAIT:
1725 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1726 waitreq->fcol = -1;
1727 rf_lock_mutex2(rf_sparet_wait_mutex);
1728 waitreq->next = rf_sparet_wait_queue;
1729 rf_sparet_wait_queue = waitreq;
1730 rf_broadcast_conf2(rf_sparet_wait_cv);
1731 rf_unlock_mutex2(rf_sparet_wait_mutex);
1732 return (0);
1733
1734 /* used by the spare table daemon to deliver a spare table
1735 * into the kernel */
1736 case RAIDFRAME_SEND_SPARET:
1737
1738 /* install the spare table */
1739 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1740
1741 /* respond to the requestor. the return status of the spare
1742 * table installation is passed in the "fcol" field */
1743 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1744 waitreq->fcol = retcode;
1745 rf_lock_mutex2(rf_sparet_wait_mutex);
1746 waitreq->next = rf_sparet_resp_queue;
1747 rf_sparet_resp_queue = waitreq;
1748 rf_broadcast_cond2(rf_sparet_resp_cv);
1749 rf_unlock_mutex2(rf_sparet_wait_mutex);
1750
1751 return (retcode);
1752 #endif
1753
1754 default:
1755 break; /* fall through to the os-specific code below */
1756
1757 }
1758
1759 if (!raidPtr->valid)
1760 return (EINVAL);
1761
1762 /*
1763 * Add support for "regular" device ioctls here.
1764 */
1765
1766 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1767 if (error != EPASSTHROUGH)
1768 return (error);
1769
1770 switch (cmd) {
1771 case DIOCGDINFO:
1772 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1773 break;
1774 #ifdef __HAVE_OLD_DISKLABEL
1775 case ODIOCGDINFO:
1776 newlabel = *(rs->sc_dkdev.dk_label);
1777 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1778 return ENOTTY;
1779 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1780 break;
1781 #endif
1782
1783 case DIOCGPART:
1784 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1785 ((struct partinfo *) data)->part =
1786 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1787 break;
1788
1789 case DIOCWDINFO:
1790 case DIOCSDINFO:
1791 #ifdef __HAVE_OLD_DISKLABEL
1792 case ODIOCWDINFO:
1793 case ODIOCSDINFO:
1794 #endif
1795 {
1796 struct disklabel *lp;
1797 #ifdef __HAVE_OLD_DISKLABEL
1798 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1799 memset(&newlabel, 0, sizeof newlabel);
1800 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1801 lp = &newlabel;
1802 } else
1803 #endif
1804 lp = (struct disklabel *)data;
1805
1806 if ((error = raidlock(rs)) != 0)
1807 return (error);
1808
1809 rs->sc_flags |= RAIDF_LABELLING;
1810
1811 error = setdisklabel(rs->sc_dkdev.dk_label,
1812 lp, 0, rs->sc_dkdev.dk_cpulabel);
1813 if (error == 0) {
1814 if (cmd == DIOCWDINFO
1815 #ifdef __HAVE_OLD_DISKLABEL
1816 || cmd == ODIOCWDINFO
1817 #endif
1818 )
1819 error = writedisklabel(RAIDLABELDEV(dev),
1820 raidstrategy, rs->sc_dkdev.dk_label,
1821 rs->sc_dkdev.dk_cpulabel);
1822 }
1823 rs->sc_flags &= ~RAIDF_LABELLING;
1824
1825 raidunlock(rs);
1826
1827 if (error)
1828 return (error);
1829 break;
1830 }
1831
1832 case DIOCWLABEL:
1833 if (*(int *) data != 0)
1834 rs->sc_flags |= RAIDF_WLABEL;
1835 else
1836 rs->sc_flags &= ~RAIDF_WLABEL;
1837 break;
1838
1839 case DIOCGDEFLABEL:
1840 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1841 break;
1842
1843 #ifdef __HAVE_OLD_DISKLABEL
1844 case ODIOCGDEFLABEL:
1845 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1846 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1847 return ENOTTY;
1848 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1849 break;
1850 #endif
1851
1852 case DIOCAWEDGE:
1853 case DIOCDWEDGE:
1854 dkw = (void *)data;
1855
1856 /* If the ioctl happens here, the parent is us. */
1857 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1858 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1859
1860 case DIOCLWEDGES:
1861 return dkwedge_list(&rs->sc_dkdev,
1862 (struct dkwedge_list *)data, l);
1863 case DIOCCACHESYNC:
1864 return rf_sync_component_caches(raidPtr);
1865
1866 case DIOCGSTRATEGY:
1867 {
1868 struct disk_strategy *dks = (void *)data;
1869
1870 s = splbio();
1871 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1872 sizeof(dks->dks_name));
1873 splx(s);
1874 dks->dks_paramlen = 0;
1875
1876 return 0;
1877 }
1878
1879 case DIOCSSTRATEGY:
1880 {
1881 struct disk_strategy *dks = (void *)data;
1882 struct bufq_state *new;
1883 struct bufq_state *old;
1884
1885 if (dks->dks_param != NULL) {
1886 return EINVAL;
1887 }
1888 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1889 error = bufq_alloc(&new, dks->dks_name,
1890 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1891 if (error) {
1892 return error;
1893 }
1894 s = splbio();
1895 old = rs->buf_queue;
1896 bufq_move(new, old);
1897 rs->buf_queue = new;
1898 splx(s);
1899 bufq_free(old);
1900
1901 return 0;
1902 }
1903
1904 default:
1905 retcode = ENOTTY;
1906 }
1907 return (retcode);
1908
1909 }
1910
1911
1912 /* raidinit -- complete the rest of the initialization for the
1913 RAIDframe device. */
1914
1915
1916 static void
1917 raidinit(RF_Raid_t *raidPtr)
1918 {
1919 cfdata_t cf;
1920 struct raid_softc *rs;
1921 int unit;
1922
1923 unit = raidPtr->raidid;
1924
1925 rs = &raid_softc[unit];
1926
1927 /* XXX should check return code first... */
1928 rs->sc_flags |= RAIDF_INITED;
1929
1930 /* XXX doesn't check bounds. */
1931 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1932
1933 /* attach the pseudo device */
1934 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1935 cf->cf_name = raid_cd.cd_name;
1936 cf->cf_atname = raid_cd.cd_name;
1937 cf->cf_unit = unit;
1938 cf->cf_fstate = FSTATE_STAR;
1939
1940 rs->sc_dev = config_attach_pseudo(cf);
1941
1942 if (rs->sc_dev == NULL) {
1943 printf("raid%d: config_attach_pseudo failed\n",
1944 raidPtr->raidid);
1945 rs->sc_flags &= ~RAIDF_INITED;
1946 free(cf, M_RAIDFRAME);
1947 return;
1948 }
1949
1950 /* disk_attach actually creates space for the CPU disklabel, among
1951 * other things, so it's critical to call this *BEFORE* we try putzing
1952 * with disklabels. */
1953
1954 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1955 disk_attach(&rs->sc_dkdev);
1956 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1957
1958 /* XXX There may be a weird interaction here between this, and
1959 * protectedSectors, as used in RAIDframe. */
1960
1961 rs->sc_size = raidPtr->totalSectors;
1962
1963 dkwedge_discover(&rs->sc_dkdev);
1964
1965 rf_set_properties(rs, raidPtr);
1966
1967 }
1968 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1969 /* wake up the daemon & tell it to get us a spare table
1970 * XXX
1971 * the entries in the queues should be tagged with the raidPtr
1972 * so that in the extremely rare case that two recons happen at once,
1973 * we know for which device were requesting a spare table
1974 * XXX
1975 *
1976 * XXX This code is not currently used. GO
1977 */
1978 int
1979 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1980 {
1981 int retcode;
1982
1983 rf_lock_mutex2(rf_sparet_wait_mutex);
1984 req->next = rf_sparet_wait_queue;
1985 rf_sparet_wait_queue = req;
1986 rf_broadcast_cond2(rf_sparet_wait_cv);
1987
1988 /* mpsleep unlocks the mutex */
1989 while (!rf_sparet_resp_queue) {
1990 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1991 }
1992 req = rf_sparet_resp_queue;
1993 rf_sparet_resp_queue = req->next;
1994 rf_unlock_mutex2(rf_sparet_wait_mutex);
1995
1996 retcode = req->fcol;
1997 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1998 * alloc'd */
1999 return (retcode);
2000 }
2001 #endif
2002
2003 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2004 * bp & passes it down.
2005 * any calls originating in the kernel must use non-blocking I/O
2006 * do some extra sanity checking to return "appropriate" error values for
2007 * certain conditions (to make some standard utilities work)
2008 *
2009 * Formerly known as: rf_DoAccessKernel
2010 */
2011 void
2012 raidstart(RF_Raid_t *raidPtr)
2013 {
2014 RF_SectorCount_t num_blocks, pb, sum;
2015 RF_RaidAddr_t raid_addr;
2016 struct partition *pp;
2017 daddr_t blocknum;
2018 int unit;
2019 struct raid_softc *rs;
2020 int do_async;
2021 struct buf *bp;
2022 int rc;
2023
2024 unit = raidPtr->raidid;
2025 rs = &raid_softc[unit];
2026
2027 /* quick check to see if anything has died recently */
2028 rf_lock_mutex2(raidPtr->mutex);
2029 if (raidPtr->numNewFailures > 0) {
2030 rf_unlock_mutex2(raidPtr->mutex);
2031 rf_update_component_labels(raidPtr,
2032 RF_NORMAL_COMPONENT_UPDATE);
2033 rf_lock_mutex2(raidPtr->mutex);
2034 raidPtr->numNewFailures--;
2035 }
2036
2037 /* Check to see if we're at the limit... */
2038 while (raidPtr->openings > 0) {
2039 rf_unlock_mutex2(raidPtr->mutex);
2040
2041 /* get the next item, if any, from the queue */
2042 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2043 /* nothing more to do */
2044 return;
2045 }
2046
2047 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2048 * partition.. Need to make it absolute to the underlying
2049 * device.. */
2050
2051 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2052 if (DISKPART(bp->b_dev) != RAW_PART) {
2053 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2054 blocknum += pp->p_offset;
2055 }
2056
2057 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2058 (int) blocknum));
2059
2060 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2061 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2062
2063 /* *THIS* is where we adjust what block we're going to...
2064 * but DO NOT TOUCH bp->b_blkno!!! */
2065 raid_addr = blocknum;
2066
2067 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2068 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2069 sum = raid_addr + num_blocks + pb;
2070 if (1 || rf_debugKernelAccess) {
2071 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2072 (int) raid_addr, (int) sum, (int) num_blocks,
2073 (int) pb, (int) bp->b_resid));
2074 }
2075 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2076 || (sum < num_blocks) || (sum < pb)) {
2077 bp->b_error = ENOSPC;
2078 bp->b_resid = bp->b_bcount;
2079 biodone(bp);
2080 rf_lock_mutex2(raidPtr->mutex);
2081 continue;
2082 }
2083 /*
2084 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2085 */
2086
2087 if (bp->b_bcount & raidPtr->sectorMask) {
2088 bp->b_error = EINVAL;
2089 bp->b_resid = bp->b_bcount;
2090 biodone(bp);
2091 rf_lock_mutex2(raidPtr->mutex);
2092 continue;
2093
2094 }
2095 db1_printf(("Calling DoAccess..\n"));
2096
2097
2098 rf_lock_mutex2(raidPtr->mutex);
2099 raidPtr->openings--;
2100 rf_unlock_mutex2(raidPtr->mutex);
2101
2102 /*
2103 * Everything is async.
2104 */
2105 do_async = 1;
2106
2107 disk_busy(&rs->sc_dkdev);
2108
2109 /* XXX we're still at splbio() here... do we *really*
2110 need to be? */
2111
2112 /* don't ever condition on bp->b_flags & B_WRITE.
2113 * always condition on B_READ instead */
2114
2115 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2116 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2117 do_async, raid_addr, num_blocks,
2118 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2119
2120 if (rc) {
2121 bp->b_error = rc;
2122 bp->b_resid = bp->b_bcount;
2123 biodone(bp);
2124 /* continue loop */
2125 }
2126
2127 rf_lock_mutex2(raidPtr->mutex);
2128 }
2129 rf_unlock_mutex2(raidPtr->mutex);
2130 }
2131
2132
2133
2134
2135 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2136
2137 int
2138 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2139 {
2140 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2141 struct buf *bp;
2142
2143 req->queue = queue;
2144 bp = req->bp;
2145
2146 switch (req->type) {
2147 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2148 /* XXX need to do something extra here.. */
2149 /* I'm leaving this in, as I've never actually seen it used,
2150 * and I'd like folks to report it... GO */
2151 printf(("WAKEUP CALLED\n"));
2152 queue->numOutstanding++;
2153
2154 bp->b_flags = 0;
2155 bp->b_private = req;
2156
2157 KernelWakeupFunc(bp);
2158 break;
2159
2160 case RF_IO_TYPE_READ:
2161 case RF_IO_TYPE_WRITE:
2162 #if RF_ACC_TRACE > 0
2163 if (req->tracerec) {
2164 RF_ETIMER_START(req->tracerec->timer);
2165 }
2166 #endif
2167 InitBP(bp, queue->rf_cinfo->ci_vp,
2168 op, queue->rf_cinfo->ci_dev,
2169 req->sectorOffset, req->numSector,
2170 req->buf, KernelWakeupFunc, (void *) req,
2171 queue->raidPtr->logBytesPerSector, req->b_proc);
2172
2173 if (rf_debugKernelAccess) {
2174 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2175 (long) bp->b_blkno));
2176 }
2177 queue->numOutstanding++;
2178 queue->last_deq_sector = req->sectorOffset;
2179 /* acc wouldn't have been let in if there were any pending
2180 * reqs at any other priority */
2181 queue->curPriority = req->priority;
2182
2183 db1_printf(("Going for %c to unit %d col %d\n",
2184 req->type, queue->raidPtr->raidid,
2185 queue->col));
2186 db1_printf(("sector %d count %d (%d bytes) %d\n",
2187 (int) req->sectorOffset, (int) req->numSector,
2188 (int) (req->numSector <<
2189 queue->raidPtr->logBytesPerSector),
2190 (int) queue->raidPtr->logBytesPerSector));
2191
2192 /*
2193 * XXX: drop lock here since this can block at
2194 * least with backing SCSI devices. Retake it
2195 * to minimize fuss with calling interfaces.
2196 */
2197
2198 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2199 bdev_strategy(bp);
2200 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2201 break;
2202
2203 default:
2204 panic("bad req->type in rf_DispatchKernelIO");
2205 }
2206 db1_printf(("Exiting from DispatchKernelIO\n"));
2207
2208 return (0);
2209 }
2210 /* this is the callback function associated with a I/O invoked from
2211 kernel code.
2212 */
2213 static void
2214 KernelWakeupFunc(struct buf *bp)
2215 {
2216 RF_DiskQueueData_t *req = NULL;
2217 RF_DiskQueue_t *queue;
2218
2219 db1_printf(("recovering the request queue:\n"));
2220
2221 req = bp->b_private;
2222
2223 queue = (RF_DiskQueue_t *) req->queue;
2224
2225 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2226
2227 #if RF_ACC_TRACE > 0
2228 if (req->tracerec) {
2229 RF_ETIMER_STOP(req->tracerec->timer);
2230 RF_ETIMER_EVAL(req->tracerec->timer);
2231 rf_lock_mutex2(rf_tracing_mutex);
2232 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2233 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2234 req->tracerec->num_phys_ios++;
2235 rf_unlock_mutex2(rf_tracing_mutex);
2236 }
2237 #endif
2238
2239 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2240 * ballistic, and mark the component as hosed... */
2241
2242 if (bp->b_error != 0) {
2243 /* Mark the disk as dead */
2244 /* but only mark it once... */
2245 /* and only if it wouldn't leave this RAID set
2246 completely broken */
2247 if (((queue->raidPtr->Disks[queue->col].status ==
2248 rf_ds_optimal) ||
2249 (queue->raidPtr->Disks[queue->col].status ==
2250 rf_ds_used_spare)) &&
2251 (queue->raidPtr->numFailures <
2252 queue->raidPtr->Layout.map->faultsTolerated)) {
2253 printf("raid%d: IO Error. Marking %s as failed.\n",
2254 queue->raidPtr->raidid,
2255 queue->raidPtr->Disks[queue->col].devname);
2256 queue->raidPtr->Disks[queue->col].status =
2257 rf_ds_failed;
2258 queue->raidPtr->status = rf_rs_degraded;
2259 queue->raidPtr->numFailures++;
2260 queue->raidPtr->numNewFailures++;
2261 } else { /* Disk is already dead... */
2262 /* printf("Disk already marked as dead!\n"); */
2263 }
2264
2265 }
2266
2267 /* Fill in the error value */
2268 req->error = bp->b_error;
2269
2270 /* Drop this one on the "finished" queue... */
2271 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2272
2273 /* Let the raidio thread know there is work to be done. */
2274 rf_signal_cond2(queue->raidPtr->iodone_cv);
2275
2276 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2277 }
2278
2279
2280 /*
2281 * initialize a buf structure for doing an I/O in the kernel.
2282 */
2283 static void
2284 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2285 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2286 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2287 struct proc *b_proc)
2288 {
2289 /* bp->b_flags = B_PHYS | rw_flag; */
2290 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2291 bp->b_oflags = 0;
2292 bp->b_cflags = 0;
2293 bp->b_bcount = numSect << logBytesPerSector;
2294 bp->b_bufsize = bp->b_bcount;
2295 bp->b_error = 0;
2296 bp->b_dev = dev;
2297 bp->b_data = bf;
2298 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2299 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2300 if (bp->b_bcount == 0) {
2301 panic("bp->b_bcount is zero in InitBP!!");
2302 }
2303 bp->b_proc = b_proc;
2304 bp->b_iodone = cbFunc;
2305 bp->b_private = cbArg;
2306 }
2307
2308 static void
2309 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2310 struct disklabel *lp)
2311 {
2312 memset(lp, 0, sizeof(*lp));
2313
2314 /* fabricate a label... */
2315 lp->d_secperunit = raidPtr->totalSectors;
2316 lp->d_secsize = raidPtr->bytesPerSector;
2317 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2318 lp->d_ntracks = 4 * raidPtr->numCol;
2319 lp->d_ncylinders = raidPtr->totalSectors /
2320 (lp->d_nsectors * lp->d_ntracks);
2321 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2322
2323 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2324 lp->d_type = DTYPE_RAID;
2325 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2326 lp->d_rpm = 3600;
2327 lp->d_interleave = 1;
2328 lp->d_flags = 0;
2329
2330 lp->d_partitions[RAW_PART].p_offset = 0;
2331 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2332 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2333 lp->d_npartitions = RAW_PART + 1;
2334
2335 lp->d_magic = DISKMAGIC;
2336 lp->d_magic2 = DISKMAGIC;
2337 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2338
2339 }
2340 /*
2341 * Read the disklabel from the raid device. If one is not present, fake one
2342 * up.
2343 */
2344 static void
2345 raidgetdisklabel(dev_t dev)
2346 {
2347 int unit = raidunit(dev);
2348 struct raid_softc *rs = &raid_softc[unit];
2349 const char *errstring;
2350 struct disklabel *lp = rs->sc_dkdev.dk_label;
2351 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2352 RF_Raid_t *raidPtr;
2353
2354 db1_printf(("Getting the disklabel...\n"));
2355
2356 memset(clp, 0, sizeof(*clp));
2357
2358 raidPtr = raidPtrs[unit];
2359
2360 raidgetdefaultlabel(raidPtr, rs, lp);
2361
2362 /*
2363 * Call the generic disklabel extraction routine.
2364 */
2365 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2366 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2367 if (errstring)
2368 raidmakedisklabel(rs);
2369 else {
2370 int i;
2371 struct partition *pp;
2372
2373 /*
2374 * Sanity check whether the found disklabel is valid.
2375 *
2376 * This is necessary since total size of the raid device
2377 * may vary when an interleave is changed even though exactly
2378 * same components are used, and old disklabel may used
2379 * if that is found.
2380 */
2381 if (lp->d_secperunit != rs->sc_size)
2382 printf("raid%d: WARNING: %s: "
2383 "total sector size in disklabel (%" PRIu32 ") != "
2384 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2385 lp->d_secperunit, rs->sc_size);
2386 for (i = 0; i < lp->d_npartitions; i++) {
2387 pp = &lp->d_partitions[i];
2388 if (pp->p_offset + pp->p_size > rs->sc_size)
2389 printf("raid%d: WARNING: %s: end of partition `%c' "
2390 "exceeds the size of raid (%" PRIu64 ")\n",
2391 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2392 }
2393 }
2394
2395 }
2396 /*
2397 * Take care of things one might want to take care of in the event
2398 * that a disklabel isn't present.
2399 */
2400 static void
2401 raidmakedisklabel(struct raid_softc *rs)
2402 {
2403 struct disklabel *lp = rs->sc_dkdev.dk_label;
2404 db1_printf(("Making a label..\n"));
2405
2406 /*
2407 * For historical reasons, if there's no disklabel present
2408 * the raw partition must be marked FS_BSDFFS.
2409 */
2410
2411 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2412
2413 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2414
2415 lp->d_checksum = dkcksum(lp);
2416 }
2417 /*
2418 * Wait interruptibly for an exclusive lock.
2419 *
2420 * XXX
2421 * Several drivers do this; it should be abstracted and made MP-safe.
2422 * (Hmm... where have we seen this warning before :-> GO )
2423 */
2424 static int
2425 raidlock(struct raid_softc *rs)
2426 {
2427 int error;
2428
2429 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2430 rs->sc_flags |= RAIDF_WANTED;
2431 if ((error =
2432 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2433 return (error);
2434 }
2435 rs->sc_flags |= RAIDF_LOCKED;
2436 return (0);
2437 }
2438 /*
2439 * Unlock and wake up any waiters.
2440 */
2441 static void
2442 raidunlock(struct raid_softc *rs)
2443 {
2444
2445 rs->sc_flags &= ~RAIDF_LOCKED;
2446 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2447 rs->sc_flags &= ~RAIDF_WANTED;
2448 wakeup(rs);
2449 }
2450 }
2451
2452
2453 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2454 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2455 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2456
2457 static daddr_t
2458 rf_component_info_offset(void)
2459 {
2460
2461 return RF_COMPONENT_INFO_OFFSET;
2462 }
2463
2464 static daddr_t
2465 rf_component_info_size(unsigned secsize)
2466 {
2467 daddr_t info_size;
2468
2469 KASSERT(secsize);
2470 if (secsize > RF_COMPONENT_INFO_SIZE)
2471 info_size = secsize;
2472 else
2473 info_size = RF_COMPONENT_INFO_SIZE;
2474
2475 return info_size;
2476 }
2477
2478 static daddr_t
2479 rf_parity_map_offset(RF_Raid_t *raidPtr)
2480 {
2481 daddr_t map_offset;
2482
2483 KASSERT(raidPtr->bytesPerSector);
2484 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2485 map_offset = raidPtr->bytesPerSector;
2486 else
2487 map_offset = RF_COMPONENT_INFO_SIZE;
2488 map_offset += rf_component_info_offset();
2489
2490 return map_offset;
2491 }
2492
2493 static daddr_t
2494 rf_parity_map_size(RF_Raid_t *raidPtr)
2495 {
2496 daddr_t map_size;
2497
2498 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2499 map_size = raidPtr->bytesPerSector;
2500 else
2501 map_size = RF_PARITY_MAP_SIZE;
2502
2503 return map_size;
2504 }
2505
2506 int
2507 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2508 {
2509 RF_ComponentLabel_t *clabel;
2510
2511 clabel = raidget_component_label(raidPtr, col);
2512 clabel->clean = RF_RAID_CLEAN;
2513 raidflush_component_label(raidPtr, col);
2514 return(0);
2515 }
2516
2517
2518 int
2519 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2520 {
2521 RF_ComponentLabel_t *clabel;
2522
2523 clabel = raidget_component_label(raidPtr, col);
2524 clabel->clean = RF_RAID_DIRTY;
2525 raidflush_component_label(raidPtr, col);
2526 return(0);
2527 }
2528
2529 int
2530 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2531 {
2532 KASSERT(raidPtr->bytesPerSector);
2533 return raidread_component_label(raidPtr->bytesPerSector,
2534 raidPtr->Disks[col].dev,
2535 raidPtr->raid_cinfo[col].ci_vp,
2536 &raidPtr->raid_cinfo[col].ci_label);
2537 }
2538
2539 RF_ComponentLabel_t *
2540 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2541 {
2542 return &raidPtr->raid_cinfo[col].ci_label;
2543 }
2544
2545 int
2546 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2547 {
2548 RF_ComponentLabel_t *label;
2549
2550 label = &raidPtr->raid_cinfo[col].ci_label;
2551 label->mod_counter = raidPtr->mod_counter;
2552 #ifndef RF_NO_PARITY_MAP
2553 label->parity_map_modcount = label->mod_counter;
2554 #endif
2555 return raidwrite_component_label(raidPtr->bytesPerSector,
2556 raidPtr->Disks[col].dev,
2557 raidPtr->raid_cinfo[col].ci_vp, label);
2558 }
2559
2560
2561 static int
2562 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2563 RF_ComponentLabel_t *clabel)
2564 {
2565 return raidread_component_area(dev, b_vp, clabel,
2566 sizeof(RF_ComponentLabel_t),
2567 rf_component_info_offset(),
2568 rf_component_info_size(secsize));
2569 }
2570
2571 /* ARGSUSED */
2572 static int
2573 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2574 size_t msize, daddr_t offset, daddr_t dsize)
2575 {
2576 struct buf *bp;
2577 const struct bdevsw *bdev;
2578 int error;
2579
2580 /* XXX should probably ensure that we don't try to do this if
2581 someone has changed rf_protected_sectors. */
2582
2583 if (b_vp == NULL) {
2584 /* For whatever reason, this component is not valid.
2585 Don't try to read a component label from it. */
2586 return(EINVAL);
2587 }
2588
2589 /* get a block of the appropriate size... */
2590 bp = geteblk((int)dsize);
2591 bp->b_dev = dev;
2592
2593 /* get our ducks in a row for the read */
2594 bp->b_blkno = offset / DEV_BSIZE;
2595 bp->b_bcount = dsize;
2596 bp->b_flags |= B_READ;
2597 bp->b_resid = dsize;
2598
2599 bdev = bdevsw_lookup(bp->b_dev);
2600 if (bdev == NULL)
2601 return (ENXIO);
2602 (*bdev->d_strategy)(bp);
2603
2604 error = biowait(bp);
2605
2606 if (!error) {
2607 memcpy(data, bp->b_data, msize);
2608 }
2609
2610 brelse(bp, 0);
2611 return(error);
2612 }
2613
2614
2615 static int
2616 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2617 RF_ComponentLabel_t *clabel)
2618 {
2619 return raidwrite_component_area(dev, b_vp, clabel,
2620 sizeof(RF_ComponentLabel_t),
2621 rf_component_info_offset(),
2622 rf_component_info_size(secsize), 0);
2623 }
2624
2625 /* ARGSUSED */
2626 static int
2627 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2628 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2629 {
2630 struct buf *bp;
2631 const struct bdevsw *bdev;
2632 int error;
2633
2634 /* get a block of the appropriate size... */
2635 bp = geteblk((int)dsize);
2636 bp->b_dev = dev;
2637
2638 /* get our ducks in a row for the write */
2639 bp->b_blkno = offset / DEV_BSIZE;
2640 bp->b_bcount = dsize;
2641 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2642 bp->b_resid = dsize;
2643
2644 memset(bp->b_data, 0, dsize);
2645 memcpy(bp->b_data, data, msize);
2646
2647 bdev = bdevsw_lookup(bp->b_dev);
2648 if (bdev == NULL)
2649 return (ENXIO);
2650 (*bdev->d_strategy)(bp);
2651 if (asyncp)
2652 return 0;
2653 error = biowait(bp);
2654 brelse(bp, 0);
2655 if (error) {
2656 #if 1
2657 printf("Failed to write RAID component info!\n");
2658 #endif
2659 }
2660
2661 return(error);
2662 }
2663
2664 void
2665 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2666 {
2667 int c;
2668
2669 for (c = 0; c < raidPtr->numCol; c++) {
2670 /* Skip dead disks. */
2671 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2672 continue;
2673 /* XXXjld: what if an error occurs here? */
2674 raidwrite_component_area(raidPtr->Disks[c].dev,
2675 raidPtr->raid_cinfo[c].ci_vp, map,
2676 RF_PARITYMAP_NBYTE,
2677 rf_parity_map_offset(raidPtr),
2678 rf_parity_map_size(raidPtr), 0);
2679 }
2680 }
2681
2682 void
2683 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2684 {
2685 struct rf_paritymap_ondisk tmp;
2686 int c,first;
2687
2688 first=1;
2689 for (c = 0; c < raidPtr->numCol; c++) {
2690 /* Skip dead disks. */
2691 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2692 continue;
2693 raidread_component_area(raidPtr->Disks[c].dev,
2694 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2695 RF_PARITYMAP_NBYTE,
2696 rf_parity_map_offset(raidPtr),
2697 rf_parity_map_size(raidPtr));
2698 if (first) {
2699 memcpy(map, &tmp, sizeof(*map));
2700 first = 0;
2701 } else {
2702 rf_paritymap_merge(map, &tmp);
2703 }
2704 }
2705 }
2706
2707 void
2708 rf_markalldirty(RF_Raid_t *raidPtr)
2709 {
2710 RF_ComponentLabel_t *clabel;
2711 int sparecol;
2712 int c;
2713 int j;
2714 int scol = -1;
2715
2716 raidPtr->mod_counter++;
2717 for (c = 0; c < raidPtr->numCol; c++) {
2718 /* we don't want to touch (at all) a disk that has
2719 failed */
2720 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2721 clabel = raidget_component_label(raidPtr, c);
2722 if (clabel->status == rf_ds_spared) {
2723 /* XXX do something special...
2724 but whatever you do, don't
2725 try to access it!! */
2726 } else {
2727 raidmarkdirty(raidPtr, c);
2728 }
2729 }
2730 }
2731
2732 for( c = 0; c < raidPtr->numSpare ; c++) {
2733 sparecol = raidPtr->numCol + c;
2734 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2735 /*
2736
2737 we claim this disk is "optimal" if it's
2738 rf_ds_used_spare, as that means it should be
2739 directly substitutable for the disk it replaced.
2740 We note that too...
2741
2742 */
2743
2744 for(j=0;j<raidPtr->numCol;j++) {
2745 if (raidPtr->Disks[j].spareCol == sparecol) {
2746 scol = j;
2747 break;
2748 }
2749 }
2750
2751 clabel = raidget_component_label(raidPtr, sparecol);
2752 /* make sure status is noted */
2753
2754 raid_init_component_label(raidPtr, clabel);
2755
2756 clabel->row = 0;
2757 clabel->column = scol;
2758 /* Note: we *don't* change status from rf_ds_used_spare
2759 to rf_ds_optimal */
2760 /* clabel.status = rf_ds_optimal; */
2761
2762 raidmarkdirty(raidPtr, sparecol);
2763 }
2764 }
2765 }
2766
2767
2768 void
2769 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2770 {
2771 RF_ComponentLabel_t *clabel;
2772 int sparecol;
2773 int c;
2774 int j;
2775 int scol;
2776
2777 scol = -1;
2778
2779 /* XXX should do extra checks to make sure things really are clean,
2780 rather than blindly setting the clean bit... */
2781
2782 raidPtr->mod_counter++;
2783
2784 for (c = 0; c < raidPtr->numCol; c++) {
2785 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2786 clabel = raidget_component_label(raidPtr, c);
2787 /* make sure status is noted */
2788 clabel->status = rf_ds_optimal;
2789
2790 /* note what unit we are configured as */
2791 clabel->last_unit = raidPtr->raidid;
2792
2793 raidflush_component_label(raidPtr, c);
2794 if (final == RF_FINAL_COMPONENT_UPDATE) {
2795 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2796 raidmarkclean(raidPtr, c);
2797 }
2798 }
2799 }
2800 /* else we don't touch it.. */
2801 }
2802
2803 for( c = 0; c < raidPtr->numSpare ; c++) {
2804 sparecol = raidPtr->numCol + c;
2805 /* Need to ensure that the reconstruct actually completed! */
2806 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2807 /*
2808
2809 we claim this disk is "optimal" if it's
2810 rf_ds_used_spare, as that means it should be
2811 directly substitutable for the disk it replaced.
2812 We note that too...
2813
2814 */
2815
2816 for(j=0;j<raidPtr->numCol;j++) {
2817 if (raidPtr->Disks[j].spareCol == sparecol) {
2818 scol = j;
2819 break;
2820 }
2821 }
2822
2823 /* XXX shouldn't *really* need this... */
2824 clabel = raidget_component_label(raidPtr, sparecol);
2825 /* make sure status is noted */
2826
2827 raid_init_component_label(raidPtr, clabel);
2828
2829 clabel->column = scol;
2830 clabel->status = rf_ds_optimal;
2831 clabel->last_unit = raidPtr->raidid;
2832
2833 raidflush_component_label(raidPtr, sparecol);
2834 if (final == RF_FINAL_COMPONENT_UPDATE) {
2835 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2836 raidmarkclean(raidPtr, sparecol);
2837 }
2838 }
2839 }
2840 }
2841 }
2842
2843 void
2844 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2845 {
2846
2847 if (vp != NULL) {
2848 if (auto_configured == 1) {
2849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2850 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2851 vput(vp);
2852
2853 } else {
2854 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2855 }
2856 }
2857 }
2858
2859
2860 void
2861 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2862 {
2863 int r,c;
2864 struct vnode *vp;
2865 int acd;
2866
2867
2868 /* We take this opportunity to close the vnodes like we should.. */
2869
2870 for (c = 0; c < raidPtr->numCol; c++) {
2871 vp = raidPtr->raid_cinfo[c].ci_vp;
2872 acd = raidPtr->Disks[c].auto_configured;
2873 rf_close_component(raidPtr, vp, acd);
2874 raidPtr->raid_cinfo[c].ci_vp = NULL;
2875 raidPtr->Disks[c].auto_configured = 0;
2876 }
2877
2878 for (r = 0; r < raidPtr->numSpare; r++) {
2879 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2880 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2881 rf_close_component(raidPtr, vp, acd);
2882 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2883 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2884 }
2885 }
2886
2887
2888 void
2889 rf_ReconThread(struct rf_recon_req *req)
2890 {
2891 int s;
2892 RF_Raid_t *raidPtr;
2893
2894 s = splbio();
2895 raidPtr = (RF_Raid_t *) req->raidPtr;
2896 raidPtr->recon_in_progress = 1;
2897
2898 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2899 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2900
2901 RF_Free(req, sizeof(*req));
2902
2903 raidPtr->recon_in_progress = 0;
2904 splx(s);
2905
2906 /* That's all... */
2907 kthread_exit(0); /* does not return */
2908 }
2909
2910 void
2911 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2912 {
2913 int retcode;
2914 int s;
2915
2916 raidPtr->parity_rewrite_stripes_done = 0;
2917 raidPtr->parity_rewrite_in_progress = 1;
2918 s = splbio();
2919 retcode = rf_RewriteParity(raidPtr);
2920 splx(s);
2921 if (retcode) {
2922 printf("raid%d: Error re-writing parity (%d)!\n",
2923 raidPtr->raidid, retcode);
2924 } else {
2925 /* set the clean bit! If we shutdown correctly,
2926 the clean bit on each component label will get
2927 set */
2928 raidPtr->parity_good = RF_RAID_CLEAN;
2929 }
2930 raidPtr->parity_rewrite_in_progress = 0;
2931
2932 /* Anyone waiting for us to stop? If so, inform them... */
2933 if (raidPtr->waitShutdown) {
2934 wakeup(&raidPtr->parity_rewrite_in_progress);
2935 }
2936
2937 /* That's all... */
2938 kthread_exit(0); /* does not return */
2939 }
2940
2941
2942 void
2943 rf_CopybackThread(RF_Raid_t *raidPtr)
2944 {
2945 int s;
2946
2947 raidPtr->copyback_in_progress = 1;
2948 s = splbio();
2949 rf_CopybackReconstructedData(raidPtr);
2950 splx(s);
2951 raidPtr->copyback_in_progress = 0;
2952
2953 /* That's all... */
2954 kthread_exit(0); /* does not return */
2955 }
2956
2957
2958 void
2959 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2960 {
2961 int s;
2962 RF_Raid_t *raidPtr;
2963
2964 s = splbio();
2965 raidPtr = req->raidPtr;
2966 raidPtr->recon_in_progress = 1;
2967 rf_ReconstructInPlace(raidPtr, req->col);
2968 RF_Free(req, sizeof(*req));
2969 raidPtr->recon_in_progress = 0;
2970 splx(s);
2971
2972 /* That's all... */
2973 kthread_exit(0); /* does not return */
2974 }
2975
2976 static RF_AutoConfig_t *
2977 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2978 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2979 unsigned secsize)
2980 {
2981 int good_one = 0;
2982 RF_ComponentLabel_t *clabel;
2983 RF_AutoConfig_t *ac;
2984
2985 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2986 if (clabel == NULL) {
2987 oomem:
2988 while(ac_list) {
2989 ac = ac_list;
2990 if (ac->clabel)
2991 free(ac->clabel, M_RAIDFRAME);
2992 ac_list = ac_list->next;
2993 free(ac, M_RAIDFRAME);
2994 }
2995 printf("RAID auto config: out of memory!\n");
2996 return NULL; /* XXX probably should panic? */
2997 }
2998
2999 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3000 /* Got the label. Does it look reasonable? */
3001 if (rf_reasonable_label(clabel, numsecs) &&
3002 (rf_component_label_partitionsize(clabel) <= size)) {
3003 #ifdef DEBUG
3004 printf("Component on: %s: %llu\n",
3005 cname, (unsigned long long)size);
3006 rf_print_component_label(clabel);
3007 #endif
3008 /* if it's reasonable, add it, else ignore it. */
3009 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3010 M_NOWAIT);
3011 if (ac == NULL) {
3012 free(clabel, M_RAIDFRAME);
3013 goto oomem;
3014 }
3015 strlcpy(ac->devname, cname, sizeof(ac->devname));
3016 ac->dev = dev;
3017 ac->vp = vp;
3018 ac->clabel = clabel;
3019 ac->next = ac_list;
3020 ac_list = ac;
3021 good_one = 1;
3022 }
3023 }
3024 if (!good_one) {
3025 /* cleanup */
3026 free(clabel, M_RAIDFRAME);
3027 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3028 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3029 vput(vp);
3030 }
3031 return ac_list;
3032 }
3033
3034 RF_AutoConfig_t *
3035 rf_find_raid_components(void)
3036 {
3037 struct vnode *vp;
3038 struct disklabel label;
3039 device_t dv;
3040 deviter_t di;
3041 dev_t dev;
3042 int bmajor, bminor, wedge, rf_part_found;
3043 int error;
3044 int i;
3045 RF_AutoConfig_t *ac_list;
3046 uint64_t numsecs;
3047 unsigned secsize;
3048
3049 /* initialize the AutoConfig list */
3050 ac_list = NULL;
3051
3052 /* we begin by trolling through *all* the devices on the system */
3053
3054 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3055 dv = deviter_next(&di)) {
3056
3057 /* we are only interested in disks... */
3058 if (device_class(dv) != DV_DISK)
3059 continue;
3060
3061 /* we don't care about floppies... */
3062 if (device_is_a(dv, "fd")) {
3063 continue;
3064 }
3065
3066 /* we don't care about CD's... */
3067 if (device_is_a(dv, "cd")) {
3068 continue;
3069 }
3070
3071 /* we don't care about md's... */
3072 if (device_is_a(dv, "md")) {
3073 continue;
3074 }
3075
3076 /* hdfd is the Atari/Hades floppy driver */
3077 if (device_is_a(dv, "hdfd")) {
3078 continue;
3079 }
3080
3081 /* fdisa is the Atari/Milan floppy driver */
3082 if (device_is_a(dv, "fdisa")) {
3083 continue;
3084 }
3085
3086 /* need to find the device_name_to_block_device_major stuff */
3087 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3088
3089 rf_part_found = 0; /*No raid partition as yet*/
3090
3091 /* get a vnode for the raw partition of this disk */
3092
3093 wedge = device_is_a(dv, "dk");
3094 bminor = minor(device_unit(dv));
3095 dev = wedge ? makedev(bmajor, bminor) :
3096 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3097 if (bdevvp(dev, &vp))
3098 panic("RAID can't alloc vnode");
3099
3100 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3101
3102 if (error) {
3103 /* "Who cares." Continue looking
3104 for something that exists*/
3105 vput(vp);
3106 continue;
3107 }
3108
3109 error = getdisksize(vp, &numsecs, &secsize);
3110 if (error) {
3111 vput(vp);
3112 continue;
3113 }
3114 if (wedge) {
3115 struct dkwedge_info dkw;
3116 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3117 NOCRED);
3118 if (error) {
3119 printf("RAIDframe: can't get wedge info for "
3120 "dev %s (%d)\n", device_xname(dv), error);
3121 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3122 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3123 vput(vp);
3124 continue;
3125 }
3126
3127 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3128 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3129 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3130 vput(vp);
3131 continue;
3132 }
3133
3134 ac_list = rf_get_component(ac_list, dev, vp,
3135 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3136 rf_part_found = 1; /*There is a raid component on this disk*/
3137 continue;
3138 }
3139
3140 /* Ok, the disk exists. Go get the disklabel. */
3141 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3142 if (error) {
3143 /*
3144 * XXX can't happen - open() would
3145 * have errored out (or faked up one)
3146 */
3147 if (error != ENOTTY)
3148 printf("RAIDframe: can't get label for dev "
3149 "%s (%d)\n", device_xname(dv), error);
3150 }
3151
3152 /* don't need this any more. We'll allocate it again
3153 a little later if we really do... */
3154 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3155 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3156 vput(vp);
3157
3158 if (error)
3159 continue;
3160
3161 rf_part_found = 0; /*No raid partitions yet*/
3162 for (i = 0; i < label.d_npartitions; i++) {
3163 char cname[sizeof(ac_list->devname)];
3164
3165 /* We only support partitions marked as RAID */
3166 if (label.d_partitions[i].p_fstype != FS_RAID)
3167 continue;
3168
3169 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3170 if (bdevvp(dev, &vp))
3171 panic("RAID can't alloc vnode");
3172
3173 error = VOP_OPEN(vp, FREAD, NOCRED);
3174 if (error) {
3175 /* Whatever... */
3176 vput(vp);
3177 continue;
3178 }
3179 snprintf(cname, sizeof(cname), "%s%c",
3180 device_xname(dv), 'a' + i);
3181 ac_list = rf_get_component(ac_list, dev, vp, cname,
3182 label.d_partitions[i].p_size, numsecs, secsize);
3183 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3184 }
3185
3186 /*
3187 *If there is no raid component on this disk, either in a
3188 *disklabel or inside a wedge, check the raw partition as well,
3189 *as it is possible to configure raid components on raw disk
3190 *devices.
3191 */
3192
3193 if (!rf_part_found) {
3194 char cname[sizeof(ac_list->devname)];
3195
3196 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3197 if (bdevvp(dev, &vp))
3198 panic("RAID can't alloc vnode");
3199
3200 error = VOP_OPEN(vp, FREAD, NOCRED);
3201 if (error) {
3202 /* Whatever... */
3203 vput(vp);
3204 continue;
3205 }
3206 snprintf(cname, sizeof(cname), "%s%c",
3207 device_xname(dv), 'a' + RAW_PART);
3208 ac_list = rf_get_component(ac_list, dev, vp, cname,
3209 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3210 }
3211 }
3212 deviter_release(&di);
3213 return ac_list;
3214 }
3215
3216
3217 int
3218 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3219 {
3220
3221 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3222 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3223 ((clabel->clean == RF_RAID_CLEAN) ||
3224 (clabel->clean == RF_RAID_DIRTY)) &&
3225 clabel->row >=0 &&
3226 clabel->column >= 0 &&
3227 clabel->num_rows > 0 &&
3228 clabel->num_columns > 0 &&
3229 clabel->row < clabel->num_rows &&
3230 clabel->column < clabel->num_columns &&
3231 clabel->blockSize > 0 &&
3232 /*
3233 * numBlocksHi may contain garbage, but it is ok since
3234 * the type is unsigned. If it is really garbage,
3235 * rf_fix_old_label_size() will fix it.
3236 */
3237 rf_component_label_numblocks(clabel) > 0) {
3238 /*
3239 * label looks reasonable enough...
3240 * let's make sure it has no old garbage.
3241 */
3242 if (numsecs)
3243 rf_fix_old_label_size(clabel, numsecs);
3244 return(1);
3245 }
3246 return(0);
3247 }
3248
3249
3250 /*
3251 * For reasons yet unknown, some old component labels have garbage in
3252 * the newer numBlocksHi region, and this causes lossage. Since those
3253 * disks will also have numsecs set to less than 32 bits of sectors,
3254 * we can determine when this corruption has occured, and fix it.
3255 *
3256 * The exact same problem, with the same unknown reason, happens to
3257 * the partitionSizeHi member as well.
3258 */
3259 static void
3260 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3261 {
3262
3263 if (numsecs < ((uint64_t)1 << 32)) {
3264 if (clabel->numBlocksHi) {
3265 printf("WARNING: total sectors < 32 bits, yet "
3266 "numBlocksHi set\n"
3267 "WARNING: resetting numBlocksHi to zero.\n");
3268 clabel->numBlocksHi = 0;
3269 }
3270
3271 if (clabel->partitionSizeHi) {
3272 printf("WARNING: total sectors < 32 bits, yet "
3273 "partitionSizeHi set\n"
3274 "WARNING: resetting partitionSizeHi to zero.\n");
3275 clabel->partitionSizeHi = 0;
3276 }
3277 }
3278 }
3279
3280
3281 #ifdef DEBUG
3282 void
3283 rf_print_component_label(RF_ComponentLabel_t *clabel)
3284 {
3285 uint64_t numBlocks;
3286
3287 numBlocks = rf_component_label_numblocks(clabel);
3288
3289 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3290 clabel->row, clabel->column,
3291 clabel->num_rows, clabel->num_columns);
3292 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3293 clabel->version, clabel->serial_number,
3294 clabel->mod_counter);
3295 printf(" Clean: %s Status: %d\n",
3296 clabel->clean ? "Yes" : "No", clabel->status);
3297 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3298 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3299 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3300 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3301 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3302 printf(" Contains root partition: %s\n",
3303 clabel->root_partition ? "Yes" : "No");
3304 printf(" Last configured as: raid%d\n", clabel->last_unit);
3305 #if 0
3306 printf(" Config order: %d\n", clabel->config_order);
3307 #endif
3308
3309 }
3310 #endif
3311
3312 RF_ConfigSet_t *
3313 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3314 {
3315 RF_AutoConfig_t *ac;
3316 RF_ConfigSet_t *config_sets;
3317 RF_ConfigSet_t *cset;
3318 RF_AutoConfig_t *ac_next;
3319
3320
3321 config_sets = NULL;
3322
3323 /* Go through the AutoConfig list, and figure out which components
3324 belong to what sets. */
3325 ac = ac_list;
3326 while(ac!=NULL) {
3327 /* we're going to putz with ac->next, so save it here
3328 for use at the end of the loop */
3329 ac_next = ac->next;
3330
3331 if (config_sets == NULL) {
3332 /* will need at least this one... */
3333 config_sets = (RF_ConfigSet_t *)
3334 malloc(sizeof(RF_ConfigSet_t),
3335 M_RAIDFRAME, M_NOWAIT);
3336 if (config_sets == NULL) {
3337 panic("rf_create_auto_sets: No memory!");
3338 }
3339 /* this one is easy :) */
3340 config_sets->ac = ac;
3341 config_sets->next = NULL;
3342 config_sets->rootable = 0;
3343 ac->next = NULL;
3344 } else {
3345 /* which set does this component fit into? */
3346 cset = config_sets;
3347 while(cset!=NULL) {
3348 if (rf_does_it_fit(cset, ac)) {
3349 /* looks like it matches... */
3350 ac->next = cset->ac;
3351 cset->ac = ac;
3352 break;
3353 }
3354 cset = cset->next;
3355 }
3356 if (cset==NULL) {
3357 /* didn't find a match above... new set..*/
3358 cset = (RF_ConfigSet_t *)
3359 malloc(sizeof(RF_ConfigSet_t),
3360 M_RAIDFRAME, M_NOWAIT);
3361 if (cset == NULL) {
3362 panic("rf_create_auto_sets: No memory!");
3363 }
3364 cset->ac = ac;
3365 ac->next = NULL;
3366 cset->next = config_sets;
3367 cset->rootable = 0;
3368 config_sets = cset;
3369 }
3370 }
3371 ac = ac_next;
3372 }
3373
3374
3375 return(config_sets);
3376 }
3377
3378 static int
3379 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3380 {
3381 RF_ComponentLabel_t *clabel1, *clabel2;
3382
3383 /* If this one matches the *first* one in the set, that's good
3384 enough, since the other members of the set would have been
3385 through here too... */
3386 /* note that we are not checking partitionSize here..
3387
3388 Note that we are also not checking the mod_counters here.
3389 If everything else matches execpt the mod_counter, that's
3390 good enough for this test. We will deal with the mod_counters
3391 a little later in the autoconfiguration process.
3392
3393 (clabel1->mod_counter == clabel2->mod_counter) &&
3394
3395 The reason we don't check for this is that failed disks
3396 will have lower modification counts. If those disks are
3397 not added to the set they used to belong to, then they will
3398 form their own set, which may result in 2 different sets,
3399 for example, competing to be configured at raid0, and
3400 perhaps competing to be the root filesystem set. If the
3401 wrong ones get configured, or both attempt to become /,
3402 weird behaviour and or serious lossage will occur. Thus we
3403 need to bring them into the fold here, and kick them out at
3404 a later point.
3405
3406 */
3407
3408 clabel1 = cset->ac->clabel;
3409 clabel2 = ac->clabel;
3410 if ((clabel1->version == clabel2->version) &&
3411 (clabel1->serial_number == clabel2->serial_number) &&
3412 (clabel1->num_rows == clabel2->num_rows) &&
3413 (clabel1->num_columns == clabel2->num_columns) &&
3414 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3415 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3416 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3417 (clabel1->parityConfig == clabel2->parityConfig) &&
3418 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3419 (clabel1->blockSize == clabel2->blockSize) &&
3420 rf_component_label_numblocks(clabel1) ==
3421 rf_component_label_numblocks(clabel2) &&
3422 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3423 (clabel1->root_partition == clabel2->root_partition) &&
3424 (clabel1->last_unit == clabel2->last_unit) &&
3425 (clabel1->config_order == clabel2->config_order)) {
3426 /* if it get's here, it almost *has* to be a match */
3427 } else {
3428 /* it's not consistent with somebody in the set..
3429 punt */
3430 return(0);
3431 }
3432 /* all was fine.. it must fit... */
3433 return(1);
3434 }
3435
3436 int
3437 rf_have_enough_components(RF_ConfigSet_t *cset)
3438 {
3439 RF_AutoConfig_t *ac;
3440 RF_AutoConfig_t *auto_config;
3441 RF_ComponentLabel_t *clabel;
3442 int c;
3443 int num_cols;
3444 int num_missing;
3445 int mod_counter;
3446 int mod_counter_found;
3447 int even_pair_failed;
3448 char parity_type;
3449
3450
3451 /* check to see that we have enough 'live' components
3452 of this set. If so, we can configure it if necessary */
3453
3454 num_cols = cset->ac->clabel->num_columns;
3455 parity_type = cset->ac->clabel->parityConfig;
3456
3457 /* XXX Check for duplicate components!?!?!? */
3458
3459 /* Determine what the mod_counter is supposed to be for this set. */
3460
3461 mod_counter_found = 0;
3462 mod_counter = 0;
3463 ac = cset->ac;
3464 while(ac!=NULL) {
3465 if (mod_counter_found==0) {
3466 mod_counter = ac->clabel->mod_counter;
3467 mod_counter_found = 1;
3468 } else {
3469 if (ac->clabel->mod_counter > mod_counter) {
3470 mod_counter = ac->clabel->mod_counter;
3471 }
3472 }
3473 ac = ac->next;
3474 }
3475
3476 num_missing = 0;
3477 auto_config = cset->ac;
3478
3479 even_pair_failed = 0;
3480 for(c=0; c<num_cols; c++) {
3481 ac = auto_config;
3482 while(ac!=NULL) {
3483 if ((ac->clabel->column == c) &&
3484 (ac->clabel->mod_counter == mod_counter)) {
3485 /* it's this one... */
3486 #ifdef DEBUG
3487 printf("Found: %s at %d\n",
3488 ac->devname,c);
3489 #endif
3490 break;
3491 }
3492 ac=ac->next;
3493 }
3494 if (ac==NULL) {
3495 /* Didn't find one here! */
3496 /* special case for RAID 1, especially
3497 where there are more than 2
3498 components (where RAIDframe treats
3499 things a little differently :( ) */
3500 if (parity_type == '1') {
3501 if (c%2 == 0) { /* even component */
3502 even_pair_failed = 1;
3503 } else { /* odd component. If
3504 we're failed, and
3505 so is the even
3506 component, it's
3507 "Good Night, Charlie" */
3508 if (even_pair_failed == 1) {
3509 return(0);
3510 }
3511 }
3512 } else {
3513 /* normal accounting */
3514 num_missing++;
3515 }
3516 }
3517 if ((parity_type == '1') && (c%2 == 1)) {
3518 /* Just did an even component, and we didn't
3519 bail.. reset the even_pair_failed flag,
3520 and go on to the next component.... */
3521 even_pair_failed = 0;
3522 }
3523 }
3524
3525 clabel = cset->ac->clabel;
3526
3527 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3528 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3529 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3530 /* XXX this needs to be made *much* more general */
3531 /* Too many failures */
3532 return(0);
3533 }
3534 /* otherwise, all is well, and we've got enough to take a kick
3535 at autoconfiguring this set */
3536 return(1);
3537 }
3538
3539 void
3540 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3541 RF_Raid_t *raidPtr)
3542 {
3543 RF_ComponentLabel_t *clabel;
3544 int i;
3545
3546 clabel = ac->clabel;
3547
3548 /* 1. Fill in the common stuff */
3549 config->numRow = clabel->num_rows = 1;
3550 config->numCol = clabel->num_columns;
3551 config->numSpare = 0; /* XXX should this be set here? */
3552 config->sectPerSU = clabel->sectPerSU;
3553 config->SUsPerPU = clabel->SUsPerPU;
3554 config->SUsPerRU = clabel->SUsPerRU;
3555 config->parityConfig = clabel->parityConfig;
3556 /* XXX... */
3557 strcpy(config->diskQueueType,"fifo");
3558 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3559 config->layoutSpecificSize = 0; /* XXX ?? */
3560
3561 while(ac!=NULL) {
3562 /* row/col values will be in range due to the checks
3563 in reasonable_label() */
3564 strcpy(config->devnames[0][ac->clabel->column],
3565 ac->devname);
3566 ac = ac->next;
3567 }
3568
3569 for(i=0;i<RF_MAXDBGV;i++) {
3570 config->debugVars[i][0] = 0;
3571 }
3572 }
3573
3574 int
3575 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3576 {
3577 RF_ComponentLabel_t *clabel;
3578 int column;
3579 int sparecol;
3580
3581 raidPtr->autoconfigure = new_value;
3582
3583 for(column=0; column<raidPtr->numCol; column++) {
3584 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3585 clabel = raidget_component_label(raidPtr, column);
3586 clabel->autoconfigure = new_value;
3587 raidflush_component_label(raidPtr, column);
3588 }
3589 }
3590 for(column = 0; column < raidPtr->numSpare ; column++) {
3591 sparecol = raidPtr->numCol + column;
3592 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3593 clabel = raidget_component_label(raidPtr, sparecol);
3594 clabel->autoconfigure = new_value;
3595 raidflush_component_label(raidPtr, sparecol);
3596 }
3597 }
3598 return(new_value);
3599 }
3600
3601 int
3602 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3603 {
3604 RF_ComponentLabel_t *clabel;
3605 int column;
3606 int sparecol;
3607
3608 raidPtr->root_partition = new_value;
3609 for(column=0; column<raidPtr->numCol; column++) {
3610 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3611 clabel = raidget_component_label(raidPtr, column);
3612 clabel->root_partition = new_value;
3613 raidflush_component_label(raidPtr, column);
3614 }
3615 }
3616 for(column = 0; column < raidPtr->numSpare ; column++) {
3617 sparecol = raidPtr->numCol + column;
3618 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3619 clabel = raidget_component_label(raidPtr, sparecol);
3620 clabel->root_partition = new_value;
3621 raidflush_component_label(raidPtr, sparecol);
3622 }
3623 }
3624 return(new_value);
3625 }
3626
3627 void
3628 rf_release_all_vps(RF_ConfigSet_t *cset)
3629 {
3630 RF_AutoConfig_t *ac;
3631
3632 ac = cset->ac;
3633 while(ac!=NULL) {
3634 /* Close the vp, and give it back */
3635 if (ac->vp) {
3636 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3637 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3638 vput(ac->vp);
3639 ac->vp = NULL;
3640 }
3641 ac = ac->next;
3642 }
3643 }
3644
3645
3646 void
3647 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3648 {
3649 RF_AutoConfig_t *ac;
3650 RF_AutoConfig_t *next_ac;
3651
3652 ac = cset->ac;
3653 while(ac!=NULL) {
3654 next_ac = ac->next;
3655 /* nuke the label */
3656 free(ac->clabel, M_RAIDFRAME);
3657 /* cleanup the config structure */
3658 free(ac, M_RAIDFRAME);
3659 /* "next.." */
3660 ac = next_ac;
3661 }
3662 /* and, finally, nuke the config set */
3663 free(cset, M_RAIDFRAME);
3664 }
3665
3666
3667 void
3668 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3669 {
3670 /* current version number */
3671 clabel->version = RF_COMPONENT_LABEL_VERSION;
3672 clabel->serial_number = raidPtr->serial_number;
3673 clabel->mod_counter = raidPtr->mod_counter;
3674
3675 clabel->num_rows = 1;
3676 clabel->num_columns = raidPtr->numCol;
3677 clabel->clean = RF_RAID_DIRTY; /* not clean */
3678 clabel->status = rf_ds_optimal; /* "It's good!" */
3679
3680 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3681 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3682 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3683
3684 clabel->blockSize = raidPtr->bytesPerSector;
3685 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3686
3687 /* XXX not portable */
3688 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3689 clabel->maxOutstanding = raidPtr->maxOutstanding;
3690 clabel->autoconfigure = raidPtr->autoconfigure;
3691 clabel->root_partition = raidPtr->root_partition;
3692 clabel->last_unit = raidPtr->raidid;
3693 clabel->config_order = raidPtr->config_order;
3694
3695 #ifndef RF_NO_PARITY_MAP
3696 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3697 #endif
3698 }
3699
3700 int
3701 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3702 {
3703 RF_Raid_t *raidPtr;
3704 RF_Config_t *config;
3705 int raidID;
3706 int retcode;
3707
3708 #ifdef DEBUG
3709 printf("RAID autoconfigure\n");
3710 #endif
3711
3712 retcode = 0;
3713 *unit = -1;
3714
3715 /* 1. Create a config structure */
3716
3717 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3718 M_RAIDFRAME,
3719 M_NOWAIT);
3720 if (config==NULL) {
3721 printf("Out of mem!?!?\n");
3722 /* XXX do something more intelligent here. */
3723 return(1);
3724 }
3725
3726 memset(config, 0, sizeof(RF_Config_t));
3727
3728 /*
3729 2. Figure out what RAID ID this one is supposed to live at
3730 See if we can get the same RAID dev that it was configured
3731 on last time..
3732 */
3733
3734 raidID = cset->ac->clabel->last_unit;
3735 if ((raidID < 0) || (raidID >= numraid)) {
3736 /* let's not wander off into lala land. */
3737 raidID = numraid - 1;
3738 }
3739 if (raidPtrs[raidID]->valid != 0) {
3740
3741 /*
3742 Nope... Go looking for an alternative...
3743 Start high so we don't immediately use raid0 if that's
3744 not taken.
3745 */
3746
3747 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3748 if (raidPtrs[raidID]->valid == 0) {
3749 /* can use this one! */
3750 break;
3751 }
3752 }
3753 }
3754
3755 if (raidID < 0) {
3756 /* punt... */
3757 printf("Unable to auto configure this set!\n");
3758 printf("(Out of RAID devs!)\n");
3759 free(config, M_RAIDFRAME);
3760 return(1);
3761 }
3762
3763 #ifdef DEBUG
3764 printf("Configuring raid%d:\n",raidID);
3765 #endif
3766
3767 raidPtr = raidPtrs[raidID];
3768
3769 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3770 raidPtr->raidid = raidID;
3771 raidPtr->openings = RAIDOUTSTANDING;
3772
3773 /* 3. Build the configuration structure */
3774 rf_create_configuration(cset->ac, config, raidPtr);
3775
3776 /* 4. Do the configuration */
3777 retcode = rf_Configure(raidPtr, config, cset->ac);
3778
3779 if (retcode == 0) {
3780
3781 raidinit(raidPtrs[raidID]);
3782
3783 rf_markalldirty(raidPtrs[raidID]);
3784 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3785 if (cset->ac->clabel->root_partition==1) {
3786 /* everything configured just fine. Make a note
3787 that this set is eligible to be root. */
3788 cset->rootable = 1;
3789 /* XXX do this here? */
3790 raidPtrs[raidID]->root_partition = 1;
3791 }
3792 }
3793
3794 /* 5. Cleanup */
3795 free(config, M_RAIDFRAME);
3796
3797 *unit = raidID;
3798 return(retcode);
3799 }
3800
3801 void
3802 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3803 {
3804 struct buf *bp;
3805
3806 bp = (struct buf *)desc->bp;
3807 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3808 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3809 }
3810
3811 void
3812 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3813 size_t xmin, size_t xmax)
3814 {
3815 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3816 pool_sethiwat(p, xmax);
3817 pool_prime(p, xmin);
3818 pool_setlowat(p, xmin);
3819 }
3820
3821 /*
3822 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3823 * if there is IO pending and if that IO could possibly be done for a
3824 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3825 * otherwise.
3826 *
3827 */
3828
3829 int
3830 rf_buf_queue_check(int raidid)
3831 {
3832 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3833 raidPtrs[raidid]->openings > 0) {
3834 /* there is work to do */
3835 return 0;
3836 }
3837 /* default is nothing to do */
3838 return 1;
3839 }
3840
3841 int
3842 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3843 {
3844 uint64_t numsecs;
3845 unsigned secsize;
3846 int error;
3847
3848 error = getdisksize(vp, &numsecs, &secsize);
3849 if (error == 0) {
3850 diskPtr->blockSize = secsize;
3851 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3852 diskPtr->partitionSize = numsecs;
3853 return 0;
3854 }
3855 return error;
3856 }
3857
3858 static int
3859 raid_match(device_t self, cfdata_t cfdata, void *aux)
3860 {
3861 return 1;
3862 }
3863
3864 static void
3865 raid_attach(device_t parent, device_t self, void *aux)
3866 {
3867
3868 }
3869
3870
3871 static int
3872 raid_detach(device_t self, int flags)
3873 {
3874 int error;
3875 struct raid_softc *rs = &raid_softc[device_unit(self)];
3876
3877 if ((error = raidlock(rs)) != 0)
3878 return (error);
3879
3880 error = raid_detach_unlocked(rs);
3881
3882 raidunlock(rs);
3883
3884 return error;
3885 }
3886
3887 static void
3888 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3889 {
3890 prop_dictionary_t disk_info, odisk_info, geom;
3891 disk_info = prop_dictionary_create();
3892 geom = prop_dictionary_create();
3893 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3894 raidPtr->totalSectors);
3895 prop_dictionary_set_uint32(geom, "sector-size",
3896 raidPtr->bytesPerSector);
3897
3898 prop_dictionary_set_uint16(geom, "sectors-per-track",
3899 raidPtr->Layout.dataSectorsPerStripe);
3900 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3901 4 * raidPtr->numCol);
3902
3903 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3904 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3905 (4 * raidPtr->numCol)));
3906
3907 prop_dictionary_set(disk_info, "geometry", geom);
3908 prop_object_release(geom);
3909 prop_dictionary_set(device_properties(rs->sc_dev),
3910 "disk-info", disk_info);
3911 odisk_info = rs->sc_dkdev.dk_info;
3912 rs->sc_dkdev.dk_info = disk_info;
3913 if (odisk_info)
3914 prop_object_release(odisk_info);
3915 }
3916
3917 /*
3918 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3919 * We end up returning whatever error was returned by the first cache flush
3920 * that fails.
3921 */
3922
3923 int
3924 rf_sync_component_caches(RF_Raid_t *raidPtr)
3925 {
3926 int c, sparecol;
3927 int e,error;
3928 int force = 1;
3929
3930 error = 0;
3931 for (c = 0; c < raidPtr->numCol; c++) {
3932 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3933 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3934 &force, FWRITE, NOCRED);
3935 if (e) {
3936 if (e != ENODEV)
3937 printf("raid%d: cache flush to component %s failed.\n",
3938 raidPtr->raidid, raidPtr->Disks[c].devname);
3939 if (error == 0) {
3940 error = e;
3941 }
3942 }
3943 }
3944 }
3945
3946 for( c = 0; c < raidPtr->numSpare ; c++) {
3947 sparecol = raidPtr->numCol + c;
3948 /* Need to ensure that the reconstruct actually completed! */
3949 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3950 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3951 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3952 if (e) {
3953 if (e != ENODEV)
3954 printf("raid%d: cache flush to component %s failed.\n",
3955 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3956 if (error == 0) {
3957 error = e;
3958 }
3959 }
3960 }
3961 }
3962 return error;
3963 }
3964