rf_netbsdkintf.c revision 1.250.4.13.2.1 1 /* $NetBSD: rf_netbsdkintf.c,v 1.250.4.13.2.1 2014/11/20 12:25:10 sborrill Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1990, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * the Systems Programming Group of the University of Utah Computer
37 * Science Department.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * from: Utah $Hdr: cd.c 1.6 90/11/28$
64 *
65 * @(#)cd.c 8.2 (Berkeley) 11/16/93
66 */
67
68 /*
69 * Copyright (c) 1988 University of Utah.
70 *
71 * This code is derived from software contributed to Berkeley by
72 * the Systems Programming Group of the University of Utah Computer
73 * Science Department.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 * 1. Redistributions of source code must retain the above copyright
79 * notice, this list of conditions and the following disclaimer.
80 * 2. Redistributions in binary form must reproduce the above copyright
81 * notice, this list of conditions and the following disclaimer in the
82 * documentation and/or other materials provided with the distribution.
83 * 3. All advertising materials mentioning features or use of this software
84 * must display the following acknowledgement:
85 * This product includes software developed by the University of
86 * California, Berkeley and its contributors.
87 * 4. Neither the name of the University nor the names of its contributors
88 * may be used to endorse or promote products derived from this software
89 * without specific prior written permission.
90 *
91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
101 * SUCH DAMAGE.
102 *
103 * from: Utah $Hdr: cd.c 1.6 90/11/28$
104 *
105 * @(#)cd.c 8.2 (Berkeley) 11/16/93
106 */
107
108 /*
109 * Copyright (c) 1995 Carnegie-Mellon University.
110 * All rights reserved.
111 *
112 * Authors: Mark Holland, Jim Zelenka
113 *
114 * Permission to use, copy, modify and distribute this software and
115 * its documentation is hereby granted, provided that both the copyright
116 * notice and this permission notice appear in all copies of the
117 * software, derivative works or modified versions, and any portions
118 * thereof, and that both notices appear in supporting documentation.
119 *
120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
123 *
124 * Carnegie Mellon requests users of this software to return to
125 *
126 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
127 * School of Computer Science
128 * Carnegie Mellon University
129 * Pittsburgh PA 15213-3890
130 *
131 * any improvements or extensions that they make and grant Carnegie the
132 * rights to redistribute these changes.
133 */
134
135 /***********************************************************
136 *
137 * rf_kintf.c -- the kernel interface routines for RAIDframe
138 *
139 ***********************************************************/
140
141 #include <sys/cdefs.h>
142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.250.4.13.2.1 2014/11/20 12:25:10 sborrill Exp $");
143
144 #include <sys/param.h>
145 #include <sys/errno.h>
146 #include <sys/pool.h>
147 #include <sys/proc.h>
148 #include <sys/queue.h>
149 #include <sys/disk.h>
150 #include <sys/device.h>
151 #include <sys/stat.h>
152 #include <sys/ioctl.h>
153 #include <sys/fcntl.h>
154 #include <sys/systm.h>
155 #include <sys/vnode.h>
156 #include <sys/disklabel.h>
157 #include <sys/conf.h>
158 #include <sys/buf.h>
159 #include <sys/bufq.h>
160 #include <sys/user.h>
161 #include <sys/reboot.h>
162 #include <sys/kauth.h>
163
164 #include <prop/proplib.h>
165
166 #include <dev/raidframe/raidframevar.h>
167 #include <dev/raidframe/raidframeio.h>
168 #include <dev/raidframe/rf_paritymap.h>
169 #include "raid.h"
170 #include "opt_raid_autoconfig.h"
171 #include "rf_raid.h"
172 #include "rf_copyback.h"
173 #include "rf_dag.h"
174 #include "rf_dagflags.h"
175 #include "rf_desc.h"
176 #include "rf_diskqueue.h"
177 #include "rf_etimer.h"
178 #include "rf_general.h"
179 #include "rf_kintf.h"
180 #include "rf_options.h"
181 #include "rf_driver.h"
182 #include "rf_parityscan.h"
183 #include "rf_threadstuff.h"
184
185 #ifdef DEBUG
186 int rf_kdebug_level = 0;
187 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
188 #else /* DEBUG */
189 #define db1_printf(a) { }
190 #endif /* DEBUG */
191
192 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
193
194 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
195 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
196
197 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
198 * spare table */
199 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
200 * installation process */
201 #endif
202
203 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
204
205 /* prototypes */
206 static void KernelWakeupFunc(struct buf *);
207 static void InitBP(struct buf *, struct vnode *, unsigned,
208 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
209 void *, int, struct proc *);
210 static void raidinit(RF_Raid_t *);
211
212 void raidattach(int);
213 static int raid_match(struct device *, struct cfdata *, void *);
214 static void raid_attach(struct device *, struct device *, void *);
215 static int raid_detach(struct device *, int);
216
217 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
218 daddr_t, daddr_t);
219 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
220 daddr_t, daddr_t, int);
221
222 static int raidwrite_component_label(unsigned,
223 dev_t, struct vnode *, RF_ComponentLabel_t *);
224 static int raidread_component_label(unsigned,
225 dev_t, struct vnode *, RF_ComponentLabel_t *);
226
227
228 dev_type_open(raidopen);
229 dev_type_close(raidclose);
230 dev_type_read(raidread);
231 dev_type_write(raidwrite);
232 dev_type_ioctl(raidioctl);
233 dev_type_strategy(raidstrategy);
234 dev_type_dump(raiddump);
235 dev_type_size(raidsize);
236
237 const struct bdevsw raid_bdevsw = {
238 raidopen, raidclose, raidstrategy, raidioctl,
239 raiddump, raidsize, D_DISK
240 };
241
242 const struct cdevsw raid_cdevsw = {
243 raidopen, raidclose, raidread, raidwrite, raidioctl,
244 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
245 };
246
247 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that...
251 */
252
253 struct raid_softc {
254 struct device *sc_dev;
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 uint64_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state *buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 extern struct cfdriver raid_cd;
273 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
274 raid_match, raid_attach, raid_detach, NULL);
275
276 /*
277 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
278 * Be aware that large numbers can allow the driver to consume a lot of
279 * kernel memory, especially on writes, and in degraded mode reads.
280 *
281 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
282 * a single 64K write will typically require 64K for the old data,
283 * 64K for the old parity, and 64K for the new parity, for a total
284 * of 192K (if the parity buffer is not re-used immediately).
285 * Even it if is used immediately, that's still 128K, which when multiplied
286 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
287 *
288 * Now in degraded mode, for example, a 64K read on the above setup may
289 * require data reconstruction, which will require *all* of the 4 remaining
290 * disks to participate -- 4 * 32K/disk == 128K again.
291 */
292
293 #ifndef RAIDOUTSTANDING
294 #define RAIDOUTSTANDING 6
295 #endif
296
297 #define RAIDLABELDEV(dev) \
298 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
299
300 /* declared here, and made public, for the benefit of KVM stuff.. */
301 struct raid_softc *raid_softc;
302
303 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
304 struct disklabel *);
305 static void raidgetdisklabel(dev_t);
306 static void raidmakedisklabel(struct raid_softc *);
307
308 static int raidlock(struct raid_softc *);
309 static void raidunlock(struct raid_softc *);
310
311 static void rf_markalldirty(RF_Raid_t *);
312 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
313
314 void rf_ReconThread(struct rf_recon_req *);
315 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
316 void rf_CopybackThread(RF_Raid_t *raidPtr);
317 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
318 int rf_autoconfig(struct device *self);
319 void rf_buildroothack(RF_ConfigSet_t *);
320
321 RF_AutoConfig_t *rf_find_raid_components(void);
322 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
323 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
324 static int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
325 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
326 int rf_set_autoconfig(RF_Raid_t *, int);
327 int rf_set_rootpartition(RF_Raid_t *, int);
328 void rf_release_all_vps(RF_ConfigSet_t *);
329 void rf_cleanup_config_set(RF_ConfigSet_t *);
330 int rf_have_enough_components(RF_ConfigSet_t *);
331 int rf_auto_config_set(RF_ConfigSet_t *, int *);
332 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
333
334 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
335 allow autoconfig to take place.
336 Note that this is overridden by having
337 RAID_AUTOCONFIG as an option in the
338 kernel config file. */
339
340 struct RF_Pools_s rf_pools;
341
342 void
343 raidattach(int num)
344 {
345 int raidID;
346 int i, rc;
347
348 aprint_debug("raidattach: Asked for %d units\n", num);
349
350 if (num <= 0) {
351 #ifdef DIAGNOSTIC
352 panic("raidattach: count <= 0");
353 #endif
354 return;
355 }
356 /* This is where all the initialization stuff gets done. */
357
358 numraid = num;
359
360 /* Make some space for requested number of units... */
361
362 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
363 if (raidPtrs == NULL) {
364 panic("raidPtrs is NULL!!");
365 }
366
367 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
368 rf_mutex_init(&rf_sparet_wait_mutex);
369
370 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
371 #endif
372
373 for (i = 0; i < num; i++)
374 raidPtrs[i] = NULL;
375 rc = rf_BootRaidframe();
376 if (rc == 0)
377 aprint_normal("Kernelized RAIDframe activated\n");
378 else
379 panic("Serious error booting RAID!!");
380
381 /* put together some datastructures like the CCD device does.. This
382 * lets us lock the device and what-not when it gets opened. */
383
384 raid_softc = (struct raid_softc *)
385 malloc(num * sizeof(struct raid_softc),
386 M_RAIDFRAME, M_NOWAIT);
387 if (raid_softc == NULL) {
388 aprint_error("WARNING: no memory for RAIDframe driver\n");
389 return;
390 }
391
392 memset(raid_softc, 0, num * sizeof(struct raid_softc));
393
394 for (raidID = 0; raidID < num; raidID++) {
395 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
396
397 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
398 (RF_Raid_t *));
399 if (raidPtrs[raidID] == NULL) {
400 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
401 numraid = raidID;
402 return;
403 }
404 }
405
406 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
407 aprint_error("raidattach: config_cfattach_attach failed?\n");
408 }
409
410 #ifdef RAID_AUTOCONFIG
411 raidautoconfig = 1;
412 #endif
413
414 /*
415 * Register a finalizer which will be used to auto-config RAID
416 * sets once all real hardware devices have been found.
417 */
418 if (config_finalize_register(NULL, rf_autoconfig) != 0)
419 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
420 }
421
422 int
423 rf_autoconfig(struct device *self)
424 {
425 RF_AutoConfig_t *ac_list;
426 RF_ConfigSet_t *config_sets;
427
428 if (raidautoconfig == 0)
429 return (0);
430
431 /* XXX This code can only be run once. */
432 raidautoconfig = 0;
433
434 /* 1. locate all RAID components on the system */
435 aprint_debug("Searching for RAID components...\n");
436 ac_list = rf_find_raid_components();
437
438 /* 2. Sort them into their respective sets. */
439 config_sets = rf_create_auto_sets(ac_list);
440
441 /*
442 * 3. Evaluate each set andconfigure the valid ones.
443 * This gets done in rf_buildroothack().
444 */
445 rf_buildroothack(config_sets);
446
447 return 1;
448 }
449
450 void
451 rf_buildroothack(RF_ConfigSet_t *config_sets)
452 {
453 RF_ConfigSet_t *cset;
454 RF_ConfigSet_t *next_cset;
455 int retcode;
456 int raidID;
457 int rootID;
458 int col;
459 int num_root;
460 char *devname;
461
462 rootID = 0;
463 num_root = 0;
464 cset = config_sets;
465 while(cset != NULL ) {
466 next_cset = cset->next;
467 if (rf_have_enough_components(cset) &&
468 cset->ac->clabel->autoconfigure==1) {
469 retcode = rf_auto_config_set(cset,&raidID);
470 if (!retcode) {
471 aprint_debug("raid%d: configured ok\n", raidID);
472 if (cset->rootable) {
473 rootID = raidID;
474 num_root++;
475 }
476 } else {
477 /* The autoconfig didn't work :( */
478 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
479 rf_release_all_vps(cset);
480 }
481 } else {
482 /* we're not autoconfiguring this set...
483 release the associated resources */
484 rf_release_all_vps(cset);
485 }
486 /* cleanup */
487 rf_cleanup_config_set(cset);
488 cset = next_cset;
489 }
490
491 /* if the user has specified what the root device should be
492 then we don't touch booted_device or boothowto... */
493
494 if (rootspec != NULL)
495 return;
496
497 /* we found something bootable... */
498
499 if (num_root == 1) {
500 booted_device = raid_softc[rootID].sc_dev;
501 } else if (num_root > 1) {
502
503 /*
504 * Maybe the MD code can help. If it cannot, then
505 * setroot() will discover that we have no
506 * booted_device and will ask the user if nothing was
507 * hardwired in the kernel config file
508 */
509
510 if (booted_device == NULL)
511 cpu_rootconf();
512 if (booted_device == NULL)
513 return;
514
515 num_root = 0;
516 for (raidID = 0; raidID < numraid; raidID++) {
517 if (raidPtrs[raidID]->valid == 0)
518 continue;
519
520 if (raidPtrs[raidID]->root_partition == 0)
521 continue;
522
523 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
524 devname = raidPtrs[raidID]->Disks[col].devname;
525 devname += sizeof("/dev/") - 1;
526 if (strncmp(devname, device_xname(booted_device),
527 strlen(device_xname(booted_device))) != 0)
528 continue;
529 aprint_debug("raid%d includes boot device %s\n",
530 raidID, devname);
531 num_root++;
532 rootID = raidID;
533 }
534 }
535
536 if (num_root == 1) {
537 booted_device = raid_softc[rootID].sc_dev;
538 } else {
539 /* we can't guess.. require the user to answer... */
540 boothowto |= RB_ASKNAME;
541 }
542 }
543 }
544
545
546 int
547 raidsize(dev_t dev)
548 {
549 struct raid_softc *rs;
550 struct disklabel *lp;
551 int part, unit, omask, size;
552
553 unit = raidunit(dev);
554 if (unit >= numraid)
555 return (-1);
556 rs = &raid_softc[unit];
557
558 if ((rs->sc_flags & RAIDF_INITED) == 0)
559 return (-1);
560
561 part = DISKPART(dev);
562 omask = rs->sc_dkdev.dk_openmask & (1 << part);
563 lp = rs->sc_dkdev.dk_label;
564
565 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
566 return (-1);
567
568 if (lp->d_partitions[part].p_fstype != FS_SWAP)
569 size = -1;
570 else
571 size = lp->d_partitions[part].p_size *
572 (lp->d_secsize / DEV_BSIZE);
573
574 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
575 return (-1);
576
577 return (size);
578
579 }
580
581 int
582 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
583 {
584 int unit = raidunit(dev);
585 struct raid_softc *rs;
586 const struct bdevsw *bdev;
587 struct disklabel *lp;
588 RF_Raid_t *raidPtr;
589 daddr_t offset;
590 int part, c, sparecol, j, scol, dumpto;
591 int error = 0;
592
593 if (unit >= numraid)
594 return (ENXIO);
595
596 rs = &raid_softc[unit];
597 raidPtr = raidPtrs[unit];
598
599 if ((rs->sc_flags & RAIDF_INITED) == 0)
600 return ENXIO;
601
602 /* we only support dumping to RAID 1 sets */
603 if (raidPtr->Layout.numDataCol != 1 ||
604 raidPtr->Layout.numParityCol != 1)
605 return EINVAL;
606
607
608 if ((error = raidlock(rs)) != 0)
609 return error;
610
611 if (size % DEV_BSIZE != 0) {
612 error = EINVAL;
613 goto out;
614 }
615
616 if (blkno + size / DEV_BSIZE > rs->sc_size) {
617 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
618 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
619 size / DEV_BSIZE, rs->sc_size);
620 error = EINVAL;
621 goto out;
622 }
623
624 part = DISKPART(dev);
625 lp = rs->sc_dkdev.dk_label;
626 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
627
628 /* figure out what device is alive.. */
629
630 /*
631 Look for a component to dump to. The preference for the
632 component to dump to is as follows:
633 1) the master
634 2) a used_spare of the master
635 3) the slave
636 4) a used_spare of the slave
637 */
638
639 dumpto = -1;
640 for (c = 0; c < raidPtr->numCol; c++) {
641 if (raidPtr->Disks[c].status == rf_ds_optimal) {
642 /* this might be the one */
643 dumpto = c;
644 break;
645 }
646 }
647
648 /*
649 At this point we have possibly selected a live master or a
650 live slave. We now check to see if there is a spared
651 master (or a spared slave), if we didn't find a live master
652 or a live slave.
653 */
654
655 for (c = 0; c < raidPtr->numSpare; c++) {
656 sparecol = raidPtr->numCol + c;
657 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
658 /* How about this one? */
659 scol = -1;
660 for(j=0;j<raidPtr->numCol;j++) {
661 if (raidPtr->Disks[j].spareCol == sparecol) {
662 scol = j;
663 break;
664 }
665 }
666 if (scol == 0) {
667 /*
668 We must have found a spared master!
669 We'll take that over anything else
670 found so far. (We couldn't have
671 found a real master before, since
672 this is a used spare, and it's
673 saying that it's replacing the
674 master.) On reboot (with
675 autoconfiguration turned on)
676 sparecol will become the 1st
677 component (component0) of this set.
678 */
679 dumpto = sparecol;
680 break;
681 } else if (scol != -1) {
682 /*
683 Must be a spared slave. We'll dump
684 to that if we havn't found anything
685 else so far.
686 */
687 if (dumpto == -1)
688 dumpto = sparecol;
689 }
690 }
691 }
692
693 if (dumpto == -1) {
694 /* we couldn't find any live components to dump to!?!?
695 */
696 error = EINVAL;
697 goto out;
698 }
699
700 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
701
702 /*
703 Note that blkno is relative to this particular partition.
704 By adding the offset of this partition in the RAID
705 set, and also adding RF_PROTECTED_SECTORS, we get a
706 value that is relative to the partition used for the
707 underlying component.
708 */
709
710 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
711 blkno + offset, va, size);
712
713 out:
714 raidunlock(rs);
715
716 return error;
717 }
718 /* ARGSUSED */
719 int
720 raidopen(dev_t dev, int flags, int fmt,
721 struct lwp *l)
722 {
723 int unit = raidunit(dev);
724 struct raid_softc *rs;
725 struct disklabel *lp;
726 int part, pmask;
727 int error = 0;
728
729 if (unit >= numraid)
730 return (ENXIO);
731 rs = &raid_softc[unit];
732
733 if ((error = raidlock(rs)) != 0)
734 return (error);
735 lp = rs->sc_dkdev.dk_label;
736
737 part = DISKPART(dev);
738
739 /*
740 * If there are wedges, and this is not RAW_PART, then we
741 * need to fail.
742 */
743 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
744 error = EBUSY;
745 goto bad;
746 }
747 pmask = (1 << part);
748
749 if ((rs->sc_flags & RAIDF_INITED) &&
750 (rs->sc_dkdev.dk_openmask == 0))
751 raidgetdisklabel(dev);
752
753 /* make sure that this partition exists */
754
755 if (part != RAW_PART) {
756 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
757 ((part >= lp->d_npartitions) ||
758 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
759 error = ENXIO;
760 goto bad;
761 }
762 }
763 /* Prevent this unit from being unconfigured while open. */
764 switch (fmt) {
765 case S_IFCHR:
766 rs->sc_dkdev.dk_copenmask |= pmask;
767 break;
768
769 case S_IFBLK:
770 rs->sc_dkdev.dk_bopenmask |= pmask;
771 break;
772 }
773
774 if ((rs->sc_dkdev.dk_openmask == 0) &&
775 ((rs->sc_flags & RAIDF_INITED) != 0)) {
776 /* First one... mark things as dirty... Note that we *MUST*
777 have done a configure before this. I DO NOT WANT TO BE
778 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
779 THAT THEY BELONG TOGETHER!!!!! */
780 /* XXX should check to see if we're only open for reading
781 here... If so, we needn't do this, but then need some
782 other way of keeping track of what's happened.. */
783
784 rf_markalldirty( raidPtrs[unit] );
785 }
786
787
788 rs->sc_dkdev.dk_openmask =
789 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
790
791 bad:
792 raidunlock(rs);
793
794 return (error);
795
796
797 }
798 /* ARGSUSED */
799 int
800 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
801 {
802 int unit = raidunit(dev);
803 struct cfdata *cf;
804 struct raid_softc *rs;
805 int error = 0;
806 int part;
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811
812 if ((error = raidlock(rs)) != 0)
813 return (error);
814
815 part = DISKPART(dev);
816
817 /* ...that much closer to allowing unconfiguration... */
818 switch (fmt) {
819 case S_IFCHR:
820 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
821 break;
822
823 case S_IFBLK:
824 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
825 break;
826 }
827 rs->sc_dkdev.dk_openmask =
828 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
829
830 if ((rs->sc_dkdev.dk_openmask == 0) &&
831 ((rs->sc_flags & RAIDF_INITED) != 0)) {
832 /* Last one... device is not unconfigured yet.
833 Device shutdown has taken care of setting the
834 clean bits if RAIDF_INITED is not set
835 mark things as clean... */
836
837 rf_update_component_labels(raidPtrs[unit],
838 RF_FINAL_COMPONENT_UPDATE);
839 if (doing_shutdown) {
840 /* last one, and we're going down, so
841 lights out for this RAID set too. */
842 error = rf_Shutdown(raidPtrs[unit]);
843
844 /* It's no longer initialized... */
845 rs->sc_flags &= ~RAIDF_INITED;
846
847 /* detach the device */
848
849 cf = device_cfdata(rs->sc_dev);
850 error = config_detach(rs->sc_dev, DETACH_QUIET);
851 free(cf, M_RAIDFRAME);
852
853 /* Detach the disk. */
854 dkwedge_delall(&rs->sc_dkdev);
855 disk_detach(&rs->sc_dkdev);
856 disk_destroy(&rs->sc_dkdev);
857 }
858 }
859
860 raidunlock(rs);
861 return (0);
862
863 }
864
865 void
866 raidstrategy(struct buf *bp)
867 {
868 int s;
869
870 unsigned int raidID = raidunit(bp->b_dev);
871 RF_Raid_t *raidPtr;
872 struct raid_softc *rs = &raid_softc[raidID];
873 int wlabel;
874
875 if ((rs->sc_flags & RAIDF_INITED) ==0) {
876 bp->b_error = ENXIO;
877 goto done;
878 }
879 if (raidID >= numraid || !raidPtrs[raidID]) {
880 bp->b_error = ENODEV;
881 goto done;
882 }
883 raidPtr = raidPtrs[raidID];
884 if (!raidPtr->valid) {
885 bp->b_error = ENODEV;
886 goto done;
887 }
888 if (bp->b_bcount == 0) {
889 db1_printf(("b_bcount is zero..\n"));
890 goto done;
891 }
892
893 /*
894 * Do bounds checking and adjust transfer. If there's an
895 * error, the bounds check will flag that for us.
896 */
897
898 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
899 if (DISKPART(bp->b_dev) == RAW_PART) {
900 uint64_t size; /* device size in DEV_BSIZE unit */
901
902 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
903 size = raidPtr->totalSectors <<
904 (raidPtr->logBytesPerSector - DEV_BSHIFT);
905 } else {
906 size = raidPtr->totalSectors >>
907 (DEV_BSHIFT - raidPtr->logBytesPerSector);
908 }
909 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
910 goto done;
911 }
912 } else {
913 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
914 db1_printf(("Bounds check failed!!:%d %d\n",
915 (int) bp->b_blkno, (int) wlabel));
916 goto done;
917 }
918 }
919 s = splbio();
920
921 bp->b_resid = 0;
922
923 /* stuff it onto our queue */
924 BUFQ_PUT(rs->buf_queue, bp);
925
926 /* scheduled the IO to happen at the next convenient time */
927 wakeup(&(raidPtrs[raidID]->iodone));
928
929 splx(s);
930 return;
931
932 done:
933 bp->b_resid = bp->b_bcount;
934 biodone(bp);
935 }
936 /* ARGSUSED */
937 int
938 raidread(dev_t dev, struct uio *uio, int flags)
939 {
940 int unit = raidunit(dev);
941 struct raid_softc *rs;
942
943 if (unit >= numraid)
944 return (ENXIO);
945 rs = &raid_softc[unit];
946
947 if ((rs->sc_flags & RAIDF_INITED) == 0)
948 return (ENXIO);
949
950 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
951
952 }
953 /* ARGSUSED */
954 int
955 raidwrite(dev_t dev, struct uio *uio, int flags)
956 {
957 int unit = raidunit(dev);
958 struct raid_softc *rs;
959
960 if (unit >= numraid)
961 return (ENXIO);
962 rs = &raid_softc[unit];
963
964 if ((rs->sc_flags & RAIDF_INITED) == 0)
965 return (ENXIO);
966
967 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
968
969 }
970
971 int
972 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
973 {
974 int unit = raidunit(dev);
975 int error = 0;
976 int part, pmask, s;
977 struct cfdata *cf;
978 struct raid_softc *rs;
979 RF_Config_t *k_cfg, *u_cfg;
980 RF_Raid_t *raidPtr;
981 RF_RaidDisk_t *diskPtr;
982 RF_AccTotals_t *totals;
983 RF_DeviceConfig_t *d_cfg, **ucfgp;
984 u_char *specific_buf;
985 int retcode = 0;
986 int column;
987 /* int raidid; */
988 struct rf_recon_req *rrcopy, *rr;
989 RF_ComponentLabel_t *clabel;
990 RF_ComponentLabel_t *ci_label;
991 RF_ComponentLabel_t **clabel_ptr;
992 RF_SingleComponent_t *sparePtr,*componentPtr;
993 RF_SingleComponent_t component;
994 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
995 int i, j, d;
996 #ifdef __HAVE_OLD_DISKLABEL
997 struct disklabel newlabel;
998 #endif
999 struct dkwedge_info *dkw;
1000
1001 if (unit >= numraid)
1002 return (ENXIO);
1003 rs = &raid_softc[unit];
1004 raidPtr = raidPtrs[unit];
1005
1006 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1007 (int) DISKPART(dev), (int) unit, cmd));
1008
1009 /* Must be open for writes for these commands... */
1010 switch (cmd) {
1011 #ifdef DIOCGSECTORSIZE
1012 case DIOCGSECTORSIZE:
1013 *(u_int *)data = raidPtr->bytesPerSector;
1014 return 0;
1015 case DIOCGMEDIASIZE:
1016 *(off_t *)data =
1017 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1018 return 0;
1019 #endif
1020 case DIOCSDINFO:
1021 case DIOCWDINFO:
1022 #ifdef __HAVE_OLD_DISKLABEL
1023 case ODIOCWDINFO:
1024 case ODIOCSDINFO:
1025 #endif
1026 case DIOCWLABEL:
1027 case DIOCAWEDGE:
1028 case DIOCDWEDGE:
1029 case DIOCSSTRATEGY:
1030 if ((flag & FWRITE) == 0)
1031 return (EBADF);
1032 }
1033
1034 /* Must be initialized for these... */
1035 switch (cmd) {
1036 case DIOCGDINFO:
1037 case DIOCSDINFO:
1038 case DIOCWDINFO:
1039 #ifdef __HAVE_OLD_DISKLABEL
1040 case ODIOCGDINFO:
1041 case ODIOCWDINFO:
1042 case ODIOCSDINFO:
1043 case ODIOCGDEFLABEL:
1044 #endif
1045 case DIOCGPART:
1046 case DIOCWLABEL:
1047 case DIOCGDEFLABEL:
1048 case DIOCAWEDGE:
1049 case DIOCDWEDGE:
1050 case DIOCLWEDGES:
1051 case DIOCCACHESYNC:
1052 case RAIDFRAME_SHUTDOWN:
1053 case RAIDFRAME_REWRITEPARITY:
1054 case RAIDFRAME_GET_INFO:
1055 case RAIDFRAME_RESET_ACCTOTALS:
1056 case RAIDFRAME_GET_ACCTOTALS:
1057 case RAIDFRAME_KEEP_ACCTOTALS:
1058 case RAIDFRAME_GET_SIZE:
1059 case RAIDFRAME_FAIL_DISK:
1060 case RAIDFRAME_COPYBACK:
1061 case RAIDFRAME_CHECK_RECON_STATUS:
1062 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1063 case RAIDFRAME_GET_COMPONENT_LABEL:
1064 case RAIDFRAME_SET_COMPONENT_LABEL:
1065 case RAIDFRAME_ADD_HOT_SPARE:
1066 case RAIDFRAME_REMOVE_HOT_SPARE:
1067 case RAIDFRAME_INIT_LABELS:
1068 case RAIDFRAME_REBUILD_IN_PLACE:
1069 case RAIDFRAME_CHECK_PARITY:
1070 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1071 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1072 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1073 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1074 case RAIDFRAME_SET_AUTOCONFIG:
1075 case RAIDFRAME_SET_ROOT:
1076 case RAIDFRAME_DELETE_COMPONENT:
1077 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1078 case RAIDFRAME_PARITYMAP_STATUS:
1079 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1080 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1081 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1082 case DIOCGSTRATEGY:
1083 case DIOCSSTRATEGY:
1084 if ((rs->sc_flags & RAIDF_INITED) == 0)
1085 return (ENXIO);
1086 }
1087
1088 switch (cmd) {
1089
1090 /* configure the system */
1091 case RAIDFRAME_CONFIGURE:
1092
1093 if (raidPtr->valid) {
1094 /* There is a valid RAID set running on this unit! */
1095 printf("raid%d: Device already configured!\n",unit);
1096 return(EINVAL);
1097 }
1098
1099 /* copy-in the configuration information */
1100 /* data points to a pointer to the configuration structure */
1101
1102 u_cfg = *((RF_Config_t **) data);
1103 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1104 if (k_cfg == NULL) {
1105 return (ENOMEM);
1106 }
1107 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1108 if (retcode) {
1109 RF_Free(k_cfg, sizeof(RF_Config_t));
1110 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1111 retcode));
1112 return (retcode);
1113 }
1114 /* allocate a buffer for the layout-specific data, and copy it
1115 * in */
1116 if (k_cfg->layoutSpecificSize) {
1117 if (k_cfg->layoutSpecificSize > 10000) {
1118 /* sanity check */
1119 RF_Free(k_cfg, sizeof(RF_Config_t));
1120 return (EINVAL);
1121 }
1122 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1123 (u_char *));
1124 if (specific_buf == NULL) {
1125 RF_Free(k_cfg, sizeof(RF_Config_t));
1126 return (ENOMEM);
1127 }
1128 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1129 k_cfg->layoutSpecificSize);
1130 if (retcode) {
1131 RF_Free(k_cfg, sizeof(RF_Config_t));
1132 RF_Free(specific_buf,
1133 k_cfg->layoutSpecificSize);
1134 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1135 retcode));
1136 return (retcode);
1137 }
1138 } else
1139 specific_buf = NULL;
1140 k_cfg->layoutSpecific = specific_buf;
1141
1142 /* should do some kind of sanity check on the configuration.
1143 * Store the sum of all the bytes in the last byte? */
1144
1145 /* configure the system */
1146
1147 /*
1148 * Clear the entire RAID descriptor, just to make sure
1149 * there is no stale data left in the case of a
1150 * reconfiguration
1151 */
1152 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1153 raidPtr->raidid = unit;
1154
1155 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1156
1157 if (retcode == 0) {
1158
1159 /* allow this many simultaneous IO's to
1160 this RAID device */
1161 raidPtr->openings = RAIDOUTSTANDING;
1162
1163 raidinit(raidPtr);
1164 rf_markalldirty(raidPtr);
1165 }
1166 /* free the buffers. No return code here. */
1167 if (k_cfg->layoutSpecificSize) {
1168 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1169 }
1170 RF_Free(k_cfg, sizeof(RF_Config_t));
1171
1172 return (retcode);
1173
1174 /* shutdown the system */
1175 case RAIDFRAME_SHUTDOWN:
1176
1177 if ((error = raidlock(rs)) != 0)
1178 return (error);
1179
1180 /*
1181 * If somebody has a partition mounted, we shouldn't
1182 * shutdown.
1183 */
1184
1185 part = DISKPART(dev);
1186 pmask = (1 << part);
1187 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1188 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1189 (rs->sc_dkdev.dk_copenmask & pmask))) {
1190 raidunlock(rs);
1191 return (EBUSY);
1192 }
1193
1194 retcode = rf_Shutdown(raidPtr);
1195
1196 /* It's no longer initialized... */
1197 rs->sc_flags &= ~RAIDF_INITED;
1198
1199 /* free the pseudo device attach bits */
1200
1201 cf = device_cfdata(rs->sc_dev);
1202 /* XXX this causes us to not return any errors
1203 from the above call to rf_Shutdown() */
1204 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1205 free(cf, M_RAIDFRAME);
1206
1207 /* Detach the disk. */
1208 dkwedge_delall(&rs->sc_dkdev);
1209 disk_detach(&rs->sc_dkdev);
1210 disk_destroy(&rs->sc_dkdev);
1211
1212 raidunlock(rs);
1213
1214 return (retcode);
1215 case RAIDFRAME_GET_COMPONENT_LABEL:
1216 clabel_ptr = (RF_ComponentLabel_t **) data;
1217 /* need to read the component label for the disk indicated
1218 by row,column in clabel */
1219
1220 /*
1221 * Perhaps there should be an option to skip the in-core
1222 * copy and hit the disk, as with disklabel(8).
1223 */
1224 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1225
1226 retcode = copyin( *clabel_ptr, clabel,
1227 sizeof(RF_ComponentLabel_t));
1228
1229 if (retcode) {
1230 return(retcode);
1231 }
1232
1233 clabel->row = 0; /* Don't allow looking at anything else.*/
1234
1235 column = clabel->column;
1236
1237 if ((column < 0) || (column >= raidPtr->numCol +
1238 raidPtr->numSpare)) {
1239 return(EINVAL);
1240 }
1241
1242 RF_Free(clabel, sizeof(*clabel));
1243
1244 clabel = raidget_component_label(raidPtr, column);
1245
1246 if (retcode == 0) {
1247 retcode = copyout(clabel, *clabel_ptr,
1248 sizeof(RF_ComponentLabel_t));
1249 }
1250 return (retcode);
1251
1252 #if 0
1253 case RAIDFRAME_SET_COMPONENT_LABEL:
1254 clabel = (RF_ComponentLabel_t *) data;
1255
1256 /* XXX check the label for valid stuff... */
1257 /* Note that some things *should not* get modified --
1258 the user should be re-initing the labels instead of
1259 trying to patch things.
1260 */
1261
1262 raidid = raidPtr->raidid;
1263 #ifdef DEBUG
1264 printf("raid%d: Got component label:\n", raidid);
1265 printf("raid%d: Version: %d\n", raidid, clabel->version);
1266 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1267 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1268 printf("raid%d: Column: %d\n", raidid, clabel->column);
1269 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1270 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1271 printf("raid%d: Status: %d\n", raidid, clabel->status);
1272 #endif
1273 clabel->row = 0;
1274 column = clabel->column;
1275
1276 if ((column < 0) || (column >= raidPtr->numCol)) {
1277 return(EINVAL);
1278 }
1279
1280 /* XXX this isn't allowed to do anything for now :-) */
1281
1282 /* XXX and before it is, we need to fill in the rest
1283 of the fields!?!?!?! */
1284 memcpy(raidget_component_label(raidPtr, column),
1285 clabel, sizeof(*clabel));
1286 raidflush_component_label(raidPtr, column);
1287 return (0);
1288 #endif
1289
1290 case RAIDFRAME_INIT_LABELS:
1291 clabel = (RF_ComponentLabel_t *) data;
1292 /*
1293 we only want the serial number from
1294 the above. We get all the rest of the information
1295 from the config that was used to create this RAID
1296 set.
1297 */
1298
1299 raidPtr->serial_number = clabel->serial_number;
1300
1301 for(column=0;column<raidPtr->numCol;column++) {
1302 diskPtr = &raidPtr->Disks[column];
1303 if (!RF_DEAD_DISK(diskPtr->status)) {
1304 ci_label = raidget_component_label(raidPtr,
1305 column);
1306 /* Zeroing this is important. */
1307 memset(ci_label, 0, sizeof(*ci_label));
1308 raid_init_component_label(raidPtr, ci_label);
1309 ci_label->serial_number =
1310 raidPtr->serial_number;
1311 ci_label->row = 0; /* we dont' pretend to support more */
1312 rf_component_label_set_partitionsize(ci_label,
1313 diskPtr->partitionSize);
1314 ci_label->column = column;
1315 raidflush_component_label(raidPtr, column);
1316 }
1317 /* XXXjld what about the spares? */
1318 }
1319
1320 return (retcode);
1321 case RAIDFRAME_SET_AUTOCONFIG:
1322 d = rf_set_autoconfig(raidPtr, *(int *) data);
1323 printf("raid%d: New autoconfig value is: %d\n",
1324 raidPtr->raidid, d);
1325 *(int *) data = d;
1326 return (retcode);
1327
1328 case RAIDFRAME_SET_ROOT:
1329 d = rf_set_rootpartition(raidPtr, *(int *) data);
1330 printf("raid%d: New rootpartition value is: %d\n",
1331 raidPtr->raidid, d);
1332 *(int *) data = d;
1333 return (retcode);
1334
1335 /* initialize all parity */
1336 case RAIDFRAME_REWRITEPARITY:
1337
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* Parity for RAID 0 is trivially correct */
1340 raidPtr->parity_good = RF_RAID_CLEAN;
1341 return(0);
1342 }
1343
1344 if (raidPtr->parity_rewrite_in_progress == 1) {
1345 /* Re-write is already in progress! */
1346 return(EINVAL);
1347 }
1348
1349 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1350 rf_RewriteParityThread,
1351 raidPtr,"raid_parity");
1352 return (retcode);
1353
1354
1355 case RAIDFRAME_ADD_HOT_SPARE:
1356 sparePtr = (RF_SingleComponent_t *) data;
1357 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1358 retcode = rf_add_hot_spare(raidPtr, &component);
1359 return(retcode);
1360
1361 case RAIDFRAME_REMOVE_HOT_SPARE:
1362 return(retcode);
1363
1364 case RAIDFRAME_DELETE_COMPONENT:
1365 componentPtr = (RF_SingleComponent_t *)data;
1366 memcpy( &component, componentPtr,
1367 sizeof(RF_SingleComponent_t));
1368 retcode = rf_delete_component(raidPtr, &component);
1369 return(retcode);
1370
1371 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1372 componentPtr = (RF_SingleComponent_t *)data;
1373 memcpy( &component, componentPtr,
1374 sizeof(RF_SingleComponent_t));
1375 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1376 return(retcode);
1377
1378 case RAIDFRAME_REBUILD_IN_PLACE:
1379
1380 if (raidPtr->Layout.map->faultsTolerated == 0) {
1381 /* Can't do this on a RAID 0!! */
1382 return(EINVAL);
1383 }
1384
1385 if (raidPtr->recon_in_progress == 1) {
1386 /* a reconstruct is already in progress! */
1387 return(EINVAL);
1388 }
1389
1390 componentPtr = (RF_SingleComponent_t *) data;
1391 memcpy( &component, componentPtr,
1392 sizeof(RF_SingleComponent_t));
1393 component.row = 0; /* we don't support any more */
1394 column = component.column;
1395
1396 if ((column < 0) || (column >= raidPtr->numCol)) {
1397 return(EINVAL);
1398 }
1399
1400 RF_LOCK_MUTEX(raidPtr->mutex);
1401 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1402 (raidPtr->numFailures > 0)) {
1403 /* XXX 0 above shouldn't be constant!!! */
1404 /* some component other than this has failed.
1405 Let's not make things worse than they already
1406 are... */
1407 printf("raid%d: Unable to reconstruct to disk at:\n",
1408 raidPtr->raidid);
1409 printf("raid%d: Col: %d Too many failures.\n",
1410 raidPtr->raidid, column);
1411 RF_UNLOCK_MUTEX(raidPtr->mutex);
1412 return (EINVAL);
1413 }
1414 if (raidPtr->Disks[column].status ==
1415 rf_ds_reconstructing) {
1416 printf("raid%d: Unable to reconstruct to disk at:\n",
1417 raidPtr->raidid);
1418 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1419
1420 RF_UNLOCK_MUTEX(raidPtr->mutex);
1421 return (EINVAL);
1422 }
1423 if (raidPtr->Disks[column].status == rf_ds_spared) {
1424 RF_UNLOCK_MUTEX(raidPtr->mutex);
1425 return (EINVAL);
1426 }
1427 RF_UNLOCK_MUTEX(raidPtr->mutex);
1428
1429 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1430 if (rrcopy == NULL)
1431 return(ENOMEM);
1432
1433 rrcopy->raidPtr = (void *) raidPtr;
1434 rrcopy->col = column;
1435
1436 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1437 rf_ReconstructInPlaceThread,
1438 rrcopy,"raid_reconip");
1439 return(retcode);
1440
1441 case RAIDFRAME_GET_INFO:
1442 if (!raidPtr->valid)
1443 return (ENODEV);
1444 ucfgp = (RF_DeviceConfig_t **) data;
1445 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1446 (RF_DeviceConfig_t *));
1447 if (d_cfg == NULL)
1448 return (ENOMEM);
1449 d_cfg->rows = 1; /* there is only 1 row now */
1450 d_cfg->cols = raidPtr->numCol;
1451 d_cfg->ndevs = raidPtr->numCol;
1452 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1453 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1454 return (ENOMEM);
1455 }
1456 d_cfg->nspares = raidPtr->numSpare;
1457 if (d_cfg->nspares >= RF_MAX_DISKS) {
1458 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1459 return (ENOMEM);
1460 }
1461 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1462 d = 0;
1463 for (j = 0; j < d_cfg->cols; j++) {
1464 d_cfg->devs[d] = raidPtr->Disks[j];
1465 d++;
1466 }
1467 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1468 d_cfg->spares[i] = raidPtr->Disks[j];
1469 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1470 /* XXX: raidctl(8) expects to see this as a used spare */
1471 d_cfg->spares[i].status = rf_ds_used_spare;
1472 }
1473 }
1474 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1475 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1476
1477 return (retcode);
1478
1479 case RAIDFRAME_CHECK_PARITY:
1480 *(int *) data = raidPtr->parity_good;
1481 return (0);
1482
1483 case RAIDFRAME_PARITYMAP_STATUS:
1484 rf_paritymap_status(raidPtr->parity_map,
1485 (struct rf_pmstat *)data);
1486 return 0;
1487
1488 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1489 if (raidPtr->parity_map == NULL)
1490 return ENOENT; /* ??? */
1491 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1492 (struct rf_pmparams *)data, 1))
1493 return EINVAL;
1494 return 0;
1495
1496 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1497 *(int *) data = rf_paritymap_get_disable(raidPtr);
1498 return 0;
1499
1500 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1501 rf_paritymap_set_disable(raidPtr, *(int *)data);
1502 /* XXX should errors be passed up? */
1503 return 0;
1504
1505 case RAIDFRAME_RESET_ACCTOTALS:
1506 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1507 return (0);
1508
1509 case RAIDFRAME_GET_ACCTOTALS:
1510 totals = (RF_AccTotals_t *) data;
1511 *totals = raidPtr->acc_totals;
1512 return (0);
1513
1514 case RAIDFRAME_KEEP_ACCTOTALS:
1515 raidPtr->keep_acc_totals = *(int *)data;
1516 return (0);
1517
1518 case RAIDFRAME_GET_SIZE:
1519 *(int *) data = raidPtr->totalSectors;
1520 return (0);
1521
1522 /* fail a disk & optionally start reconstruction */
1523 case RAIDFRAME_FAIL_DISK:
1524
1525 if (raidPtr->Layout.map->faultsTolerated == 0) {
1526 /* Can't do this on a RAID 0!! */
1527 return(EINVAL);
1528 }
1529
1530 rr = (struct rf_recon_req *) data;
1531 rr->row = 0;
1532 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1533 return (EINVAL);
1534
1535
1536 RF_LOCK_MUTEX(raidPtr->mutex);
1537 if (raidPtr->status == rf_rs_reconstructing) {
1538 /* you can't fail a disk while we're reconstructing! */
1539 /* XXX wrong for RAID6 */
1540 RF_UNLOCK_MUTEX(raidPtr->mutex);
1541 return (EINVAL);
1542 }
1543 if ((raidPtr->Disks[rr->col].status ==
1544 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1545 /* some other component has failed. Let's not make
1546 things worse. XXX wrong for RAID6 */
1547 RF_UNLOCK_MUTEX(raidPtr->mutex);
1548 return (EINVAL);
1549 }
1550 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1551 /* Can't fail a spared disk! */
1552 RF_UNLOCK_MUTEX(raidPtr->mutex);
1553 return (EINVAL);
1554 }
1555 RF_UNLOCK_MUTEX(raidPtr->mutex);
1556
1557 /* make a copy of the recon request so that we don't rely on
1558 * the user's buffer */
1559 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1560 if (rrcopy == NULL)
1561 return(ENOMEM);
1562 memcpy(rrcopy, rr, sizeof(*rr));
1563 rrcopy->raidPtr = (void *) raidPtr;
1564
1565 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1566 rf_ReconThread,
1567 rrcopy,"raid_recon");
1568 return (0);
1569
1570 /* invoke a copyback operation after recon on whatever disk
1571 * needs it, if any */
1572 case RAIDFRAME_COPYBACK:
1573
1574 if (raidPtr->Layout.map->faultsTolerated == 0) {
1575 /* This makes no sense on a RAID 0!! */
1576 return(EINVAL);
1577 }
1578
1579 if (raidPtr->copyback_in_progress == 1) {
1580 /* Copyback is already in progress! */
1581 return(EINVAL);
1582 }
1583
1584 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1585 rf_CopybackThread,
1586 raidPtr,"raid_copyback");
1587 return (retcode);
1588
1589 /* return the percentage completion of reconstruction */
1590 case RAIDFRAME_CHECK_RECON_STATUS:
1591 if (raidPtr->Layout.map->faultsTolerated == 0) {
1592 /* This makes no sense on a RAID 0, so tell the
1593 user it's done. */
1594 *(int *) data = 100;
1595 return(0);
1596 }
1597 if (raidPtr->status != rf_rs_reconstructing)
1598 *(int *) data = 100;
1599 else {
1600 if (raidPtr->reconControl->numRUsTotal > 0) {
1601 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1602 } else {
1603 *(int *) data = 0;
1604 }
1605 }
1606 return (0);
1607 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1608 progressInfoPtr = (RF_ProgressInfo_t **) data;
1609 if (raidPtr->status != rf_rs_reconstructing) {
1610 progressInfo.remaining = 0;
1611 progressInfo.completed = 100;
1612 progressInfo.total = 100;
1613 } else {
1614 progressInfo.total =
1615 raidPtr->reconControl->numRUsTotal;
1616 progressInfo.completed =
1617 raidPtr->reconControl->numRUsComplete;
1618 progressInfo.remaining = progressInfo.total -
1619 progressInfo.completed;
1620 }
1621 retcode = copyout(&progressInfo, *progressInfoPtr,
1622 sizeof(RF_ProgressInfo_t));
1623 return (retcode);
1624
1625 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1626 if (raidPtr->Layout.map->faultsTolerated == 0) {
1627 /* This makes no sense on a RAID 0, so tell the
1628 user it's done. */
1629 *(int *) data = 100;
1630 return(0);
1631 }
1632 if (raidPtr->parity_rewrite_in_progress == 1) {
1633 *(int *) data = 100 *
1634 raidPtr->parity_rewrite_stripes_done /
1635 raidPtr->Layout.numStripe;
1636 } else {
1637 *(int *) data = 100;
1638 }
1639 return (0);
1640
1641 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1642 progressInfoPtr = (RF_ProgressInfo_t **) data;
1643 if (raidPtr->parity_rewrite_in_progress == 1) {
1644 progressInfo.total = raidPtr->Layout.numStripe;
1645 progressInfo.completed =
1646 raidPtr->parity_rewrite_stripes_done;
1647 progressInfo.remaining = progressInfo.total -
1648 progressInfo.completed;
1649 } else {
1650 progressInfo.remaining = 0;
1651 progressInfo.completed = 100;
1652 progressInfo.total = 100;
1653 }
1654 retcode = copyout(&progressInfo, *progressInfoPtr,
1655 sizeof(RF_ProgressInfo_t));
1656 return (retcode);
1657
1658 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1659 if (raidPtr->Layout.map->faultsTolerated == 0) {
1660 /* This makes no sense on a RAID 0 */
1661 *(int *) data = 100;
1662 return(0);
1663 }
1664 if (raidPtr->copyback_in_progress == 1) {
1665 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1666 raidPtr->Layout.numStripe;
1667 } else {
1668 *(int *) data = 100;
1669 }
1670 return (0);
1671
1672 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1673 progressInfoPtr = (RF_ProgressInfo_t **) data;
1674 if (raidPtr->copyback_in_progress == 1) {
1675 progressInfo.total = raidPtr->Layout.numStripe;
1676 progressInfo.completed =
1677 raidPtr->copyback_stripes_done;
1678 progressInfo.remaining = progressInfo.total -
1679 progressInfo.completed;
1680 } else {
1681 progressInfo.remaining = 0;
1682 progressInfo.completed = 100;
1683 progressInfo.total = 100;
1684 }
1685 retcode = copyout(&progressInfo, *progressInfoPtr,
1686 sizeof(RF_ProgressInfo_t));
1687 return (retcode);
1688
1689 /* the sparetable daemon calls this to wait for the kernel to
1690 * need a spare table. this ioctl does not return until a
1691 * spare table is needed. XXX -- calling mpsleep here in the
1692 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1693 * -- I should either compute the spare table in the kernel,
1694 * or have a different -- XXX XXX -- interface (a different
1695 * character device) for delivering the table -- XXX */
1696 #if 0
1697 case RAIDFRAME_SPARET_WAIT:
1698 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1699 while (!rf_sparet_wait_queue)
1700 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1701 waitreq = rf_sparet_wait_queue;
1702 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1703 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1704
1705 /* structure assignment */
1706 *((RF_SparetWait_t *) data) = *waitreq;
1707
1708 RF_Free(waitreq, sizeof(*waitreq));
1709 return (0);
1710
1711 /* wakes up a process waiting on SPARET_WAIT and puts an error
1712 * code in it that will cause the dameon to exit */
1713 case RAIDFRAME_ABORT_SPARET_WAIT:
1714 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1715 waitreq->fcol = -1;
1716 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1717 waitreq->next = rf_sparet_wait_queue;
1718 rf_sparet_wait_queue = waitreq;
1719 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1720 wakeup(&rf_sparet_wait_queue);
1721 return (0);
1722
1723 /* used by the spare table daemon to deliver a spare table
1724 * into the kernel */
1725 case RAIDFRAME_SEND_SPARET:
1726
1727 /* install the spare table */
1728 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1729
1730 /* respond to the requestor. the return status of the spare
1731 * table installation is passed in the "fcol" field */
1732 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1733 waitreq->fcol = retcode;
1734 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1735 waitreq->next = rf_sparet_resp_queue;
1736 rf_sparet_resp_queue = waitreq;
1737 wakeup(&rf_sparet_resp_queue);
1738 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1739
1740 return (retcode);
1741 #endif
1742
1743 default:
1744 break; /* fall through to the os-specific code below */
1745
1746 }
1747
1748 if (!raidPtr->valid)
1749 return (EINVAL);
1750
1751 /*
1752 * Add support for "regular" device ioctls here.
1753 */
1754
1755 switch (cmd) {
1756 case DIOCGDINFO:
1757 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1758 break;
1759 #ifdef __HAVE_OLD_DISKLABEL
1760 case ODIOCGDINFO:
1761 newlabel = *(rs->sc_dkdev.dk_label);
1762 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1763 return ENOTTY;
1764 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1765 break;
1766 #endif
1767
1768 case DIOCGPART:
1769 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1770 ((struct partinfo *) data)->part =
1771 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1772 break;
1773
1774 case DIOCWDINFO:
1775 case DIOCSDINFO:
1776 #ifdef __HAVE_OLD_DISKLABEL
1777 case ODIOCWDINFO:
1778 case ODIOCSDINFO:
1779 #endif
1780 {
1781 struct disklabel *lp;
1782 #ifdef __HAVE_OLD_DISKLABEL
1783 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1784 memset(&newlabel, 0, sizeof newlabel);
1785 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1786 lp = &newlabel;
1787 } else
1788 #endif
1789 lp = (struct disklabel *)data;
1790
1791 if ((error = raidlock(rs)) != 0)
1792 return (error);
1793
1794 rs->sc_flags |= RAIDF_LABELLING;
1795
1796 error = setdisklabel(rs->sc_dkdev.dk_label,
1797 lp, 0, rs->sc_dkdev.dk_cpulabel);
1798 if (error == 0) {
1799 if (cmd == DIOCWDINFO
1800 #ifdef __HAVE_OLD_DISKLABEL
1801 || cmd == ODIOCWDINFO
1802 #endif
1803 )
1804 error = writedisklabel(RAIDLABELDEV(dev),
1805 raidstrategy, rs->sc_dkdev.dk_label,
1806 rs->sc_dkdev.dk_cpulabel);
1807 }
1808 rs->sc_flags &= ~RAIDF_LABELLING;
1809
1810 raidunlock(rs);
1811
1812 if (error)
1813 return (error);
1814 break;
1815 }
1816
1817 case DIOCWLABEL:
1818 if (*(int *) data != 0)
1819 rs->sc_flags |= RAIDF_WLABEL;
1820 else
1821 rs->sc_flags &= ~RAIDF_WLABEL;
1822 break;
1823
1824 case DIOCGDEFLABEL:
1825 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1826 break;
1827
1828 #ifdef __HAVE_OLD_DISKLABEL
1829 case ODIOCGDEFLABEL:
1830 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1831 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1832 return ENOTTY;
1833 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1834 break;
1835 #endif
1836
1837 case DIOCAWEDGE:
1838 case DIOCDWEDGE:
1839 dkw = (void *)data;
1840
1841 /* If the ioctl happens here, the parent is us. */
1842 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1843 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1844
1845 case DIOCLWEDGES:
1846 return dkwedge_list(&rs->sc_dkdev,
1847 (struct dkwedge_list *)data, l);
1848 case DIOCCACHESYNC:
1849 return rf_sync_component_caches(raidPtr);
1850
1851 case DIOCGSTRATEGY:
1852 {
1853 struct disk_strategy *dks = (void *)data;
1854
1855 s = splbio();
1856 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1857 sizeof(dks->dks_name));
1858 splx(s);
1859 dks->dks_paramlen = 0;
1860
1861 return 0;
1862 }
1863
1864 case DIOCSSTRATEGY:
1865 {
1866 struct disk_strategy *dks = (void *)data;
1867 struct bufq_state *new;
1868 struct bufq_state *old;
1869
1870 if (dks->dks_param != NULL) {
1871 return EINVAL;
1872 }
1873 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1874 error = bufq_alloc(&new, dks->dks_name,
1875 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1876 if (error) {
1877 return error;
1878 }
1879 s = splbio();
1880 old = rs->buf_queue;
1881 bufq_move(new, old);
1882 rs->buf_queue = new;
1883 splx(s);
1884 bufq_free(old);
1885
1886 return 0;
1887 }
1888
1889 default:
1890 retcode = ENOTTY;
1891 }
1892 return (retcode);
1893
1894 }
1895
1896
1897 /* raidinit -- complete the rest of the initialization for the
1898 RAIDframe device. */
1899
1900
1901 static void
1902 raidinit(RF_Raid_t *raidPtr)
1903 {
1904 struct cfdata *cf;
1905 struct raid_softc *rs;
1906 int unit;
1907
1908 unit = raidPtr->raidid;
1909
1910 rs = &raid_softc[unit];
1911
1912 /* XXX should check return code first... */
1913 rs->sc_flags |= RAIDF_INITED;
1914
1915 /* XXX doesn't check bounds. */
1916 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1917
1918 /* attach the pseudo device */
1919 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1920 cf->cf_name = raid_cd.cd_name;
1921 cf->cf_atname = raid_cd.cd_name;
1922 cf->cf_unit = unit;
1923 cf->cf_fstate = FSTATE_STAR;
1924
1925 rs->sc_dev = config_attach_pseudo(cf);
1926
1927 if (rs->sc_dev==NULL) {
1928 printf("raid%d: config_attach_pseudo failed\n",
1929 raidPtr->raidid);
1930 }
1931
1932 /* disk_attach actually creates space for the CPU disklabel, among
1933 * other things, so it's critical to call this *BEFORE* we try putzing
1934 * with disklabels. */
1935
1936 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1937 disk_attach(&rs->sc_dkdev);
1938 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1939
1940 /* XXX There may be a weird interaction here between this, and
1941 * protectedSectors, as used in RAIDframe. */
1942
1943 rs->sc_size = raidPtr->totalSectors;
1944
1945 dkwedge_discover(&rs->sc_dkdev);
1946
1947 rf_set_properties(rs, raidPtr);
1948
1949 }
1950 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1951 /* wake up the daemon & tell it to get us a spare table
1952 * XXX
1953 * the entries in the queues should be tagged with the raidPtr
1954 * so that in the extremely rare case that two recons happen at once,
1955 * we know for which device were requesting a spare table
1956 * XXX
1957 *
1958 * XXX This code is not currently used. GO
1959 */
1960 int
1961 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1962 {
1963 int retcode;
1964
1965 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1966 req->next = rf_sparet_wait_queue;
1967 rf_sparet_wait_queue = req;
1968 wakeup(&rf_sparet_wait_queue);
1969
1970 /* mpsleep unlocks the mutex */
1971 while (!rf_sparet_resp_queue) {
1972 tsleep(&rf_sparet_resp_queue, PRIBIO,
1973 "raidframe getsparetable", 0);
1974 }
1975 req = rf_sparet_resp_queue;
1976 rf_sparet_resp_queue = req->next;
1977 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1978
1979 retcode = req->fcol;
1980 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1981 * alloc'd */
1982 return (retcode);
1983 }
1984 #endif
1985
1986 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1987 * bp & passes it down.
1988 * any calls originating in the kernel must use non-blocking I/O
1989 * do some extra sanity checking to return "appropriate" error values for
1990 * certain conditions (to make some standard utilities work)
1991 *
1992 * Formerly known as: rf_DoAccessKernel
1993 */
1994 void
1995 raidstart(RF_Raid_t *raidPtr)
1996 {
1997 RF_SectorCount_t num_blocks, pb, sum;
1998 RF_RaidAddr_t raid_addr;
1999 struct partition *pp;
2000 daddr_t blocknum;
2001 int unit;
2002 struct raid_softc *rs;
2003 int do_async;
2004 struct buf *bp;
2005 int rc;
2006
2007 unit = raidPtr->raidid;
2008 rs = &raid_softc[unit];
2009
2010 /* quick check to see if anything has died recently */
2011 RF_LOCK_MUTEX(raidPtr->mutex);
2012 if (raidPtr->numNewFailures > 0) {
2013 RF_UNLOCK_MUTEX(raidPtr->mutex);
2014 rf_update_component_labels(raidPtr,
2015 RF_NORMAL_COMPONENT_UPDATE);
2016 RF_LOCK_MUTEX(raidPtr->mutex);
2017 raidPtr->numNewFailures--;
2018 }
2019
2020 /* Check to see if we're at the limit... */
2021 while (raidPtr->openings > 0) {
2022 RF_UNLOCK_MUTEX(raidPtr->mutex);
2023
2024 /* get the next item, if any, from the queue */
2025 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
2026 /* nothing more to do */
2027 return;
2028 }
2029
2030 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2031 * partition.. Need to make it absolute to the underlying
2032 * device.. */
2033
2034 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2035 if (DISKPART(bp->b_dev) != RAW_PART) {
2036 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2037 blocknum += pp->p_offset;
2038 }
2039
2040 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2041 (int) blocknum));
2042
2043 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2044 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2045
2046 /* *THIS* is where we adjust what block we're going to...
2047 * but DO NOT TOUCH bp->b_blkno!!! */
2048 raid_addr = blocknum;
2049
2050 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2051 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2052 sum = raid_addr + num_blocks + pb;
2053 if (1 || rf_debugKernelAccess) {
2054 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2055 (int) raid_addr, (int) sum, (int) num_blocks,
2056 (int) pb, (int) bp->b_resid));
2057 }
2058 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2059 || (sum < num_blocks) || (sum < pb)) {
2060 bp->b_error = ENOSPC;
2061 bp->b_resid = bp->b_bcount;
2062 biodone(bp);
2063 RF_LOCK_MUTEX(raidPtr->mutex);
2064 continue;
2065 }
2066 /*
2067 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2068 */
2069
2070 if (bp->b_bcount & raidPtr->sectorMask) {
2071 bp->b_error = EINVAL;
2072 bp->b_resid = bp->b_bcount;
2073 biodone(bp);
2074 RF_LOCK_MUTEX(raidPtr->mutex);
2075 continue;
2076
2077 }
2078 db1_printf(("Calling DoAccess..\n"));
2079
2080
2081 RF_LOCK_MUTEX(raidPtr->mutex);
2082 raidPtr->openings--;
2083 RF_UNLOCK_MUTEX(raidPtr->mutex);
2084
2085 /*
2086 * Everything is async.
2087 */
2088 do_async = 1;
2089
2090 disk_busy(&rs->sc_dkdev);
2091
2092 /* XXX we're still at splbio() here... do we *really*
2093 need to be? */
2094
2095 /* don't ever condition on bp->b_flags & B_WRITE.
2096 * always condition on B_READ instead */
2097
2098 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2099 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2100 do_async, raid_addr, num_blocks,
2101 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2102
2103 if (rc) {
2104 bp->b_error = rc;
2105 bp->b_resid = bp->b_bcount;
2106 biodone(bp);
2107 /* continue loop */
2108 }
2109
2110 RF_LOCK_MUTEX(raidPtr->mutex);
2111 }
2112 RF_UNLOCK_MUTEX(raidPtr->mutex);
2113 }
2114
2115
2116
2117
2118 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2119
2120 int
2121 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2122 {
2123 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2124 struct buf *bp;
2125
2126 req->queue = queue;
2127
2128 #if DIAGNOSTIC
2129 if (queue->raidPtr->raidid >= numraid) {
2130 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
2131 numraid);
2132 panic("Invalid Unit number in rf_DispatchKernelIO");
2133 }
2134 #endif
2135
2136 bp = req->bp;
2137
2138 switch (req->type) {
2139 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2140 /* XXX need to do something extra here.. */
2141 /* I'm leaving this in, as I've never actually seen it used,
2142 * and I'd like folks to report it... GO */
2143 printf(("WAKEUP CALLED\n"));
2144 queue->numOutstanding++;
2145
2146 bp->b_flags = 0;
2147 bp->b_private = req;
2148
2149 KernelWakeupFunc(bp);
2150 break;
2151
2152 case RF_IO_TYPE_READ:
2153 case RF_IO_TYPE_WRITE:
2154 #if RF_ACC_TRACE > 0
2155 if (req->tracerec) {
2156 RF_ETIMER_START(req->tracerec->timer);
2157 }
2158 #endif
2159 InitBP(bp, queue->rf_cinfo->ci_vp,
2160 op, queue->rf_cinfo->ci_dev,
2161 req->sectorOffset, req->numSector,
2162 req->buf, KernelWakeupFunc, (void *) req,
2163 queue->raidPtr->logBytesPerSector, req->b_proc);
2164
2165 if (rf_debugKernelAccess) {
2166 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2167 (long) bp->b_blkno));
2168 }
2169 queue->numOutstanding++;
2170 queue->last_deq_sector = req->sectorOffset;
2171 /* acc wouldn't have been let in if there were any pending
2172 * reqs at any other priority */
2173 queue->curPriority = req->priority;
2174
2175 db1_printf(("Going for %c to unit %d col %d\n",
2176 req->type, queue->raidPtr->raidid,
2177 queue->col));
2178 db1_printf(("sector %d count %d (%d bytes) %d\n",
2179 (int) req->sectorOffset, (int) req->numSector,
2180 (int) (req->numSector <<
2181 queue->raidPtr->logBytesPerSector),
2182 (int) queue->raidPtr->logBytesPerSector));
2183
2184 /*
2185 * XXX: drop lock here since this can block at
2186 * least with backing SCSI devices. Retake it
2187 * to minimize fuss with calling interfaces.
2188 */
2189
2190 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2191 bdev_strategy(bp);
2192 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2193 break;
2194
2195 default:
2196 panic("bad req->type in rf_DispatchKernelIO");
2197 }
2198 db1_printf(("Exiting from DispatchKernelIO\n"));
2199
2200 return (0);
2201 }
2202 /* this is the callback function associated with a I/O invoked from
2203 kernel code.
2204 */
2205 static void
2206 KernelWakeupFunc(struct buf *bp)
2207 {
2208 RF_DiskQueueData_t *req = NULL;
2209 RF_DiskQueue_t *queue;
2210 int s;
2211
2212 s = splbio();
2213 db1_printf(("recovering the request queue:\n"));
2214 req = bp->b_private;
2215
2216 queue = (RF_DiskQueue_t *) req->queue;
2217
2218 #if RF_ACC_TRACE > 0
2219 if (req->tracerec) {
2220 RF_ETIMER_STOP(req->tracerec->timer);
2221 RF_ETIMER_EVAL(req->tracerec->timer);
2222 RF_LOCK_MUTEX(rf_tracing_mutex);
2223 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2224 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2225 req->tracerec->num_phys_ios++;
2226 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2227 }
2228 #endif
2229
2230 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2231 * ballistic, and mark the component as hosed... */
2232
2233 if (bp->b_error != 0) {
2234 /* Mark the disk as dead */
2235 /* but only mark it once... */
2236 /* and only if it wouldn't leave this RAID set
2237 completely broken */
2238 if (((queue->raidPtr->Disks[queue->col].status ==
2239 rf_ds_optimal) ||
2240 (queue->raidPtr->Disks[queue->col].status ==
2241 rf_ds_used_spare)) &&
2242 (queue->raidPtr->numFailures <
2243 queue->raidPtr->Layout.map->faultsTolerated)) {
2244 printf("raid%d: IO Error. Marking %s as failed.\n",
2245 queue->raidPtr->raidid,
2246 queue->raidPtr->Disks[queue->col].devname);
2247 queue->raidPtr->Disks[queue->col].status =
2248 rf_ds_failed;
2249 queue->raidPtr->status = rf_rs_degraded;
2250 queue->raidPtr->numFailures++;
2251 queue->raidPtr->numNewFailures++;
2252 } else { /* Disk is already dead... */
2253 /* printf("Disk already marked as dead!\n"); */
2254 }
2255
2256 }
2257
2258 /* Fill in the error value */
2259
2260 req->error = bp->b_error;
2261
2262 simple_lock(&queue->raidPtr->iodone_lock);
2263
2264 /* Drop this one on the "finished" queue... */
2265 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2266
2267 /* Let the raidio thread know there is work to be done. */
2268 wakeup(&(queue->raidPtr->iodone));
2269
2270 simple_unlock(&queue->raidPtr->iodone_lock);
2271
2272 splx(s);
2273 }
2274
2275
2276
2277 /*
2278 * initialize a buf structure for doing an I/O in the kernel.
2279 */
2280 static void
2281 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2282 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2283 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2284 struct proc *b_proc)
2285 {
2286 /* bp->b_flags = B_PHYS | rw_flag; */
2287 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2288 bp->b_oflags = 0;
2289 bp->b_cflags = 0;
2290 bp->b_bcount = numSect << logBytesPerSector;
2291 bp->b_bufsize = bp->b_bcount;
2292 bp->b_error = 0;
2293 bp->b_dev = dev;
2294 bp->b_data = bf;
2295 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2296 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2297 if (bp->b_bcount == 0) {
2298 panic("bp->b_bcount is zero in InitBP!!");
2299 }
2300 bp->b_proc = b_proc;
2301 bp->b_iodone = cbFunc;
2302 bp->b_private = cbArg;
2303 }
2304
2305 static void
2306 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2307 struct disklabel *lp)
2308 {
2309 memset(lp, 0, sizeof(*lp));
2310
2311 /* fabricate a label... */
2312 lp->d_secperunit = raidPtr->totalSectors;
2313 lp->d_secsize = raidPtr->bytesPerSector;
2314 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2315 lp->d_ntracks = 4 * raidPtr->numCol;
2316 lp->d_ncylinders = raidPtr->totalSectors /
2317 (lp->d_nsectors * lp->d_ntracks);
2318 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2319
2320 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2321 lp->d_type = DTYPE_RAID;
2322 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2323 lp->d_rpm = 3600;
2324 lp->d_interleave = 1;
2325 lp->d_flags = 0;
2326
2327 lp->d_partitions[RAW_PART].p_offset = 0;
2328 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2329 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2330 lp->d_npartitions = RAW_PART + 1;
2331
2332 lp->d_magic = DISKMAGIC;
2333 lp->d_magic2 = DISKMAGIC;
2334 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2335
2336 }
2337 /*
2338 * Read the disklabel from the raid device. If one is not present, fake one
2339 * up.
2340 */
2341 static void
2342 raidgetdisklabel(dev_t dev)
2343 {
2344 int unit = raidunit(dev);
2345 struct raid_softc *rs = &raid_softc[unit];
2346 const char *errstring;
2347 struct disklabel *lp = rs->sc_dkdev.dk_label;
2348 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2349 RF_Raid_t *raidPtr;
2350
2351 db1_printf(("Getting the disklabel...\n"));
2352
2353 memset(clp, 0, sizeof(*clp));
2354
2355 raidPtr = raidPtrs[unit];
2356
2357 raidgetdefaultlabel(raidPtr, rs, lp);
2358
2359 /*
2360 * Call the generic disklabel extraction routine.
2361 */
2362 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2363 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2364 if (errstring)
2365 raidmakedisklabel(rs);
2366 else {
2367 int i;
2368 struct partition *pp;
2369
2370 /*
2371 * Sanity check whether the found disklabel is valid.
2372 *
2373 * This is necessary since total size of the raid device
2374 * may vary when an interleave is changed even though exactly
2375 * same components are used, and old disklabel may used
2376 * if that is found.
2377 */
2378 if (lp->d_secperunit != rs->sc_size)
2379 printf("raid%d: WARNING: %s: "
2380 "total sector size in disklabel (%" PRIu32 ") != "
2381 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2382 lp->d_secperunit, rs->sc_size);
2383 for (i = 0; i < lp->d_npartitions; i++) {
2384 pp = &lp->d_partitions[i];
2385 if (pp->p_offset + pp->p_size > rs->sc_size)
2386 printf("raid%d: WARNING: %s: end of partition `%c' "
2387 "exceeds the size of raid (%" PRIu64 ")\n",
2388 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2389 }
2390 }
2391
2392 }
2393 /*
2394 * Take care of things one might want to take care of in the event
2395 * that a disklabel isn't present.
2396 */
2397 static void
2398 raidmakedisklabel(struct raid_softc *rs)
2399 {
2400 struct disklabel *lp = rs->sc_dkdev.dk_label;
2401 db1_printf(("Making a label..\n"));
2402
2403 /*
2404 * For historical reasons, if there's no disklabel present
2405 * the raw partition must be marked FS_BSDFFS.
2406 */
2407
2408 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2409
2410 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2411
2412 lp->d_checksum = dkcksum(lp);
2413 }
2414 /*
2415 * Wait interruptibly for an exclusive lock.
2416 *
2417 * XXX
2418 * Several drivers do this; it should be abstracted and made MP-safe.
2419 * (Hmm... where have we seen this warning before :-> GO )
2420 */
2421 static int
2422 raidlock(struct raid_softc *rs)
2423 {
2424 int error;
2425
2426 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2427 rs->sc_flags |= RAIDF_WANTED;
2428 if ((error =
2429 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2430 return (error);
2431 }
2432 rs->sc_flags |= RAIDF_LOCKED;
2433 return (0);
2434 }
2435 /*
2436 * Unlock and wake up any waiters.
2437 */
2438 static void
2439 raidunlock(struct raid_softc *rs)
2440 {
2441
2442 rs->sc_flags &= ~RAIDF_LOCKED;
2443 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2444 rs->sc_flags &= ~RAIDF_WANTED;
2445 wakeup(rs);
2446 }
2447 }
2448
2449
2450 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2451 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2452 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2453
2454 static daddr_t
2455 rf_component_info_offset(void)
2456 {
2457
2458 return RF_COMPONENT_INFO_OFFSET;
2459 }
2460
2461 static daddr_t
2462 rf_component_info_size(unsigned secsize)
2463 {
2464 daddr_t info_size;
2465
2466 KASSERT(secsize);
2467 if (secsize > RF_COMPONENT_INFO_SIZE)
2468 info_size = secsize;
2469 else
2470 info_size = RF_COMPONENT_INFO_SIZE;
2471
2472 return info_size;
2473 }
2474
2475 static daddr_t
2476 rf_parity_map_offset(RF_Raid_t *raidPtr)
2477 {
2478 daddr_t map_offset;
2479
2480 KASSERT(raidPtr->bytesPerSector);
2481 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2482 map_offset = raidPtr->bytesPerSector;
2483 else
2484 map_offset = RF_COMPONENT_INFO_SIZE;
2485 map_offset += rf_component_info_offset();
2486
2487 return map_offset;
2488 }
2489
2490 static daddr_t
2491 rf_parity_map_size(RF_Raid_t *raidPtr)
2492 {
2493 daddr_t map_size;
2494
2495 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2496 map_size = raidPtr->bytesPerSector;
2497 else
2498 map_size = RF_PARITY_MAP_SIZE;
2499
2500 return map_size;
2501 }
2502
2503 int
2504 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2505 {
2506 RF_ComponentLabel_t *clabel;
2507
2508 clabel = raidget_component_label(raidPtr, col);
2509 clabel->clean = RF_RAID_CLEAN;
2510 raidflush_component_label(raidPtr, col);
2511 return(0);
2512 }
2513
2514
2515 int
2516 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2517 {
2518 RF_ComponentLabel_t *clabel;
2519
2520 clabel = raidget_component_label(raidPtr, col);
2521 clabel->clean = RF_RAID_DIRTY;
2522 raidflush_component_label(raidPtr, col);
2523 return(0);
2524 }
2525
2526 int
2527 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2528 {
2529 KASSERT(raidPtr->bytesPerSector);
2530 return raidread_component_label(raidPtr->bytesPerSector,
2531 raidPtr->Disks[col].dev,
2532 raidPtr->raid_cinfo[col].ci_vp,
2533 &raidPtr->raid_cinfo[col].ci_label);
2534 }
2535
2536 RF_ComponentLabel_t *
2537 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2538 {
2539 return &raidPtr->raid_cinfo[col].ci_label;
2540 }
2541
2542 int
2543 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2544 {
2545 RF_ComponentLabel_t *label;
2546
2547 label = &raidPtr->raid_cinfo[col].ci_label;
2548 label->mod_counter = raidPtr->mod_counter;
2549 #ifndef RF_NO_PARITY_MAP
2550 label->parity_map_modcount = label->mod_counter;
2551 #endif
2552 return raidwrite_component_label(raidPtr->bytesPerSector,
2553 raidPtr->Disks[col].dev,
2554 raidPtr->raid_cinfo[col].ci_vp, label);
2555 }
2556
2557
2558 static int
2559 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2560 RF_ComponentLabel_t *clabel)
2561 {
2562 return raidread_component_area(dev, b_vp, clabel,
2563 sizeof(RF_ComponentLabel_t),
2564 rf_component_info_offset(),
2565 rf_component_info_size(secsize));
2566 }
2567
2568 /* ARGSUSED */
2569 static int
2570 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2571 size_t msize, daddr_t offset, daddr_t dsize)
2572 {
2573 struct buf *bp;
2574 const struct bdevsw *bdev;
2575 int error;
2576
2577 /* XXX should probably ensure that we don't try to do this if
2578 someone has changed rf_protected_sectors. */
2579
2580 if (b_vp == NULL) {
2581 /* For whatever reason, this component is not valid.
2582 Don't try to read a component label from it. */
2583 return(EINVAL);
2584 }
2585
2586 /* get a block of the appropriate size... */
2587 bp = geteblk((int)dsize);
2588 bp->b_dev = dev;
2589
2590 /* get our ducks in a row for the read */
2591 bp->b_blkno = offset / DEV_BSIZE;
2592 bp->b_bcount = dsize;
2593 bp->b_flags |= B_READ;
2594 bp->b_resid = dsize;
2595
2596 bdev = bdevsw_lookup(bp->b_dev);
2597 if (bdev == NULL)
2598 return (ENXIO);
2599 (*bdev->d_strategy)(bp);
2600
2601 error = biowait(bp);
2602
2603 if (!error) {
2604 memcpy(data, bp->b_data, msize);
2605 }
2606
2607 brelse(bp, 0);
2608 return(error);
2609 }
2610
2611
2612 static int
2613 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2614 RF_ComponentLabel_t *clabel)
2615 {
2616 return raidwrite_component_area(dev, b_vp, clabel,
2617 sizeof(RF_ComponentLabel_t),
2618 rf_component_info_offset(),
2619 rf_component_info_size(secsize), 0);
2620 }
2621
2622 /* ARGSUSED */
2623 static int
2624 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2625 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2626 {
2627 struct buf *bp;
2628 const struct bdevsw *bdev;
2629 int error;
2630
2631 /* get a block of the appropriate size... */
2632 bp = geteblk((int)dsize);
2633 bp->b_dev = dev;
2634
2635 /* get our ducks in a row for the write */
2636 bp->b_blkno = offset / DEV_BSIZE;
2637 bp->b_bcount = dsize;
2638 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2639 bp->b_resid = dsize;
2640
2641 memset(bp->b_data, 0, dsize);
2642 memcpy(bp->b_data, data, msize);
2643
2644 bdev = bdevsw_lookup(bp->b_dev);
2645 if (bdev == NULL)
2646 return (ENXIO);
2647 (*bdev->d_strategy)(bp);
2648 if (asyncp)
2649 return 0;
2650 error = biowait(bp);
2651 brelse(bp, 0);
2652 if (error) {
2653 #if 1
2654 printf("Failed to write RAID component info!\n");
2655 #endif
2656 }
2657
2658 return(error);
2659 }
2660
2661 void
2662 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2663 {
2664 int c;
2665
2666 for (c = 0; c < raidPtr->numCol; c++) {
2667 /* Skip dead disks. */
2668 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2669 continue;
2670 /* XXXjld: what if an error occurs here? */
2671 raidwrite_component_area(raidPtr->Disks[c].dev,
2672 raidPtr->raid_cinfo[c].ci_vp, map,
2673 RF_PARITYMAP_NBYTE,
2674 rf_parity_map_offset(raidPtr),
2675 rf_parity_map_size(raidPtr), 0);
2676 }
2677 }
2678
2679 void
2680 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2681 {
2682 struct rf_paritymap_ondisk tmp;
2683 int c,first;
2684
2685 first=1;
2686 for (c = 0; c < raidPtr->numCol; c++) {
2687 /* Skip dead disks. */
2688 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2689 continue;
2690 raidread_component_area(raidPtr->Disks[c].dev,
2691 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2692 RF_PARITYMAP_NBYTE,
2693 rf_parity_map_offset(raidPtr),
2694 rf_parity_map_size(raidPtr));
2695 if (first) {
2696 memcpy(map, &tmp, sizeof(*map));
2697 first = 0;
2698 } else {
2699 rf_paritymap_merge(map, &tmp);
2700 }
2701 }
2702 }
2703
2704 void
2705 rf_markalldirty(RF_Raid_t *raidPtr)
2706 {
2707 RF_ComponentLabel_t *clabel;
2708 int sparecol;
2709 int c;
2710 int j;
2711 int scol = -1;
2712
2713 raidPtr->mod_counter++;
2714 for (c = 0; c < raidPtr->numCol; c++) {
2715 /* we don't want to touch (at all) a disk that has
2716 failed */
2717 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2718 clabel = raidget_component_label(raidPtr, c);
2719 if (clabel->status == rf_ds_spared) {
2720 /* XXX do something special...
2721 but whatever you do, don't
2722 try to access it!! */
2723 } else {
2724 raidmarkdirty(raidPtr, c);
2725 }
2726 }
2727 }
2728
2729 for( c = 0; c < raidPtr->numSpare ; c++) {
2730 sparecol = raidPtr->numCol + c;
2731 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2732 /*
2733
2734 we claim this disk is "optimal" if it's
2735 rf_ds_used_spare, as that means it should be
2736 directly substitutable for the disk it replaced.
2737 We note that too...
2738
2739 */
2740
2741 for(j=0;j<raidPtr->numCol;j++) {
2742 if (raidPtr->Disks[j].spareCol == sparecol) {
2743 scol = j;
2744 break;
2745 }
2746 }
2747
2748 clabel = raidget_component_label(raidPtr, sparecol);
2749 /* make sure status is noted */
2750
2751 raid_init_component_label(raidPtr, clabel);
2752
2753 clabel->row = 0;
2754 clabel->column = scol;
2755 /* Note: we *don't* change status from rf_ds_used_spare
2756 to rf_ds_optimal */
2757 /* clabel.status = rf_ds_optimal; */
2758
2759 raidmarkdirty(raidPtr, sparecol);
2760 }
2761 }
2762 }
2763
2764
2765 void
2766 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2767 {
2768 RF_ComponentLabel_t *clabel;
2769 int sparecol;
2770 int c;
2771 int j;
2772 int scol;
2773
2774 scol = -1;
2775
2776 /* XXX should do extra checks to make sure things really are clean,
2777 rather than blindly setting the clean bit... */
2778
2779 raidPtr->mod_counter++;
2780
2781 for (c = 0; c < raidPtr->numCol; c++) {
2782 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2783 clabel = raidget_component_label(raidPtr, c);
2784 /* make sure status is noted */
2785 clabel->status = rf_ds_optimal;
2786
2787 /* note what unit we are configured as */
2788 clabel->last_unit = raidPtr->raidid;
2789
2790 raidflush_component_label(raidPtr, c);
2791 if (final == RF_FINAL_COMPONENT_UPDATE) {
2792 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2793 raidmarkclean(raidPtr, c);
2794 }
2795 }
2796 }
2797 /* else we don't touch it.. */
2798 }
2799
2800 for( c = 0; c < raidPtr->numSpare ; c++) {
2801 sparecol = raidPtr->numCol + c;
2802 /* Need to ensure that the reconstruct actually completed! */
2803 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2804 /*
2805
2806 we claim this disk is "optimal" if it's
2807 rf_ds_used_spare, as that means it should be
2808 directly substitutable for the disk it replaced.
2809 We note that too...
2810
2811 */
2812
2813 for(j=0;j<raidPtr->numCol;j++) {
2814 if (raidPtr->Disks[j].spareCol == sparecol) {
2815 scol = j;
2816 break;
2817 }
2818 }
2819
2820 /* XXX shouldn't *really* need this... */
2821 clabel = raidget_component_label(raidPtr, sparecol);
2822 /* make sure status is noted */
2823
2824 raid_init_component_label(raidPtr, clabel);
2825
2826 clabel->column = scol;
2827 clabel->status = rf_ds_optimal;
2828 clabel->last_unit = raidPtr->raidid;
2829
2830 raidflush_component_label(raidPtr, sparecol);
2831 if (final == RF_FINAL_COMPONENT_UPDATE) {
2832 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2833 raidmarkclean(raidPtr, sparecol);
2834 }
2835 }
2836 }
2837 }
2838 }
2839
2840 void
2841 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2842 {
2843
2844 if (vp != NULL) {
2845 if (auto_configured == 1) {
2846 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2847 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2848 vput(vp);
2849
2850 } else {
2851 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2852 }
2853 }
2854 }
2855
2856
2857 void
2858 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2859 {
2860 int r,c;
2861 struct vnode *vp;
2862 int acd;
2863
2864
2865 /* We take this opportunity to close the vnodes like we should.. */
2866
2867 for (c = 0; c < raidPtr->numCol; c++) {
2868 vp = raidPtr->raid_cinfo[c].ci_vp;
2869 acd = raidPtr->Disks[c].auto_configured;
2870 rf_close_component(raidPtr, vp, acd);
2871 raidPtr->raid_cinfo[c].ci_vp = NULL;
2872 raidPtr->Disks[c].auto_configured = 0;
2873 }
2874
2875 for (r = 0; r < raidPtr->numSpare; r++) {
2876 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2877 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2878 rf_close_component(raidPtr, vp, acd);
2879 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2880 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2881 }
2882 }
2883
2884
2885 void
2886 rf_ReconThread(struct rf_recon_req *req)
2887 {
2888 int s;
2889 RF_Raid_t *raidPtr;
2890
2891 s = splbio();
2892 raidPtr = (RF_Raid_t *) req->raidPtr;
2893 raidPtr->recon_in_progress = 1;
2894
2895 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2896 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2897
2898 RF_Free(req, sizeof(*req));
2899
2900 raidPtr->recon_in_progress = 0;
2901 splx(s);
2902
2903 /* That's all... */
2904 kthread_exit(0); /* does not return */
2905 }
2906
2907 void
2908 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2909 {
2910 int retcode;
2911 int s;
2912
2913 raidPtr->parity_rewrite_stripes_done = 0;
2914 raidPtr->parity_rewrite_in_progress = 1;
2915 s = splbio();
2916 retcode = rf_RewriteParity(raidPtr);
2917 splx(s);
2918 if (retcode) {
2919 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2920 } else {
2921 /* set the clean bit! If we shutdown correctly,
2922 the clean bit on each component label will get
2923 set */
2924 raidPtr->parity_good = RF_RAID_CLEAN;
2925 }
2926 raidPtr->parity_rewrite_in_progress = 0;
2927
2928 /* Anyone waiting for us to stop? If so, inform them... */
2929 if (raidPtr->waitShutdown) {
2930 wakeup(&raidPtr->parity_rewrite_in_progress);
2931 }
2932
2933 /* That's all... */
2934 kthread_exit(0); /* does not return */
2935 }
2936
2937
2938 void
2939 rf_CopybackThread(RF_Raid_t *raidPtr)
2940 {
2941 int s;
2942
2943 raidPtr->copyback_in_progress = 1;
2944 s = splbio();
2945 rf_CopybackReconstructedData(raidPtr);
2946 splx(s);
2947 raidPtr->copyback_in_progress = 0;
2948
2949 /* That's all... */
2950 kthread_exit(0); /* does not return */
2951 }
2952
2953
2954 void
2955 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2956 {
2957 int s;
2958 RF_Raid_t *raidPtr;
2959
2960 s = splbio();
2961 raidPtr = req->raidPtr;
2962 raidPtr->recon_in_progress = 1;
2963 rf_ReconstructInPlace(raidPtr, req->col);
2964 RF_Free(req, sizeof(*req));
2965 raidPtr->recon_in_progress = 0;
2966 splx(s);
2967
2968 /* That's all... */
2969 kthread_exit(0); /* does not return */
2970 }
2971
2972 static RF_AutoConfig_t *
2973 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2974 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2975 unsigned secsize)
2976 {
2977 int good_one = 0;
2978 RF_ComponentLabel_t *clabel;
2979 RF_AutoConfig_t *ac;
2980
2981 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2982 if (clabel == NULL) {
2983 oomem:
2984 while(ac_list) {
2985 ac = ac_list;
2986 if (ac->clabel)
2987 free(ac->clabel, M_RAIDFRAME);
2988 ac_list = ac_list->next;
2989 free(ac, M_RAIDFRAME);
2990 }
2991 printf("RAID auto config: out of memory!\n");
2992 return NULL; /* XXX probably should panic? */
2993 }
2994
2995 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2996 /* Got the label. Does it look reasonable? */
2997 if (rf_reasonable_label(clabel, numsecs) &&
2998 (rf_component_label_partitionsize(clabel) <= size)) {
2999 #ifdef DEBUG
3000 printf("Component on: %s: %llu\n",
3001 cname, (unsigned long long)size);
3002 rf_print_component_label(clabel);
3003 #endif
3004 /* if it's reasonable, add it, else ignore it. */
3005 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3006 M_NOWAIT);
3007 if (ac == NULL) {
3008 free(clabel, M_RAIDFRAME);
3009 goto oomem;
3010 }
3011 strlcpy(ac->devname, cname, sizeof(ac->devname));
3012 ac->dev = dev;
3013 ac->vp = vp;
3014 ac->clabel = clabel;
3015 ac->next = ac_list;
3016 ac_list = ac;
3017 good_one = 1;
3018 }
3019 }
3020 if (!good_one) {
3021 /* cleanup */
3022 free(clabel, M_RAIDFRAME);
3023 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3024 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3025 vput(vp);
3026 }
3027 return ac_list;
3028 }
3029
3030 RF_AutoConfig_t *
3031 rf_find_raid_components()
3032 {
3033 struct vnode *vp;
3034 struct disklabel label;
3035 struct device *dv;
3036 dev_t dev;
3037 int bmajor, bminor, wedge;
3038 int error;
3039 int i;
3040 RF_AutoConfig_t *ac_list;
3041 uint64_t numsecs;
3042 unsigned secsize;
3043
3044 RF_ASSERT(raidPtr->bytesPerSector < rf_component_info_offset());
3045
3046 /* initialize the AutoConfig list */
3047 ac_list = NULL;
3048
3049 /* we begin by trolling through *all* the devices on the system */
3050
3051 for (dv = alldevs.tqh_first; dv != NULL;
3052 dv = dv->dv_list.tqe_next) {
3053
3054 /* we are only interested in disks... */
3055 if (device_class(dv) != DV_DISK)
3056 continue;
3057
3058 /* we don't care about floppies... */
3059 if (device_is_a(dv, "fd")) {
3060 continue;
3061 }
3062
3063 /* we don't care about CD's... */
3064 if (device_is_a(dv, "cd")) {
3065 continue;
3066 }
3067
3068 /* we don't care about md's... */
3069 if (device_is_a(dv, "md")) {
3070 continue;
3071 }
3072
3073 /* hdfd is the Atari/Hades floppy driver */
3074 if (device_is_a(dv, "hdfd")) {
3075 continue;
3076 }
3077
3078 /* fdisa is the Atari/Milan floppy driver */
3079 if (device_is_a(dv, "fdisa")) {
3080 continue;
3081 }
3082
3083 /* need to find the device_name_to_block_device_major stuff */
3084 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3085
3086 /* get a vnode for the raw partition of this disk */
3087
3088 wedge = device_is_a(dv, "dk");
3089 bminor = minor(device_unit(dv));
3090 dev = wedge ? makedev(bmajor, bminor) :
3091 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3092 if (bdevvp(dev, &vp))
3093 panic("RAID can't alloc vnode");
3094
3095 error = VOP_OPEN(vp, FREAD, NOCRED);
3096
3097 if (error) {
3098 /* "Who cares." Continue looking
3099 for something that exists*/
3100 vput(vp);
3101 continue;
3102 }
3103
3104 error = getdisksize(vp, &numsecs, &secsize);
3105 if (error) {
3106 vput(vp);
3107 continue;
3108 }
3109 if (wedge) {
3110 struct dkwedge_info dkw;
3111 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3112 NOCRED);
3113 if (error) {
3114 printf("RAIDframe: can't get wedge info for "
3115 "dev %s (%d)\n", device_xname(dv), error);
3116 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3117 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3118 vput(vp);
3119 continue;
3120 }
3121
3122 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125 vput(vp);
3126 continue;
3127 }
3128
3129 ac_list = rf_get_component(ac_list, dev, vp,
3130 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3131 continue;
3132 }
3133
3134 /* Ok, the disk exists. Go get the disklabel. */
3135 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3136 if (error) {
3137 /*
3138 * XXX can't happen - open() would
3139 * have errored out (or faked up one)
3140 */
3141 if (error != ENOTTY)
3142 printf("RAIDframe: can't get label for dev "
3143 "%s (%d)\n", device_xname(dv), error);
3144 }
3145
3146 /* don't need this any more. We'll allocate it again
3147 a little later if we really do... */
3148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3149 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3150 vput(vp);
3151
3152 if (error)
3153 continue;
3154
3155 for (i = 0; i < label.d_npartitions; i++) {
3156 char cname[sizeof(ac_list->devname)];
3157
3158 /* We only support partitions marked as RAID */
3159 if (label.d_partitions[i].p_fstype != FS_RAID)
3160 continue;
3161
3162 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3163 if (bdevvp(dev, &vp))
3164 panic("RAID can't alloc vnode");
3165
3166 error = VOP_OPEN(vp, FREAD, NOCRED);
3167 if (error) {
3168 /* Whatever... */
3169 vput(vp);
3170 continue;
3171 }
3172 snprintf(cname, sizeof(cname), "%s%c",
3173 device_xname(dv), 'a' + i);
3174 ac_list = rf_get_component(ac_list, dev, vp, cname,
3175 label.d_partitions[i].p_size, numsecs, secsize);
3176 }
3177 }
3178 return ac_list;
3179 }
3180
3181
3182 static int
3183 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3184 {
3185
3186 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3187 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3188 ((clabel->clean == RF_RAID_CLEAN) ||
3189 (clabel->clean == RF_RAID_DIRTY)) &&
3190 clabel->row >=0 &&
3191 clabel->column >= 0 &&
3192 clabel->num_rows > 0 &&
3193 clabel->num_columns > 0 &&
3194 clabel->row < clabel->num_rows &&
3195 clabel->column < clabel->num_columns &&
3196 clabel->blockSize > 0 &&
3197 /*
3198 * numBlocksHi may contain garbage, but it is ok since
3199 * the type is unsigned. If it is really garbage,
3200 * rf_fix_old_label_size() will fix it.
3201 */
3202 rf_component_label_numblocks(clabel) > 0) {
3203 /*
3204 * label looks reasonable enough...
3205 * let's make sure it has no old garbage.
3206 */
3207 rf_fix_old_label_size(clabel, numsecs);
3208 return(1);
3209 }
3210 return(0);
3211 }
3212
3213
3214 /*
3215 * For reasons yet unknown, some old component labels have garbage in
3216 * the newer numBlocksHi region, and this causes lossage. Since those
3217 * disks will also have numsecs set to less than 32 bits of sectors,
3218 * we can determine when this corruption has occured, and fix it.
3219 *
3220 * The exact same problem, with the same unknown reason, happens to
3221 * the partitionSizeHi member as well.
3222 */
3223 static void
3224 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3225 {
3226
3227 if (numsecs < ((uint64_t)1 << 32)) {
3228 if (clabel->numBlocksHi) {
3229 printf("WARNING: total sectors < 32 bits, yet "
3230 "numBlocksHi set\n"
3231 "WARNING: resetting numBlocksHi to zero.\n");
3232 clabel->numBlocksHi = 0;
3233 }
3234
3235 if (clabel->partitionSizeHi) {
3236 printf("WARNING: total sectors < 32 bits, yet "
3237 "partitionSizeHi set\n"
3238 "WARNING: resetting partitionSizeHi to zero.\n");
3239 clabel->partitionSizeHi = 0;
3240 }
3241 }
3242 }
3243
3244
3245 #ifdef DEBUG
3246 void
3247 rf_print_component_label(RF_ComponentLabel_t *clabel)
3248 {
3249 uint64_t numBlocks;
3250
3251 numBlocks = rf_component_label_numblocks(clabel);
3252
3253 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3254 clabel->row, clabel->column,
3255 clabel->num_rows, clabel->num_columns);
3256 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3257 clabel->version, clabel->serial_number,
3258 clabel->mod_counter);
3259 printf(" Clean: %s Status: %d\n",
3260 clabel->clean ? "Yes" : "No", clabel->status );
3261 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3262 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3263 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3264 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3265 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
3266 printf(" Contains root partition: %s\n",
3267 clabel->root_partition ? "Yes" : "No" );
3268 printf(" Last configured as: raid%d\n", clabel->last_unit );
3269 #if 0
3270 printf(" Config order: %d\n", clabel->config_order);
3271 #endif
3272
3273 }
3274 #endif
3275
3276 RF_ConfigSet_t *
3277 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3278 {
3279 RF_AutoConfig_t *ac;
3280 RF_ConfigSet_t *config_sets;
3281 RF_ConfigSet_t *cset;
3282 RF_AutoConfig_t *ac_next;
3283
3284
3285 config_sets = NULL;
3286
3287 /* Go through the AutoConfig list, and figure out which components
3288 belong to what sets. */
3289 ac = ac_list;
3290 while(ac!=NULL) {
3291 /* we're going to putz with ac->next, so save it here
3292 for use at the end of the loop */
3293 ac_next = ac->next;
3294
3295 if (config_sets == NULL) {
3296 /* will need at least this one... */
3297 config_sets = (RF_ConfigSet_t *)
3298 malloc(sizeof(RF_ConfigSet_t),
3299 M_RAIDFRAME, M_NOWAIT);
3300 if (config_sets == NULL) {
3301 panic("rf_create_auto_sets: No memory!");
3302 }
3303 /* this one is easy :) */
3304 config_sets->ac = ac;
3305 config_sets->next = NULL;
3306 config_sets->rootable = 0;
3307 ac->next = NULL;
3308 } else {
3309 /* which set does this component fit into? */
3310 cset = config_sets;
3311 while(cset!=NULL) {
3312 if (rf_does_it_fit(cset, ac)) {
3313 /* looks like it matches... */
3314 ac->next = cset->ac;
3315 cset->ac = ac;
3316 break;
3317 }
3318 cset = cset->next;
3319 }
3320 if (cset==NULL) {
3321 /* didn't find a match above... new set..*/
3322 cset = (RF_ConfigSet_t *)
3323 malloc(sizeof(RF_ConfigSet_t),
3324 M_RAIDFRAME, M_NOWAIT);
3325 if (cset == NULL) {
3326 panic("rf_create_auto_sets: No memory!");
3327 }
3328 cset->ac = ac;
3329 ac->next = NULL;
3330 cset->next = config_sets;
3331 cset->rootable = 0;
3332 config_sets = cset;
3333 }
3334 }
3335 ac = ac_next;
3336 }
3337
3338
3339 return(config_sets);
3340 }
3341
3342 static int
3343 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3344 {
3345 RF_ComponentLabel_t *clabel1, *clabel2;
3346
3347 /* If this one matches the *first* one in the set, that's good
3348 enough, since the other members of the set would have been
3349 through here too... */
3350 /* note that we are not checking partitionSize here..
3351
3352 Note that we are also not checking the mod_counters here.
3353 If everything else matches execpt the mod_counter, that's
3354 good enough for this test. We will deal with the mod_counters
3355 a little later in the autoconfiguration process.
3356
3357 (clabel1->mod_counter == clabel2->mod_counter) &&
3358
3359 The reason we don't check for this is that failed disks
3360 will have lower modification counts. If those disks are
3361 not added to the set they used to belong to, then they will
3362 form their own set, which may result in 2 different sets,
3363 for example, competing to be configured at raid0, and
3364 perhaps competing to be the root filesystem set. If the
3365 wrong ones get configured, or both attempt to become /,
3366 weird behaviour and or serious lossage will occur. Thus we
3367 need to bring them into the fold here, and kick them out at
3368 a later point.
3369
3370 */
3371
3372 clabel1 = cset->ac->clabel;
3373 clabel2 = ac->clabel;
3374 if ((clabel1->version == clabel2->version) &&
3375 (clabel1->serial_number == clabel2->serial_number) &&
3376 (clabel1->num_rows == clabel2->num_rows) &&
3377 (clabel1->num_columns == clabel2->num_columns) &&
3378 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3379 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3380 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3381 (clabel1->parityConfig == clabel2->parityConfig) &&
3382 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3383 (clabel1->blockSize == clabel2->blockSize) &&
3384 rf_component_label_numblocks(clabel1) ==
3385 rf_component_label_numblocks(clabel2) &&
3386 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3387 (clabel1->root_partition == clabel2->root_partition) &&
3388 (clabel1->last_unit == clabel2->last_unit) &&
3389 (clabel1->config_order == clabel2->config_order)) {
3390 /* if it get's here, it almost *has* to be a match */
3391 } else {
3392 /* it's not consistent with somebody in the set..
3393 punt */
3394 return(0);
3395 }
3396 /* all was fine.. it must fit... */
3397 return(1);
3398 }
3399
3400 int
3401 rf_have_enough_components(RF_ConfigSet_t *cset)
3402 {
3403 RF_AutoConfig_t *ac;
3404 RF_AutoConfig_t *auto_config;
3405 RF_ComponentLabel_t *clabel;
3406 int c;
3407 int num_cols;
3408 int num_missing;
3409 int mod_counter;
3410 int mod_counter_found;
3411 int even_pair_failed;
3412 char parity_type;
3413
3414
3415 /* check to see that we have enough 'live' components
3416 of this set. If so, we can configure it if necessary */
3417
3418 num_cols = cset->ac->clabel->num_columns;
3419 parity_type = cset->ac->clabel->parityConfig;
3420
3421 /* XXX Check for duplicate components!?!?!? */
3422
3423 /* Determine what the mod_counter is supposed to be for this set. */
3424
3425 mod_counter_found = 0;
3426 mod_counter = 0;
3427 ac = cset->ac;
3428 while(ac!=NULL) {
3429 if (mod_counter_found==0) {
3430 mod_counter = ac->clabel->mod_counter;
3431 mod_counter_found = 1;
3432 } else {
3433 if (ac->clabel->mod_counter > mod_counter) {
3434 mod_counter = ac->clabel->mod_counter;
3435 }
3436 }
3437 ac = ac->next;
3438 }
3439
3440 num_missing = 0;
3441 auto_config = cset->ac;
3442
3443 even_pair_failed = 0;
3444 for(c=0; c<num_cols; c++) {
3445 ac = auto_config;
3446 while(ac!=NULL) {
3447 if ((ac->clabel->column == c) &&
3448 (ac->clabel->mod_counter == mod_counter)) {
3449 /* it's this one... */
3450 #ifdef DEBUG
3451 printf("Found: %s at %d\n",
3452 ac->devname,c);
3453 #endif
3454 break;
3455 }
3456 ac=ac->next;
3457 }
3458 if (ac==NULL) {
3459 /* Didn't find one here! */
3460 /* special case for RAID 1, especially
3461 where there are more than 2
3462 components (where RAIDframe treats
3463 things a little differently :( ) */
3464 if (parity_type == '1') {
3465 if (c%2 == 0) { /* even component */
3466 even_pair_failed = 1;
3467 } else { /* odd component. If
3468 we're failed, and
3469 so is the even
3470 component, it's
3471 "Good Night, Charlie" */
3472 if (even_pair_failed == 1) {
3473 return(0);
3474 }
3475 }
3476 } else {
3477 /* normal accounting */
3478 num_missing++;
3479 }
3480 }
3481 if ((parity_type == '1') && (c%2 == 1)) {
3482 /* Just did an even component, and we didn't
3483 bail.. reset the even_pair_failed flag,
3484 and go on to the next component.... */
3485 even_pair_failed = 0;
3486 }
3487 }
3488
3489 clabel = cset->ac->clabel;
3490
3491 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3492 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3493 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3494 /* XXX this needs to be made *much* more general */
3495 /* Too many failures */
3496 return(0);
3497 }
3498 /* otherwise, all is well, and we've got enough to take a kick
3499 at autoconfiguring this set */
3500 return(1);
3501 }
3502
3503 void
3504 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3505 RF_Raid_t *raidPtr)
3506 {
3507 RF_ComponentLabel_t *clabel;
3508 int i;
3509
3510 clabel = ac->clabel;
3511
3512 /* 1. Fill in the common stuff */
3513 config->numRow = clabel->num_rows = 1;
3514 config->numCol = clabel->num_columns;
3515 config->numSpare = 0; /* XXX should this be set here? */
3516 config->sectPerSU = clabel->sectPerSU;
3517 config->SUsPerPU = clabel->SUsPerPU;
3518 config->SUsPerRU = clabel->SUsPerRU;
3519 config->parityConfig = clabel->parityConfig;
3520 /* XXX... */
3521 strcpy(config->diskQueueType,"fifo");
3522 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3523 config->layoutSpecificSize = 0; /* XXX ?? */
3524
3525 while(ac!=NULL) {
3526 /* row/col values will be in range due to the checks
3527 in reasonable_label() */
3528 strcpy(config->devnames[0][ac->clabel->column],
3529 ac->devname);
3530 ac = ac->next;
3531 }
3532
3533 for(i=0;i<RF_MAXDBGV;i++) {
3534 config->debugVars[i][0] = 0;
3535 }
3536 }
3537
3538 int
3539 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3540 {
3541 RF_ComponentLabel_t *clabel;
3542 int column;
3543 int sparecol;
3544
3545 raidPtr->autoconfigure = new_value;
3546
3547 for(column=0; column<raidPtr->numCol; column++) {
3548 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3549 clabel = raidget_component_label(raidPtr, column);
3550 clabel->autoconfigure = new_value;
3551 raidflush_component_label(raidPtr, column);
3552 }
3553 }
3554 for(column = 0; column < raidPtr->numSpare ; column++) {
3555 sparecol = raidPtr->numCol + column;
3556 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3557 clabel = raidget_component_label(raidPtr, sparecol);
3558 clabel->autoconfigure = new_value;
3559 raidflush_component_label(raidPtr, sparecol);
3560 }
3561 }
3562 return(new_value);
3563 }
3564
3565 int
3566 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3567 {
3568 RF_ComponentLabel_t *clabel;
3569 int column;
3570 int sparecol;
3571
3572 raidPtr->root_partition = new_value;
3573 for(column=0; column<raidPtr->numCol; column++) {
3574 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3575 clabel = raidget_component_label(raidPtr, column);
3576 clabel->root_partition = new_value;
3577 raidflush_component_label(raidPtr, column);
3578 }
3579 }
3580 for(column = 0; column < raidPtr->numSpare ; column++) {
3581 sparecol = raidPtr->numCol + column;
3582 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3583 clabel = raidget_component_label(raidPtr, sparecol);
3584 clabel->root_partition = new_value;
3585 raidflush_component_label(raidPtr, sparecol);
3586 }
3587 }
3588 return(new_value);
3589 }
3590
3591 void
3592 rf_release_all_vps(RF_ConfigSet_t *cset)
3593 {
3594 RF_AutoConfig_t *ac;
3595
3596 ac = cset->ac;
3597 while(ac!=NULL) {
3598 /* Close the vp, and give it back */
3599 if (ac->vp) {
3600 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3601 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3602 vput(ac->vp);
3603 ac->vp = NULL;
3604 }
3605 ac = ac->next;
3606 }
3607 }
3608
3609
3610 void
3611 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3612 {
3613 RF_AutoConfig_t *ac;
3614 RF_AutoConfig_t *next_ac;
3615
3616 ac = cset->ac;
3617 while(ac!=NULL) {
3618 next_ac = ac->next;
3619 /* nuke the label */
3620 free(ac->clabel, M_RAIDFRAME);
3621 /* cleanup the config structure */
3622 free(ac, M_RAIDFRAME);
3623 /* "next.." */
3624 ac = next_ac;
3625 }
3626 /* and, finally, nuke the config set */
3627 free(cset, M_RAIDFRAME);
3628 }
3629
3630
3631 void
3632 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3633 {
3634 /* current version number */
3635 clabel->version = RF_COMPONENT_LABEL_VERSION;
3636 clabel->serial_number = raidPtr->serial_number;
3637 clabel->mod_counter = raidPtr->mod_counter;
3638
3639 clabel->num_rows = 1;
3640 clabel->num_columns = raidPtr->numCol;
3641 clabel->clean = RF_RAID_DIRTY; /* not clean */
3642 clabel->status = rf_ds_optimal; /* "It's good!" */
3643
3644 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3645 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3646 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3647
3648 clabel->blockSize = raidPtr->bytesPerSector;
3649 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3650
3651 /* XXX not portable */
3652 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3653 clabel->maxOutstanding = raidPtr->maxOutstanding;
3654 clabel->autoconfigure = raidPtr->autoconfigure;
3655 clabel->root_partition = raidPtr->root_partition;
3656 clabel->last_unit = raidPtr->raidid;
3657 clabel->config_order = raidPtr->config_order;
3658
3659 #ifndef RF_NO_PARITY_MAP
3660 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3661 #endif
3662 }
3663
3664 int
3665 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3666 {
3667 RF_Raid_t *raidPtr;
3668 RF_Config_t *config;
3669 int raidID;
3670 int retcode;
3671
3672 #ifdef DEBUG
3673 printf("RAID autoconfigure\n");
3674 #endif
3675
3676 retcode = 0;
3677 *unit = -1;
3678
3679 /* 1. Create a config structure */
3680
3681 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3682 M_RAIDFRAME,
3683 M_NOWAIT);
3684 if (config==NULL) {
3685 printf("Out of mem!?!?\n");
3686 /* XXX do something more intelligent here. */
3687 return(1);
3688 }
3689
3690 memset(config, 0, sizeof(RF_Config_t));
3691
3692 /*
3693 2. Figure out what RAID ID this one is supposed to live at
3694 See if we can get the same RAID dev that it was configured
3695 on last time..
3696 */
3697
3698 raidID = cset->ac->clabel->last_unit;
3699 if ((raidID < 0) || (raidID >= numraid)) {
3700 /* let's not wander off into lala land. */
3701 raidID = numraid - 1;
3702 }
3703 if (raidPtrs[raidID]->valid != 0) {
3704
3705 /*
3706 Nope... Go looking for an alternative...
3707 Start high so we don't immediately use raid0 if that's
3708 not taken.
3709 */
3710
3711 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3712 if (raidPtrs[raidID]->valid == 0) {
3713 /* can use this one! */
3714 break;
3715 }
3716 }
3717 }
3718
3719 if (raidID < 0) {
3720 /* punt... */
3721 printf("Unable to auto configure this set!\n");
3722 printf("(Out of RAID devs!)\n");
3723 free(config, M_RAIDFRAME);
3724 return(1);
3725 }
3726
3727 #ifdef DEBUG
3728 printf("Configuring raid%d:\n",raidID);
3729 #endif
3730
3731 raidPtr = raidPtrs[raidID];
3732
3733 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3734 raidPtr->raidid = raidID;
3735 raidPtr->openings = RAIDOUTSTANDING;
3736
3737 /* 3. Build the configuration structure */
3738 rf_create_configuration(cset->ac, config, raidPtr);
3739
3740 /* 4. Do the configuration */
3741 retcode = rf_Configure(raidPtr, config, cset->ac);
3742
3743 if (retcode == 0) {
3744
3745 raidinit(raidPtrs[raidID]);
3746
3747 rf_markalldirty(raidPtrs[raidID]);
3748 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3749 if (cset->ac->clabel->root_partition==1) {
3750 /* everything configured just fine. Make a note
3751 that this set is eligible to be root. */
3752 cset->rootable = 1;
3753 /* XXX do this here? */
3754 raidPtrs[raidID]->root_partition = 1;
3755 }
3756 }
3757
3758 /* 5. Cleanup */
3759 free(config, M_RAIDFRAME);
3760
3761 *unit = raidID;
3762 return(retcode);
3763 }
3764
3765 void
3766 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3767 {
3768 struct buf *bp;
3769
3770 bp = (struct buf *)desc->bp;
3771 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3772 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3773 }
3774
3775 void
3776 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3777 size_t xmin, size_t xmax)
3778 {
3779 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3780 pool_sethiwat(p, xmax);
3781 pool_prime(p, xmin);
3782 pool_setlowat(p, xmin);
3783 }
3784
3785 /*
3786 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3787 * if there is IO pending and if that IO could possibly be done for a
3788 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3789 * otherwise.
3790 *
3791 */
3792
3793 int
3794 rf_buf_queue_check(int raidid)
3795 {
3796 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3797 raidPtrs[raidid]->openings > 0) {
3798 /* there is work to do */
3799 return 0;
3800 }
3801 /* default is nothing to do */
3802 return 1;
3803 }
3804
3805 int
3806 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3807 {
3808 struct partinfo dpart;
3809 struct dkwedge_info dkw;
3810 int error;
3811
3812 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
3813 if (error == 0) {
3814 diskPtr->blockSize = dpart.disklab->d_secsize;
3815 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3816 diskPtr->partitionSize = dpart.part->p_size;
3817 return 0;
3818 }
3819
3820 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
3821 if (error == 0) {
3822 struct disk *pdk;
3823
3824 if ((pdk = disk_find(dkw.dkw_parent)) != NULL)
3825 diskPtr->blockSize = DEV_BSIZE << pdk->dk_blkshift;
3826 else
3827 diskPtr->blockSize = 512; /* XXX */
3828 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3829 diskPtr->partitionSize = dkw.dkw_size;
3830 return 0;
3831 }
3832 return error;
3833 }
3834
3835 static int
3836 raid_match(struct device *self, struct cfdata *cfdata,
3837 void *aux)
3838 {
3839 return 1;
3840 }
3841
3842 static void
3843 raid_attach(struct device *parent, struct device *self,
3844 void *aux)
3845 {
3846
3847 }
3848
3849
3850 static int
3851 raid_detach(struct device *self, int flags)
3852 {
3853 struct raid_softc *rs = (struct raid_softc *)self;
3854
3855 if (rs->sc_flags & RAIDF_INITED)
3856 return EBUSY;
3857
3858 return 0;
3859 }
3860
3861 static void
3862 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3863 {
3864 prop_dictionary_t disk_info, odisk_info, geom;
3865 disk_info = prop_dictionary_create();
3866 geom = prop_dictionary_create();
3867 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3868 raidPtr->totalSectors);
3869 prop_dictionary_set_uint32(geom, "sector-size",
3870 raidPtr->bytesPerSector);
3871
3872 prop_dictionary_set_uint16(geom, "sectors-per-track",
3873 raidPtr->Layout.dataSectorsPerStripe);
3874 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3875 4 * raidPtr->numCol);
3876
3877 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3878 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3879 (4 * raidPtr->numCol)));
3880
3881 prop_dictionary_set(disk_info, "geometry", geom);
3882 prop_object_release(geom);
3883 prop_dictionary_set(device_properties(rs->sc_dev),
3884 "disk-info", disk_info);
3885 odisk_info = rs->sc_dkdev.dk_info;
3886 rs->sc_dkdev.dk_info = disk_info;
3887 if (odisk_info)
3888 prop_object_release(odisk_info);
3889 }
3890
3891 /*
3892 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3893 * We end up returning whatever error was returned by the first cache flush
3894 * that fails.
3895 */
3896
3897 int
3898 rf_sync_component_caches(RF_Raid_t *raidPtr)
3899 {
3900 int c, sparecol;
3901 int e,error;
3902 int force = 1;
3903
3904 error = 0;
3905 for (c = 0; c < raidPtr->numCol; c++) {
3906 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3907 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3908 &force, FWRITE, NOCRED);
3909 if (e) {
3910 if (e != ENODEV)
3911 printf("raid%d: cache flush to component %s failed.\n",
3912 raidPtr->raidid, raidPtr->Disks[c].devname);
3913 if (error == 0) {
3914 error = e;
3915 }
3916 }
3917 }
3918 }
3919
3920 for( c = 0; c < raidPtr->numSpare ; c++) {
3921 sparecol = raidPtr->numCol + c;
3922 /* Need to ensure that the reconstruct actually completed! */
3923 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3924 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3925 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3926 if (e) {
3927 if (e != ENODEV)
3928 printf("raid%d: cache flush to component %s failed.\n",
3929 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3930 if (error == 0) {
3931 error = e;
3932 }
3933 }
3934 }
3935 }
3936 return error;
3937 }
3938