rf_netbsdkintf.c revision 1.298.2.2 1 /* $NetBSD: rf_netbsdkintf.c,v 1.298.2.2 2013/02/25 00:29:30 tls Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.298.2.2 2013/02/25 00:29:30 tls Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #ifdef DEBUG
156 int rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else /* DEBUG */
159 #define db1_printf(a) { }
160 #endif /* DEBUG */
161
162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 raidopen, raidclose, raidstrategy, raidioctl,
211 raiddump, raidsize, D_DISK
212 };
213
214 const struct cdevsw raid_cdevsw = {
215 raidopen, raidclose, raidread, raidwrite, raidioctl,
216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218
219 static void raidminphys(struct buf *);
220
221 static struct dkdriver rf_dkdriver = { raidstrategy, raidminphys };
222
223 /* XXX Not sure if the following should be replacing the raidPtrs above,
224 or if it should be used in conjunction with that...
225 */
226
227 struct raid_softc {
228 device_t sc_dev;
229 int sc_flags; /* flags */
230 int sc_cflags; /* configuration flags */
231 uint64_t sc_size; /* size of the raid device */
232 char sc_xname[20]; /* XXX external name */
233 struct disk sc_dkdev; /* generic disk device info */
234 struct bufq_state *buf_queue; /* used for the device queue */
235 };
236 /* sc_flags */
237 #define RAIDF_INITED 0x01 /* unit has been initialized */
238 #define RAIDF_WLABEL 0x02 /* label area is writable */
239 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
240 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
241 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
242 #define RAIDF_LOCKED 0x80 /* unit is locked */
243
244 #define raidunit(x) DISKUNIT(x)
245 int numraid = 0;
246
247 extern struct cfdriver raid_cd;
248 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
249 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
250 DVF_DETACH_SHUTDOWN);
251
252 /*
253 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
254 * Be aware that large numbers can allow the driver to consume a lot of
255 * kernel memory, especially on writes, and in degraded mode reads.
256 *
257 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
258 * a single 64K write will typically require 64K for the old data,
259 * 64K for the old parity, and 64K for the new parity, for a total
260 * of 192K (if the parity buffer is not re-used immediately).
261 * Even it if is used immediately, that's still 128K, which when multiplied
262 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
263 *
264 * Now in degraded mode, for example, a 64K read on the above setup may
265 * require data reconstruction, which will require *all* of the 4 remaining
266 * disks to participate -- 4 * 32K/disk == 128K again.
267 */
268
269 #ifndef RAIDOUTSTANDING
270 #define RAIDOUTSTANDING 6
271 #endif
272
273 #define RAIDLABELDEV(dev) \
274 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
275
276 /* declared here, and made public, for the benefit of KVM stuff.. */
277 struct raid_softc *raid_softc;
278
279 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
280 struct disklabel *);
281 static void raidgetdisklabel(dev_t);
282 static void raidmakedisklabel(struct raid_softc *);
283
284 static int raidlock(struct raid_softc *);
285 static void raidunlock(struct raid_softc *);
286
287 static int raid_detach_unlocked(struct raid_softc *);
288
289 static void rf_markalldirty(RF_Raid_t *);
290 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
291
292 void rf_ReconThread(struct rf_recon_req *);
293 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
294 void rf_CopybackThread(RF_Raid_t *raidPtr);
295 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
296 int rf_autoconfig(device_t);
297 void rf_buildroothack(RF_ConfigSet_t *);
298
299 RF_AutoConfig_t *rf_find_raid_components(void);
300 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
301 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
302 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
303 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
304 int rf_set_autoconfig(RF_Raid_t *, int);
305 int rf_set_rootpartition(RF_Raid_t *, int);
306 void rf_release_all_vps(RF_ConfigSet_t *);
307 void rf_cleanup_config_set(RF_ConfigSet_t *);
308 int rf_have_enough_components(RF_ConfigSet_t *);
309 int rf_auto_config_set(RF_ConfigSet_t *, int *);
310 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
311
312 /*
313 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
314 * Note that this is overridden by having RAID_AUTOCONFIG as an option
315 * in the kernel config file.
316 */
317 #ifdef RAID_AUTOCONFIG
318 int raidautoconfig = 1;
319 #else
320 int raidautoconfig = 0;
321 #endif
322 static bool raidautoconfigdone = false;
323
324 struct RF_Pools_s rf_pools;
325
326 void
327 raidattach(int num)
328 {
329 int raidID;
330 int i, rc;
331
332 aprint_debug("raidattach: Asked for %d units\n", num);
333
334 if (num <= 0) {
335 #ifdef DIAGNOSTIC
336 panic("raidattach: count <= 0");
337 #endif
338 return;
339 }
340 /* This is where all the initialization stuff gets done. */
341
342 numraid = num;
343
344 /* Make some space for requested number of units... */
345
346 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
347 if (raidPtrs == NULL) {
348 panic("raidPtrs is NULL!!");
349 }
350
351 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
352 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
353 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
354 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
355
356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
357 #endif
358
359 for (i = 0; i < num; i++)
360 raidPtrs[i] = NULL;
361 rc = rf_BootRaidframe();
362 if (rc == 0)
363 aprint_verbose("Kernelized RAIDframe activated\n");
364 else
365 panic("Serious error booting RAID!!");
366
367 /* put together some datastructures like the CCD device does.. This
368 * lets us lock the device and what-not when it gets opened. */
369
370 raid_softc = (struct raid_softc *)
371 malloc(num * sizeof(struct raid_softc),
372 M_RAIDFRAME, M_NOWAIT);
373 if (raid_softc == NULL) {
374 aprint_error("WARNING: no memory for RAIDframe driver\n");
375 return;
376 }
377
378 memset(raid_softc, 0, num * sizeof(struct raid_softc));
379
380 for (raidID = 0; raidID < num; raidID++) {
381 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
382
383 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
384 (RF_Raid_t *));
385 if (raidPtrs[raidID] == NULL) {
386 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
387 numraid = raidID;
388 return;
389 }
390 }
391
392 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
393 aprint_error("raidattach: config_cfattach_attach failed?\n");
394 }
395
396 raidautoconfigdone = false;
397
398 /*
399 * Register a finalizer which will be used to auto-config RAID
400 * sets once all real hardware devices have been found.
401 */
402 if (config_finalize_register(NULL, rf_autoconfig) != 0)
403 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
404 }
405
406 int
407 rf_autoconfig(device_t self)
408 {
409 RF_AutoConfig_t *ac_list;
410 RF_ConfigSet_t *config_sets;
411
412 if (!raidautoconfig || raidautoconfigdone == true)
413 return (0);
414
415 /* XXX This code can only be run once. */
416 raidautoconfigdone = true;
417
418 /* 1. locate all RAID components on the system */
419 aprint_debug("Searching for RAID components...\n");
420 ac_list = rf_find_raid_components();
421
422 /* 2. Sort them into their respective sets. */
423 config_sets = rf_create_auto_sets(ac_list);
424
425 /*
426 * 3. Evaluate each set and configure the valid ones.
427 * This gets done in rf_buildroothack().
428 */
429 rf_buildroothack(config_sets);
430
431 return 1;
432 }
433
434 void
435 rf_buildroothack(RF_ConfigSet_t *config_sets)
436 {
437 RF_ConfigSet_t *cset;
438 RF_ConfigSet_t *next_cset;
439 int retcode;
440 int raidID;
441 int rootID;
442 int col;
443 int num_root;
444 char *devname;
445
446 rootID = 0;
447 num_root = 0;
448 cset = config_sets;
449 while (cset != NULL) {
450 next_cset = cset->next;
451 if (rf_have_enough_components(cset) &&
452 cset->ac->clabel->autoconfigure==1) {
453 retcode = rf_auto_config_set(cset,&raidID);
454 if (!retcode) {
455 aprint_debug("raid%d: configured ok\n", raidID);
456 if (cset->rootable) {
457 rootID = raidID;
458 num_root++;
459 }
460 } else {
461 /* The autoconfig didn't work :( */
462 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
463 rf_release_all_vps(cset);
464 }
465 } else {
466 /* we're not autoconfiguring this set...
467 release the associated resources */
468 rf_release_all_vps(cset);
469 }
470 /* cleanup */
471 rf_cleanup_config_set(cset);
472 cset = next_cset;
473 }
474
475 /* if the user has specified what the root device should be
476 then we don't touch booted_device or boothowto... */
477
478 if (rootspec != NULL)
479 return;
480
481 /* we found something bootable... */
482
483 if (num_root == 1) {
484 if (raid_softc[rootID].sc_dkdev.dk_nwedges != 0) {
485 /* XXX: How do we find the real root partition? */
486 char cname[sizeof(cset->ac->devname)];
487 snprintf(cname, sizeof(cname), "%s%c",
488 device_xname(raid_softc[rootID].sc_dev), 'a');
489 booted_device = dkwedge_find_by_wname(cname);
490 } else
491 booted_device = raid_softc[rootID].sc_dev;
492 } else if (num_root > 1) {
493
494 /*
495 * Maybe the MD code can help. If it cannot, then
496 * setroot() will discover that we have no
497 * booted_device and will ask the user if nothing was
498 * hardwired in the kernel config file
499 */
500
501 if (booted_device == NULL)
502 cpu_rootconf();
503 if (booted_device == NULL)
504 return;
505
506 num_root = 0;
507 for (raidID = 0; raidID < numraid; raidID++) {
508 if (raidPtrs[raidID]->valid == 0)
509 continue;
510
511 if (raidPtrs[raidID]->root_partition == 0)
512 continue;
513
514 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
515 devname = raidPtrs[raidID]->Disks[col].devname;
516 devname += sizeof("/dev/") - 1;
517 if (strncmp(devname, device_xname(booted_device),
518 strlen(device_xname(booted_device))) != 0)
519 continue;
520 aprint_debug("raid%d includes boot device %s\n",
521 raidID, devname);
522 num_root++;
523 rootID = raidID;
524 }
525 }
526
527 if (num_root == 1) {
528 booted_device = raid_softc[rootID].sc_dev;
529 } else {
530 /* we can't guess.. require the user to answer... */
531 boothowto |= RB_ASKNAME;
532 }
533 }
534 }
535
536
537 int
538 raidsize(dev_t dev)
539 {
540 struct raid_softc *rs;
541 struct disklabel *lp;
542 int part, unit, omask, size;
543
544 unit = raidunit(dev);
545 if (unit >= numraid)
546 return (-1);
547 rs = &raid_softc[unit];
548
549 if ((rs->sc_flags & RAIDF_INITED) == 0)
550 return (-1);
551
552 part = DISKPART(dev);
553 omask = rs->sc_dkdev.dk_openmask & (1 << part);
554 lp = rs->sc_dkdev.dk_label;
555
556 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
557 return (-1);
558
559 if (lp->d_partitions[part].p_fstype != FS_SWAP)
560 size = -1;
561 else
562 size = lp->d_partitions[part].p_size *
563 (lp->d_secsize / DEV_BSIZE);
564
565 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
566 return (-1);
567
568 return (size);
569
570 }
571
572 int
573 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
574 {
575 int unit = raidunit(dev);
576 struct raid_softc *rs;
577 const struct bdevsw *bdev;
578 struct disklabel *lp;
579 RF_Raid_t *raidPtr;
580 daddr_t offset;
581 int part, c, sparecol, j, scol, dumpto;
582 int error = 0;
583
584 if (unit >= numraid)
585 return (ENXIO);
586
587 rs = &raid_softc[unit];
588 raidPtr = raidPtrs[unit];
589
590 if ((rs->sc_flags & RAIDF_INITED) == 0)
591 return ENXIO;
592
593 /* we only support dumping to RAID 1 sets */
594 if (raidPtr->Layout.numDataCol != 1 ||
595 raidPtr->Layout.numParityCol != 1)
596 return EINVAL;
597
598
599 if ((error = raidlock(rs)) != 0)
600 return error;
601
602 if (size % DEV_BSIZE != 0) {
603 error = EINVAL;
604 goto out;
605 }
606
607 if (blkno + size / DEV_BSIZE > rs->sc_size) {
608 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
609 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
610 size / DEV_BSIZE, rs->sc_size);
611 error = EINVAL;
612 goto out;
613 }
614
615 part = DISKPART(dev);
616 lp = rs->sc_dkdev.dk_label;
617 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
618
619 /* figure out what device is alive.. */
620
621 /*
622 Look for a component to dump to. The preference for the
623 component to dump to is as follows:
624 1) the master
625 2) a used_spare of the master
626 3) the slave
627 4) a used_spare of the slave
628 */
629
630 dumpto = -1;
631 for (c = 0; c < raidPtr->numCol; c++) {
632 if (raidPtr->Disks[c].status == rf_ds_optimal) {
633 /* this might be the one */
634 dumpto = c;
635 break;
636 }
637 }
638
639 /*
640 At this point we have possibly selected a live master or a
641 live slave. We now check to see if there is a spared
642 master (or a spared slave), if we didn't find a live master
643 or a live slave.
644 */
645
646 for (c = 0; c < raidPtr->numSpare; c++) {
647 sparecol = raidPtr->numCol + c;
648 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
649 /* How about this one? */
650 scol = -1;
651 for(j=0;j<raidPtr->numCol;j++) {
652 if (raidPtr->Disks[j].spareCol == sparecol) {
653 scol = j;
654 break;
655 }
656 }
657 if (scol == 0) {
658 /*
659 We must have found a spared master!
660 We'll take that over anything else
661 found so far. (We couldn't have
662 found a real master before, since
663 this is a used spare, and it's
664 saying that it's replacing the
665 master.) On reboot (with
666 autoconfiguration turned on)
667 sparecol will become the 1st
668 component (component0) of this set.
669 */
670 dumpto = sparecol;
671 break;
672 } else if (scol != -1) {
673 /*
674 Must be a spared slave. We'll dump
675 to that if we havn't found anything
676 else so far.
677 */
678 if (dumpto == -1)
679 dumpto = sparecol;
680 }
681 }
682 }
683
684 if (dumpto == -1) {
685 /* we couldn't find any live components to dump to!?!?
686 */
687 error = EINVAL;
688 goto out;
689 }
690
691 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
692
693 /*
694 Note that blkno is relative to this particular partition.
695 By adding the offset of this partition in the RAID
696 set, and also adding RF_PROTECTED_SECTORS, we get a
697 value that is relative to the partition used for the
698 underlying component.
699 */
700
701 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
702 blkno + offset, va, size);
703
704 out:
705 raidunlock(rs);
706
707 return error;
708 }
709 /* ARGSUSED */
710 int
711 raidopen(dev_t dev, int flags, int fmt,
712 struct lwp *l)
713 {
714 int unit = raidunit(dev);
715 struct raid_softc *rs;
716 struct disklabel *lp;
717 int part, pmask;
718 int error = 0;
719
720 if (unit >= numraid)
721 return (ENXIO);
722 rs = &raid_softc[unit];
723
724 if ((error = raidlock(rs)) != 0)
725 return (error);
726
727 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
728 error = EBUSY;
729 goto bad;
730 }
731
732 lp = rs->sc_dkdev.dk_label;
733
734 part = DISKPART(dev);
735
736 /*
737 * If there are wedges, and this is not RAW_PART, then we
738 * need to fail.
739 */
740 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
741 error = EBUSY;
742 goto bad;
743 }
744 pmask = (1 << part);
745
746 if ((rs->sc_flags & RAIDF_INITED) &&
747 (rs->sc_dkdev.dk_openmask == 0))
748 raidgetdisklabel(dev);
749
750 /* make sure that this partition exists */
751
752 if (part != RAW_PART) {
753 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
754 ((part >= lp->d_npartitions) ||
755 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
756 error = ENXIO;
757 goto bad;
758 }
759 }
760 /* Prevent this unit from being unconfigured while open. */
761 switch (fmt) {
762 case S_IFCHR:
763 rs->sc_dkdev.dk_copenmask |= pmask;
764 break;
765
766 case S_IFBLK:
767 rs->sc_dkdev.dk_bopenmask |= pmask;
768 break;
769 }
770
771 if ((rs->sc_dkdev.dk_openmask == 0) &&
772 ((rs->sc_flags & RAIDF_INITED) != 0)) {
773 /* First one... mark things as dirty... Note that we *MUST*
774 have done a configure before this. I DO NOT WANT TO BE
775 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
776 THAT THEY BELONG TOGETHER!!!!! */
777 /* XXX should check to see if we're only open for reading
778 here... If so, we needn't do this, but then need some
779 other way of keeping track of what's happened.. */
780
781 rf_markalldirty(raidPtrs[unit]);
782 }
783
784
785 rs->sc_dkdev.dk_openmask =
786 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
787
788 bad:
789 raidunlock(rs);
790
791 return (error);
792
793
794 }
795 /* ARGSUSED */
796 int
797 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
798 {
799 int unit = raidunit(dev);
800 struct raid_softc *rs;
801 int error = 0;
802 int part;
803
804 if (unit >= numraid)
805 return (ENXIO);
806 rs = &raid_softc[unit];
807
808 if ((error = raidlock(rs)) != 0)
809 return (error);
810
811 part = DISKPART(dev);
812
813 /* ...that much closer to allowing unconfiguration... */
814 switch (fmt) {
815 case S_IFCHR:
816 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
817 break;
818
819 case S_IFBLK:
820 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
821 break;
822 }
823 rs->sc_dkdev.dk_openmask =
824 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
825
826 if ((rs->sc_dkdev.dk_openmask == 0) &&
827 ((rs->sc_flags & RAIDF_INITED) != 0)) {
828 /* Last one... device is not unconfigured yet.
829 Device shutdown has taken care of setting the
830 clean bits if RAIDF_INITED is not set
831 mark things as clean... */
832
833 rf_update_component_labels(raidPtrs[unit],
834 RF_FINAL_COMPONENT_UPDATE);
835
836 /* If the kernel is shutting down, it will detach
837 * this RAID set soon enough.
838 */
839 }
840
841 raidunlock(rs);
842 return (0);
843
844 }
845
846 void
847 raidstrategy(struct buf *bp)
848 {
849 unsigned int raidID = raidunit(bp->b_dev);
850 RF_Raid_t *raidPtr;
851 struct raid_softc *rs = &raid_softc[raidID];
852 int wlabel;
853
854 if ((rs->sc_flags & RAIDF_INITED) ==0) {
855 bp->b_error = ENXIO;
856 goto done;
857 }
858 if (raidID >= numraid || !raidPtrs[raidID]) {
859 bp->b_error = ENODEV;
860 goto done;
861 }
862 raidPtr = raidPtrs[raidID];
863 if (!raidPtr->valid) {
864 bp->b_error = ENODEV;
865 goto done;
866 }
867 if (bp->b_bcount == 0) {
868 db1_printf(("b_bcount is zero..\n"));
869 goto done;
870 }
871
872 /*
873 * Do bounds checking and adjust transfer. If there's an
874 * error, the bounds check will flag that for us.
875 */
876
877 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
878 if (DISKPART(bp->b_dev) == RAW_PART) {
879 uint64_t size; /* device size in DEV_BSIZE unit */
880
881 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
882 size = raidPtr->totalSectors <<
883 (raidPtr->logBytesPerSector - DEV_BSHIFT);
884 } else {
885 size = raidPtr->totalSectors >>
886 (DEV_BSHIFT - raidPtr->logBytesPerSector);
887 }
888 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
889 goto done;
890 }
891 } else {
892 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
893 db1_printf(("Bounds check failed!!:%d %d\n",
894 (int) bp->b_blkno, (int) wlabel));
895 goto done;
896 }
897 }
898
899 rf_lock_mutex2(raidPtr->iodone_lock);
900
901 bp->b_resid = 0;
902
903 /* stuff it onto our queue */
904 bufq_put(rs->buf_queue, bp);
905
906 /* scheduled the IO to happen at the next convenient time */
907 rf_signal_cond2(raidPtr->iodone_cv);
908 rf_unlock_mutex2(raidPtr->iodone_lock);
909
910 return;
911
912 done:
913 bp->b_resid = bp->b_bcount;
914 biodone(bp);
915 }
916 /* ARGSUSED */
917 int
918 raidread(dev_t dev, struct uio *uio, int flags)
919 {
920 int unit = raidunit(dev);
921 struct raid_softc *rs;
922
923 if (unit >= numraid)
924 return (ENXIO);
925 rs = &raid_softc[unit];
926
927 if ((rs->sc_flags & RAIDF_INITED) == 0)
928 return (ENXIO);
929
930 return (physio(raidstrategy, NULL, dev, B_READ, raidminphys, uio));
931
932 }
933 /* ARGSUSED */
934 int
935 raidwrite(dev_t dev, struct uio *uio, int flags)
936 {
937 int unit = raidunit(dev);
938 struct raid_softc *rs;
939
940 if (unit >= numraid)
941 return (ENXIO);
942 rs = &raid_softc[unit];
943
944 if ((rs->sc_flags & RAIDF_INITED) == 0)
945 return (ENXIO);
946
947 return (physio(raidstrategy, NULL, dev, B_WRITE, raidminphys, uio));
948
949 }
950
951 static int
952 raid_detach_unlocked(struct raid_softc *rs)
953 {
954 int error;
955 RF_Raid_t *raidPtr;
956
957 raidPtr = raidPtrs[device_unit(rs->sc_dev)];
958
959 /*
960 * If somebody has a partition mounted, we shouldn't
961 * shutdown.
962 */
963 if (rs->sc_dkdev.dk_openmask != 0)
964 return EBUSY;
965
966 if ((rs->sc_flags & RAIDF_INITED) == 0)
967 ; /* not initialized: nothing to do */
968 else if ((error = rf_Shutdown(raidPtr)) != 0)
969 return error;
970 else
971 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
972
973 /* Detach the disk. */
974 dkwedge_delall(&rs->sc_dkdev);
975 disk_detach(&rs->sc_dkdev);
976 disk_destroy(&rs->sc_dkdev);
977
978 aprint_normal_dev(rs->sc_dev, "detached\n");
979
980 return 0;
981 }
982
983 int
984 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
985 {
986 int unit = raidunit(dev);
987 int error = 0;
988 int part, pmask, s;
989 cfdata_t cf;
990 struct raid_softc *rs;
991 RF_Config_t *k_cfg, *u_cfg;
992 RF_Raid_t *raidPtr;
993 RF_RaidDisk_t *diskPtr;
994 RF_AccTotals_t *totals;
995 RF_DeviceConfig_t *d_cfg, **ucfgp;
996 u_char *specific_buf;
997 int retcode = 0;
998 int column;
999 /* int raidid; */
1000 struct rf_recon_req *rrcopy, *rr;
1001 RF_ComponentLabel_t *clabel;
1002 RF_ComponentLabel_t *ci_label;
1003 RF_ComponentLabel_t **clabel_ptr;
1004 RF_SingleComponent_t *sparePtr,*componentPtr;
1005 RF_SingleComponent_t component;
1006 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1007 int i, j, d;
1008 #ifdef __HAVE_OLD_DISKLABEL
1009 struct disklabel newlabel;
1010 #endif
1011 struct dkwedge_info *dkw;
1012
1013 if (unit >= numraid)
1014 return (ENXIO);
1015 rs = &raid_softc[unit];
1016 raidPtr = raidPtrs[unit];
1017
1018 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1019 (int) DISKPART(dev), (int) unit, cmd));
1020
1021 /* Must be open for writes for these commands... */
1022 switch (cmd) {
1023 #ifdef DIOCGSECTORSIZE
1024 case DIOCGSECTORSIZE:
1025 *(u_int *)data = raidPtr->bytesPerSector;
1026 return 0;
1027 case DIOCGMEDIASIZE:
1028 *(off_t *)data =
1029 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1030 return 0;
1031 #endif
1032 case DIOCSDINFO:
1033 case DIOCWDINFO:
1034 #ifdef __HAVE_OLD_DISKLABEL
1035 case ODIOCWDINFO:
1036 case ODIOCSDINFO:
1037 #endif
1038 case DIOCWLABEL:
1039 case DIOCAWEDGE:
1040 case DIOCDWEDGE:
1041 case DIOCSSTRATEGY:
1042 if ((flag & FWRITE) == 0)
1043 return (EBADF);
1044 }
1045
1046 /* Must be initialized for these... */
1047 switch (cmd) {
1048 case DIOCGDINFO:
1049 case DIOCSDINFO:
1050 case DIOCWDINFO:
1051 #ifdef __HAVE_OLD_DISKLABEL
1052 case ODIOCGDINFO:
1053 case ODIOCWDINFO:
1054 case ODIOCSDINFO:
1055 case ODIOCGDEFLABEL:
1056 #endif
1057 case DIOCGPART:
1058 case DIOCWLABEL:
1059 case DIOCGDEFLABEL:
1060 case DIOCAWEDGE:
1061 case DIOCDWEDGE:
1062 case DIOCLWEDGES:
1063 case DIOCCACHESYNC:
1064 case RAIDFRAME_SHUTDOWN:
1065 case RAIDFRAME_REWRITEPARITY:
1066 case RAIDFRAME_GET_INFO:
1067 case RAIDFRAME_RESET_ACCTOTALS:
1068 case RAIDFRAME_GET_ACCTOTALS:
1069 case RAIDFRAME_KEEP_ACCTOTALS:
1070 case RAIDFRAME_GET_SIZE:
1071 case RAIDFRAME_FAIL_DISK:
1072 case RAIDFRAME_COPYBACK:
1073 case RAIDFRAME_CHECK_RECON_STATUS:
1074 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1075 case RAIDFRAME_GET_COMPONENT_LABEL:
1076 case RAIDFRAME_SET_COMPONENT_LABEL:
1077 case RAIDFRAME_ADD_HOT_SPARE:
1078 case RAIDFRAME_REMOVE_HOT_SPARE:
1079 case RAIDFRAME_INIT_LABELS:
1080 case RAIDFRAME_REBUILD_IN_PLACE:
1081 case RAIDFRAME_CHECK_PARITY:
1082 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1083 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1084 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1085 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1086 case RAIDFRAME_SET_AUTOCONFIG:
1087 case RAIDFRAME_SET_ROOT:
1088 case RAIDFRAME_DELETE_COMPONENT:
1089 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1090 case RAIDFRAME_PARITYMAP_STATUS:
1091 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1092 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1093 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1094 case DIOCGSTRATEGY:
1095 case DIOCSSTRATEGY:
1096 if ((rs->sc_flags & RAIDF_INITED) == 0)
1097 return (ENXIO);
1098 }
1099
1100 switch (cmd) {
1101 #ifdef COMPAT_50
1102 case RAIDFRAME_GET_INFO50:
1103 return rf_get_info50(raidPtr, data);
1104
1105 case RAIDFRAME_CONFIGURE50:
1106 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1107 return retcode;
1108 goto config;
1109 #endif
1110 /* configure the system */
1111 case RAIDFRAME_CONFIGURE:
1112
1113 if (raidPtr->valid) {
1114 /* There is a valid RAID set running on this unit! */
1115 printf("raid%d: Device already configured!\n",unit);
1116 return(EINVAL);
1117 }
1118
1119 /* copy-in the configuration information */
1120 /* data points to a pointer to the configuration structure */
1121
1122 u_cfg = *((RF_Config_t **) data);
1123 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1124 if (k_cfg == NULL) {
1125 return (ENOMEM);
1126 }
1127 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1128 if (retcode) {
1129 RF_Free(k_cfg, sizeof(RF_Config_t));
1130 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1131 retcode));
1132 return (retcode);
1133 }
1134 goto config;
1135 config:
1136 /* allocate a buffer for the layout-specific data, and copy it
1137 * in */
1138 if (k_cfg->layoutSpecificSize) {
1139 if (k_cfg->layoutSpecificSize > 10000) {
1140 /* sanity check */
1141 RF_Free(k_cfg, sizeof(RF_Config_t));
1142 return (EINVAL);
1143 }
1144 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1145 (u_char *));
1146 if (specific_buf == NULL) {
1147 RF_Free(k_cfg, sizeof(RF_Config_t));
1148 return (ENOMEM);
1149 }
1150 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1151 k_cfg->layoutSpecificSize);
1152 if (retcode) {
1153 RF_Free(k_cfg, sizeof(RF_Config_t));
1154 RF_Free(specific_buf,
1155 k_cfg->layoutSpecificSize);
1156 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1157 retcode));
1158 return (retcode);
1159 }
1160 } else
1161 specific_buf = NULL;
1162 k_cfg->layoutSpecific = specific_buf;
1163
1164 /* should do some kind of sanity check on the configuration.
1165 * Store the sum of all the bytes in the last byte? */
1166
1167 /* configure the system */
1168
1169 /*
1170 * Clear the entire RAID descriptor, just to make sure
1171 * there is no stale data left in the case of a
1172 * reconfiguration
1173 */
1174 memset(raidPtr, 0, sizeof(*raidPtr));
1175 raidPtr->raidid = unit;
1176
1177 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1178
1179 if (retcode == 0) {
1180
1181 /* allow this many simultaneous IO's to
1182 this RAID device */
1183 raidPtr->openings = RAIDOUTSTANDING;
1184
1185 raidinit(raidPtr);
1186 rf_markalldirty(raidPtr);
1187 }
1188 /* free the buffers. No return code here. */
1189 if (k_cfg->layoutSpecificSize) {
1190 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1191 }
1192 RF_Free(k_cfg, sizeof(RF_Config_t));
1193
1194 return (retcode);
1195
1196 /* shutdown the system */
1197 case RAIDFRAME_SHUTDOWN:
1198
1199 part = DISKPART(dev);
1200 pmask = (1 << part);
1201
1202 if ((error = raidlock(rs)) != 0)
1203 return (error);
1204
1205 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1206 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1207 (rs->sc_dkdev.dk_copenmask & pmask)))
1208 retcode = EBUSY;
1209 else {
1210 rs->sc_flags |= RAIDF_SHUTDOWN;
1211 rs->sc_dkdev.dk_copenmask &= ~pmask;
1212 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1213 rs->sc_dkdev.dk_openmask &= ~pmask;
1214 retcode = 0;
1215 }
1216
1217 raidunlock(rs);
1218
1219 if (retcode != 0)
1220 return retcode;
1221
1222 /* free the pseudo device attach bits */
1223
1224 cf = device_cfdata(rs->sc_dev);
1225 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1226 free(cf, M_RAIDFRAME);
1227
1228 return (retcode);
1229 case RAIDFRAME_GET_COMPONENT_LABEL:
1230 clabel_ptr = (RF_ComponentLabel_t **) data;
1231 /* need to read the component label for the disk indicated
1232 by row,column in clabel */
1233
1234 /*
1235 * Perhaps there should be an option to skip the in-core
1236 * copy and hit the disk, as with disklabel(8).
1237 */
1238 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1239
1240 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1241
1242 if (retcode) {
1243 RF_Free(clabel, sizeof(*clabel));
1244 return retcode;
1245 }
1246
1247 clabel->row = 0; /* Don't allow looking at anything else.*/
1248
1249 column = clabel->column;
1250
1251 if ((column < 0) || (column >= raidPtr->numCol +
1252 raidPtr->numSpare)) {
1253 RF_Free(clabel, sizeof(*clabel));
1254 return EINVAL;
1255 }
1256
1257 RF_Free(clabel, sizeof(*clabel));
1258
1259 clabel = raidget_component_label(raidPtr, column);
1260
1261 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1262
1263 #if 0
1264 case RAIDFRAME_SET_COMPONENT_LABEL:
1265 clabel = (RF_ComponentLabel_t *) data;
1266
1267 /* XXX check the label for valid stuff... */
1268 /* Note that some things *should not* get modified --
1269 the user should be re-initing the labels instead of
1270 trying to patch things.
1271 */
1272
1273 raidid = raidPtr->raidid;
1274 #ifdef DEBUG
1275 printf("raid%d: Got component label:\n", raidid);
1276 printf("raid%d: Version: %d\n", raidid, clabel->version);
1277 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1278 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1279 printf("raid%d: Column: %d\n", raidid, clabel->column);
1280 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1281 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1282 printf("raid%d: Status: %d\n", raidid, clabel->status);
1283 #endif
1284 clabel->row = 0;
1285 column = clabel->column;
1286
1287 if ((column < 0) || (column >= raidPtr->numCol)) {
1288 return(EINVAL);
1289 }
1290
1291 /* XXX this isn't allowed to do anything for now :-) */
1292
1293 /* XXX and before it is, we need to fill in the rest
1294 of the fields!?!?!?! */
1295 memcpy(raidget_component_label(raidPtr, column),
1296 clabel, sizeof(*clabel));
1297 raidflush_component_label(raidPtr, column);
1298 return (0);
1299 #endif
1300
1301 case RAIDFRAME_INIT_LABELS:
1302 clabel = (RF_ComponentLabel_t *) data;
1303 /*
1304 we only want the serial number from
1305 the above. We get all the rest of the information
1306 from the config that was used to create this RAID
1307 set.
1308 */
1309
1310 raidPtr->serial_number = clabel->serial_number;
1311
1312 for(column=0;column<raidPtr->numCol;column++) {
1313 diskPtr = &raidPtr->Disks[column];
1314 if (!RF_DEAD_DISK(diskPtr->status)) {
1315 ci_label = raidget_component_label(raidPtr,
1316 column);
1317 /* Zeroing this is important. */
1318 memset(ci_label, 0, sizeof(*ci_label));
1319 raid_init_component_label(raidPtr, ci_label);
1320 ci_label->serial_number =
1321 raidPtr->serial_number;
1322 ci_label->row = 0; /* we dont' pretend to support more */
1323 rf_component_label_set_partitionsize(ci_label,
1324 diskPtr->partitionSize);
1325 ci_label->column = column;
1326 raidflush_component_label(raidPtr, column);
1327 }
1328 /* XXXjld what about the spares? */
1329 }
1330
1331 return (retcode);
1332 case RAIDFRAME_SET_AUTOCONFIG:
1333 d = rf_set_autoconfig(raidPtr, *(int *) data);
1334 printf("raid%d: New autoconfig value is: %d\n",
1335 raidPtr->raidid, d);
1336 *(int *) data = d;
1337 return (retcode);
1338
1339 case RAIDFRAME_SET_ROOT:
1340 d = rf_set_rootpartition(raidPtr, *(int *) data);
1341 printf("raid%d: New rootpartition value is: %d\n",
1342 raidPtr->raidid, d);
1343 *(int *) data = d;
1344 return (retcode);
1345
1346 /* initialize all parity */
1347 case RAIDFRAME_REWRITEPARITY:
1348
1349 if (raidPtr->Layout.map->faultsTolerated == 0) {
1350 /* Parity for RAID 0 is trivially correct */
1351 raidPtr->parity_good = RF_RAID_CLEAN;
1352 return(0);
1353 }
1354
1355 if (raidPtr->parity_rewrite_in_progress == 1) {
1356 /* Re-write is already in progress! */
1357 return(EINVAL);
1358 }
1359
1360 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1361 rf_RewriteParityThread,
1362 raidPtr,"raid_parity");
1363 return (retcode);
1364
1365
1366 case RAIDFRAME_ADD_HOT_SPARE:
1367 sparePtr = (RF_SingleComponent_t *) data;
1368 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1369 retcode = rf_add_hot_spare(raidPtr, &component);
1370 return(retcode);
1371
1372 case RAIDFRAME_REMOVE_HOT_SPARE:
1373 return(retcode);
1374
1375 case RAIDFRAME_DELETE_COMPONENT:
1376 componentPtr = (RF_SingleComponent_t *)data;
1377 memcpy( &component, componentPtr,
1378 sizeof(RF_SingleComponent_t));
1379 retcode = rf_delete_component(raidPtr, &component);
1380 return(retcode);
1381
1382 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1383 componentPtr = (RF_SingleComponent_t *)data;
1384 memcpy( &component, componentPtr,
1385 sizeof(RF_SingleComponent_t));
1386 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1387 return(retcode);
1388
1389 case RAIDFRAME_REBUILD_IN_PLACE:
1390
1391 if (raidPtr->Layout.map->faultsTolerated == 0) {
1392 /* Can't do this on a RAID 0!! */
1393 return(EINVAL);
1394 }
1395
1396 if (raidPtr->recon_in_progress == 1) {
1397 /* a reconstruct is already in progress! */
1398 return(EINVAL);
1399 }
1400
1401 componentPtr = (RF_SingleComponent_t *) data;
1402 memcpy( &component, componentPtr,
1403 sizeof(RF_SingleComponent_t));
1404 component.row = 0; /* we don't support any more */
1405 column = component.column;
1406
1407 if ((column < 0) || (column >= raidPtr->numCol)) {
1408 return(EINVAL);
1409 }
1410
1411 rf_lock_mutex2(raidPtr->mutex);
1412 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1413 (raidPtr->numFailures > 0)) {
1414 /* XXX 0 above shouldn't be constant!!! */
1415 /* some component other than this has failed.
1416 Let's not make things worse than they already
1417 are... */
1418 printf("raid%d: Unable to reconstruct to disk at:\n",
1419 raidPtr->raidid);
1420 printf("raid%d: Col: %d Too many failures.\n",
1421 raidPtr->raidid, column);
1422 rf_unlock_mutex2(raidPtr->mutex);
1423 return (EINVAL);
1424 }
1425 if (raidPtr->Disks[column].status ==
1426 rf_ds_reconstructing) {
1427 printf("raid%d: Unable to reconstruct to disk at:\n",
1428 raidPtr->raidid);
1429 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1430
1431 rf_unlock_mutex2(raidPtr->mutex);
1432 return (EINVAL);
1433 }
1434 if (raidPtr->Disks[column].status == rf_ds_spared) {
1435 rf_unlock_mutex2(raidPtr->mutex);
1436 return (EINVAL);
1437 }
1438 rf_unlock_mutex2(raidPtr->mutex);
1439
1440 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1441 if (rrcopy == NULL)
1442 return(ENOMEM);
1443
1444 rrcopy->raidPtr = (void *) raidPtr;
1445 rrcopy->col = column;
1446
1447 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1448 rf_ReconstructInPlaceThread,
1449 rrcopy,"raid_reconip");
1450 return(retcode);
1451
1452 case RAIDFRAME_GET_INFO:
1453 if (!raidPtr->valid)
1454 return (ENODEV);
1455 ucfgp = (RF_DeviceConfig_t **) data;
1456 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1457 (RF_DeviceConfig_t *));
1458 if (d_cfg == NULL)
1459 return (ENOMEM);
1460 d_cfg->rows = 1; /* there is only 1 row now */
1461 d_cfg->cols = raidPtr->numCol;
1462 d_cfg->ndevs = raidPtr->numCol;
1463 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1464 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1465 return (ENOMEM);
1466 }
1467 d_cfg->nspares = raidPtr->numSpare;
1468 if (d_cfg->nspares >= RF_MAX_DISKS) {
1469 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1470 return (ENOMEM);
1471 }
1472 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1473 d = 0;
1474 for (j = 0; j < d_cfg->cols; j++) {
1475 d_cfg->devs[d] = raidPtr->Disks[j];
1476 d++;
1477 }
1478 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1479 d_cfg->spares[i] = raidPtr->Disks[j];
1480 }
1481 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1482 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1483
1484 return (retcode);
1485
1486 case RAIDFRAME_CHECK_PARITY:
1487 *(int *) data = raidPtr->parity_good;
1488 return (0);
1489
1490 case RAIDFRAME_PARITYMAP_STATUS:
1491 if (rf_paritymap_ineligible(raidPtr))
1492 return EINVAL;
1493 rf_paritymap_status(raidPtr->parity_map,
1494 (struct rf_pmstat *)data);
1495 return 0;
1496
1497 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1498 if (rf_paritymap_ineligible(raidPtr))
1499 return EINVAL;
1500 if (raidPtr->parity_map == NULL)
1501 return ENOENT; /* ??? */
1502 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1503 (struct rf_pmparams *)data, 1))
1504 return EINVAL;
1505 return 0;
1506
1507 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1508 if (rf_paritymap_ineligible(raidPtr))
1509 return EINVAL;
1510 *(int *) data = rf_paritymap_get_disable(raidPtr);
1511 return 0;
1512
1513 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1514 if (rf_paritymap_ineligible(raidPtr))
1515 return EINVAL;
1516 rf_paritymap_set_disable(raidPtr, *(int *)data);
1517 /* XXX should errors be passed up? */
1518 return 0;
1519
1520 case RAIDFRAME_RESET_ACCTOTALS:
1521 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1522 return (0);
1523
1524 case RAIDFRAME_GET_ACCTOTALS:
1525 totals = (RF_AccTotals_t *) data;
1526 *totals = raidPtr->acc_totals;
1527 return (0);
1528
1529 case RAIDFRAME_KEEP_ACCTOTALS:
1530 raidPtr->keep_acc_totals = *(int *)data;
1531 return (0);
1532
1533 case RAIDFRAME_GET_SIZE:
1534 *(int *) data = raidPtr->totalSectors;
1535 return (0);
1536
1537 /* fail a disk & optionally start reconstruction */
1538 case RAIDFRAME_FAIL_DISK:
1539
1540 if (raidPtr->Layout.map->faultsTolerated == 0) {
1541 /* Can't do this on a RAID 0!! */
1542 return(EINVAL);
1543 }
1544
1545 rr = (struct rf_recon_req *) data;
1546 rr->row = 0;
1547 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1548 return (EINVAL);
1549
1550
1551 rf_lock_mutex2(raidPtr->mutex);
1552 if (raidPtr->status == rf_rs_reconstructing) {
1553 /* you can't fail a disk while we're reconstructing! */
1554 /* XXX wrong for RAID6 */
1555 rf_unlock_mutex2(raidPtr->mutex);
1556 return (EINVAL);
1557 }
1558 if ((raidPtr->Disks[rr->col].status ==
1559 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1560 /* some other component has failed. Let's not make
1561 things worse. XXX wrong for RAID6 */
1562 rf_unlock_mutex2(raidPtr->mutex);
1563 return (EINVAL);
1564 }
1565 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1566 /* Can't fail a spared disk! */
1567 rf_unlock_mutex2(raidPtr->mutex);
1568 return (EINVAL);
1569 }
1570 rf_unlock_mutex2(raidPtr->mutex);
1571
1572 /* make a copy of the recon request so that we don't rely on
1573 * the user's buffer */
1574 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1575 if (rrcopy == NULL)
1576 return(ENOMEM);
1577 memcpy(rrcopy, rr, sizeof(*rr));
1578 rrcopy->raidPtr = (void *) raidPtr;
1579
1580 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1581 rf_ReconThread,
1582 rrcopy,"raid_recon");
1583 return (0);
1584
1585 /* invoke a copyback operation after recon on whatever disk
1586 * needs it, if any */
1587 case RAIDFRAME_COPYBACK:
1588
1589 if (raidPtr->Layout.map->faultsTolerated == 0) {
1590 /* This makes no sense on a RAID 0!! */
1591 return(EINVAL);
1592 }
1593
1594 if (raidPtr->copyback_in_progress == 1) {
1595 /* Copyback is already in progress! */
1596 return(EINVAL);
1597 }
1598
1599 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1600 rf_CopybackThread,
1601 raidPtr,"raid_copyback");
1602 return (retcode);
1603
1604 /* return the percentage completion of reconstruction */
1605 case RAIDFRAME_CHECK_RECON_STATUS:
1606 if (raidPtr->Layout.map->faultsTolerated == 0) {
1607 /* This makes no sense on a RAID 0, so tell the
1608 user it's done. */
1609 *(int *) data = 100;
1610 return(0);
1611 }
1612 if (raidPtr->status != rf_rs_reconstructing)
1613 *(int *) data = 100;
1614 else {
1615 if (raidPtr->reconControl->numRUsTotal > 0) {
1616 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1617 } else {
1618 *(int *) data = 0;
1619 }
1620 }
1621 return (0);
1622 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1623 progressInfoPtr = (RF_ProgressInfo_t **) data;
1624 if (raidPtr->status != rf_rs_reconstructing) {
1625 progressInfo.remaining = 0;
1626 progressInfo.completed = 100;
1627 progressInfo.total = 100;
1628 } else {
1629 progressInfo.total =
1630 raidPtr->reconControl->numRUsTotal;
1631 progressInfo.completed =
1632 raidPtr->reconControl->numRUsComplete;
1633 progressInfo.remaining = progressInfo.total -
1634 progressInfo.completed;
1635 }
1636 retcode = copyout(&progressInfo, *progressInfoPtr,
1637 sizeof(RF_ProgressInfo_t));
1638 return (retcode);
1639
1640 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1641 if (raidPtr->Layout.map->faultsTolerated == 0) {
1642 /* This makes no sense on a RAID 0, so tell the
1643 user it's done. */
1644 *(int *) data = 100;
1645 return(0);
1646 }
1647 if (raidPtr->parity_rewrite_in_progress == 1) {
1648 *(int *) data = 100 *
1649 raidPtr->parity_rewrite_stripes_done /
1650 raidPtr->Layout.numStripe;
1651 } else {
1652 *(int *) data = 100;
1653 }
1654 return (0);
1655
1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1657 progressInfoPtr = (RF_ProgressInfo_t **) data;
1658 if (raidPtr->parity_rewrite_in_progress == 1) {
1659 progressInfo.total = raidPtr->Layout.numStripe;
1660 progressInfo.completed =
1661 raidPtr->parity_rewrite_stripes_done;
1662 progressInfo.remaining = progressInfo.total -
1663 progressInfo.completed;
1664 } else {
1665 progressInfo.remaining = 0;
1666 progressInfo.completed = 100;
1667 progressInfo.total = 100;
1668 }
1669 retcode = copyout(&progressInfo, *progressInfoPtr,
1670 sizeof(RF_ProgressInfo_t));
1671 return (retcode);
1672
1673 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1674 if (raidPtr->Layout.map->faultsTolerated == 0) {
1675 /* This makes no sense on a RAID 0 */
1676 *(int *) data = 100;
1677 return(0);
1678 }
1679 if (raidPtr->copyback_in_progress == 1) {
1680 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1681 raidPtr->Layout.numStripe;
1682 } else {
1683 *(int *) data = 100;
1684 }
1685 return (0);
1686
1687 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1688 progressInfoPtr = (RF_ProgressInfo_t **) data;
1689 if (raidPtr->copyback_in_progress == 1) {
1690 progressInfo.total = raidPtr->Layout.numStripe;
1691 progressInfo.completed =
1692 raidPtr->copyback_stripes_done;
1693 progressInfo.remaining = progressInfo.total -
1694 progressInfo.completed;
1695 } else {
1696 progressInfo.remaining = 0;
1697 progressInfo.completed = 100;
1698 progressInfo.total = 100;
1699 }
1700 retcode = copyout(&progressInfo, *progressInfoPtr,
1701 sizeof(RF_ProgressInfo_t));
1702 return (retcode);
1703
1704 /* the sparetable daemon calls this to wait for the kernel to
1705 * need a spare table. this ioctl does not return until a
1706 * spare table is needed. XXX -- calling mpsleep here in the
1707 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1708 * -- I should either compute the spare table in the kernel,
1709 * or have a different -- XXX XXX -- interface (a different
1710 * character device) for delivering the table -- XXX */
1711 #if 0
1712 case RAIDFRAME_SPARET_WAIT:
1713 rf_lock_mutex2(rf_sparet_wait_mutex);
1714 while (!rf_sparet_wait_queue)
1715 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1716 waitreq = rf_sparet_wait_queue;
1717 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1718 rf_unlock_mutex2(rf_sparet_wait_mutex);
1719
1720 /* structure assignment */
1721 *((RF_SparetWait_t *) data) = *waitreq;
1722
1723 RF_Free(waitreq, sizeof(*waitreq));
1724 return (0);
1725
1726 /* wakes up a process waiting on SPARET_WAIT and puts an error
1727 * code in it that will cause the dameon to exit */
1728 case RAIDFRAME_ABORT_SPARET_WAIT:
1729 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1730 waitreq->fcol = -1;
1731 rf_lock_mutex2(rf_sparet_wait_mutex);
1732 waitreq->next = rf_sparet_wait_queue;
1733 rf_sparet_wait_queue = waitreq;
1734 rf_broadcast_conf2(rf_sparet_wait_cv);
1735 rf_unlock_mutex2(rf_sparet_wait_mutex);
1736 return (0);
1737
1738 /* used by the spare table daemon to deliver a spare table
1739 * into the kernel */
1740 case RAIDFRAME_SEND_SPARET:
1741
1742 /* install the spare table */
1743 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1744
1745 /* respond to the requestor. the return status of the spare
1746 * table installation is passed in the "fcol" field */
1747 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1748 waitreq->fcol = retcode;
1749 rf_lock_mutex2(rf_sparet_wait_mutex);
1750 waitreq->next = rf_sparet_resp_queue;
1751 rf_sparet_resp_queue = waitreq;
1752 rf_broadcast_cond2(rf_sparet_resp_cv);
1753 rf_unlock_mutex2(rf_sparet_wait_mutex);
1754
1755 return (retcode);
1756 #endif
1757
1758 default:
1759 break; /* fall through to the os-specific code below */
1760
1761 }
1762
1763 if (!raidPtr->valid)
1764 return (EINVAL);
1765
1766 /*
1767 * Add support for "regular" device ioctls here.
1768 */
1769
1770 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1771 if (error != EPASSTHROUGH)
1772 return (error);
1773
1774 switch (cmd) {
1775 case DIOCGDINFO:
1776 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1777 break;
1778 #ifdef __HAVE_OLD_DISKLABEL
1779 case ODIOCGDINFO:
1780 newlabel = *(rs->sc_dkdev.dk_label);
1781 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1782 return ENOTTY;
1783 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1784 break;
1785 #endif
1786
1787 case DIOCGPART:
1788 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1789 ((struct partinfo *) data)->part =
1790 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1791 break;
1792
1793 case DIOCWDINFO:
1794 case DIOCSDINFO:
1795 #ifdef __HAVE_OLD_DISKLABEL
1796 case ODIOCWDINFO:
1797 case ODIOCSDINFO:
1798 #endif
1799 {
1800 struct disklabel *lp;
1801 #ifdef __HAVE_OLD_DISKLABEL
1802 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1803 memset(&newlabel, 0, sizeof newlabel);
1804 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1805 lp = &newlabel;
1806 } else
1807 #endif
1808 lp = (struct disklabel *)data;
1809
1810 if ((error = raidlock(rs)) != 0)
1811 return (error);
1812
1813 rs->sc_flags |= RAIDF_LABELLING;
1814
1815 error = setdisklabel(rs->sc_dkdev.dk_label,
1816 lp, 0, rs->sc_dkdev.dk_cpulabel);
1817 if (error == 0) {
1818 if (cmd == DIOCWDINFO
1819 #ifdef __HAVE_OLD_DISKLABEL
1820 || cmd == ODIOCWDINFO
1821 #endif
1822 )
1823 error = writedisklabel(RAIDLABELDEV(dev),
1824 raidstrategy, rs->sc_dkdev.dk_label,
1825 rs->sc_dkdev.dk_cpulabel);
1826 }
1827 rs->sc_flags &= ~RAIDF_LABELLING;
1828
1829 raidunlock(rs);
1830
1831 if (error)
1832 return (error);
1833 break;
1834 }
1835
1836 case DIOCWLABEL:
1837 if (*(int *) data != 0)
1838 rs->sc_flags |= RAIDF_WLABEL;
1839 else
1840 rs->sc_flags &= ~RAIDF_WLABEL;
1841 break;
1842
1843 case DIOCGDEFLABEL:
1844 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1845 break;
1846
1847 #ifdef __HAVE_OLD_DISKLABEL
1848 case ODIOCGDEFLABEL:
1849 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1850 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1851 return ENOTTY;
1852 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1853 break;
1854 #endif
1855
1856 case DIOCAWEDGE:
1857 case DIOCDWEDGE:
1858 dkw = (void *)data;
1859
1860 /* If the ioctl happens here, the parent is us. */
1861 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1862 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1863
1864 case DIOCLWEDGES:
1865 return dkwedge_list(&rs->sc_dkdev,
1866 (struct dkwedge_list *)data, l);
1867 case DIOCCACHESYNC:
1868 return rf_sync_component_caches(raidPtr);
1869
1870 case DIOCGSTRATEGY:
1871 {
1872 struct disk_strategy *dks = (void *)data;
1873
1874 s = splbio();
1875 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1876 sizeof(dks->dks_name));
1877 splx(s);
1878 dks->dks_paramlen = 0;
1879
1880 return 0;
1881 }
1882
1883 case DIOCSSTRATEGY:
1884 {
1885 struct disk_strategy *dks = (void *)data;
1886 struct bufq_state *new;
1887 struct bufq_state *old;
1888
1889 if (dks->dks_param != NULL) {
1890 return EINVAL;
1891 }
1892 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1893 error = bufq_alloc(&new, dks->dks_name,
1894 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1895 if (error) {
1896 return error;
1897 }
1898 s = splbio();
1899 old = rs->buf_queue;
1900 bufq_move(new, old);
1901 rs->buf_queue = new;
1902 splx(s);
1903 bufq_free(old);
1904
1905 return 0;
1906 }
1907
1908 default:
1909 retcode = ENOTTY;
1910 }
1911 return (retcode);
1912
1913 }
1914
1915
1916 /* raidinit -- complete the rest of the initialization for the
1917 RAIDframe device. */
1918
1919
1920 static void
1921 raidinit(RF_Raid_t *raidPtr)
1922 {
1923 cfdata_t cf;
1924 struct raid_softc *rs;
1925 int unit;
1926
1927 unit = raidPtr->raidid;
1928
1929 rs = &raid_softc[unit];
1930
1931 /* XXX should check return code first... */
1932 rs->sc_flags |= RAIDF_INITED;
1933
1934 /* XXX doesn't check bounds. */
1935 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1936
1937 /* attach the pseudo device */
1938 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1939 cf->cf_name = raid_cd.cd_name;
1940 cf->cf_atname = raid_cd.cd_name;
1941 cf->cf_unit = unit;
1942 cf->cf_fstate = FSTATE_STAR;
1943
1944 rs->sc_dev = config_attach_pseudo(cf);
1945
1946 if (rs->sc_dev == NULL) {
1947 printf("raid%d: config_attach_pseudo failed\n",
1948 raidPtr->raidid);
1949 rs->sc_flags &= ~RAIDF_INITED;
1950 free(cf, M_RAIDFRAME);
1951 return;
1952 }
1953
1954 /* disk_attach actually creates space for the CPU disklabel, among
1955 * other things, so it's critical to call this *BEFORE* we try putzing
1956 * with disklabels. */
1957
1958 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1959 disk_attach(&rs->sc_dkdev);
1960 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1961
1962 /* XXX There may be a weird interaction here between this, and
1963 * protectedSectors, as used in RAIDframe. */
1964
1965 rs->sc_size = raidPtr->totalSectors;
1966
1967 dkwedge_discover(&rs->sc_dkdev);
1968
1969 rf_set_properties(rs, raidPtr);
1970
1971 }
1972 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1973 /* wake up the daemon & tell it to get us a spare table
1974 * XXX
1975 * the entries in the queues should be tagged with the raidPtr
1976 * so that in the extremely rare case that two recons happen at once,
1977 * we know for which device were requesting a spare table
1978 * XXX
1979 *
1980 * XXX This code is not currently used. GO
1981 */
1982 int
1983 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1984 {
1985 int retcode;
1986
1987 rf_lock_mutex2(rf_sparet_wait_mutex);
1988 req->next = rf_sparet_wait_queue;
1989 rf_sparet_wait_queue = req;
1990 rf_broadcast_cond2(rf_sparet_wait_cv);
1991
1992 /* mpsleep unlocks the mutex */
1993 while (!rf_sparet_resp_queue) {
1994 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1995 }
1996 req = rf_sparet_resp_queue;
1997 rf_sparet_resp_queue = req->next;
1998 rf_unlock_mutex2(rf_sparet_wait_mutex);
1999
2000 retcode = req->fcol;
2001 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2002 * alloc'd */
2003 return (retcode);
2004 }
2005 #endif
2006
2007 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2008 * bp & passes it down.
2009 * any calls originating in the kernel must use non-blocking I/O
2010 * do some extra sanity checking to return "appropriate" error values for
2011 * certain conditions (to make some standard utilities work)
2012 *
2013 * Formerly known as: rf_DoAccessKernel
2014 */
2015 void
2016 raidstart(RF_Raid_t *raidPtr)
2017 {
2018 RF_SectorCount_t num_blocks, pb, sum;
2019 RF_RaidAddr_t raid_addr;
2020 struct partition *pp;
2021 daddr_t blocknum;
2022 int unit;
2023 struct raid_softc *rs;
2024 int do_async;
2025 struct buf *bp;
2026 int rc;
2027
2028 unit = raidPtr->raidid;
2029 rs = &raid_softc[unit];
2030
2031 /* quick check to see if anything has died recently */
2032 rf_lock_mutex2(raidPtr->mutex);
2033 if (raidPtr->numNewFailures > 0) {
2034 rf_unlock_mutex2(raidPtr->mutex);
2035 rf_update_component_labels(raidPtr,
2036 RF_NORMAL_COMPONENT_UPDATE);
2037 rf_lock_mutex2(raidPtr->mutex);
2038 raidPtr->numNewFailures--;
2039 }
2040
2041 /* Check to see if we're at the limit... */
2042 while (raidPtr->openings > 0) {
2043 rf_unlock_mutex2(raidPtr->mutex);
2044
2045 /* get the next item, if any, from the queue */
2046 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2047 /* nothing more to do */
2048 return;
2049 }
2050
2051 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2052 * partition.. Need to make it absolute to the underlying
2053 * device.. */
2054
2055 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2056 if (DISKPART(bp->b_dev) != RAW_PART) {
2057 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2058 blocknum += pp->p_offset;
2059 }
2060
2061 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2062 (int) blocknum));
2063
2064 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2065 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2066
2067 /* *THIS* is where we adjust what block we're going to...
2068 * but DO NOT TOUCH bp->b_blkno!!! */
2069 raid_addr = blocknum;
2070
2071 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2072 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2073 sum = raid_addr + num_blocks + pb;
2074 if (1 || rf_debugKernelAccess) {
2075 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2076 (int) raid_addr, (int) sum, (int) num_blocks,
2077 (int) pb, (int) bp->b_resid));
2078 }
2079 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2080 || (sum < num_blocks) || (sum < pb)) {
2081 bp->b_error = ENOSPC;
2082 bp->b_resid = bp->b_bcount;
2083 biodone(bp);
2084 rf_lock_mutex2(raidPtr->mutex);
2085 continue;
2086 }
2087 /*
2088 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2089 */
2090
2091 if (bp->b_bcount & raidPtr->sectorMask) {
2092 bp->b_error = EINVAL;
2093 bp->b_resid = bp->b_bcount;
2094 biodone(bp);
2095 rf_lock_mutex2(raidPtr->mutex);
2096 continue;
2097
2098 }
2099 db1_printf(("Calling DoAccess..\n"));
2100
2101
2102 rf_lock_mutex2(raidPtr->mutex);
2103 raidPtr->openings--;
2104 rf_unlock_mutex2(raidPtr->mutex);
2105
2106 /*
2107 * Everything is async.
2108 */
2109 do_async = 1;
2110
2111 disk_busy(&rs->sc_dkdev);
2112
2113 /* XXX we're still at splbio() here... do we *really*
2114 need to be? */
2115
2116 /* don't ever condition on bp->b_flags & B_WRITE.
2117 * always condition on B_READ instead */
2118
2119 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2120 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2121 do_async, raid_addr, num_blocks,
2122 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2123
2124 if (rc) {
2125 bp->b_error = rc;
2126 bp->b_resid = bp->b_bcount;
2127 biodone(bp);
2128 /* continue loop */
2129 }
2130
2131 rf_lock_mutex2(raidPtr->mutex);
2132 }
2133 rf_unlock_mutex2(raidPtr->mutex);
2134 }
2135
2136
2137
2138
2139 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2140
2141 int
2142 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2143 {
2144 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2145 struct buf *bp;
2146
2147 req->queue = queue;
2148 bp = req->bp;
2149
2150 switch (req->type) {
2151 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2152 /* XXX need to do something extra here.. */
2153 /* I'm leaving this in, as I've never actually seen it used,
2154 * and I'd like folks to report it... GO */
2155 printf(("WAKEUP CALLED\n"));
2156 queue->numOutstanding++;
2157
2158 bp->b_flags = 0;
2159 bp->b_private = req;
2160
2161 KernelWakeupFunc(bp);
2162 break;
2163
2164 case RF_IO_TYPE_READ:
2165 case RF_IO_TYPE_WRITE:
2166 #if RF_ACC_TRACE > 0
2167 if (req->tracerec) {
2168 RF_ETIMER_START(req->tracerec->timer);
2169 }
2170 #endif
2171 InitBP(bp, queue->rf_cinfo->ci_vp,
2172 op, queue->rf_cinfo->ci_dev,
2173 req->sectorOffset, req->numSector,
2174 req->buf, KernelWakeupFunc, (void *) req,
2175 queue->raidPtr->logBytesPerSector, req->b_proc);
2176
2177 if (rf_debugKernelAccess) {
2178 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2179 (long) bp->b_blkno));
2180 }
2181 queue->numOutstanding++;
2182 queue->last_deq_sector = req->sectorOffset;
2183 /* acc wouldn't have been let in if there were any pending
2184 * reqs at any other priority */
2185 queue->curPriority = req->priority;
2186
2187 db1_printf(("Going for %c to unit %d col %d\n",
2188 req->type, queue->raidPtr->raidid,
2189 queue->col));
2190 db1_printf(("sector %d count %d (%d bytes) %d\n",
2191 (int) req->sectorOffset, (int) req->numSector,
2192 (int) (req->numSector <<
2193 queue->raidPtr->logBytesPerSector),
2194 (int) queue->raidPtr->logBytesPerSector));
2195
2196 /*
2197 * XXX: drop lock here since this can block at
2198 * least with backing SCSI devices. Retake it
2199 * to minimize fuss with calling interfaces.
2200 */
2201
2202 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2203 bdev_strategy(bp);
2204 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2205 break;
2206
2207 default:
2208 panic("bad req->type in rf_DispatchKernelIO");
2209 }
2210 db1_printf(("Exiting from DispatchKernelIO\n"));
2211
2212 return (0);
2213 }
2214 /* this is the callback function associated with a I/O invoked from
2215 kernel code.
2216 */
2217 static void
2218 KernelWakeupFunc(struct buf *bp)
2219 {
2220 RF_DiskQueueData_t *req = NULL;
2221 RF_DiskQueue_t *queue;
2222
2223 db1_printf(("recovering the request queue:\n"));
2224
2225 req = bp->b_private;
2226
2227 queue = (RF_DiskQueue_t *) req->queue;
2228
2229 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2230
2231 #if RF_ACC_TRACE > 0
2232 if (req->tracerec) {
2233 RF_ETIMER_STOP(req->tracerec->timer);
2234 RF_ETIMER_EVAL(req->tracerec->timer);
2235 rf_lock_mutex2(rf_tracing_mutex);
2236 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2237 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2238 req->tracerec->num_phys_ios++;
2239 rf_unlock_mutex2(rf_tracing_mutex);
2240 }
2241 #endif
2242
2243 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2244 * ballistic, and mark the component as hosed... */
2245
2246 if (bp->b_error != 0) {
2247 /* Mark the disk as dead */
2248 /* but only mark it once... */
2249 /* and only if it wouldn't leave this RAID set
2250 completely broken */
2251 if (((queue->raidPtr->Disks[queue->col].status ==
2252 rf_ds_optimal) ||
2253 (queue->raidPtr->Disks[queue->col].status ==
2254 rf_ds_used_spare)) &&
2255 (queue->raidPtr->numFailures <
2256 queue->raidPtr->Layout.map->faultsTolerated)) {
2257 printf("raid%d: IO Error. Marking %s as failed.\n",
2258 queue->raidPtr->raidid,
2259 queue->raidPtr->Disks[queue->col].devname);
2260 queue->raidPtr->Disks[queue->col].status =
2261 rf_ds_failed;
2262 queue->raidPtr->status = rf_rs_degraded;
2263 queue->raidPtr->numFailures++;
2264 queue->raidPtr->numNewFailures++;
2265 } else { /* Disk is already dead... */
2266 /* printf("Disk already marked as dead!\n"); */
2267 }
2268
2269 }
2270
2271 /* Fill in the error value */
2272 req->error = bp->b_error;
2273
2274 /* Drop this one on the "finished" queue... */
2275 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2276
2277 /* Let the raidio thread know there is work to be done. */
2278 rf_signal_cond2(queue->raidPtr->iodone_cv);
2279
2280 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2281 }
2282
2283
2284 /*
2285 * initialize a buf structure for doing an I/O in the kernel.
2286 */
2287 static void
2288 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2289 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2290 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2291 struct proc *b_proc)
2292 {
2293 /* bp->b_flags = B_PHYS | rw_flag; */
2294 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2295 bp->b_oflags = 0;
2296 bp->b_cflags = 0;
2297 bp->b_bcount = numSect << logBytesPerSector;
2298 bp->b_bufsize = bp->b_bcount;
2299 bp->b_error = 0;
2300 bp->b_dev = dev;
2301 bp->b_data = bf;
2302 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2303 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2304 if (bp->b_bcount == 0) {
2305 panic("bp->b_bcount is zero in InitBP!!");
2306 }
2307 bp->b_proc = b_proc;
2308 bp->b_iodone = cbFunc;
2309 bp->b_private = cbArg;
2310 }
2311
2312 static void
2313 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2314 struct disklabel *lp)
2315 {
2316 memset(lp, 0, sizeof(*lp));
2317
2318 /* fabricate a label... */
2319 lp->d_secperunit = raidPtr->totalSectors;
2320 lp->d_secsize = raidPtr->bytesPerSector;
2321 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2322 lp->d_ntracks = 4 * raidPtr->numCol;
2323 lp->d_ncylinders = raidPtr->totalSectors /
2324 (lp->d_nsectors * lp->d_ntracks);
2325 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2326
2327 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2328 lp->d_type = DTYPE_RAID;
2329 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2330 lp->d_rpm = 3600;
2331 lp->d_interleave = 1;
2332 lp->d_flags = 0;
2333
2334 lp->d_partitions[RAW_PART].p_offset = 0;
2335 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2336 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2337 lp->d_npartitions = RAW_PART + 1;
2338
2339 lp->d_magic = DISKMAGIC;
2340 lp->d_magic2 = DISKMAGIC;
2341 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2342
2343 }
2344 /*
2345 * Read the disklabel from the raid device. If one is not present, fake one
2346 * up.
2347 */
2348 static void
2349 raidgetdisklabel(dev_t dev)
2350 {
2351 int unit = raidunit(dev);
2352 struct raid_softc *rs = &raid_softc[unit];
2353 const char *errstring;
2354 struct disklabel *lp = rs->sc_dkdev.dk_label;
2355 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2356 RF_Raid_t *raidPtr;
2357
2358 db1_printf(("Getting the disklabel...\n"));
2359
2360 memset(clp, 0, sizeof(*clp));
2361
2362 raidPtr = raidPtrs[unit];
2363
2364 raidgetdefaultlabel(raidPtr, rs, lp);
2365
2366 /*
2367 * Call the generic disklabel extraction routine.
2368 */
2369 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2370 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2371 if (errstring)
2372 raidmakedisklabel(rs);
2373 else {
2374 int i;
2375 struct partition *pp;
2376
2377 /*
2378 * Sanity check whether the found disklabel is valid.
2379 *
2380 * This is necessary since total size of the raid device
2381 * may vary when an interleave is changed even though exactly
2382 * same components are used, and old disklabel may used
2383 * if that is found.
2384 */
2385 if (lp->d_secperunit != rs->sc_size)
2386 printf("raid%d: WARNING: %s: "
2387 "total sector size in disklabel (%" PRIu32 ") != "
2388 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2389 lp->d_secperunit, rs->sc_size);
2390 for (i = 0; i < lp->d_npartitions; i++) {
2391 pp = &lp->d_partitions[i];
2392 if (pp->p_offset + pp->p_size > rs->sc_size)
2393 printf("raid%d: WARNING: %s: end of partition `%c' "
2394 "exceeds the size of raid (%" PRIu64 ")\n",
2395 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2396 }
2397 }
2398
2399 }
2400 /*
2401 * Take care of things one might want to take care of in the event
2402 * that a disklabel isn't present.
2403 */
2404 static void
2405 raidmakedisklabel(struct raid_softc *rs)
2406 {
2407 struct disklabel *lp = rs->sc_dkdev.dk_label;
2408 db1_printf(("Making a label..\n"));
2409
2410 /*
2411 * For historical reasons, if there's no disklabel present
2412 * the raw partition must be marked FS_BSDFFS.
2413 */
2414
2415 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2416
2417 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2418
2419 lp->d_checksum = dkcksum(lp);
2420 }
2421 /*
2422 * Wait interruptibly for an exclusive lock.
2423 *
2424 * XXX
2425 * Several drivers do this; it should be abstracted and made MP-safe.
2426 * (Hmm... where have we seen this warning before :-> GO )
2427 */
2428 static int
2429 raidlock(struct raid_softc *rs)
2430 {
2431 int error;
2432
2433 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2434 rs->sc_flags |= RAIDF_WANTED;
2435 if ((error =
2436 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2437 return (error);
2438 }
2439 rs->sc_flags |= RAIDF_LOCKED;
2440 return (0);
2441 }
2442 /*
2443 * Unlock and wake up any waiters.
2444 */
2445 static void
2446 raidunlock(struct raid_softc *rs)
2447 {
2448
2449 rs->sc_flags &= ~RAIDF_LOCKED;
2450 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2451 rs->sc_flags &= ~RAIDF_WANTED;
2452 wakeup(rs);
2453 }
2454 }
2455
2456
2457 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2458 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2459 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2460
2461 static daddr_t
2462 rf_component_info_offset(void)
2463 {
2464
2465 return RF_COMPONENT_INFO_OFFSET;
2466 }
2467
2468 static daddr_t
2469 rf_component_info_size(unsigned secsize)
2470 {
2471 daddr_t info_size;
2472
2473 KASSERT(secsize);
2474 if (secsize > RF_COMPONENT_INFO_SIZE)
2475 info_size = secsize;
2476 else
2477 info_size = RF_COMPONENT_INFO_SIZE;
2478
2479 return info_size;
2480 }
2481
2482 static daddr_t
2483 rf_parity_map_offset(RF_Raid_t *raidPtr)
2484 {
2485 daddr_t map_offset;
2486
2487 KASSERT(raidPtr->bytesPerSector);
2488 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2489 map_offset = raidPtr->bytesPerSector;
2490 else
2491 map_offset = RF_COMPONENT_INFO_SIZE;
2492 map_offset += rf_component_info_offset();
2493
2494 return map_offset;
2495 }
2496
2497 static daddr_t
2498 rf_parity_map_size(RF_Raid_t *raidPtr)
2499 {
2500 daddr_t map_size;
2501
2502 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2503 map_size = raidPtr->bytesPerSector;
2504 else
2505 map_size = RF_PARITY_MAP_SIZE;
2506
2507 return map_size;
2508 }
2509
2510 int
2511 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2512 {
2513 RF_ComponentLabel_t *clabel;
2514
2515 clabel = raidget_component_label(raidPtr, col);
2516 clabel->clean = RF_RAID_CLEAN;
2517 raidflush_component_label(raidPtr, col);
2518 return(0);
2519 }
2520
2521
2522 int
2523 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2524 {
2525 RF_ComponentLabel_t *clabel;
2526
2527 clabel = raidget_component_label(raidPtr, col);
2528 clabel->clean = RF_RAID_DIRTY;
2529 raidflush_component_label(raidPtr, col);
2530 return(0);
2531 }
2532
2533 int
2534 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2535 {
2536 KASSERT(raidPtr->bytesPerSector);
2537 return raidread_component_label(raidPtr->bytesPerSector,
2538 raidPtr->Disks[col].dev,
2539 raidPtr->raid_cinfo[col].ci_vp,
2540 &raidPtr->raid_cinfo[col].ci_label);
2541 }
2542
2543 RF_ComponentLabel_t *
2544 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2545 {
2546 return &raidPtr->raid_cinfo[col].ci_label;
2547 }
2548
2549 int
2550 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2551 {
2552 RF_ComponentLabel_t *label;
2553
2554 label = &raidPtr->raid_cinfo[col].ci_label;
2555 label->mod_counter = raidPtr->mod_counter;
2556 #ifndef RF_NO_PARITY_MAP
2557 label->parity_map_modcount = label->mod_counter;
2558 #endif
2559 return raidwrite_component_label(raidPtr->bytesPerSector,
2560 raidPtr->Disks[col].dev,
2561 raidPtr->raid_cinfo[col].ci_vp, label);
2562 }
2563
2564
2565 static int
2566 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2567 RF_ComponentLabel_t *clabel)
2568 {
2569 return raidread_component_area(dev, b_vp, clabel,
2570 sizeof(RF_ComponentLabel_t),
2571 rf_component_info_offset(),
2572 rf_component_info_size(secsize));
2573 }
2574
2575 /* ARGSUSED */
2576 static int
2577 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2578 size_t msize, daddr_t offset, daddr_t dsize)
2579 {
2580 struct buf *bp;
2581 const struct bdevsw *bdev;
2582 int error;
2583
2584 /* XXX should probably ensure that we don't try to do this if
2585 someone has changed rf_protected_sectors. */
2586
2587 if (b_vp == NULL) {
2588 /* For whatever reason, this component is not valid.
2589 Don't try to read a component label from it. */
2590 return(EINVAL);
2591 }
2592
2593 /* get a block of the appropriate size... */
2594 bp = geteblk((int)dsize);
2595 bp->b_dev = dev;
2596
2597 /* get our ducks in a row for the read */
2598 bp->b_blkno = offset / DEV_BSIZE;
2599 bp->b_bcount = dsize;
2600 bp->b_flags |= B_READ;
2601 bp->b_resid = dsize;
2602
2603 bdev = bdevsw_lookup(bp->b_dev);
2604 if (bdev == NULL)
2605 return (ENXIO);
2606 (*bdev->d_strategy)(bp);
2607
2608 error = biowait(bp);
2609
2610 if (!error) {
2611 memcpy(data, bp->b_data, msize);
2612 }
2613
2614 brelse(bp, 0);
2615 return(error);
2616 }
2617
2618
2619 static int
2620 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2621 RF_ComponentLabel_t *clabel)
2622 {
2623 return raidwrite_component_area(dev, b_vp, clabel,
2624 sizeof(RF_ComponentLabel_t),
2625 rf_component_info_offset(),
2626 rf_component_info_size(secsize), 0);
2627 }
2628
2629 /* ARGSUSED */
2630 static int
2631 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2632 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2633 {
2634 struct buf *bp;
2635 const struct bdevsw *bdev;
2636 int error;
2637
2638 /* get a block of the appropriate size... */
2639 bp = geteblk((int)dsize);
2640 bp->b_dev = dev;
2641
2642 /* get our ducks in a row for the write */
2643 bp->b_blkno = offset / DEV_BSIZE;
2644 bp->b_bcount = dsize;
2645 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2646 bp->b_resid = dsize;
2647
2648 memset(bp->b_data, 0, dsize);
2649 memcpy(bp->b_data, data, msize);
2650
2651 bdev = bdevsw_lookup(bp->b_dev);
2652 if (bdev == NULL)
2653 return (ENXIO);
2654 (*bdev->d_strategy)(bp);
2655 if (asyncp)
2656 return 0;
2657 error = biowait(bp);
2658 brelse(bp, 0);
2659 if (error) {
2660 #if 1
2661 printf("Failed to write RAID component info!\n");
2662 #endif
2663 }
2664
2665 return(error);
2666 }
2667
2668 void
2669 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2670 {
2671 int c;
2672
2673 for (c = 0; c < raidPtr->numCol; c++) {
2674 /* Skip dead disks. */
2675 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2676 continue;
2677 /* XXXjld: what if an error occurs here? */
2678 raidwrite_component_area(raidPtr->Disks[c].dev,
2679 raidPtr->raid_cinfo[c].ci_vp, map,
2680 RF_PARITYMAP_NBYTE,
2681 rf_parity_map_offset(raidPtr),
2682 rf_parity_map_size(raidPtr), 0);
2683 }
2684 }
2685
2686 void
2687 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2688 {
2689 struct rf_paritymap_ondisk tmp;
2690 int c,first;
2691
2692 first=1;
2693 for (c = 0; c < raidPtr->numCol; c++) {
2694 /* Skip dead disks. */
2695 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2696 continue;
2697 raidread_component_area(raidPtr->Disks[c].dev,
2698 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2699 RF_PARITYMAP_NBYTE,
2700 rf_parity_map_offset(raidPtr),
2701 rf_parity_map_size(raidPtr));
2702 if (first) {
2703 memcpy(map, &tmp, sizeof(*map));
2704 first = 0;
2705 } else {
2706 rf_paritymap_merge(map, &tmp);
2707 }
2708 }
2709 }
2710
2711 void
2712 rf_markalldirty(RF_Raid_t *raidPtr)
2713 {
2714 RF_ComponentLabel_t *clabel;
2715 int sparecol;
2716 int c;
2717 int j;
2718 int scol = -1;
2719
2720 raidPtr->mod_counter++;
2721 for (c = 0; c < raidPtr->numCol; c++) {
2722 /* we don't want to touch (at all) a disk that has
2723 failed */
2724 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2725 clabel = raidget_component_label(raidPtr, c);
2726 if (clabel->status == rf_ds_spared) {
2727 /* XXX do something special...
2728 but whatever you do, don't
2729 try to access it!! */
2730 } else {
2731 raidmarkdirty(raidPtr, c);
2732 }
2733 }
2734 }
2735
2736 for( c = 0; c < raidPtr->numSpare ; c++) {
2737 sparecol = raidPtr->numCol + c;
2738 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2739 /*
2740
2741 we claim this disk is "optimal" if it's
2742 rf_ds_used_spare, as that means it should be
2743 directly substitutable for the disk it replaced.
2744 We note that too...
2745
2746 */
2747
2748 for(j=0;j<raidPtr->numCol;j++) {
2749 if (raidPtr->Disks[j].spareCol == sparecol) {
2750 scol = j;
2751 break;
2752 }
2753 }
2754
2755 clabel = raidget_component_label(raidPtr, sparecol);
2756 /* make sure status is noted */
2757
2758 raid_init_component_label(raidPtr, clabel);
2759
2760 clabel->row = 0;
2761 clabel->column = scol;
2762 /* Note: we *don't* change status from rf_ds_used_spare
2763 to rf_ds_optimal */
2764 /* clabel.status = rf_ds_optimal; */
2765
2766 raidmarkdirty(raidPtr, sparecol);
2767 }
2768 }
2769 }
2770
2771
2772 void
2773 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2774 {
2775 RF_ComponentLabel_t *clabel;
2776 int sparecol;
2777 int c;
2778 int j;
2779 int scol;
2780
2781 scol = -1;
2782
2783 /* XXX should do extra checks to make sure things really are clean,
2784 rather than blindly setting the clean bit... */
2785
2786 raidPtr->mod_counter++;
2787
2788 for (c = 0; c < raidPtr->numCol; c++) {
2789 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2790 clabel = raidget_component_label(raidPtr, c);
2791 /* make sure status is noted */
2792 clabel->status = rf_ds_optimal;
2793
2794 /* note what unit we are configured as */
2795 clabel->last_unit = raidPtr->raidid;
2796
2797 raidflush_component_label(raidPtr, c);
2798 if (final == RF_FINAL_COMPONENT_UPDATE) {
2799 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2800 raidmarkclean(raidPtr, c);
2801 }
2802 }
2803 }
2804 /* else we don't touch it.. */
2805 }
2806
2807 for( c = 0; c < raidPtr->numSpare ; c++) {
2808 sparecol = raidPtr->numCol + c;
2809 /* Need to ensure that the reconstruct actually completed! */
2810 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2811 /*
2812
2813 we claim this disk is "optimal" if it's
2814 rf_ds_used_spare, as that means it should be
2815 directly substitutable for the disk it replaced.
2816 We note that too...
2817
2818 */
2819
2820 for(j=0;j<raidPtr->numCol;j++) {
2821 if (raidPtr->Disks[j].spareCol == sparecol) {
2822 scol = j;
2823 break;
2824 }
2825 }
2826
2827 /* XXX shouldn't *really* need this... */
2828 clabel = raidget_component_label(raidPtr, sparecol);
2829 /* make sure status is noted */
2830
2831 raid_init_component_label(raidPtr, clabel);
2832
2833 clabel->column = scol;
2834 clabel->status = rf_ds_optimal;
2835 clabel->last_unit = raidPtr->raidid;
2836
2837 raidflush_component_label(raidPtr, sparecol);
2838 if (final == RF_FINAL_COMPONENT_UPDATE) {
2839 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2840 raidmarkclean(raidPtr, sparecol);
2841 }
2842 }
2843 }
2844 }
2845 }
2846
2847 void
2848 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2849 {
2850
2851 if (vp != NULL) {
2852 if (auto_configured == 1) {
2853 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2854 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2855 vput(vp);
2856
2857 } else {
2858 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2859 }
2860 }
2861 }
2862
2863
2864 void
2865 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2866 {
2867 int r,c;
2868 struct vnode *vp;
2869 int acd;
2870
2871
2872 /* We take this opportunity to close the vnodes like we should.. */
2873
2874 for (c = 0; c < raidPtr->numCol; c++) {
2875 vp = raidPtr->raid_cinfo[c].ci_vp;
2876 acd = raidPtr->Disks[c].auto_configured;
2877 rf_close_component(raidPtr, vp, acd);
2878 raidPtr->raid_cinfo[c].ci_vp = NULL;
2879 raidPtr->Disks[c].auto_configured = 0;
2880 }
2881
2882 for (r = 0; r < raidPtr->numSpare; r++) {
2883 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2884 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2885 rf_close_component(raidPtr, vp, acd);
2886 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2887 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2888 }
2889 }
2890
2891
2892 void
2893 rf_ReconThread(struct rf_recon_req *req)
2894 {
2895 int s;
2896 RF_Raid_t *raidPtr;
2897
2898 s = splbio();
2899 raidPtr = (RF_Raid_t *) req->raidPtr;
2900 raidPtr->recon_in_progress = 1;
2901
2902 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2903 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2904
2905 RF_Free(req, sizeof(*req));
2906
2907 raidPtr->recon_in_progress = 0;
2908 splx(s);
2909
2910 /* That's all... */
2911 kthread_exit(0); /* does not return */
2912 }
2913
2914 void
2915 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2916 {
2917 int retcode;
2918 int s;
2919
2920 raidPtr->parity_rewrite_stripes_done = 0;
2921 raidPtr->parity_rewrite_in_progress = 1;
2922 s = splbio();
2923 retcode = rf_RewriteParity(raidPtr);
2924 splx(s);
2925 if (retcode) {
2926 printf("raid%d: Error re-writing parity (%d)!\n",
2927 raidPtr->raidid, retcode);
2928 } else {
2929 /* set the clean bit! If we shutdown correctly,
2930 the clean bit on each component label will get
2931 set */
2932 raidPtr->parity_good = RF_RAID_CLEAN;
2933 }
2934 raidPtr->parity_rewrite_in_progress = 0;
2935
2936 /* Anyone waiting for us to stop? If so, inform them... */
2937 if (raidPtr->waitShutdown) {
2938 wakeup(&raidPtr->parity_rewrite_in_progress);
2939 }
2940
2941 /* That's all... */
2942 kthread_exit(0); /* does not return */
2943 }
2944
2945
2946 void
2947 rf_CopybackThread(RF_Raid_t *raidPtr)
2948 {
2949 int s;
2950
2951 raidPtr->copyback_in_progress = 1;
2952 s = splbio();
2953 rf_CopybackReconstructedData(raidPtr);
2954 splx(s);
2955 raidPtr->copyback_in_progress = 0;
2956
2957 /* That's all... */
2958 kthread_exit(0); /* does not return */
2959 }
2960
2961
2962 void
2963 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2964 {
2965 int s;
2966 RF_Raid_t *raidPtr;
2967
2968 s = splbio();
2969 raidPtr = req->raidPtr;
2970 raidPtr->recon_in_progress = 1;
2971 rf_ReconstructInPlace(raidPtr, req->col);
2972 RF_Free(req, sizeof(*req));
2973 raidPtr->recon_in_progress = 0;
2974 splx(s);
2975
2976 /* That's all... */
2977 kthread_exit(0); /* does not return */
2978 }
2979
2980 static RF_AutoConfig_t *
2981 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2982 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2983 unsigned secsize)
2984 {
2985 int good_one = 0;
2986 RF_ComponentLabel_t *clabel;
2987 RF_AutoConfig_t *ac;
2988
2989 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2990 if (clabel == NULL) {
2991 oomem:
2992 while(ac_list) {
2993 ac = ac_list;
2994 if (ac->clabel)
2995 free(ac->clabel, M_RAIDFRAME);
2996 ac_list = ac_list->next;
2997 free(ac, M_RAIDFRAME);
2998 }
2999 printf("RAID auto config: out of memory!\n");
3000 return NULL; /* XXX probably should panic? */
3001 }
3002
3003 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3004 /* Got the label. Does it look reasonable? */
3005 if (rf_reasonable_label(clabel, numsecs) &&
3006 (rf_component_label_partitionsize(clabel) <= size)) {
3007 #ifdef DEBUG
3008 printf("Component on: %s: %llu\n",
3009 cname, (unsigned long long)size);
3010 rf_print_component_label(clabel);
3011 #endif
3012 /* if it's reasonable, add it, else ignore it. */
3013 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3014 M_NOWAIT);
3015 if (ac == NULL) {
3016 free(clabel, M_RAIDFRAME);
3017 goto oomem;
3018 }
3019 strlcpy(ac->devname, cname, sizeof(ac->devname));
3020 ac->dev = dev;
3021 ac->vp = vp;
3022 ac->clabel = clabel;
3023 ac->next = ac_list;
3024 ac_list = ac;
3025 good_one = 1;
3026 }
3027 }
3028 if (!good_one) {
3029 /* cleanup */
3030 free(clabel, M_RAIDFRAME);
3031 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3032 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3033 vput(vp);
3034 }
3035 return ac_list;
3036 }
3037
3038 RF_AutoConfig_t *
3039 rf_find_raid_components(void)
3040 {
3041 struct vnode *vp;
3042 struct disklabel label;
3043 device_t dv;
3044 deviter_t di;
3045 dev_t dev;
3046 int bmajor, bminor, wedge, rf_part_found;
3047 int error;
3048 int i;
3049 RF_AutoConfig_t *ac_list;
3050 uint64_t numsecs;
3051 unsigned secsize;
3052
3053 /* initialize the AutoConfig list */
3054 ac_list = NULL;
3055
3056 /* we begin by trolling through *all* the devices on the system */
3057
3058 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3059 dv = deviter_next(&di)) {
3060
3061 /* we are only interested in disks... */
3062 if (device_class(dv) != DV_DISK)
3063 continue;
3064
3065 /* we don't care about floppies... */
3066 if (device_is_a(dv, "fd")) {
3067 continue;
3068 }
3069
3070 /* we don't care about CD's... */
3071 if (device_is_a(dv, "cd")) {
3072 continue;
3073 }
3074
3075 /* we don't care about md's... */
3076 if (device_is_a(dv, "md")) {
3077 continue;
3078 }
3079
3080 /* hdfd is the Atari/Hades floppy driver */
3081 if (device_is_a(dv, "hdfd")) {
3082 continue;
3083 }
3084
3085 /* fdisa is the Atari/Milan floppy driver */
3086 if (device_is_a(dv, "fdisa")) {
3087 continue;
3088 }
3089
3090 /* need to find the device_name_to_block_device_major stuff */
3091 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3092
3093 rf_part_found = 0; /*No raid partition as yet*/
3094
3095 /* get a vnode for the raw partition of this disk */
3096
3097 wedge = device_is_a(dv, "dk");
3098 bminor = minor(device_unit(dv));
3099 dev = wedge ? makedev(bmajor, bminor) :
3100 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3101 if (bdevvp(dev, &vp))
3102 panic("RAID can't alloc vnode");
3103
3104 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3105
3106 if (error) {
3107 /* "Who cares." Continue looking
3108 for something that exists*/
3109 vput(vp);
3110 continue;
3111 }
3112
3113 error = getdisksize(vp, &numsecs, &secsize);
3114 if (error) {
3115 vput(vp);
3116 continue;
3117 }
3118 if (wedge) {
3119 struct dkwedge_info dkw;
3120 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3121 NOCRED);
3122 if (error) {
3123 printf("RAIDframe: can't get wedge info for "
3124 "dev %s (%d)\n", device_xname(dv), error);
3125 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3126 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3127 vput(vp);
3128 continue;
3129 }
3130
3131 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3133 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3134 vput(vp);
3135 continue;
3136 }
3137
3138 ac_list = rf_get_component(ac_list, dev, vp,
3139 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3140 rf_part_found = 1; /*There is a raid component on this disk*/
3141 continue;
3142 }
3143
3144 /* Ok, the disk exists. Go get the disklabel. */
3145 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3146 if (error) {
3147 /*
3148 * XXX can't happen - open() would
3149 * have errored out (or faked up one)
3150 */
3151 if (error != ENOTTY)
3152 printf("RAIDframe: can't get label for dev "
3153 "%s (%d)\n", device_xname(dv), error);
3154 }
3155
3156 /* don't need this any more. We'll allocate it again
3157 a little later if we really do... */
3158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3159 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3160 vput(vp);
3161
3162 if (error)
3163 continue;
3164
3165 rf_part_found = 0; /*No raid partitions yet*/
3166 for (i = 0; i < label.d_npartitions; i++) {
3167 char cname[sizeof(ac_list->devname)];
3168
3169 /* We only support partitions marked as RAID */
3170 if (label.d_partitions[i].p_fstype != FS_RAID)
3171 continue;
3172
3173 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3174 if (bdevvp(dev, &vp))
3175 panic("RAID can't alloc vnode");
3176
3177 error = VOP_OPEN(vp, FREAD, NOCRED);
3178 if (error) {
3179 /* Whatever... */
3180 vput(vp);
3181 continue;
3182 }
3183 snprintf(cname, sizeof(cname), "%s%c",
3184 device_xname(dv), 'a' + i);
3185 ac_list = rf_get_component(ac_list, dev, vp, cname,
3186 label.d_partitions[i].p_size, numsecs, secsize);
3187 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3188 }
3189
3190 /*
3191 *If there is no raid component on this disk, either in a
3192 *disklabel or inside a wedge, check the raw partition as well,
3193 *as it is possible to configure raid components on raw disk
3194 *devices.
3195 */
3196
3197 if (!rf_part_found) {
3198 char cname[sizeof(ac_list->devname)];
3199
3200 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3201 if (bdevvp(dev, &vp))
3202 panic("RAID can't alloc vnode");
3203
3204 error = VOP_OPEN(vp, FREAD, NOCRED);
3205 if (error) {
3206 /* Whatever... */
3207 vput(vp);
3208 continue;
3209 }
3210 snprintf(cname, sizeof(cname), "%s%c",
3211 device_xname(dv), 'a' + RAW_PART);
3212 ac_list = rf_get_component(ac_list, dev, vp, cname,
3213 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3214 }
3215 }
3216 deviter_release(&di);
3217 return ac_list;
3218 }
3219
3220
3221 int
3222 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3223 {
3224
3225 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3226 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3227 ((clabel->clean == RF_RAID_CLEAN) ||
3228 (clabel->clean == RF_RAID_DIRTY)) &&
3229 clabel->row >=0 &&
3230 clabel->column >= 0 &&
3231 clabel->num_rows > 0 &&
3232 clabel->num_columns > 0 &&
3233 clabel->row < clabel->num_rows &&
3234 clabel->column < clabel->num_columns &&
3235 clabel->blockSize > 0 &&
3236 /*
3237 * numBlocksHi may contain garbage, but it is ok since
3238 * the type is unsigned. If it is really garbage,
3239 * rf_fix_old_label_size() will fix it.
3240 */
3241 rf_component_label_numblocks(clabel) > 0) {
3242 /*
3243 * label looks reasonable enough...
3244 * let's make sure it has no old garbage.
3245 */
3246 if (numsecs)
3247 rf_fix_old_label_size(clabel, numsecs);
3248 return(1);
3249 }
3250 return(0);
3251 }
3252
3253
3254 /*
3255 * For reasons yet unknown, some old component labels have garbage in
3256 * the newer numBlocksHi region, and this causes lossage. Since those
3257 * disks will also have numsecs set to less than 32 bits of sectors,
3258 * we can determine when this corruption has occurred, and fix it.
3259 *
3260 * The exact same problem, with the same unknown reason, happens to
3261 * the partitionSizeHi member as well.
3262 */
3263 static void
3264 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3265 {
3266
3267 if (numsecs < ((uint64_t)1 << 32)) {
3268 if (clabel->numBlocksHi) {
3269 printf("WARNING: total sectors < 32 bits, yet "
3270 "numBlocksHi set\n"
3271 "WARNING: resetting numBlocksHi to zero.\n");
3272 clabel->numBlocksHi = 0;
3273 }
3274
3275 if (clabel->partitionSizeHi) {
3276 printf("WARNING: total sectors < 32 bits, yet "
3277 "partitionSizeHi set\n"
3278 "WARNING: resetting partitionSizeHi to zero.\n");
3279 clabel->partitionSizeHi = 0;
3280 }
3281 }
3282 }
3283
3284
3285 #ifdef DEBUG
3286 void
3287 rf_print_component_label(RF_ComponentLabel_t *clabel)
3288 {
3289 uint64_t numBlocks;
3290
3291 numBlocks = rf_component_label_numblocks(clabel);
3292
3293 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3294 clabel->row, clabel->column,
3295 clabel->num_rows, clabel->num_columns);
3296 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3297 clabel->version, clabel->serial_number,
3298 clabel->mod_counter);
3299 printf(" Clean: %s Status: %d\n",
3300 clabel->clean ? "Yes" : "No", clabel->status);
3301 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3302 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3303 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3304 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3305 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3306 printf(" Contains root partition: %s\n",
3307 clabel->root_partition ? "Yes" : "No");
3308 printf(" Last configured as: raid%d\n", clabel->last_unit);
3309 #if 0
3310 printf(" Config order: %d\n", clabel->config_order);
3311 #endif
3312
3313 }
3314 #endif
3315
3316 RF_ConfigSet_t *
3317 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3318 {
3319 RF_AutoConfig_t *ac;
3320 RF_ConfigSet_t *config_sets;
3321 RF_ConfigSet_t *cset;
3322 RF_AutoConfig_t *ac_next;
3323
3324
3325 config_sets = NULL;
3326
3327 /* Go through the AutoConfig list, and figure out which components
3328 belong to what sets. */
3329 ac = ac_list;
3330 while(ac!=NULL) {
3331 /* we're going to putz with ac->next, so save it here
3332 for use at the end of the loop */
3333 ac_next = ac->next;
3334
3335 if (config_sets == NULL) {
3336 /* will need at least this one... */
3337 config_sets = (RF_ConfigSet_t *)
3338 malloc(sizeof(RF_ConfigSet_t),
3339 M_RAIDFRAME, M_NOWAIT);
3340 if (config_sets == NULL) {
3341 panic("rf_create_auto_sets: No memory!");
3342 }
3343 /* this one is easy :) */
3344 config_sets->ac = ac;
3345 config_sets->next = NULL;
3346 config_sets->rootable = 0;
3347 ac->next = NULL;
3348 } else {
3349 /* which set does this component fit into? */
3350 cset = config_sets;
3351 while(cset!=NULL) {
3352 if (rf_does_it_fit(cset, ac)) {
3353 /* looks like it matches... */
3354 ac->next = cset->ac;
3355 cset->ac = ac;
3356 break;
3357 }
3358 cset = cset->next;
3359 }
3360 if (cset==NULL) {
3361 /* didn't find a match above... new set..*/
3362 cset = (RF_ConfigSet_t *)
3363 malloc(sizeof(RF_ConfigSet_t),
3364 M_RAIDFRAME, M_NOWAIT);
3365 if (cset == NULL) {
3366 panic("rf_create_auto_sets: No memory!");
3367 }
3368 cset->ac = ac;
3369 ac->next = NULL;
3370 cset->next = config_sets;
3371 cset->rootable = 0;
3372 config_sets = cset;
3373 }
3374 }
3375 ac = ac_next;
3376 }
3377
3378
3379 return(config_sets);
3380 }
3381
3382 static int
3383 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3384 {
3385 RF_ComponentLabel_t *clabel1, *clabel2;
3386
3387 /* If this one matches the *first* one in the set, that's good
3388 enough, since the other members of the set would have been
3389 through here too... */
3390 /* note that we are not checking partitionSize here..
3391
3392 Note that we are also not checking the mod_counters here.
3393 If everything else matches except the mod_counter, that's
3394 good enough for this test. We will deal with the mod_counters
3395 a little later in the autoconfiguration process.
3396
3397 (clabel1->mod_counter == clabel2->mod_counter) &&
3398
3399 The reason we don't check for this is that failed disks
3400 will have lower modification counts. If those disks are
3401 not added to the set they used to belong to, then they will
3402 form their own set, which may result in 2 different sets,
3403 for example, competing to be configured at raid0, and
3404 perhaps competing to be the root filesystem set. If the
3405 wrong ones get configured, or both attempt to become /,
3406 weird behaviour and or serious lossage will occur. Thus we
3407 need to bring them into the fold here, and kick them out at
3408 a later point.
3409
3410 */
3411
3412 clabel1 = cset->ac->clabel;
3413 clabel2 = ac->clabel;
3414 if ((clabel1->version == clabel2->version) &&
3415 (clabel1->serial_number == clabel2->serial_number) &&
3416 (clabel1->num_rows == clabel2->num_rows) &&
3417 (clabel1->num_columns == clabel2->num_columns) &&
3418 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3419 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3420 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3421 (clabel1->parityConfig == clabel2->parityConfig) &&
3422 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3423 (clabel1->blockSize == clabel2->blockSize) &&
3424 rf_component_label_numblocks(clabel1) ==
3425 rf_component_label_numblocks(clabel2) &&
3426 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3427 (clabel1->root_partition == clabel2->root_partition) &&
3428 (clabel1->last_unit == clabel2->last_unit) &&
3429 (clabel1->config_order == clabel2->config_order)) {
3430 /* if it get's here, it almost *has* to be a match */
3431 } else {
3432 /* it's not consistent with somebody in the set..
3433 punt */
3434 return(0);
3435 }
3436 /* all was fine.. it must fit... */
3437 return(1);
3438 }
3439
3440 int
3441 rf_have_enough_components(RF_ConfigSet_t *cset)
3442 {
3443 RF_AutoConfig_t *ac;
3444 RF_AutoConfig_t *auto_config;
3445 RF_ComponentLabel_t *clabel;
3446 int c;
3447 int num_cols;
3448 int num_missing;
3449 int mod_counter;
3450 int mod_counter_found;
3451 int even_pair_failed;
3452 char parity_type;
3453
3454
3455 /* check to see that we have enough 'live' components
3456 of this set. If so, we can configure it if necessary */
3457
3458 num_cols = cset->ac->clabel->num_columns;
3459 parity_type = cset->ac->clabel->parityConfig;
3460
3461 /* XXX Check for duplicate components!?!?!? */
3462
3463 /* Determine what the mod_counter is supposed to be for this set. */
3464
3465 mod_counter_found = 0;
3466 mod_counter = 0;
3467 ac = cset->ac;
3468 while(ac!=NULL) {
3469 if (mod_counter_found==0) {
3470 mod_counter = ac->clabel->mod_counter;
3471 mod_counter_found = 1;
3472 } else {
3473 if (ac->clabel->mod_counter > mod_counter) {
3474 mod_counter = ac->clabel->mod_counter;
3475 }
3476 }
3477 ac = ac->next;
3478 }
3479
3480 num_missing = 0;
3481 auto_config = cset->ac;
3482
3483 even_pair_failed = 0;
3484 for(c=0; c<num_cols; c++) {
3485 ac = auto_config;
3486 while(ac!=NULL) {
3487 if ((ac->clabel->column == c) &&
3488 (ac->clabel->mod_counter == mod_counter)) {
3489 /* it's this one... */
3490 #ifdef DEBUG
3491 printf("Found: %s at %d\n",
3492 ac->devname,c);
3493 #endif
3494 break;
3495 }
3496 ac=ac->next;
3497 }
3498 if (ac==NULL) {
3499 /* Didn't find one here! */
3500 /* special case for RAID 1, especially
3501 where there are more than 2
3502 components (where RAIDframe treats
3503 things a little differently :( ) */
3504 if (parity_type == '1') {
3505 if (c%2 == 0) { /* even component */
3506 even_pair_failed = 1;
3507 } else { /* odd component. If
3508 we're failed, and
3509 so is the even
3510 component, it's
3511 "Good Night, Charlie" */
3512 if (even_pair_failed == 1) {
3513 return(0);
3514 }
3515 }
3516 } else {
3517 /* normal accounting */
3518 num_missing++;
3519 }
3520 }
3521 if ((parity_type == '1') && (c%2 == 1)) {
3522 /* Just did an even component, and we didn't
3523 bail.. reset the even_pair_failed flag,
3524 and go on to the next component.... */
3525 even_pair_failed = 0;
3526 }
3527 }
3528
3529 clabel = cset->ac->clabel;
3530
3531 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3532 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3533 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3534 /* XXX this needs to be made *much* more general */
3535 /* Too many failures */
3536 return(0);
3537 }
3538 /* otherwise, all is well, and we've got enough to take a kick
3539 at autoconfiguring this set */
3540 return(1);
3541 }
3542
3543 void
3544 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3545 RF_Raid_t *raidPtr)
3546 {
3547 RF_ComponentLabel_t *clabel;
3548 int i;
3549
3550 clabel = ac->clabel;
3551
3552 /* 1. Fill in the common stuff */
3553 config->numRow = clabel->num_rows = 1;
3554 config->numCol = clabel->num_columns;
3555 config->numSpare = 0; /* XXX should this be set here? */
3556 config->sectPerSU = clabel->sectPerSU;
3557 config->SUsPerPU = clabel->SUsPerPU;
3558 config->SUsPerRU = clabel->SUsPerRU;
3559 config->parityConfig = clabel->parityConfig;
3560 /* XXX... */
3561 strcpy(config->diskQueueType,"fifo");
3562 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3563 config->layoutSpecificSize = 0; /* XXX ?? */
3564
3565 while(ac!=NULL) {
3566 /* row/col values will be in range due to the checks
3567 in reasonable_label() */
3568 strcpy(config->devnames[0][ac->clabel->column],
3569 ac->devname);
3570 ac = ac->next;
3571 }
3572
3573 for(i=0;i<RF_MAXDBGV;i++) {
3574 config->debugVars[i][0] = 0;
3575 }
3576 }
3577
3578 int
3579 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3580 {
3581 RF_ComponentLabel_t *clabel;
3582 int column;
3583 int sparecol;
3584
3585 raidPtr->autoconfigure = new_value;
3586
3587 for(column=0; column<raidPtr->numCol; column++) {
3588 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3589 clabel = raidget_component_label(raidPtr, column);
3590 clabel->autoconfigure = new_value;
3591 raidflush_component_label(raidPtr, column);
3592 }
3593 }
3594 for(column = 0; column < raidPtr->numSpare ; column++) {
3595 sparecol = raidPtr->numCol + column;
3596 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3597 clabel = raidget_component_label(raidPtr, sparecol);
3598 clabel->autoconfigure = new_value;
3599 raidflush_component_label(raidPtr, sparecol);
3600 }
3601 }
3602 return(new_value);
3603 }
3604
3605 int
3606 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3607 {
3608 RF_ComponentLabel_t *clabel;
3609 int column;
3610 int sparecol;
3611
3612 raidPtr->root_partition = new_value;
3613 for(column=0; column<raidPtr->numCol; column++) {
3614 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3615 clabel = raidget_component_label(raidPtr, column);
3616 clabel->root_partition = new_value;
3617 raidflush_component_label(raidPtr, column);
3618 }
3619 }
3620 for(column = 0; column < raidPtr->numSpare ; column++) {
3621 sparecol = raidPtr->numCol + column;
3622 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3623 clabel = raidget_component_label(raidPtr, sparecol);
3624 clabel->root_partition = new_value;
3625 raidflush_component_label(raidPtr, sparecol);
3626 }
3627 }
3628 return(new_value);
3629 }
3630
3631 void
3632 rf_release_all_vps(RF_ConfigSet_t *cset)
3633 {
3634 RF_AutoConfig_t *ac;
3635
3636 ac = cset->ac;
3637 while(ac!=NULL) {
3638 /* Close the vp, and give it back */
3639 if (ac->vp) {
3640 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3641 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3642 vput(ac->vp);
3643 ac->vp = NULL;
3644 }
3645 ac = ac->next;
3646 }
3647 }
3648
3649
3650 void
3651 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3652 {
3653 RF_AutoConfig_t *ac;
3654 RF_AutoConfig_t *next_ac;
3655
3656 ac = cset->ac;
3657 while(ac!=NULL) {
3658 next_ac = ac->next;
3659 /* nuke the label */
3660 free(ac->clabel, M_RAIDFRAME);
3661 /* cleanup the config structure */
3662 free(ac, M_RAIDFRAME);
3663 /* "next.." */
3664 ac = next_ac;
3665 }
3666 /* and, finally, nuke the config set */
3667 free(cset, M_RAIDFRAME);
3668 }
3669
3670
3671 void
3672 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3673 {
3674 /* current version number */
3675 clabel->version = RF_COMPONENT_LABEL_VERSION;
3676 clabel->serial_number = raidPtr->serial_number;
3677 clabel->mod_counter = raidPtr->mod_counter;
3678
3679 clabel->num_rows = 1;
3680 clabel->num_columns = raidPtr->numCol;
3681 clabel->clean = RF_RAID_DIRTY; /* not clean */
3682 clabel->status = rf_ds_optimal; /* "It's good!" */
3683
3684 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3685 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3686 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3687
3688 clabel->blockSize = raidPtr->bytesPerSector;
3689 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3690
3691 /* XXX not portable */
3692 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3693 clabel->maxOutstanding = raidPtr->maxOutstanding;
3694 clabel->autoconfigure = raidPtr->autoconfigure;
3695 clabel->root_partition = raidPtr->root_partition;
3696 clabel->last_unit = raidPtr->raidid;
3697 clabel->config_order = raidPtr->config_order;
3698
3699 #ifndef RF_NO_PARITY_MAP
3700 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3701 #endif
3702 }
3703
3704 int
3705 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3706 {
3707 RF_Raid_t *raidPtr;
3708 RF_Config_t *config;
3709 int raidID;
3710 int retcode;
3711
3712 #ifdef DEBUG
3713 printf("RAID autoconfigure\n");
3714 #endif
3715
3716 retcode = 0;
3717 *unit = -1;
3718
3719 /* 1. Create a config structure */
3720
3721 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3722 M_RAIDFRAME,
3723 M_NOWAIT);
3724 if (config==NULL) {
3725 printf("Out of mem!?!?\n");
3726 /* XXX do something more intelligent here. */
3727 return(1);
3728 }
3729
3730 memset(config, 0, sizeof(RF_Config_t));
3731
3732 /*
3733 2. Figure out what RAID ID this one is supposed to live at
3734 See if we can get the same RAID dev that it was configured
3735 on last time..
3736 */
3737
3738 raidID = cset->ac->clabel->last_unit;
3739 if ((raidID < 0) || (raidID >= numraid)) {
3740 /* let's not wander off into lala land. */
3741 raidID = numraid - 1;
3742 }
3743 if (raidPtrs[raidID]->valid != 0) {
3744
3745 /*
3746 Nope... Go looking for an alternative...
3747 Start high so we don't immediately use raid0 if that's
3748 not taken.
3749 */
3750
3751 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3752 if (raidPtrs[raidID]->valid == 0) {
3753 /* can use this one! */
3754 break;
3755 }
3756 }
3757 }
3758
3759 if (raidID < 0) {
3760 /* punt... */
3761 printf("Unable to auto configure this set!\n");
3762 printf("(Out of RAID devs!)\n");
3763 free(config, M_RAIDFRAME);
3764 return(1);
3765 }
3766
3767 #ifdef DEBUG
3768 printf("Configuring raid%d:\n",raidID);
3769 #endif
3770
3771 raidPtr = raidPtrs[raidID];
3772
3773 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3774 raidPtr->raidid = raidID;
3775 raidPtr->openings = RAIDOUTSTANDING;
3776
3777 /* 3. Build the configuration structure */
3778 rf_create_configuration(cset->ac, config, raidPtr);
3779
3780 /* 4. Do the configuration */
3781 retcode = rf_Configure(raidPtr, config, cset->ac);
3782
3783 if (retcode == 0) {
3784
3785 raidinit(raidPtrs[raidID]);
3786
3787 rf_markalldirty(raidPtrs[raidID]);
3788 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3789 if (cset->ac->clabel->root_partition==1) {
3790 /* everything configured just fine. Make a note
3791 that this set is eligible to be root. */
3792 cset->rootable = 1;
3793 /* XXX do this here? */
3794 raidPtrs[raidID]->root_partition = 1;
3795 }
3796 }
3797
3798 /* 5. Cleanup */
3799 free(config, M_RAIDFRAME);
3800
3801 *unit = raidID;
3802 return(retcode);
3803 }
3804
3805 void
3806 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3807 {
3808 struct buf *bp;
3809
3810 bp = (struct buf *)desc->bp;
3811 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3812 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3813 }
3814
3815 void
3816 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3817 size_t xmin, size_t xmax)
3818 {
3819 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3820 pool_sethiwat(p, xmax);
3821 pool_prime(p, xmin);
3822 pool_setlowat(p, xmin);
3823 }
3824
3825 /*
3826 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3827 * if there is IO pending and if that IO could possibly be done for a
3828 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3829 * otherwise.
3830 *
3831 */
3832
3833 int
3834 rf_buf_queue_check(int raidid)
3835 {
3836 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3837 raidPtrs[raidid]->openings > 0) {
3838 /* there is work to do */
3839 return 0;
3840 }
3841 /* default is nothing to do */
3842 return 1;
3843 }
3844
3845 int
3846 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3847 {
3848 uint64_t numsecs;
3849 unsigned secsize;
3850 int error;
3851
3852 error = getdisksize(vp, &numsecs, &secsize);
3853 if (error == 0) {
3854 diskPtr->blockSize = secsize;
3855 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3856 diskPtr->partitionSize = numsecs;
3857 return 0;
3858 }
3859 return error;
3860 }
3861
3862 static int
3863 raid_match(device_t self, cfdata_t cfdata, void *aux)
3864 {
3865 return 1;
3866 }
3867
3868 static void
3869 raid_attach(device_t parent, device_t self, void *aux)
3870 {
3871
3872 }
3873
3874
3875 static int
3876 raid_detach(device_t self, int flags)
3877 {
3878 int error;
3879 struct raid_softc *rs = &raid_softc[device_unit(self)];
3880
3881 if ((error = raidlock(rs)) != 0)
3882 return (error);
3883
3884 error = raid_detach_unlocked(rs);
3885
3886 raidunlock(rs);
3887
3888 return error;
3889 }
3890
3891 static void
3892 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3893 {
3894 prop_dictionary_t disk_info, odisk_info, geom;
3895 disk_info = prop_dictionary_create();
3896 geom = prop_dictionary_create();
3897 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3898 raidPtr->totalSectors);
3899 prop_dictionary_set_uint32(geom, "sector-size",
3900 raidPtr->bytesPerSector);
3901
3902 prop_dictionary_set_uint16(geom, "sectors-per-track",
3903 raidPtr->Layout.dataSectorsPerStripe);
3904 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3905 4 * raidPtr->numCol);
3906
3907 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3908 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3909 (4 * raidPtr->numCol)));
3910
3911 prop_dictionary_set(disk_info, "geometry", geom);
3912 prop_object_release(geom);
3913 prop_dictionary_set(device_properties(rs->sc_dev),
3914 "disk-info", disk_info);
3915 odisk_info = rs->sc_dkdev.dk_info;
3916 rs->sc_dkdev.dk_info = disk_info;
3917 if (odisk_info)
3918 prop_object_release(odisk_info);
3919 }
3920
3921 /*
3922 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3923 * We end up returning whatever error was returned by the first cache flush
3924 * that fails.
3925 */
3926
3927 int
3928 rf_sync_component_caches(RF_Raid_t *raidPtr)
3929 {
3930 int c, sparecol;
3931 int e,error;
3932 int force = 1;
3933
3934 error = 0;
3935 for (c = 0; c < raidPtr->numCol; c++) {
3936 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3937 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3938 &force, FWRITE, NOCRED);
3939 if (e) {
3940 if (e != ENODEV)
3941 printf("raid%d: cache flush to component %s failed.\n",
3942 raidPtr->raidid, raidPtr->Disks[c].devname);
3943 if (error == 0) {
3944 error = e;
3945 }
3946 }
3947 }
3948 }
3949
3950 for( c = 0; c < raidPtr->numSpare ; c++) {
3951 sparecol = raidPtr->numCol + c;
3952 /* Need to ensure that the reconstruct actually completed! */
3953 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3954 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3955 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3956 if (e) {
3957 if (e != ENODEV)
3958 printf("raid%d: cache flush to component %s failed.\n",
3959 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3960 if (error == 0) {
3961 error = e;
3962 }
3963 }
3964 }
3965 }
3966 return error;
3967 }
3968
3969 static void
3970 raidminphys(struct buf *bp)
3971 {
3972 dev_t dev;
3973 int unit;
3974 struct raid_softc *rs;
3975 RF_Raid_t *raidPtr;
3976 long xmax;
3977
3978 dev = bp->b_dev;
3979 unit = raidunit(dev);
3980 rs = &raid_softc[unit];
3981 raidPtr = raidPtrs[unit];
3982
3983 xmax = raidPtr->Layout.numDataCol * MAXPHYS;
3984
3985 if (bp->b_bcount > xmax) {
3986 bp->b_bcount = xmax;
3987 }
3988 }
3989