rf_netbsdkintf.c revision 1.247 1 /* $NetBSD: rf_netbsdkintf.c,v 1.247 2008/06/07 17:50:34 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1990, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * the Systems Programming Group of the University of Utah Computer
37 * Science Department.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * from: Utah $Hdr: cd.c 1.6 90/11/28$
64 *
65 * @(#)cd.c 8.2 (Berkeley) 11/16/93
66 */
67
68 /*
69 * Copyright (c) 1988 University of Utah.
70 *
71 * This code is derived from software contributed to Berkeley by
72 * the Systems Programming Group of the University of Utah Computer
73 * Science Department.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 * 1. Redistributions of source code must retain the above copyright
79 * notice, this list of conditions and the following disclaimer.
80 * 2. Redistributions in binary form must reproduce the above copyright
81 * notice, this list of conditions and the following disclaimer in the
82 * documentation and/or other materials provided with the distribution.
83 * 3. All advertising materials mentioning features or use of this software
84 * must display the following acknowledgement:
85 * This product includes software developed by the University of
86 * California, Berkeley and its contributors.
87 * 4. Neither the name of the University nor the names of its contributors
88 * may be used to endorse or promote products derived from this software
89 * without specific prior written permission.
90 *
91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
101 * SUCH DAMAGE.
102 *
103 * from: Utah $Hdr: cd.c 1.6 90/11/28$
104 *
105 * @(#)cd.c 8.2 (Berkeley) 11/16/93
106 */
107
108 /*
109 * Copyright (c) 1995 Carnegie-Mellon University.
110 * All rights reserved.
111 *
112 * Authors: Mark Holland, Jim Zelenka
113 *
114 * Permission to use, copy, modify and distribute this software and
115 * its documentation is hereby granted, provided that both the copyright
116 * notice and this permission notice appear in all copies of the
117 * software, derivative works or modified versions, and any portions
118 * thereof, and that both notices appear in supporting documentation.
119 *
120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
123 *
124 * Carnegie Mellon requests users of this software to return to
125 *
126 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
127 * School of Computer Science
128 * Carnegie Mellon University
129 * Pittsburgh PA 15213-3890
130 *
131 * any improvements or extensions that they make and grant Carnegie the
132 * rights to redistribute these changes.
133 */
134
135 /***********************************************************
136 *
137 * rf_kintf.c -- the kernel interface routines for RAIDframe
138 *
139 ***********************************************************/
140
141 #include <sys/cdefs.h>
142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.247 2008/06/07 17:50:34 oster Exp $");
143
144 #include <sys/param.h>
145 #include <sys/errno.h>
146 #include <sys/pool.h>
147 #include <sys/proc.h>
148 #include <sys/queue.h>
149 #include <sys/disk.h>
150 #include <sys/device.h>
151 #include <sys/stat.h>
152 #include <sys/ioctl.h>
153 #include <sys/fcntl.h>
154 #include <sys/systm.h>
155 #include <sys/vnode.h>
156 #include <sys/disklabel.h>
157 #include <sys/conf.h>
158 #include <sys/buf.h>
159 #include <sys/bufq.h>
160 #include <sys/user.h>
161 #include <sys/reboot.h>
162 #include <sys/kauth.h>
163
164 #include <prop/proplib.h>
165
166 #include <dev/raidframe/raidframevar.h>
167 #include <dev/raidframe/raidframeio.h>
168 #include "raid.h"
169 #include "opt_raid_autoconfig.h"
170 #include "rf_raid.h"
171 #include "rf_copyback.h"
172 #include "rf_dag.h"
173 #include "rf_dagflags.h"
174 #include "rf_desc.h"
175 #include "rf_diskqueue.h"
176 #include "rf_etimer.h"
177 #include "rf_general.h"
178 #include "rf_kintf.h"
179 #include "rf_options.h"
180 #include "rf_driver.h"
181 #include "rf_parityscan.h"
182 #include "rf_threadstuff.h"
183
184 #ifdef DEBUG
185 int rf_kdebug_level = 0;
186 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
187 #else /* DEBUG */
188 #define db1_printf(a) { }
189 #endif /* DEBUG */
190
191 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
192
193 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
194
195 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
196 * spare table */
197 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
198 * installation process */
199
200 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
201
202 /* prototypes */
203 static void KernelWakeupFunc(struct buf *);
204 static void InitBP(struct buf *, struct vnode *, unsigned,
205 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
206 void *, int, struct proc *);
207 static void raidinit(RF_Raid_t *);
208
209 void raidattach(int);
210 static int raid_match(struct device *, struct cfdata *, void *);
211 static void raid_attach(struct device *, struct device *, void *);
212 static int raid_detach(struct device *, int);
213
214 dev_type_open(raidopen);
215 dev_type_close(raidclose);
216 dev_type_read(raidread);
217 dev_type_write(raidwrite);
218 dev_type_ioctl(raidioctl);
219 dev_type_strategy(raidstrategy);
220 dev_type_dump(raiddump);
221 dev_type_size(raidsize);
222
223 const struct bdevsw raid_bdevsw = {
224 raidopen, raidclose, raidstrategy, raidioctl,
225 raiddump, raidsize, D_DISK
226 };
227
228 const struct cdevsw raid_cdevsw = {
229 raidopen, raidclose, raidread, raidwrite, raidioctl,
230 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 /* XXX Not sure if the following should be replacing the raidPtrs above,
236 or if it should be used in conjunction with that...
237 */
238
239 struct raid_softc {
240 struct device *sc_dev;
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 uint64_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 extern struct cfdriver raid_cd;
259 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
260 raid_match, raid_attach, raid_detach, NULL);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287 struct raid_softc *raid_softc;
288
289 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
290 struct disklabel *);
291 static void raidgetdisklabel(dev_t);
292 static void raidmakedisklabel(struct raid_softc *);
293
294 static int raidlock(struct raid_softc *);
295 static void raidunlock(struct raid_softc *);
296
297 static void rf_markalldirty(RF_Raid_t *);
298 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
299
300 void rf_ReconThread(struct rf_recon_req *);
301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
302 void rf_CopybackThread(RF_Raid_t *raidPtr);
303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
304 int rf_autoconfig(struct device *self);
305 void rf_buildroothack(RF_ConfigSet_t *);
306
307 RF_AutoConfig_t *rf_find_raid_components(void);
308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
310 static int rf_reasonable_label(RF_ComponentLabel_t *);
311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
312 int rf_set_autoconfig(RF_Raid_t *, int);
313 int rf_set_rootpartition(RF_Raid_t *, int);
314 void rf_release_all_vps(RF_ConfigSet_t *);
315 void rf_cleanup_config_set(RF_ConfigSet_t *);
316 int rf_have_enough_components(RF_ConfigSet_t *);
317 int rf_auto_config_set(RF_ConfigSet_t *, int *);
318
319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
320 allow autoconfig to take place.
321 Note that this is overridden by having
322 RAID_AUTOCONFIG as an option in the
323 kernel config file. */
324
325 struct RF_Pools_s rf_pools;
326
327 void
328 raidattach(int num)
329 {
330 int raidID;
331 int i, rc;
332
333 #ifdef DEBUG
334 printf("raidattach: Asked for %d units\n", num);
335 #endif
336
337 if (num <= 0) {
338 #ifdef DIAGNOSTIC
339 panic("raidattach: count <= 0");
340 #endif
341 return;
342 }
343 /* This is where all the initialization stuff gets done. */
344
345 numraid = num;
346
347 /* Make some space for requested number of units... */
348
349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
350 if (raidPtrs == NULL) {
351 panic("raidPtrs is NULL!!");
352 }
353
354 rf_mutex_init(&rf_sparet_wait_mutex);
355
356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
357
358 for (i = 0; i < num; i++)
359 raidPtrs[i] = NULL;
360 rc = rf_BootRaidframe();
361 if (rc == 0)
362 aprint_normal("Kernelized RAIDframe activated\n");
363 else
364 panic("Serious error booting RAID!!");
365
366 /* put together some datastructures like the CCD device does.. This
367 * lets us lock the device and what-not when it gets opened. */
368
369 raid_softc = (struct raid_softc *)
370 malloc(num * sizeof(struct raid_softc),
371 M_RAIDFRAME, M_NOWAIT);
372 if (raid_softc == NULL) {
373 aprint_error("WARNING: no memory for RAIDframe driver\n");
374 return;
375 }
376
377 memset(raid_softc, 0, num * sizeof(struct raid_softc));
378
379 for (raidID = 0; raidID < num; raidID++) {
380 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
381
382 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
383 (RF_Raid_t *));
384 if (raidPtrs[raidID] == NULL) {
385 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
386 numraid = raidID;
387 return;
388 }
389 }
390
391 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
392 aprint_error("raidattach: config_cfattach_attach failed?\n");
393 }
394
395 #ifdef RAID_AUTOCONFIG
396 raidautoconfig = 1;
397 #endif
398
399 /*
400 * Register a finalizer which will be used to auto-config RAID
401 * sets once all real hardware devices have been found.
402 */
403 if (config_finalize_register(NULL, rf_autoconfig) != 0)
404 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
405 }
406
407 int
408 rf_autoconfig(struct device *self)
409 {
410 RF_AutoConfig_t *ac_list;
411 RF_ConfigSet_t *config_sets;
412
413 if (raidautoconfig == 0)
414 return (0);
415
416 /* XXX This code can only be run once. */
417 raidautoconfig = 0;
418
419 /* 1. locate all RAID components on the system */
420 #ifdef DEBUG
421 printf("Searching for RAID components...\n");
422 #endif
423 ac_list = rf_find_raid_components();
424
425 /* 2. Sort them into their respective sets. */
426 config_sets = rf_create_auto_sets(ac_list);
427
428 /*
429 * 3. Evaluate each set andconfigure the valid ones.
430 * This gets done in rf_buildroothack().
431 */
432 rf_buildroothack(config_sets);
433
434 return 1;
435 }
436
437 void
438 rf_buildroothack(RF_ConfigSet_t *config_sets)
439 {
440 RF_ConfigSet_t *cset;
441 RF_ConfigSet_t *next_cset;
442 int retcode;
443 int raidID;
444 int rootID;
445 int col;
446 int num_root;
447 char *devname;
448
449 rootID = 0;
450 num_root = 0;
451 cset = config_sets;
452 while(cset != NULL ) {
453 next_cset = cset->next;
454 if (rf_have_enough_components(cset) &&
455 cset->ac->clabel->autoconfigure==1) {
456 retcode = rf_auto_config_set(cset,&raidID);
457 if (!retcode) {
458 #ifdef DEBUG
459 printf("raid%d: configured ok\n", raidID);
460 #endif
461 if (cset->rootable) {
462 rootID = raidID;
463 num_root++;
464 }
465 } else {
466 /* The autoconfig didn't work :( */
467 #ifdef DEBUG
468 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
469 #endif
470 rf_release_all_vps(cset);
471 }
472 } else {
473 /* we're not autoconfiguring this set...
474 release the associated resources */
475 rf_release_all_vps(cset);
476 }
477 /* cleanup */
478 rf_cleanup_config_set(cset);
479 cset = next_cset;
480 }
481
482 /* if the user has specified what the root device should be
483 then we don't touch booted_device or boothowto... */
484
485 if (rootspec != NULL)
486 return;
487
488 /* we found something bootable... */
489
490 if (num_root == 1) {
491 booted_device = raid_softc[rootID].sc_dev;
492 } else if (num_root > 1) {
493
494 /*
495 * Maybe the MD code can help. If it cannot, then
496 * setroot() will discover that we have no
497 * booted_device and will ask the user if nothing was
498 * hardwired in the kernel config file
499 */
500
501 if (booted_device == NULL)
502 cpu_rootconf();
503 if (booted_device == NULL)
504 return;
505
506 num_root = 0;
507 for (raidID = 0; raidID < numraid; raidID++) {
508 if (raidPtrs[raidID]->valid == 0)
509 continue;
510
511 if (raidPtrs[raidID]->root_partition == 0)
512 continue;
513
514 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
515 devname = raidPtrs[raidID]->Disks[col].devname;
516 devname += sizeof("/dev/") - 1;
517 if (strncmp(devname, device_xname(booted_device),
518 strlen(device_xname(booted_device))) != 0)
519 continue;
520 #ifdef DEBUG
521 printf("raid%d includes boot device %s\n",
522 raidID, devname);
523 #endif
524 num_root++;
525 rootID = raidID;
526 }
527 }
528
529 if (num_root == 1) {
530 booted_device = raid_softc[rootID].sc_dev;
531 } else {
532 /* we can't guess.. require the user to answer... */
533 boothowto |= RB_ASKNAME;
534 }
535 }
536 }
537
538
539 int
540 raidsize(dev_t dev)
541 {
542 struct raid_softc *rs;
543 struct disklabel *lp;
544 int part, unit, omask, size;
545
546 unit = raidunit(dev);
547 if (unit >= numraid)
548 return (-1);
549 rs = &raid_softc[unit];
550
551 if ((rs->sc_flags & RAIDF_INITED) == 0)
552 return (-1);
553
554 part = DISKPART(dev);
555 omask = rs->sc_dkdev.dk_openmask & (1 << part);
556 lp = rs->sc_dkdev.dk_label;
557
558 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
559 return (-1);
560
561 if (lp->d_partitions[part].p_fstype != FS_SWAP)
562 size = -1;
563 else
564 size = lp->d_partitions[part].p_size *
565 (lp->d_secsize / DEV_BSIZE);
566
567 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
568 return (-1);
569
570 return (size);
571
572 }
573
574 int
575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
576 {
577 int unit = raidunit(dev);
578 struct raid_softc *rs;
579 const struct bdevsw *bdev;
580 struct disklabel *lp;
581 RF_Raid_t *raidPtr;
582 daddr_t offset;
583 int part, c, sparecol, j, scol, dumpto;
584 int error = 0;
585
586 if (unit >= numraid)
587 return (ENXIO);
588
589 rs = &raid_softc[unit];
590 raidPtr = raidPtrs[unit];
591
592 if ((rs->sc_flags & RAIDF_INITED) == 0)
593 return ENXIO;
594
595 /* we only support dumping to RAID 1 sets */
596 if (raidPtr->Layout.numDataCol != 1 ||
597 raidPtr->Layout.numParityCol != 1)
598 return EINVAL;
599
600
601 if ((error = raidlock(rs)) != 0)
602 return error;
603
604 if (size % DEV_BSIZE != 0) {
605 error = EINVAL;
606 goto out;
607 }
608
609 if (blkno + size / DEV_BSIZE > rs->sc_size) {
610 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
611 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
612 size / DEV_BSIZE, rs->sc_size);
613 error = EINVAL;
614 goto out;
615 }
616
617 part = DISKPART(dev);
618 lp = rs->sc_dkdev.dk_label;
619 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
620
621 /* figure out what device is alive.. */
622
623 /*
624 Look for a component to dump to. The preference for the
625 component to dump to is as follows:
626 1) the master
627 2) a used_spare of the master
628 3) the slave
629 4) a used_spare of the slave
630 */
631
632 dumpto = -1;
633 for (c = 0; c < raidPtr->numCol; c++) {
634 if (raidPtr->Disks[c].status == rf_ds_optimal) {
635 /* this might be the one */
636 dumpto = c;
637 break;
638 }
639 }
640
641 /*
642 At this point we have possibly selected a live master or a
643 live slave. We now check to see if there is a spared
644 master (or a spared slave), if we didn't find a live master
645 or a live slave.
646 */
647
648 for (c = 0; c < raidPtr->numSpare; c++) {
649 sparecol = raidPtr->numCol + c;
650 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
651 /* How about this one? */
652 scol = -1;
653 for(j=0;j<raidPtr->numCol;j++) {
654 if (raidPtr->Disks[j].spareCol == sparecol) {
655 scol = j;
656 break;
657 }
658 }
659 if (scol == 0) {
660 /*
661 We must have found a spared master!
662 We'll take that over anything else
663 found so far. (We couldn't have
664 found a real master before, since
665 this is a used spare, and it's
666 saying that it's replacing the
667 master.) On reboot (with
668 autoconfiguration turned on)
669 sparecol will become the 1st
670 component (component0) of this set.
671 */
672 dumpto = sparecol;
673 break;
674 } else if (scol != -1) {
675 /*
676 Must be a spared slave. We'll dump
677 to that if we havn't found anything
678 else so far.
679 */
680 if (dumpto == -1)
681 dumpto = sparecol;
682 }
683 }
684 }
685
686 if (dumpto == -1) {
687 /* we couldn't find any live components to dump to!?!?
688 */
689 error = EINVAL;
690 goto out;
691 }
692
693 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
694
695 /*
696 Note that blkno is relative to this particular partition.
697 By adding the offset of this partition in the RAID
698 set, and also adding RF_PROTECTED_SECTORS, we get a
699 value that is relative to the partition used for the
700 underlying component.
701 */
702
703 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
704 blkno + offset, va, size);
705
706 out:
707 raidunlock(rs);
708
709 return error;
710 }
711 /* ARGSUSED */
712 int
713 raidopen(dev_t dev, int flags, int fmt,
714 struct lwp *l)
715 {
716 int unit = raidunit(dev);
717 struct raid_softc *rs;
718 struct disklabel *lp;
719 int part, pmask;
720 int error = 0;
721
722 if (unit >= numraid)
723 return (ENXIO);
724 rs = &raid_softc[unit];
725
726 if ((error = raidlock(rs)) != 0)
727 return (error);
728 lp = rs->sc_dkdev.dk_label;
729
730 part = DISKPART(dev);
731
732 /*
733 * If there are wedges, and this is not RAW_PART, then we
734 * need to fail.
735 */
736 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
737 error = EBUSY;
738 goto bad;
739 }
740 pmask = (1 << part);
741
742 if ((rs->sc_flags & RAIDF_INITED) &&
743 (rs->sc_dkdev.dk_openmask == 0))
744 raidgetdisklabel(dev);
745
746 /* make sure that this partition exists */
747
748 if (part != RAW_PART) {
749 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
750 ((part >= lp->d_npartitions) ||
751 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
752 error = ENXIO;
753 goto bad;
754 }
755 }
756 /* Prevent this unit from being unconfigured while open. */
757 switch (fmt) {
758 case S_IFCHR:
759 rs->sc_dkdev.dk_copenmask |= pmask;
760 break;
761
762 case S_IFBLK:
763 rs->sc_dkdev.dk_bopenmask |= pmask;
764 break;
765 }
766
767 if ((rs->sc_dkdev.dk_openmask == 0) &&
768 ((rs->sc_flags & RAIDF_INITED) != 0)) {
769 /* First one... mark things as dirty... Note that we *MUST*
770 have done a configure before this. I DO NOT WANT TO BE
771 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
772 THAT THEY BELONG TOGETHER!!!!! */
773 /* XXX should check to see if we're only open for reading
774 here... If so, we needn't do this, but then need some
775 other way of keeping track of what's happened.. */
776
777 rf_markalldirty( raidPtrs[unit] );
778 }
779
780
781 rs->sc_dkdev.dk_openmask =
782 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
783
784 bad:
785 raidunlock(rs);
786
787 return (error);
788
789
790 }
791 /* ARGSUSED */
792 int
793 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
794 {
795 int unit = raidunit(dev);
796 struct cfdata *cf;
797 struct raid_softc *rs;
798 int error = 0;
799 int part;
800
801 if (unit >= numraid)
802 return (ENXIO);
803 rs = &raid_softc[unit];
804
805 if ((error = raidlock(rs)) != 0)
806 return (error);
807
808 part = DISKPART(dev);
809
810 /* ...that much closer to allowing unconfiguration... */
811 switch (fmt) {
812 case S_IFCHR:
813 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
814 break;
815
816 case S_IFBLK:
817 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
818 break;
819 }
820 rs->sc_dkdev.dk_openmask =
821 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
822
823 if ((rs->sc_dkdev.dk_openmask == 0) &&
824 ((rs->sc_flags & RAIDF_INITED) != 0)) {
825 /* Last one... device is not unconfigured yet.
826 Device shutdown has taken care of setting the
827 clean bits if RAIDF_INITED is not set
828 mark things as clean... */
829
830 rf_update_component_labels(raidPtrs[unit],
831 RF_FINAL_COMPONENT_UPDATE);
832 if (doing_shutdown) {
833 /* last one, and we're going down, so
834 lights out for this RAID set too. */
835 error = rf_Shutdown(raidPtrs[unit]);
836
837 /* It's no longer initialized... */
838 rs->sc_flags &= ~RAIDF_INITED;
839
840 /* detach the device */
841
842 cf = device_cfdata(rs->sc_dev);
843 error = config_detach(rs->sc_dev, DETACH_QUIET);
844 free(cf, M_RAIDFRAME);
845
846 /* Detach the disk. */
847 disk_detach(&rs->sc_dkdev);
848 disk_destroy(&rs->sc_dkdev);
849 }
850 }
851
852 raidunlock(rs);
853 return (0);
854
855 }
856
857 void
858 raidstrategy(struct buf *bp)
859 {
860 int s;
861
862 unsigned int raidID = raidunit(bp->b_dev);
863 RF_Raid_t *raidPtr;
864 struct raid_softc *rs = &raid_softc[raidID];
865 int wlabel;
866
867 if ((rs->sc_flags & RAIDF_INITED) ==0) {
868 bp->b_error = ENXIO;
869 goto done;
870 }
871 if (raidID >= numraid || !raidPtrs[raidID]) {
872 bp->b_error = ENODEV;
873 goto done;
874 }
875 raidPtr = raidPtrs[raidID];
876 if (!raidPtr->valid) {
877 bp->b_error = ENODEV;
878 goto done;
879 }
880 if (bp->b_bcount == 0) {
881 db1_printf(("b_bcount is zero..\n"));
882 goto done;
883 }
884
885 /*
886 * Do bounds checking and adjust transfer. If there's an
887 * error, the bounds check will flag that for us.
888 */
889
890 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
891 if (DISKPART(bp->b_dev) == RAW_PART) {
892 uint64_t size; /* device size in DEV_BSIZE unit */
893
894 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
895 size = raidPtr->totalSectors <<
896 (raidPtr->logBytesPerSector - DEV_BSHIFT);
897 } else {
898 size = raidPtr->totalSectors >>
899 (DEV_BSHIFT - raidPtr->logBytesPerSector);
900 }
901 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
902 goto done;
903 }
904 } else {
905 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
906 db1_printf(("Bounds check failed!!:%d %d\n",
907 (int) bp->b_blkno, (int) wlabel));
908 goto done;
909 }
910 }
911 s = splbio();
912
913 bp->b_resid = 0;
914
915 /* stuff it onto our queue */
916 BUFQ_PUT(rs->buf_queue, bp);
917
918 /* scheduled the IO to happen at the next convenient time */
919 wakeup(&(raidPtrs[raidID]->iodone));
920
921 splx(s);
922 return;
923
924 done:
925 bp->b_resid = bp->b_bcount;
926 biodone(bp);
927 }
928 /* ARGSUSED */
929 int
930 raidread(dev_t dev, struct uio *uio, int flags)
931 {
932 int unit = raidunit(dev);
933 struct raid_softc *rs;
934
935 if (unit >= numraid)
936 return (ENXIO);
937 rs = &raid_softc[unit];
938
939 if ((rs->sc_flags & RAIDF_INITED) == 0)
940 return (ENXIO);
941
942 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
943
944 }
945 /* ARGSUSED */
946 int
947 raidwrite(dev_t dev, struct uio *uio, int flags)
948 {
949 int unit = raidunit(dev);
950 struct raid_softc *rs;
951
952 if (unit >= numraid)
953 return (ENXIO);
954 rs = &raid_softc[unit];
955
956 if ((rs->sc_flags & RAIDF_INITED) == 0)
957 return (ENXIO);
958
959 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
960
961 }
962
963 int
964 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
965 {
966 int unit = raidunit(dev);
967 int error = 0;
968 int part, pmask;
969 struct cfdata *cf;
970 struct raid_softc *rs;
971 RF_Config_t *k_cfg, *u_cfg;
972 RF_Raid_t *raidPtr;
973 RF_RaidDisk_t *diskPtr;
974 RF_AccTotals_t *totals;
975 RF_DeviceConfig_t *d_cfg, **ucfgp;
976 u_char *specific_buf;
977 int retcode = 0;
978 int column;
979 int raidid;
980 struct rf_recon_req *rrcopy, *rr;
981 RF_ComponentLabel_t *clabel;
982 RF_ComponentLabel_t *ci_label;
983 RF_ComponentLabel_t **clabel_ptr;
984 RF_SingleComponent_t *sparePtr,*componentPtr;
985 RF_SingleComponent_t component;
986 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
987 int i, j, d;
988 #ifdef __HAVE_OLD_DISKLABEL
989 struct disklabel newlabel;
990 #endif
991 struct dkwedge_info *dkw;
992
993 if (unit >= numraid)
994 return (ENXIO);
995 rs = &raid_softc[unit];
996 raidPtr = raidPtrs[unit];
997
998 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
999 (int) DISKPART(dev), (int) unit, (int) cmd));
1000
1001 /* Must be open for writes for these commands... */
1002 switch (cmd) {
1003 #ifdef DIOCGSECTORSIZE
1004 case DIOCGSECTORSIZE:
1005 *(u_int *)data = raidPtr->bytesPerSector;
1006 return 0;
1007 case DIOCGMEDIASIZE:
1008 *(off_t *)data =
1009 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1010 return 0;
1011 #endif
1012 case DIOCSDINFO:
1013 case DIOCWDINFO:
1014 #ifdef __HAVE_OLD_DISKLABEL
1015 case ODIOCWDINFO:
1016 case ODIOCSDINFO:
1017 #endif
1018 case DIOCWLABEL:
1019 case DIOCAWEDGE:
1020 case DIOCDWEDGE:
1021 if ((flag & FWRITE) == 0)
1022 return (EBADF);
1023 }
1024
1025 /* Must be initialized for these... */
1026 switch (cmd) {
1027 case DIOCGDINFO:
1028 case DIOCSDINFO:
1029 case DIOCWDINFO:
1030 #ifdef __HAVE_OLD_DISKLABEL
1031 case ODIOCGDINFO:
1032 case ODIOCWDINFO:
1033 case ODIOCSDINFO:
1034 case ODIOCGDEFLABEL:
1035 #endif
1036 case DIOCGPART:
1037 case DIOCWLABEL:
1038 case DIOCGDEFLABEL:
1039 case DIOCAWEDGE:
1040 case DIOCDWEDGE:
1041 case DIOCLWEDGES:
1042 case RAIDFRAME_SHUTDOWN:
1043 case RAIDFRAME_REWRITEPARITY:
1044 case RAIDFRAME_GET_INFO:
1045 case RAIDFRAME_RESET_ACCTOTALS:
1046 case RAIDFRAME_GET_ACCTOTALS:
1047 case RAIDFRAME_KEEP_ACCTOTALS:
1048 case RAIDFRAME_GET_SIZE:
1049 case RAIDFRAME_FAIL_DISK:
1050 case RAIDFRAME_COPYBACK:
1051 case RAIDFRAME_CHECK_RECON_STATUS:
1052 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1053 case RAIDFRAME_GET_COMPONENT_LABEL:
1054 case RAIDFRAME_SET_COMPONENT_LABEL:
1055 case RAIDFRAME_ADD_HOT_SPARE:
1056 case RAIDFRAME_REMOVE_HOT_SPARE:
1057 case RAIDFRAME_INIT_LABELS:
1058 case RAIDFRAME_REBUILD_IN_PLACE:
1059 case RAIDFRAME_CHECK_PARITY:
1060 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1061 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1062 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1063 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1064 case RAIDFRAME_SET_AUTOCONFIG:
1065 case RAIDFRAME_SET_ROOT:
1066 case RAIDFRAME_DELETE_COMPONENT:
1067 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1068 if ((rs->sc_flags & RAIDF_INITED) == 0)
1069 return (ENXIO);
1070 }
1071
1072 switch (cmd) {
1073
1074 /* configure the system */
1075 case RAIDFRAME_CONFIGURE:
1076
1077 if (raidPtr->valid) {
1078 /* There is a valid RAID set running on this unit! */
1079 printf("raid%d: Device already configured!\n",unit);
1080 return(EINVAL);
1081 }
1082
1083 /* copy-in the configuration information */
1084 /* data points to a pointer to the configuration structure */
1085
1086 u_cfg = *((RF_Config_t **) data);
1087 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1088 if (k_cfg == NULL) {
1089 return (ENOMEM);
1090 }
1091 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1092 if (retcode) {
1093 RF_Free(k_cfg, sizeof(RF_Config_t));
1094 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1095 retcode));
1096 return (retcode);
1097 }
1098 /* allocate a buffer for the layout-specific data, and copy it
1099 * in */
1100 if (k_cfg->layoutSpecificSize) {
1101 if (k_cfg->layoutSpecificSize > 10000) {
1102 /* sanity check */
1103 RF_Free(k_cfg, sizeof(RF_Config_t));
1104 return (EINVAL);
1105 }
1106 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1107 (u_char *));
1108 if (specific_buf == NULL) {
1109 RF_Free(k_cfg, sizeof(RF_Config_t));
1110 return (ENOMEM);
1111 }
1112 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1113 k_cfg->layoutSpecificSize);
1114 if (retcode) {
1115 RF_Free(k_cfg, sizeof(RF_Config_t));
1116 RF_Free(specific_buf,
1117 k_cfg->layoutSpecificSize);
1118 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1119 retcode));
1120 return (retcode);
1121 }
1122 } else
1123 specific_buf = NULL;
1124 k_cfg->layoutSpecific = specific_buf;
1125
1126 /* should do some kind of sanity check on the configuration.
1127 * Store the sum of all the bytes in the last byte? */
1128
1129 /* configure the system */
1130
1131 /*
1132 * Clear the entire RAID descriptor, just to make sure
1133 * there is no stale data left in the case of a
1134 * reconfiguration
1135 */
1136 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1137 raidPtr->raidid = unit;
1138
1139 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1140
1141 if (retcode == 0) {
1142
1143 /* allow this many simultaneous IO's to
1144 this RAID device */
1145 raidPtr->openings = RAIDOUTSTANDING;
1146
1147 raidinit(raidPtr);
1148 rf_markalldirty(raidPtr);
1149 }
1150 /* free the buffers. No return code here. */
1151 if (k_cfg->layoutSpecificSize) {
1152 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1153 }
1154 RF_Free(k_cfg, sizeof(RF_Config_t));
1155
1156 return (retcode);
1157
1158 /* shutdown the system */
1159 case RAIDFRAME_SHUTDOWN:
1160
1161 if ((error = raidlock(rs)) != 0)
1162 return (error);
1163
1164 /*
1165 * If somebody has a partition mounted, we shouldn't
1166 * shutdown.
1167 */
1168
1169 part = DISKPART(dev);
1170 pmask = (1 << part);
1171 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1172 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1173 (rs->sc_dkdev.dk_copenmask & pmask))) {
1174 raidunlock(rs);
1175 return (EBUSY);
1176 }
1177
1178 retcode = rf_Shutdown(raidPtr);
1179
1180 /* It's no longer initialized... */
1181 rs->sc_flags &= ~RAIDF_INITED;
1182
1183 /* free the pseudo device attach bits */
1184
1185 cf = device_cfdata(rs->sc_dev);
1186 /* XXX this causes us to not return any errors
1187 from the above call to rf_Shutdown() */
1188 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1189 free(cf, M_RAIDFRAME);
1190
1191 /* Detach the disk. */
1192 disk_detach(&rs->sc_dkdev);
1193 disk_destroy(&rs->sc_dkdev);
1194
1195 raidunlock(rs);
1196
1197 return (retcode);
1198 case RAIDFRAME_GET_COMPONENT_LABEL:
1199 clabel_ptr = (RF_ComponentLabel_t **) data;
1200 /* need to read the component label for the disk indicated
1201 by row,column in clabel */
1202
1203 /* For practice, let's get it directly fromdisk, rather
1204 than from the in-core copy */
1205 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1206 (RF_ComponentLabel_t *));
1207 if (clabel == NULL)
1208 return (ENOMEM);
1209
1210 retcode = copyin( *clabel_ptr, clabel,
1211 sizeof(RF_ComponentLabel_t));
1212
1213 if (retcode) {
1214 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1215 return(retcode);
1216 }
1217
1218 clabel->row = 0; /* Don't allow looking at anything else.*/
1219
1220 column = clabel->column;
1221
1222 if ((column < 0) || (column >= raidPtr->numCol +
1223 raidPtr->numSpare)) {
1224 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1225 return(EINVAL);
1226 }
1227
1228 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1229 raidPtr->raid_cinfo[column].ci_vp,
1230 clabel );
1231
1232 if (retcode == 0) {
1233 retcode = copyout(clabel, *clabel_ptr,
1234 sizeof(RF_ComponentLabel_t));
1235 }
1236 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1237 return (retcode);
1238
1239 case RAIDFRAME_SET_COMPONENT_LABEL:
1240 clabel = (RF_ComponentLabel_t *) data;
1241
1242 /* XXX check the label for valid stuff... */
1243 /* Note that some things *should not* get modified --
1244 the user should be re-initing the labels instead of
1245 trying to patch things.
1246 */
1247
1248 raidid = raidPtr->raidid;
1249 #ifdef DEBUG
1250 printf("raid%d: Got component label:\n", raidid);
1251 printf("raid%d: Version: %d\n", raidid, clabel->version);
1252 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1253 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1254 printf("raid%d: Column: %d\n", raidid, clabel->column);
1255 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1256 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1257 printf("raid%d: Status: %d\n", raidid, clabel->status);
1258 #endif
1259 clabel->row = 0;
1260 column = clabel->column;
1261
1262 if ((column < 0) || (column >= raidPtr->numCol)) {
1263 return(EINVAL);
1264 }
1265
1266 /* XXX this isn't allowed to do anything for now :-) */
1267
1268 /* XXX and before it is, we need to fill in the rest
1269 of the fields!?!?!?! */
1270 #if 0
1271 raidwrite_component_label(
1272 raidPtr->Disks[column].dev,
1273 raidPtr->raid_cinfo[column].ci_vp,
1274 clabel );
1275 #endif
1276 return (0);
1277
1278 case RAIDFRAME_INIT_LABELS:
1279 clabel = (RF_ComponentLabel_t *) data;
1280 /*
1281 we only want the serial number from
1282 the above. We get all the rest of the information
1283 from the config that was used to create this RAID
1284 set.
1285 */
1286
1287 raidPtr->serial_number = clabel->serial_number;
1288
1289 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1290 (RF_ComponentLabel_t *));
1291 if (ci_label == NULL)
1292 return (ENOMEM);
1293
1294 raid_init_component_label(raidPtr, ci_label);
1295 ci_label->serial_number = clabel->serial_number;
1296 ci_label->row = 0; /* we dont' pretend to support more */
1297
1298 for(column=0;column<raidPtr->numCol;column++) {
1299 diskPtr = &raidPtr->Disks[column];
1300 if (!RF_DEAD_DISK(diskPtr->status)) {
1301 ci_label->partitionSize = diskPtr->partitionSize;
1302 ci_label->column = column;
1303 raidwrite_component_label(
1304 raidPtr->Disks[column].dev,
1305 raidPtr->raid_cinfo[column].ci_vp,
1306 ci_label );
1307 }
1308 }
1309 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1310
1311 return (retcode);
1312 case RAIDFRAME_SET_AUTOCONFIG:
1313 d = rf_set_autoconfig(raidPtr, *(int *) data);
1314 printf("raid%d: New autoconfig value is: %d\n",
1315 raidPtr->raidid, d);
1316 *(int *) data = d;
1317 return (retcode);
1318
1319 case RAIDFRAME_SET_ROOT:
1320 d = rf_set_rootpartition(raidPtr, *(int *) data);
1321 printf("raid%d: New rootpartition value is: %d\n",
1322 raidPtr->raidid, d);
1323 *(int *) data = d;
1324 return (retcode);
1325
1326 /* initialize all parity */
1327 case RAIDFRAME_REWRITEPARITY:
1328
1329 if (raidPtr->Layout.map->faultsTolerated == 0) {
1330 /* Parity for RAID 0 is trivially correct */
1331 raidPtr->parity_good = RF_RAID_CLEAN;
1332 return(0);
1333 }
1334
1335 if (raidPtr->parity_rewrite_in_progress == 1) {
1336 /* Re-write is already in progress! */
1337 return(EINVAL);
1338 }
1339
1340 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1341 rf_RewriteParityThread,
1342 raidPtr,"raid_parity");
1343 return (retcode);
1344
1345
1346 case RAIDFRAME_ADD_HOT_SPARE:
1347 sparePtr = (RF_SingleComponent_t *) data;
1348 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1349 retcode = rf_add_hot_spare(raidPtr, &component);
1350 return(retcode);
1351
1352 case RAIDFRAME_REMOVE_HOT_SPARE:
1353 return(retcode);
1354
1355 case RAIDFRAME_DELETE_COMPONENT:
1356 componentPtr = (RF_SingleComponent_t *)data;
1357 memcpy( &component, componentPtr,
1358 sizeof(RF_SingleComponent_t));
1359 retcode = rf_delete_component(raidPtr, &component);
1360 return(retcode);
1361
1362 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1363 componentPtr = (RF_SingleComponent_t *)data;
1364 memcpy( &component, componentPtr,
1365 sizeof(RF_SingleComponent_t));
1366 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1367 return(retcode);
1368
1369 case RAIDFRAME_REBUILD_IN_PLACE:
1370
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* Can't do this on a RAID 0!! */
1373 return(EINVAL);
1374 }
1375
1376 if (raidPtr->recon_in_progress == 1) {
1377 /* a reconstruct is already in progress! */
1378 return(EINVAL);
1379 }
1380
1381 componentPtr = (RF_SingleComponent_t *) data;
1382 memcpy( &component, componentPtr,
1383 sizeof(RF_SingleComponent_t));
1384 component.row = 0; /* we don't support any more */
1385 column = component.column;
1386
1387 if ((column < 0) || (column >= raidPtr->numCol)) {
1388 return(EINVAL);
1389 }
1390
1391 RF_LOCK_MUTEX(raidPtr->mutex);
1392 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1393 (raidPtr->numFailures > 0)) {
1394 /* XXX 0 above shouldn't be constant!!! */
1395 /* some component other than this has failed.
1396 Let's not make things worse than they already
1397 are... */
1398 printf("raid%d: Unable to reconstruct to disk at:\n",
1399 raidPtr->raidid);
1400 printf("raid%d: Col: %d Too many failures.\n",
1401 raidPtr->raidid, column);
1402 RF_UNLOCK_MUTEX(raidPtr->mutex);
1403 return (EINVAL);
1404 }
1405 if (raidPtr->Disks[column].status ==
1406 rf_ds_reconstructing) {
1407 printf("raid%d: Unable to reconstruct to disk at:\n",
1408 raidPtr->raidid);
1409 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1410
1411 RF_UNLOCK_MUTEX(raidPtr->mutex);
1412 return (EINVAL);
1413 }
1414 if (raidPtr->Disks[column].status == rf_ds_spared) {
1415 RF_UNLOCK_MUTEX(raidPtr->mutex);
1416 return (EINVAL);
1417 }
1418 RF_UNLOCK_MUTEX(raidPtr->mutex);
1419
1420 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1421 if (rrcopy == NULL)
1422 return(ENOMEM);
1423
1424 rrcopy->raidPtr = (void *) raidPtr;
1425 rrcopy->col = column;
1426
1427 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1428 rf_ReconstructInPlaceThread,
1429 rrcopy,"raid_reconip");
1430 return(retcode);
1431
1432 case RAIDFRAME_GET_INFO:
1433 if (!raidPtr->valid)
1434 return (ENODEV);
1435 ucfgp = (RF_DeviceConfig_t **) data;
1436 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1437 (RF_DeviceConfig_t *));
1438 if (d_cfg == NULL)
1439 return (ENOMEM);
1440 d_cfg->rows = 1; /* there is only 1 row now */
1441 d_cfg->cols = raidPtr->numCol;
1442 d_cfg->ndevs = raidPtr->numCol;
1443 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1444 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1445 return (ENOMEM);
1446 }
1447 d_cfg->nspares = raidPtr->numSpare;
1448 if (d_cfg->nspares >= RF_MAX_DISKS) {
1449 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1450 return (ENOMEM);
1451 }
1452 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1453 d = 0;
1454 for (j = 0; j < d_cfg->cols; j++) {
1455 d_cfg->devs[d] = raidPtr->Disks[j];
1456 d++;
1457 }
1458 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1459 d_cfg->spares[i] = raidPtr->Disks[j];
1460 }
1461 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1462 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1463
1464 return (retcode);
1465
1466 case RAIDFRAME_CHECK_PARITY:
1467 *(int *) data = raidPtr->parity_good;
1468 return (0);
1469
1470 case RAIDFRAME_RESET_ACCTOTALS:
1471 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1472 return (0);
1473
1474 case RAIDFRAME_GET_ACCTOTALS:
1475 totals = (RF_AccTotals_t *) data;
1476 *totals = raidPtr->acc_totals;
1477 return (0);
1478
1479 case RAIDFRAME_KEEP_ACCTOTALS:
1480 raidPtr->keep_acc_totals = *(int *)data;
1481 return (0);
1482
1483 case RAIDFRAME_GET_SIZE:
1484 *(int *) data = raidPtr->totalSectors;
1485 return (0);
1486
1487 /* fail a disk & optionally start reconstruction */
1488 case RAIDFRAME_FAIL_DISK:
1489
1490 if (raidPtr->Layout.map->faultsTolerated == 0) {
1491 /* Can't do this on a RAID 0!! */
1492 return(EINVAL);
1493 }
1494
1495 rr = (struct rf_recon_req *) data;
1496 rr->row = 0;
1497 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1498 return (EINVAL);
1499
1500
1501 RF_LOCK_MUTEX(raidPtr->mutex);
1502 if (raidPtr->status == rf_rs_reconstructing) {
1503 /* you can't fail a disk while we're reconstructing! */
1504 /* XXX wrong for RAID6 */
1505 RF_UNLOCK_MUTEX(raidPtr->mutex);
1506 return (EINVAL);
1507 }
1508 if ((raidPtr->Disks[rr->col].status ==
1509 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1510 /* some other component has failed. Let's not make
1511 things worse. XXX wrong for RAID6 */
1512 RF_UNLOCK_MUTEX(raidPtr->mutex);
1513 return (EINVAL);
1514 }
1515 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1516 /* Can't fail a spared disk! */
1517 RF_UNLOCK_MUTEX(raidPtr->mutex);
1518 return (EINVAL);
1519 }
1520 RF_UNLOCK_MUTEX(raidPtr->mutex);
1521
1522 /* make a copy of the recon request so that we don't rely on
1523 * the user's buffer */
1524 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1525 if (rrcopy == NULL)
1526 return(ENOMEM);
1527 memcpy(rrcopy, rr, sizeof(*rr));
1528 rrcopy->raidPtr = (void *) raidPtr;
1529
1530 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1531 rf_ReconThread,
1532 rrcopy,"raid_recon");
1533 return (0);
1534
1535 /* invoke a copyback operation after recon on whatever disk
1536 * needs it, if any */
1537 case RAIDFRAME_COPYBACK:
1538
1539 if (raidPtr->Layout.map->faultsTolerated == 0) {
1540 /* This makes no sense on a RAID 0!! */
1541 return(EINVAL);
1542 }
1543
1544 if (raidPtr->copyback_in_progress == 1) {
1545 /* Copyback is already in progress! */
1546 return(EINVAL);
1547 }
1548
1549 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1550 rf_CopybackThread,
1551 raidPtr,"raid_copyback");
1552 return (retcode);
1553
1554 /* return the percentage completion of reconstruction */
1555 case RAIDFRAME_CHECK_RECON_STATUS:
1556 if (raidPtr->Layout.map->faultsTolerated == 0) {
1557 /* This makes no sense on a RAID 0, so tell the
1558 user it's done. */
1559 *(int *) data = 100;
1560 return(0);
1561 }
1562 if (raidPtr->status != rf_rs_reconstructing)
1563 *(int *) data = 100;
1564 else {
1565 if (raidPtr->reconControl->numRUsTotal > 0) {
1566 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1567 } else {
1568 *(int *) data = 0;
1569 }
1570 }
1571 return (0);
1572 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1573 progressInfoPtr = (RF_ProgressInfo_t **) data;
1574 if (raidPtr->status != rf_rs_reconstructing) {
1575 progressInfo.remaining = 0;
1576 progressInfo.completed = 100;
1577 progressInfo.total = 100;
1578 } else {
1579 progressInfo.total =
1580 raidPtr->reconControl->numRUsTotal;
1581 progressInfo.completed =
1582 raidPtr->reconControl->numRUsComplete;
1583 progressInfo.remaining = progressInfo.total -
1584 progressInfo.completed;
1585 }
1586 retcode = copyout(&progressInfo, *progressInfoPtr,
1587 sizeof(RF_ProgressInfo_t));
1588 return (retcode);
1589
1590 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1591 if (raidPtr->Layout.map->faultsTolerated == 0) {
1592 /* This makes no sense on a RAID 0, so tell the
1593 user it's done. */
1594 *(int *) data = 100;
1595 return(0);
1596 }
1597 if (raidPtr->parity_rewrite_in_progress == 1) {
1598 *(int *) data = 100 *
1599 raidPtr->parity_rewrite_stripes_done /
1600 raidPtr->Layout.numStripe;
1601 } else {
1602 *(int *) data = 100;
1603 }
1604 return (0);
1605
1606 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1607 progressInfoPtr = (RF_ProgressInfo_t **) data;
1608 if (raidPtr->parity_rewrite_in_progress == 1) {
1609 progressInfo.total = raidPtr->Layout.numStripe;
1610 progressInfo.completed =
1611 raidPtr->parity_rewrite_stripes_done;
1612 progressInfo.remaining = progressInfo.total -
1613 progressInfo.completed;
1614 } else {
1615 progressInfo.remaining = 0;
1616 progressInfo.completed = 100;
1617 progressInfo.total = 100;
1618 }
1619 retcode = copyout(&progressInfo, *progressInfoPtr,
1620 sizeof(RF_ProgressInfo_t));
1621 return (retcode);
1622
1623 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1624 if (raidPtr->Layout.map->faultsTolerated == 0) {
1625 /* This makes no sense on a RAID 0 */
1626 *(int *) data = 100;
1627 return(0);
1628 }
1629 if (raidPtr->copyback_in_progress == 1) {
1630 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1631 raidPtr->Layout.numStripe;
1632 } else {
1633 *(int *) data = 100;
1634 }
1635 return (0);
1636
1637 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1638 progressInfoPtr = (RF_ProgressInfo_t **) data;
1639 if (raidPtr->copyback_in_progress == 1) {
1640 progressInfo.total = raidPtr->Layout.numStripe;
1641 progressInfo.completed =
1642 raidPtr->copyback_stripes_done;
1643 progressInfo.remaining = progressInfo.total -
1644 progressInfo.completed;
1645 } else {
1646 progressInfo.remaining = 0;
1647 progressInfo.completed = 100;
1648 progressInfo.total = 100;
1649 }
1650 retcode = copyout(&progressInfo, *progressInfoPtr,
1651 sizeof(RF_ProgressInfo_t));
1652 return (retcode);
1653
1654 /* the sparetable daemon calls this to wait for the kernel to
1655 * need a spare table. this ioctl does not return until a
1656 * spare table is needed. XXX -- calling mpsleep here in the
1657 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1658 * -- I should either compute the spare table in the kernel,
1659 * or have a different -- XXX XXX -- interface (a different
1660 * character device) for delivering the table -- XXX */
1661 #if 0
1662 case RAIDFRAME_SPARET_WAIT:
1663 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1664 while (!rf_sparet_wait_queue)
1665 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1666 waitreq = rf_sparet_wait_queue;
1667 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1668 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1669
1670 /* structure assignment */
1671 *((RF_SparetWait_t *) data) = *waitreq;
1672
1673 RF_Free(waitreq, sizeof(*waitreq));
1674 return (0);
1675
1676 /* wakes up a process waiting on SPARET_WAIT and puts an error
1677 * code in it that will cause the dameon to exit */
1678 case RAIDFRAME_ABORT_SPARET_WAIT:
1679 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1680 waitreq->fcol = -1;
1681 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1682 waitreq->next = rf_sparet_wait_queue;
1683 rf_sparet_wait_queue = waitreq;
1684 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1685 wakeup(&rf_sparet_wait_queue);
1686 return (0);
1687
1688 /* used by the spare table daemon to deliver a spare table
1689 * into the kernel */
1690 case RAIDFRAME_SEND_SPARET:
1691
1692 /* install the spare table */
1693 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1694
1695 /* respond to the requestor. the return status of the spare
1696 * table installation is passed in the "fcol" field */
1697 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1698 waitreq->fcol = retcode;
1699 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1700 waitreq->next = rf_sparet_resp_queue;
1701 rf_sparet_resp_queue = waitreq;
1702 wakeup(&rf_sparet_resp_queue);
1703 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1704
1705 return (retcode);
1706 #endif
1707
1708 default:
1709 break; /* fall through to the os-specific code below */
1710
1711 }
1712
1713 if (!raidPtr->valid)
1714 return (EINVAL);
1715
1716 /*
1717 * Add support for "regular" device ioctls here.
1718 */
1719
1720 switch (cmd) {
1721 case DIOCGDINFO:
1722 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1723 break;
1724 #ifdef __HAVE_OLD_DISKLABEL
1725 case ODIOCGDINFO:
1726 newlabel = *(rs->sc_dkdev.dk_label);
1727 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1728 return ENOTTY;
1729 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1730 break;
1731 #endif
1732
1733 case DIOCGPART:
1734 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1735 ((struct partinfo *) data)->part =
1736 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1737 break;
1738
1739 case DIOCWDINFO:
1740 case DIOCSDINFO:
1741 #ifdef __HAVE_OLD_DISKLABEL
1742 case ODIOCWDINFO:
1743 case ODIOCSDINFO:
1744 #endif
1745 {
1746 struct disklabel *lp;
1747 #ifdef __HAVE_OLD_DISKLABEL
1748 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1749 memset(&newlabel, 0, sizeof newlabel);
1750 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1751 lp = &newlabel;
1752 } else
1753 #endif
1754 lp = (struct disklabel *)data;
1755
1756 if ((error = raidlock(rs)) != 0)
1757 return (error);
1758
1759 rs->sc_flags |= RAIDF_LABELLING;
1760
1761 error = setdisklabel(rs->sc_dkdev.dk_label,
1762 lp, 0, rs->sc_dkdev.dk_cpulabel);
1763 if (error == 0) {
1764 if (cmd == DIOCWDINFO
1765 #ifdef __HAVE_OLD_DISKLABEL
1766 || cmd == ODIOCWDINFO
1767 #endif
1768 )
1769 error = writedisklabel(RAIDLABELDEV(dev),
1770 raidstrategy, rs->sc_dkdev.dk_label,
1771 rs->sc_dkdev.dk_cpulabel);
1772 }
1773 rs->sc_flags &= ~RAIDF_LABELLING;
1774
1775 raidunlock(rs);
1776
1777 if (error)
1778 return (error);
1779 break;
1780 }
1781
1782 case DIOCWLABEL:
1783 if (*(int *) data != 0)
1784 rs->sc_flags |= RAIDF_WLABEL;
1785 else
1786 rs->sc_flags &= ~RAIDF_WLABEL;
1787 break;
1788
1789 case DIOCGDEFLABEL:
1790 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1791 break;
1792
1793 #ifdef __HAVE_OLD_DISKLABEL
1794 case ODIOCGDEFLABEL:
1795 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1796 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1797 return ENOTTY;
1798 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1799 break;
1800 #endif
1801
1802 case DIOCAWEDGE:
1803 case DIOCDWEDGE:
1804 dkw = (void *)data;
1805
1806 /* If the ioctl happens here, the parent is us. */
1807 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1808 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1809
1810 case DIOCLWEDGES:
1811 return dkwedge_list(&rs->sc_dkdev,
1812 (struct dkwedge_list *)data, l);
1813
1814 default:
1815 retcode = ENOTTY;
1816 }
1817 return (retcode);
1818
1819 }
1820
1821
1822 /* raidinit -- complete the rest of the initialization for the
1823 RAIDframe device. */
1824
1825
1826 static void
1827 raidinit(RF_Raid_t *raidPtr)
1828 {
1829 struct cfdata *cf;
1830 struct raid_softc *rs;
1831 int unit;
1832
1833 unit = raidPtr->raidid;
1834
1835 rs = &raid_softc[unit];
1836
1837 /* XXX should check return code first... */
1838 rs->sc_flags |= RAIDF_INITED;
1839
1840 /* XXX doesn't check bounds. */
1841 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1842
1843 /* attach the pseudo device */
1844 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1845 cf->cf_name = raid_cd.cd_name;
1846 cf->cf_atname = raid_cd.cd_name;
1847 cf->cf_unit = unit;
1848 cf->cf_fstate = FSTATE_STAR;
1849
1850 rs->sc_dev = config_attach_pseudo(cf);
1851
1852 if (rs->sc_dev==NULL) {
1853 printf("raid%d: config_attach_pseudo failed\n",
1854 raidPtr->raidid);
1855 }
1856
1857 /* disk_attach actually creates space for the CPU disklabel, among
1858 * other things, so it's critical to call this *BEFORE* we try putzing
1859 * with disklabels. */
1860
1861 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1862 disk_attach(&rs->sc_dkdev);
1863
1864 /* XXX There may be a weird interaction here between this, and
1865 * protectedSectors, as used in RAIDframe. */
1866
1867 rs->sc_size = raidPtr->totalSectors;
1868
1869 dkwedge_discover(&rs->sc_dkdev);
1870
1871 rf_set_properties(rs, raidPtr);
1872
1873 }
1874 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1875 /* wake up the daemon & tell it to get us a spare table
1876 * XXX
1877 * the entries in the queues should be tagged with the raidPtr
1878 * so that in the extremely rare case that two recons happen at once,
1879 * we know for which device were requesting a spare table
1880 * XXX
1881 *
1882 * XXX This code is not currently used. GO
1883 */
1884 int
1885 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1886 {
1887 int retcode;
1888
1889 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1890 req->next = rf_sparet_wait_queue;
1891 rf_sparet_wait_queue = req;
1892 wakeup(&rf_sparet_wait_queue);
1893
1894 /* mpsleep unlocks the mutex */
1895 while (!rf_sparet_resp_queue) {
1896 tsleep(&rf_sparet_resp_queue, PRIBIO,
1897 "raidframe getsparetable", 0);
1898 }
1899 req = rf_sparet_resp_queue;
1900 rf_sparet_resp_queue = req->next;
1901 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1902
1903 retcode = req->fcol;
1904 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1905 * alloc'd */
1906 return (retcode);
1907 }
1908 #endif
1909
1910 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1911 * bp & passes it down.
1912 * any calls originating in the kernel must use non-blocking I/O
1913 * do some extra sanity checking to return "appropriate" error values for
1914 * certain conditions (to make some standard utilities work)
1915 *
1916 * Formerly known as: rf_DoAccessKernel
1917 */
1918 void
1919 raidstart(RF_Raid_t *raidPtr)
1920 {
1921 RF_SectorCount_t num_blocks, pb, sum;
1922 RF_RaidAddr_t raid_addr;
1923 struct partition *pp;
1924 daddr_t blocknum;
1925 int unit;
1926 struct raid_softc *rs;
1927 int do_async;
1928 struct buf *bp;
1929 int rc;
1930
1931 unit = raidPtr->raidid;
1932 rs = &raid_softc[unit];
1933
1934 /* quick check to see if anything has died recently */
1935 RF_LOCK_MUTEX(raidPtr->mutex);
1936 if (raidPtr->numNewFailures > 0) {
1937 RF_UNLOCK_MUTEX(raidPtr->mutex);
1938 rf_update_component_labels(raidPtr,
1939 RF_NORMAL_COMPONENT_UPDATE);
1940 RF_LOCK_MUTEX(raidPtr->mutex);
1941 raidPtr->numNewFailures--;
1942 }
1943
1944 /* Check to see if we're at the limit... */
1945 while (raidPtr->openings > 0) {
1946 RF_UNLOCK_MUTEX(raidPtr->mutex);
1947
1948 /* get the next item, if any, from the queue */
1949 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1950 /* nothing more to do */
1951 return;
1952 }
1953
1954 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1955 * partition.. Need to make it absolute to the underlying
1956 * device.. */
1957
1958 blocknum = bp->b_blkno;
1959 if (DISKPART(bp->b_dev) != RAW_PART) {
1960 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1961 blocknum += pp->p_offset;
1962 }
1963
1964 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1965 (int) blocknum));
1966
1967 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1968 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1969
1970 /* *THIS* is where we adjust what block we're going to...
1971 * but DO NOT TOUCH bp->b_blkno!!! */
1972 raid_addr = blocknum;
1973
1974 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1975 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1976 sum = raid_addr + num_blocks + pb;
1977 if (1 || rf_debugKernelAccess) {
1978 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1979 (int) raid_addr, (int) sum, (int) num_blocks,
1980 (int) pb, (int) bp->b_resid));
1981 }
1982 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1983 || (sum < num_blocks) || (sum < pb)) {
1984 bp->b_error = ENOSPC;
1985 bp->b_resid = bp->b_bcount;
1986 biodone(bp);
1987 RF_LOCK_MUTEX(raidPtr->mutex);
1988 continue;
1989 }
1990 /*
1991 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1992 */
1993
1994 if (bp->b_bcount & raidPtr->sectorMask) {
1995 bp->b_error = EINVAL;
1996 bp->b_resid = bp->b_bcount;
1997 biodone(bp);
1998 RF_LOCK_MUTEX(raidPtr->mutex);
1999 continue;
2000
2001 }
2002 db1_printf(("Calling DoAccess..\n"));
2003
2004
2005 RF_LOCK_MUTEX(raidPtr->mutex);
2006 raidPtr->openings--;
2007 RF_UNLOCK_MUTEX(raidPtr->mutex);
2008
2009 /*
2010 * Everything is async.
2011 */
2012 do_async = 1;
2013
2014 disk_busy(&rs->sc_dkdev);
2015
2016 /* XXX we're still at splbio() here... do we *really*
2017 need to be? */
2018
2019 /* don't ever condition on bp->b_flags & B_WRITE.
2020 * always condition on B_READ instead */
2021
2022 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2023 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2024 do_async, raid_addr, num_blocks,
2025 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2026
2027 if (rc) {
2028 bp->b_error = rc;
2029 bp->b_resid = bp->b_bcount;
2030 biodone(bp);
2031 /* continue loop */
2032 }
2033
2034 RF_LOCK_MUTEX(raidPtr->mutex);
2035 }
2036 RF_UNLOCK_MUTEX(raidPtr->mutex);
2037 }
2038
2039
2040
2041
2042 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2043
2044 int
2045 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2046 {
2047 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2048 struct buf *bp;
2049
2050 req->queue = queue;
2051
2052 #if DIAGNOSTIC
2053 if (queue->raidPtr->raidid >= numraid) {
2054 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
2055 numraid);
2056 panic("Invalid Unit number in rf_DispatchKernelIO");
2057 }
2058 #endif
2059
2060 bp = req->bp;
2061
2062 switch (req->type) {
2063 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2064 /* XXX need to do something extra here.. */
2065 /* I'm leaving this in, as I've never actually seen it used,
2066 * and I'd like folks to report it... GO */
2067 printf(("WAKEUP CALLED\n"));
2068 queue->numOutstanding++;
2069
2070 bp->b_flags = 0;
2071 bp->b_private = req;
2072
2073 KernelWakeupFunc(bp);
2074 break;
2075
2076 case RF_IO_TYPE_READ:
2077 case RF_IO_TYPE_WRITE:
2078 #if RF_ACC_TRACE > 0
2079 if (req->tracerec) {
2080 RF_ETIMER_START(req->tracerec->timer);
2081 }
2082 #endif
2083 InitBP(bp, queue->rf_cinfo->ci_vp,
2084 op, queue->rf_cinfo->ci_dev,
2085 req->sectorOffset, req->numSector,
2086 req->buf, KernelWakeupFunc, (void *) req,
2087 queue->raidPtr->logBytesPerSector, req->b_proc);
2088
2089 if (rf_debugKernelAccess) {
2090 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2091 (long) bp->b_blkno));
2092 }
2093 queue->numOutstanding++;
2094 queue->last_deq_sector = req->sectorOffset;
2095 /* acc wouldn't have been let in if there were any pending
2096 * reqs at any other priority */
2097 queue->curPriority = req->priority;
2098
2099 db1_printf(("Going for %c to unit %d col %d\n",
2100 req->type, queue->raidPtr->raidid,
2101 queue->col));
2102 db1_printf(("sector %d count %d (%d bytes) %d\n",
2103 (int) req->sectorOffset, (int) req->numSector,
2104 (int) (req->numSector <<
2105 queue->raidPtr->logBytesPerSector),
2106 (int) queue->raidPtr->logBytesPerSector));
2107 bdev_strategy(bp);
2108
2109 break;
2110
2111 default:
2112 panic("bad req->type in rf_DispatchKernelIO");
2113 }
2114 db1_printf(("Exiting from DispatchKernelIO\n"));
2115
2116 return (0);
2117 }
2118 /* this is the callback function associated with a I/O invoked from
2119 kernel code.
2120 */
2121 static void
2122 KernelWakeupFunc(struct buf *bp)
2123 {
2124 RF_DiskQueueData_t *req = NULL;
2125 RF_DiskQueue_t *queue;
2126 int s;
2127
2128 s = splbio();
2129 db1_printf(("recovering the request queue:\n"));
2130 req = bp->b_private;
2131
2132 queue = (RF_DiskQueue_t *) req->queue;
2133
2134 #if RF_ACC_TRACE > 0
2135 if (req->tracerec) {
2136 RF_ETIMER_STOP(req->tracerec->timer);
2137 RF_ETIMER_EVAL(req->tracerec->timer);
2138 RF_LOCK_MUTEX(rf_tracing_mutex);
2139 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2140 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2141 req->tracerec->num_phys_ios++;
2142 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2143 }
2144 #endif
2145
2146 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2147 * ballistic, and mark the component as hosed... */
2148
2149 if (bp->b_error != 0) {
2150 /* Mark the disk as dead */
2151 /* but only mark it once... */
2152 /* and only if it wouldn't leave this RAID set
2153 completely broken */
2154 if (((queue->raidPtr->Disks[queue->col].status ==
2155 rf_ds_optimal) ||
2156 (queue->raidPtr->Disks[queue->col].status ==
2157 rf_ds_used_spare)) &&
2158 (queue->raidPtr->numFailures <
2159 queue->raidPtr->Layout.map->faultsTolerated)) {
2160 printf("raid%d: IO Error. Marking %s as failed.\n",
2161 queue->raidPtr->raidid,
2162 queue->raidPtr->Disks[queue->col].devname);
2163 queue->raidPtr->Disks[queue->col].status =
2164 rf_ds_failed;
2165 queue->raidPtr->status = rf_rs_degraded;
2166 queue->raidPtr->numFailures++;
2167 queue->raidPtr->numNewFailures++;
2168 } else { /* Disk is already dead... */
2169 /* printf("Disk already marked as dead!\n"); */
2170 }
2171
2172 }
2173
2174 /* Fill in the error value */
2175
2176 req->error = bp->b_error;
2177
2178 simple_lock(&queue->raidPtr->iodone_lock);
2179
2180 /* Drop this one on the "finished" queue... */
2181 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2182
2183 /* Let the raidio thread know there is work to be done. */
2184 wakeup(&(queue->raidPtr->iodone));
2185
2186 simple_unlock(&queue->raidPtr->iodone_lock);
2187
2188 splx(s);
2189 }
2190
2191
2192
2193 /*
2194 * initialize a buf structure for doing an I/O in the kernel.
2195 */
2196 static void
2197 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2198 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2199 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2200 struct proc *b_proc)
2201 {
2202 /* bp->b_flags = B_PHYS | rw_flag; */
2203 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2204 bp->b_oflags = 0;
2205 bp->b_cflags = 0;
2206 bp->b_bcount = numSect << logBytesPerSector;
2207 bp->b_bufsize = bp->b_bcount;
2208 bp->b_error = 0;
2209 bp->b_dev = dev;
2210 bp->b_data = bf;
2211 bp->b_blkno = startSect;
2212 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2213 if (bp->b_bcount == 0) {
2214 panic("bp->b_bcount is zero in InitBP!!");
2215 }
2216 bp->b_proc = b_proc;
2217 bp->b_iodone = cbFunc;
2218 bp->b_private = cbArg;
2219 }
2220
2221 static void
2222 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2223 struct disklabel *lp)
2224 {
2225 memset(lp, 0, sizeof(*lp));
2226
2227 /* fabricate a label... */
2228 lp->d_secperunit = raidPtr->totalSectors;
2229 lp->d_secsize = raidPtr->bytesPerSector;
2230 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2231 lp->d_ntracks = 4 * raidPtr->numCol;
2232 lp->d_ncylinders = raidPtr->totalSectors /
2233 (lp->d_nsectors * lp->d_ntracks);
2234 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2235
2236 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2237 lp->d_type = DTYPE_RAID;
2238 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2239 lp->d_rpm = 3600;
2240 lp->d_interleave = 1;
2241 lp->d_flags = 0;
2242
2243 lp->d_partitions[RAW_PART].p_offset = 0;
2244 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2245 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2246 lp->d_npartitions = RAW_PART + 1;
2247
2248 lp->d_magic = DISKMAGIC;
2249 lp->d_magic2 = DISKMAGIC;
2250 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2251
2252 }
2253 /*
2254 * Read the disklabel from the raid device. If one is not present, fake one
2255 * up.
2256 */
2257 static void
2258 raidgetdisklabel(dev_t dev)
2259 {
2260 int unit = raidunit(dev);
2261 struct raid_softc *rs = &raid_softc[unit];
2262 const char *errstring;
2263 struct disklabel *lp = rs->sc_dkdev.dk_label;
2264 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2265 RF_Raid_t *raidPtr;
2266
2267 db1_printf(("Getting the disklabel...\n"));
2268
2269 memset(clp, 0, sizeof(*clp));
2270
2271 raidPtr = raidPtrs[unit];
2272
2273 raidgetdefaultlabel(raidPtr, rs, lp);
2274
2275 /*
2276 * Call the generic disklabel extraction routine.
2277 */
2278 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2279 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2280 if (errstring)
2281 raidmakedisklabel(rs);
2282 else {
2283 int i;
2284 struct partition *pp;
2285
2286 /*
2287 * Sanity check whether the found disklabel is valid.
2288 *
2289 * This is necessary since total size of the raid device
2290 * may vary when an interleave is changed even though exactly
2291 * same components are used, and old disklabel may used
2292 * if that is found.
2293 */
2294 if (lp->d_secperunit != rs->sc_size)
2295 printf("raid%d: WARNING: %s: "
2296 "total sector size in disklabel (%d) != "
2297 "the size of raid (%ld)\n", unit, rs->sc_xname,
2298 lp->d_secperunit, (long) rs->sc_size);
2299 for (i = 0; i < lp->d_npartitions; i++) {
2300 pp = &lp->d_partitions[i];
2301 if (pp->p_offset + pp->p_size > rs->sc_size)
2302 printf("raid%d: WARNING: %s: end of partition `%c' "
2303 "exceeds the size of raid (%ld)\n",
2304 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2305 }
2306 }
2307
2308 }
2309 /*
2310 * Take care of things one might want to take care of in the event
2311 * that a disklabel isn't present.
2312 */
2313 static void
2314 raidmakedisklabel(struct raid_softc *rs)
2315 {
2316 struct disklabel *lp = rs->sc_dkdev.dk_label;
2317 db1_printf(("Making a label..\n"));
2318
2319 /*
2320 * For historical reasons, if there's no disklabel present
2321 * the raw partition must be marked FS_BSDFFS.
2322 */
2323
2324 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2325
2326 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2327
2328 lp->d_checksum = dkcksum(lp);
2329 }
2330 /*
2331 * Wait interruptibly for an exclusive lock.
2332 *
2333 * XXX
2334 * Several drivers do this; it should be abstracted and made MP-safe.
2335 * (Hmm... where have we seen this warning before :-> GO )
2336 */
2337 static int
2338 raidlock(struct raid_softc *rs)
2339 {
2340 int error;
2341
2342 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2343 rs->sc_flags |= RAIDF_WANTED;
2344 if ((error =
2345 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2346 return (error);
2347 }
2348 rs->sc_flags |= RAIDF_LOCKED;
2349 return (0);
2350 }
2351 /*
2352 * Unlock and wake up any waiters.
2353 */
2354 static void
2355 raidunlock(struct raid_softc *rs)
2356 {
2357
2358 rs->sc_flags &= ~RAIDF_LOCKED;
2359 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2360 rs->sc_flags &= ~RAIDF_WANTED;
2361 wakeup(rs);
2362 }
2363 }
2364
2365
2366 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2367 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2368
2369 int
2370 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2371 {
2372 RF_ComponentLabel_t clabel;
2373 raidread_component_label(dev, b_vp, &clabel);
2374 clabel.mod_counter = mod_counter;
2375 clabel.clean = RF_RAID_CLEAN;
2376 raidwrite_component_label(dev, b_vp, &clabel);
2377 return(0);
2378 }
2379
2380
2381 int
2382 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2383 {
2384 RF_ComponentLabel_t clabel;
2385 raidread_component_label(dev, b_vp, &clabel);
2386 clabel.mod_counter = mod_counter;
2387 clabel.clean = RF_RAID_DIRTY;
2388 raidwrite_component_label(dev, b_vp, &clabel);
2389 return(0);
2390 }
2391
2392 /* ARGSUSED */
2393 int
2394 raidread_component_label(dev_t dev, struct vnode *b_vp,
2395 RF_ComponentLabel_t *clabel)
2396 {
2397 struct buf *bp;
2398 const struct bdevsw *bdev;
2399 int error;
2400
2401 /* XXX should probably ensure that we don't try to do this if
2402 someone has changed rf_protected_sectors. */
2403
2404 if (b_vp == NULL) {
2405 /* For whatever reason, this component is not valid.
2406 Don't try to read a component label from it. */
2407 return(EINVAL);
2408 }
2409
2410 /* get a block of the appropriate size... */
2411 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2412 bp->b_dev = dev;
2413
2414 /* get our ducks in a row for the read */
2415 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2416 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2417 bp->b_flags |= B_READ;
2418 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2419
2420 bdev = bdevsw_lookup(bp->b_dev);
2421 if (bdev == NULL)
2422 return (ENXIO);
2423 (*bdev->d_strategy)(bp);
2424
2425 error = biowait(bp);
2426
2427 if (!error) {
2428 memcpy(clabel, bp->b_data,
2429 sizeof(RF_ComponentLabel_t));
2430 }
2431
2432 brelse(bp, 0);
2433 return(error);
2434 }
2435 /* ARGSUSED */
2436 int
2437 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2438 RF_ComponentLabel_t *clabel)
2439 {
2440 struct buf *bp;
2441 const struct bdevsw *bdev;
2442 int error;
2443
2444 /* get a block of the appropriate size... */
2445 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2446 bp->b_dev = dev;
2447
2448 /* get our ducks in a row for the write */
2449 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2450 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2451 bp->b_flags |= B_WRITE;
2452 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2453
2454 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2455
2456 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2457
2458 bdev = bdevsw_lookup(bp->b_dev);
2459 if (bdev == NULL)
2460 return (ENXIO);
2461 (*bdev->d_strategy)(bp);
2462 error = biowait(bp);
2463 brelse(bp, 0);
2464 if (error) {
2465 #if 1
2466 printf("Failed to write RAID component info!\n");
2467 #endif
2468 }
2469
2470 return(error);
2471 }
2472
2473 void
2474 rf_markalldirty(RF_Raid_t *raidPtr)
2475 {
2476 RF_ComponentLabel_t clabel;
2477 int sparecol;
2478 int c;
2479 int j;
2480 int scol = -1;
2481
2482 raidPtr->mod_counter++;
2483 for (c = 0; c < raidPtr->numCol; c++) {
2484 /* we don't want to touch (at all) a disk that has
2485 failed */
2486 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2487 raidread_component_label(
2488 raidPtr->Disks[c].dev,
2489 raidPtr->raid_cinfo[c].ci_vp,
2490 &clabel);
2491 if (clabel.status == rf_ds_spared) {
2492 /* XXX do something special...
2493 but whatever you do, don't
2494 try to access it!! */
2495 } else {
2496 raidmarkdirty(
2497 raidPtr->Disks[c].dev,
2498 raidPtr->raid_cinfo[c].ci_vp,
2499 raidPtr->mod_counter);
2500 }
2501 }
2502 }
2503
2504 for( c = 0; c < raidPtr->numSpare ; c++) {
2505 sparecol = raidPtr->numCol + c;
2506 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2507 /*
2508
2509 we claim this disk is "optimal" if it's
2510 rf_ds_used_spare, as that means it should be
2511 directly substitutable for the disk it replaced.
2512 We note that too...
2513
2514 */
2515
2516 for(j=0;j<raidPtr->numCol;j++) {
2517 if (raidPtr->Disks[j].spareCol == sparecol) {
2518 scol = j;
2519 break;
2520 }
2521 }
2522
2523 raidread_component_label(
2524 raidPtr->Disks[sparecol].dev,
2525 raidPtr->raid_cinfo[sparecol].ci_vp,
2526 &clabel);
2527 /* make sure status is noted */
2528
2529 raid_init_component_label(raidPtr, &clabel);
2530
2531 clabel.row = 0;
2532 clabel.column = scol;
2533 /* Note: we *don't* change status from rf_ds_used_spare
2534 to rf_ds_optimal */
2535 /* clabel.status = rf_ds_optimal; */
2536
2537 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2538 raidPtr->raid_cinfo[sparecol].ci_vp,
2539 raidPtr->mod_counter);
2540 }
2541 }
2542 }
2543
2544
2545 void
2546 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2547 {
2548 RF_ComponentLabel_t clabel;
2549 int sparecol;
2550 int c;
2551 int j;
2552 int scol;
2553
2554 scol = -1;
2555
2556 /* XXX should do extra checks to make sure things really are clean,
2557 rather than blindly setting the clean bit... */
2558
2559 raidPtr->mod_counter++;
2560
2561 for (c = 0; c < raidPtr->numCol; c++) {
2562 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2563 raidread_component_label(
2564 raidPtr->Disks[c].dev,
2565 raidPtr->raid_cinfo[c].ci_vp,
2566 &clabel);
2567 /* make sure status is noted */
2568 clabel.status = rf_ds_optimal;
2569
2570 /* bump the counter */
2571 clabel.mod_counter = raidPtr->mod_counter;
2572
2573 /* note what unit we are configured as */
2574 clabel.last_unit = raidPtr->raidid;
2575
2576 raidwrite_component_label(
2577 raidPtr->Disks[c].dev,
2578 raidPtr->raid_cinfo[c].ci_vp,
2579 &clabel);
2580 if (final == RF_FINAL_COMPONENT_UPDATE) {
2581 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2582 raidmarkclean(
2583 raidPtr->Disks[c].dev,
2584 raidPtr->raid_cinfo[c].ci_vp,
2585 raidPtr->mod_counter);
2586 }
2587 }
2588 }
2589 /* else we don't touch it.. */
2590 }
2591
2592 for( c = 0; c < raidPtr->numSpare ; c++) {
2593 sparecol = raidPtr->numCol + c;
2594 /* Need to ensure that the reconstruct actually completed! */
2595 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2596 /*
2597
2598 we claim this disk is "optimal" if it's
2599 rf_ds_used_spare, as that means it should be
2600 directly substitutable for the disk it replaced.
2601 We note that too...
2602
2603 */
2604
2605 for(j=0;j<raidPtr->numCol;j++) {
2606 if (raidPtr->Disks[j].spareCol == sparecol) {
2607 scol = j;
2608 break;
2609 }
2610 }
2611
2612 /* XXX shouldn't *really* need this... */
2613 raidread_component_label(
2614 raidPtr->Disks[sparecol].dev,
2615 raidPtr->raid_cinfo[sparecol].ci_vp,
2616 &clabel);
2617 /* make sure status is noted */
2618
2619 raid_init_component_label(raidPtr, &clabel);
2620
2621 clabel.mod_counter = raidPtr->mod_counter;
2622 clabel.column = scol;
2623 clabel.status = rf_ds_optimal;
2624 clabel.last_unit = raidPtr->raidid;
2625
2626 raidwrite_component_label(
2627 raidPtr->Disks[sparecol].dev,
2628 raidPtr->raid_cinfo[sparecol].ci_vp,
2629 &clabel);
2630 if (final == RF_FINAL_COMPONENT_UPDATE) {
2631 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2632 raidmarkclean( raidPtr->Disks[sparecol].dev,
2633 raidPtr->raid_cinfo[sparecol].ci_vp,
2634 raidPtr->mod_counter);
2635 }
2636 }
2637 }
2638 }
2639 }
2640
2641 void
2642 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2643 {
2644
2645 if (vp != NULL) {
2646 if (auto_configured == 1) {
2647 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2648 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2649 vput(vp);
2650
2651 } else {
2652 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2653 }
2654 }
2655 }
2656
2657
2658 void
2659 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2660 {
2661 int r,c;
2662 struct vnode *vp;
2663 int acd;
2664
2665
2666 /* We take this opportunity to close the vnodes like we should.. */
2667
2668 for (c = 0; c < raidPtr->numCol; c++) {
2669 vp = raidPtr->raid_cinfo[c].ci_vp;
2670 acd = raidPtr->Disks[c].auto_configured;
2671 rf_close_component(raidPtr, vp, acd);
2672 raidPtr->raid_cinfo[c].ci_vp = NULL;
2673 raidPtr->Disks[c].auto_configured = 0;
2674 }
2675
2676 for (r = 0; r < raidPtr->numSpare; r++) {
2677 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2678 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2679 rf_close_component(raidPtr, vp, acd);
2680 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2681 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2682 }
2683 }
2684
2685
2686 void
2687 rf_ReconThread(struct rf_recon_req *req)
2688 {
2689 int s;
2690 RF_Raid_t *raidPtr;
2691
2692 s = splbio();
2693 raidPtr = (RF_Raid_t *) req->raidPtr;
2694 raidPtr->recon_in_progress = 1;
2695
2696 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2697 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2698
2699 RF_Free(req, sizeof(*req));
2700
2701 raidPtr->recon_in_progress = 0;
2702 splx(s);
2703
2704 /* That's all... */
2705 kthread_exit(0); /* does not return */
2706 }
2707
2708 void
2709 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2710 {
2711 int retcode;
2712 int s;
2713
2714 raidPtr->parity_rewrite_stripes_done = 0;
2715 raidPtr->parity_rewrite_in_progress = 1;
2716 s = splbio();
2717 retcode = rf_RewriteParity(raidPtr);
2718 splx(s);
2719 if (retcode) {
2720 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2721 } else {
2722 /* set the clean bit! If we shutdown correctly,
2723 the clean bit on each component label will get
2724 set */
2725 raidPtr->parity_good = RF_RAID_CLEAN;
2726 }
2727 raidPtr->parity_rewrite_in_progress = 0;
2728
2729 /* Anyone waiting for us to stop? If so, inform them... */
2730 if (raidPtr->waitShutdown) {
2731 wakeup(&raidPtr->parity_rewrite_in_progress);
2732 }
2733
2734 /* That's all... */
2735 kthread_exit(0); /* does not return */
2736 }
2737
2738
2739 void
2740 rf_CopybackThread(RF_Raid_t *raidPtr)
2741 {
2742 int s;
2743
2744 raidPtr->copyback_in_progress = 1;
2745 s = splbio();
2746 rf_CopybackReconstructedData(raidPtr);
2747 splx(s);
2748 raidPtr->copyback_in_progress = 0;
2749
2750 /* That's all... */
2751 kthread_exit(0); /* does not return */
2752 }
2753
2754
2755 void
2756 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2757 {
2758 int s;
2759 RF_Raid_t *raidPtr;
2760
2761 s = splbio();
2762 raidPtr = req->raidPtr;
2763 raidPtr->recon_in_progress = 1;
2764 rf_ReconstructInPlace(raidPtr, req->col);
2765 RF_Free(req, sizeof(*req));
2766 raidPtr->recon_in_progress = 0;
2767 splx(s);
2768
2769 /* That's all... */
2770 kthread_exit(0); /* does not return */
2771 }
2772
2773 static RF_AutoConfig_t *
2774 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2775 const char *cname, RF_SectorCount_t size)
2776 {
2777 int good_one = 0;
2778 RF_ComponentLabel_t *clabel;
2779 RF_AutoConfig_t *ac;
2780
2781 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2782 if (clabel == NULL) {
2783 oomem:
2784 while(ac_list) {
2785 ac = ac_list;
2786 if (ac->clabel)
2787 free(ac->clabel, M_RAIDFRAME);
2788 ac_list = ac_list->next;
2789 free(ac, M_RAIDFRAME);
2790 }
2791 printf("RAID auto config: out of memory!\n");
2792 return NULL; /* XXX probably should panic? */
2793 }
2794
2795 if (!raidread_component_label(dev, vp, clabel)) {
2796 /* Got the label. Does it look reasonable? */
2797 if (rf_reasonable_label(clabel) &&
2798 (clabel->partitionSize <= size)) {
2799 #ifdef DEBUG
2800 printf("Component on: %s: %llu\n",
2801 cname, (unsigned long long)size);
2802 rf_print_component_label(clabel);
2803 #endif
2804 /* if it's reasonable, add it, else ignore it. */
2805 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2806 M_NOWAIT);
2807 if (ac == NULL) {
2808 free(clabel, M_RAIDFRAME);
2809 goto oomem;
2810 }
2811 strlcpy(ac->devname, cname, sizeof(ac->devname));
2812 ac->dev = dev;
2813 ac->vp = vp;
2814 ac->clabel = clabel;
2815 ac->next = ac_list;
2816 ac_list = ac;
2817 good_one = 1;
2818 }
2819 }
2820 if (!good_one) {
2821 /* cleanup */
2822 free(clabel, M_RAIDFRAME);
2823 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2824 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2825 vput(vp);
2826 }
2827 return ac_list;
2828 }
2829
2830 RF_AutoConfig_t *
2831 rf_find_raid_components()
2832 {
2833 struct vnode *vp;
2834 struct disklabel label;
2835 struct device *dv;
2836 dev_t dev;
2837 int bmajor, bminor, wedge;
2838 int error;
2839 int i;
2840 RF_AutoConfig_t *ac_list;
2841
2842
2843 /* initialize the AutoConfig list */
2844 ac_list = NULL;
2845
2846 /* we begin by trolling through *all* the devices on the system */
2847
2848 for (dv = alldevs.tqh_first; dv != NULL;
2849 dv = dv->dv_list.tqe_next) {
2850
2851 /* we are only interested in disks... */
2852 if (device_class(dv) != DV_DISK)
2853 continue;
2854
2855 /* we don't care about floppies... */
2856 if (device_is_a(dv, "fd")) {
2857 continue;
2858 }
2859
2860 /* we don't care about CD's... */
2861 if (device_is_a(dv, "cd")) {
2862 continue;
2863 }
2864
2865 /* hdfd is the Atari/Hades floppy driver */
2866 if (device_is_a(dv, "hdfd")) {
2867 continue;
2868 }
2869
2870 /* fdisa is the Atari/Milan floppy driver */
2871 if (device_is_a(dv, "fdisa")) {
2872 continue;
2873 }
2874
2875 /* need to find the device_name_to_block_device_major stuff */
2876 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2877
2878 /* get a vnode for the raw partition of this disk */
2879
2880 wedge = device_is_a(dv, "dk");
2881 bminor = minor(device_unit(dv));
2882 dev = wedge ? makedev(bmajor, bminor) :
2883 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2884 if (bdevvp(dev, &vp))
2885 panic("RAID can't alloc vnode");
2886
2887 error = VOP_OPEN(vp, FREAD, NOCRED);
2888
2889 if (error) {
2890 /* "Who cares." Continue looking
2891 for something that exists*/
2892 vput(vp);
2893 continue;
2894 }
2895
2896 if (wedge) {
2897 struct dkwedge_info dkw;
2898 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2899 NOCRED);
2900 if (error) {
2901 printf("RAIDframe: can't get wedge info for "
2902 "dev %s (%d)\n", device_xname(dv), error);
2903 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2904 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2905 vput(vp);
2906 continue;
2907 }
2908
2909 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2910 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2911 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2912 vput(vp);
2913 continue;
2914 }
2915
2916 ac_list = rf_get_component(ac_list, dev, vp,
2917 device_xname(dv), dkw.dkw_size);
2918 continue;
2919 }
2920
2921 /* Ok, the disk exists. Go get the disklabel. */
2922 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2923 if (error) {
2924 /*
2925 * XXX can't happen - open() would
2926 * have errored out (or faked up one)
2927 */
2928 if (error != ENOTTY)
2929 printf("RAIDframe: can't get label for dev "
2930 "%s (%d)\n", device_xname(dv), error);
2931 }
2932
2933 /* don't need this any more. We'll allocate it again
2934 a little later if we really do... */
2935 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2936 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2937 vput(vp);
2938
2939 if (error)
2940 continue;
2941
2942 for (i = 0; i < label.d_npartitions; i++) {
2943 char cname[sizeof(ac_list->devname)];
2944
2945 /* We only support partitions marked as RAID */
2946 if (label.d_partitions[i].p_fstype != FS_RAID)
2947 continue;
2948
2949 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2950 if (bdevvp(dev, &vp))
2951 panic("RAID can't alloc vnode");
2952
2953 error = VOP_OPEN(vp, FREAD, NOCRED);
2954 if (error) {
2955 /* Whatever... */
2956 vput(vp);
2957 continue;
2958 }
2959 snprintf(cname, sizeof(cname), "%s%c",
2960 device_xname(dv), 'a' + i);
2961 ac_list = rf_get_component(ac_list, dev, vp, cname,
2962 label.d_partitions[i].p_size);
2963 }
2964 }
2965 return ac_list;
2966 }
2967
2968
2969 static int
2970 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2971 {
2972
2973 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2974 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2975 ((clabel->clean == RF_RAID_CLEAN) ||
2976 (clabel->clean == RF_RAID_DIRTY)) &&
2977 clabel->row >=0 &&
2978 clabel->column >= 0 &&
2979 clabel->num_rows > 0 &&
2980 clabel->num_columns > 0 &&
2981 clabel->row < clabel->num_rows &&
2982 clabel->column < clabel->num_columns &&
2983 clabel->blockSize > 0 &&
2984 clabel->numBlocks > 0) {
2985 /* label looks reasonable enough... */
2986 return(1);
2987 }
2988 return(0);
2989 }
2990
2991
2992 #ifdef DEBUG
2993 void
2994 rf_print_component_label(RF_ComponentLabel_t *clabel)
2995 {
2996 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2997 clabel->row, clabel->column,
2998 clabel->num_rows, clabel->num_columns);
2999 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3000 clabel->version, clabel->serial_number,
3001 clabel->mod_counter);
3002 printf(" Clean: %s Status: %d\n",
3003 clabel->clean ? "Yes" : "No", clabel->status );
3004 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3005 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3006 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
3007 (char) clabel->parityConfig, clabel->blockSize,
3008 clabel->numBlocks);
3009 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
3010 printf(" Contains root partition: %s\n",
3011 clabel->root_partition ? "Yes" : "No" );
3012 printf(" Last configured as: raid%d\n", clabel->last_unit );
3013 #if 0
3014 printf(" Config order: %d\n", clabel->config_order);
3015 #endif
3016
3017 }
3018 #endif
3019
3020 RF_ConfigSet_t *
3021 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3022 {
3023 RF_AutoConfig_t *ac;
3024 RF_ConfigSet_t *config_sets;
3025 RF_ConfigSet_t *cset;
3026 RF_AutoConfig_t *ac_next;
3027
3028
3029 config_sets = NULL;
3030
3031 /* Go through the AutoConfig list, and figure out which components
3032 belong to what sets. */
3033 ac = ac_list;
3034 while(ac!=NULL) {
3035 /* we're going to putz with ac->next, so save it here
3036 for use at the end of the loop */
3037 ac_next = ac->next;
3038
3039 if (config_sets == NULL) {
3040 /* will need at least this one... */
3041 config_sets = (RF_ConfigSet_t *)
3042 malloc(sizeof(RF_ConfigSet_t),
3043 M_RAIDFRAME, M_NOWAIT);
3044 if (config_sets == NULL) {
3045 panic("rf_create_auto_sets: No memory!");
3046 }
3047 /* this one is easy :) */
3048 config_sets->ac = ac;
3049 config_sets->next = NULL;
3050 config_sets->rootable = 0;
3051 ac->next = NULL;
3052 } else {
3053 /* which set does this component fit into? */
3054 cset = config_sets;
3055 while(cset!=NULL) {
3056 if (rf_does_it_fit(cset, ac)) {
3057 /* looks like it matches... */
3058 ac->next = cset->ac;
3059 cset->ac = ac;
3060 break;
3061 }
3062 cset = cset->next;
3063 }
3064 if (cset==NULL) {
3065 /* didn't find a match above... new set..*/
3066 cset = (RF_ConfigSet_t *)
3067 malloc(sizeof(RF_ConfigSet_t),
3068 M_RAIDFRAME, M_NOWAIT);
3069 if (cset == NULL) {
3070 panic("rf_create_auto_sets: No memory!");
3071 }
3072 cset->ac = ac;
3073 ac->next = NULL;
3074 cset->next = config_sets;
3075 cset->rootable = 0;
3076 config_sets = cset;
3077 }
3078 }
3079 ac = ac_next;
3080 }
3081
3082
3083 return(config_sets);
3084 }
3085
3086 static int
3087 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3088 {
3089 RF_ComponentLabel_t *clabel1, *clabel2;
3090
3091 /* If this one matches the *first* one in the set, that's good
3092 enough, since the other members of the set would have been
3093 through here too... */
3094 /* note that we are not checking partitionSize here..
3095
3096 Note that we are also not checking the mod_counters here.
3097 If everything else matches execpt the mod_counter, that's
3098 good enough for this test. We will deal with the mod_counters
3099 a little later in the autoconfiguration process.
3100
3101 (clabel1->mod_counter == clabel2->mod_counter) &&
3102
3103 The reason we don't check for this is that failed disks
3104 will have lower modification counts. If those disks are
3105 not added to the set they used to belong to, then they will
3106 form their own set, which may result in 2 different sets,
3107 for example, competing to be configured at raid0, and
3108 perhaps competing to be the root filesystem set. If the
3109 wrong ones get configured, or both attempt to become /,
3110 weird behaviour and or serious lossage will occur. Thus we
3111 need to bring them into the fold here, and kick them out at
3112 a later point.
3113
3114 */
3115
3116 clabel1 = cset->ac->clabel;
3117 clabel2 = ac->clabel;
3118 if ((clabel1->version == clabel2->version) &&
3119 (clabel1->serial_number == clabel2->serial_number) &&
3120 (clabel1->num_rows == clabel2->num_rows) &&
3121 (clabel1->num_columns == clabel2->num_columns) &&
3122 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3123 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3124 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3125 (clabel1->parityConfig == clabel2->parityConfig) &&
3126 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3127 (clabel1->blockSize == clabel2->blockSize) &&
3128 (clabel1->numBlocks == clabel2->numBlocks) &&
3129 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3130 (clabel1->root_partition == clabel2->root_partition) &&
3131 (clabel1->last_unit == clabel2->last_unit) &&
3132 (clabel1->config_order == clabel2->config_order)) {
3133 /* if it get's here, it almost *has* to be a match */
3134 } else {
3135 /* it's not consistent with somebody in the set..
3136 punt */
3137 return(0);
3138 }
3139 /* all was fine.. it must fit... */
3140 return(1);
3141 }
3142
3143 int
3144 rf_have_enough_components(RF_ConfigSet_t *cset)
3145 {
3146 RF_AutoConfig_t *ac;
3147 RF_AutoConfig_t *auto_config;
3148 RF_ComponentLabel_t *clabel;
3149 int c;
3150 int num_cols;
3151 int num_missing;
3152 int mod_counter;
3153 int mod_counter_found;
3154 int even_pair_failed;
3155 char parity_type;
3156
3157
3158 /* check to see that we have enough 'live' components
3159 of this set. If so, we can configure it if necessary */
3160
3161 num_cols = cset->ac->clabel->num_columns;
3162 parity_type = cset->ac->clabel->parityConfig;
3163
3164 /* XXX Check for duplicate components!?!?!? */
3165
3166 /* Determine what the mod_counter is supposed to be for this set. */
3167
3168 mod_counter_found = 0;
3169 mod_counter = 0;
3170 ac = cset->ac;
3171 while(ac!=NULL) {
3172 if (mod_counter_found==0) {
3173 mod_counter = ac->clabel->mod_counter;
3174 mod_counter_found = 1;
3175 } else {
3176 if (ac->clabel->mod_counter > mod_counter) {
3177 mod_counter = ac->clabel->mod_counter;
3178 }
3179 }
3180 ac = ac->next;
3181 }
3182
3183 num_missing = 0;
3184 auto_config = cset->ac;
3185
3186 even_pair_failed = 0;
3187 for(c=0; c<num_cols; c++) {
3188 ac = auto_config;
3189 while(ac!=NULL) {
3190 if ((ac->clabel->column == c) &&
3191 (ac->clabel->mod_counter == mod_counter)) {
3192 /* it's this one... */
3193 #ifdef DEBUG
3194 printf("Found: %s at %d\n",
3195 ac->devname,c);
3196 #endif
3197 break;
3198 }
3199 ac=ac->next;
3200 }
3201 if (ac==NULL) {
3202 /* Didn't find one here! */
3203 /* special case for RAID 1, especially
3204 where there are more than 2
3205 components (where RAIDframe treats
3206 things a little differently :( ) */
3207 if (parity_type == '1') {
3208 if (c%2 == 0) { /* even component */
3209 even_pair_failed = 1;
3210 } else { /* odd component. If
3211 we're failed, and
3212 so is the even
3213 component, it's
3214 "Good Night, Charlie" */
3215 if (even_pair_failed == 1) {
3216 return(0);
3217 }
3218 }
3219 } else {
3220 /* normal accounting */
3221 num_missing++;
3222 }
3223 }
3224 if ((parity_type == '1') && (c%2 == 1)) {
3225 /* Just did an even component, and we didn't
3226 bail.. reset the even_pair_failed flag,
3227 and go on to the next component.... */
3228 even_pair_failed = 0;
3229 }
3230 }
3231
3232 clabel = cset->ac->clabel;
3233
3234 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3235 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3236 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3237 /* XXX this needs to be made *much* more general */
3238 /* Too many failures */
3239 return(0);
3240 }
3241 /* otherwise, all is well, and we've got enough to take a kick
3242 at autoconfiguring this set */
3243 return(1);
3244 }
3245
3246 void
3247 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3248 RF_Raid_t *raidPtr)
3249 {
3250 RF_ComponentLabel_t *clabel;
3251 int i;
3252
3253 clabel = ac->clabel;
3254
3255 /* 1. Fill in the common stuff */
3256 config->numRow = clabel->num_rows = 1;
3257 config->numCol = clabel->num_columns;
3258 config->numSpare = 0; /* XXX should this be set here? */
3259 config->sectPerSU = clabel->sectPerSU;
3260 config->SUsPerPU = clabel->SUsPerPU;
3261 config->SUsPerRU = clabel->SUsPerRU;
3262 config->parityConfig = clabel->parityConfig;
3263 /* XXX... */
3264 strcpy(config->diskQueueType,"fifo");
3265 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3266 config->layoutSpecificSize = 0; /* XXX ?? */
3267
3268 while(ac!=NULL) {
3269 /* row/col values will be in range due to the checks
3270 in reasonable_label() */
3271 strcpy(config->devnames[0][ac->clabel->column],
3272 ac->devname);
3273 ac = ac->next;
3274 }
3275
3276 for(i=0;i<RF_MAXDBGV;i++) {
3277 config->debugVars[i][0] = 0;
3278 }
3279 }
3280
3281 int
3282 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3283 {
3284 RF_ComponentLabel_t clabel;
3285 struct vnode *vp;
3286 dev_t dev;
3287 int column;
3288 int sparecol;
3289
3290 raidPtr->autoconfigure = new_value;
3291
3292 for(column=0; column<raidPtr->numCol; column++) {
3293 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3294 dev = raidPtr->Disks[column].dev;
3295 vp = raidPtr->raid_cinfo[column].ci_vp;
3296 raidread_component_label(dev, vp, &clabel);
3297 clabel.autoconfigure = new_value;
3298 raidwrite_component_label(dev, vp, &clabel);
3299 }
3300 }
3301 for(column = 0; column < raidPtr->numSpare ; column++) {
3302 sparecol = raidPtr->numCol + column;
3303 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3304 dev = raidPtr->Disks[sparecol].dev;
3305 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3306 raidread_component_label(dev, vp, &clabel);
3307 clabel.autoconfigure = new_value;
3308 raidwrite_component_label(dev, vp, &clabel);
3309 }
3310 }
3311 return(new_value);
3312 }
3313
3314 int
3315 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3316 {
3317 RF_ComponentLabel_t clabel;
3318 struct vnode *vp;
3319 dev_t dev;
3320 int column;
3321 int sparecol;
3322
3323 raidPtr->root_partition = new_value;
3324 for(column=0; column<raidPtr->numCol; column++) {
3325 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3326 dev = raidPtr->Disks[column].dev;
3327 vp = raidPtr->raid_cinfo[column].ci_vp;
3328 raidread_component_label(dev, vp, &clabel);
3329 clabel.root_partition = new_value;
3330 raidwrite_component_label(dev, vp, &clabel);
3331 }
3332 }
3333 for(column = 0; column < raidPtr->numSpare ; column++) {
3334 sparecol = raidPtr->numCol + column;
3335 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3336 dev = raidPtr->Disks[sparecol].dev;
3337 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3338 raidread_component_label(dev, vp, &clabel);
3339 clabel.root_partition = new_value;
3340 raidwrite_component_label(dev, vp, &clabel);
3341 }
3342 }
3343 return(new_value);
3344 }
3345
3346 void
3347 rf_release_all_vps(RF_ConfigSet_t *cset)
3348 {
3349 RF_AutoConfig_t *ac;
3350
3351 ac = cset->ac;
3352 while(ac!=NULL) {
3353 /* Close the vp, and give it back */
3354 if (ac->vp) {
3355 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3356 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3357 vput(ac->vp);
3358 ac->vp = NULL;
3359 }
3360 ac = ac->next;
3361 }
3362 }
3363
3364
3365 void
3366 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3367 {
3368 RF_AutoConfig_t *ac;
3369 RF_AutoConfig_t *next_ac;
3370
3371 ac = cset->ac;
3372 while(ac!=NULL) {
3373 next_ac = ac->next;
3374 /* nuke the label */
3375 free(ac->clabel, M_RAIDFRAME);
3376 /* cleanup the config structure */
3377 free(ac, M_RAIDFRAME);
3378 /* "next.." */
3379 ac = next_ac;
3380 }
3381 /* and, finally, nuke the config set */
3382 free(cset, M_RAIDFRAME);
3383 }
3384
3385
3386 void
3387 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3388 {
3389 /* current version number */
3390 clabel->version = RF_COMPONENT_LABEL_VERSION;
3391 clabel->serial_number = raidPtr->serial_number;
3392 clabel->mod_counter = raidPtr->mod_counter;
3393 clabel->num_rows = 1;
3394 clabel->num_columns = raidPtr->numCol;
3395 clabel->clean = RF_RAID_DIRTY; /* not clean */
3396 clabel->status = rf_ds_optimal; /* "It's good!" */
3397
3398 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3399 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3400 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3401
3402 clabel->blockSize = raidPtr->bytesPerSector;
3403 clabel->numBlocks = raidPtr->sectorsPerDisk;
3404
3405 /* XXX not portable */
3406 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3407 clabel->maxOutstanding = raidPtr->maxOutstanding;
3408 clabel->autoconfigure = raidPtr->autoconfigure;
3409 clabel->root_partition = raidPtr->root_partition;
3410 clabel->last_unit = raidPtr->raidid;
3411 clabel->config_order = raidPtr->config_order;
3412 }
3413
3414 int
3415 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3416 {
3417 RF_Raid_t *raidPtr;
3418 RF_Config_t *config;
3419 int raidID;
3420 int retcode;
3421
3422 #ifdef DEBUG
3423 printf("RAID autoconfigure\n");
3424 #endif
3425
3426 retcode = 0;
3427 *unit = -1;
3428
3429 /* 1. Create a config structure */
3430
3431 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3432 M_RAIDFRAME,
3433 M_NOWAIT);
3434 if (config==NULL) {
3435 printf("Out of mem!?!?\n");
3436 /* XXX do something more intelligent here. */
3437 return(1);
3438 }
3439
3440 memset(config, 0, sizeof(RF_Config_t));
3441
3442 /*
3443 2. Figure out what RAID ID this one is supposed to live at
3444 See if we can get the same RAID dev that it was configured
3445 on last time..
3446 */
3447
3448 raidID = cset->ac->clabel->last_unit;
3449 if ((raidID < 0) || (raidID >= numraid)) {
3450 /* let's not wander off into lala land. */
3451 raidID = numraid - 1;
3452 }
3453 if (raidPtrs[raidID]->valid != 0) {
3454
3455 /*
3456 Nope... Go looking for an alternative...
3457 Start high so we don't immediately use raid0 if that's
3458 not taken.
3459 */
3460
3461 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3462 if (raidPtrs[raidID]->valid == 0) {
3463 /* can use this one! */
3464 break;
3465 }
3466 }
3467 }
3468
3469 if (raidID < 0) {
3470 /* punt... */
3471 printf("Unable to auto configure this set!\n");
3472 printf("(Out of RAID devs!)\n");
3473 free(config, M_RAIDFRAME);
3474 return(1);
3475 }
3476
3477 #ifdef DEBUG
3478 printf("Configuring raid%d:\n",raidID);
3479 #endif
3480
3481 raidPtr = raidPtrs[raidID];
3482
3483 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3484 raidPtr->raidid = raidID;
3485 raidPtr->openings = RAIDOUTSTANDING;
3486
3487 /* 3. Build the configuration structure */
3488 rf_create_configuration(cset->ac, config, raidPtr);
3489
3490 /* 4. Do the configuration */
3491 retcode = rf_Configure(raidPtr, config, cset->ac);
3492
3493 if (retcode == 0) {
3494
3495 raidinit(raidPtrs[raidID]);
3496
3497 rf_markalldirty(raidPtrs[raidID]);
3498 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3499 if (cset->ac->clabel->root_partition==1) {
3500 /* everything configured just fine. Make a note
3501 that this set is eligible to be root. */
3502 cset->rootable = 1;
3503 /* XXX do this here? */
3504 raidPtrs[raidID]->root_partition = 1;
3505 }
3506 }
3507
3508 /* 5. Cleanup */
3509 free(config, M_RAIDFRAME);
3510
3511 *unit = raidID;
3512 return(retcode);
3513 }
3514
3515 void
3516 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3517 {
3518 struct buf *bp;
3519
3520 bp = (struct buf *)desc->bp;
3521 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3522 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3523 }
3524
3525 void
3526 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3527 size_t xmin, size_t xmax)
3528 {
3529 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3530 pool_sethiwat(p, xmax);
3531 pool_prime(p, xmin);
3532 pool_setlowat(p, xmin);
3533 }
3534
3535 /*
3536 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3537 * if there is IO pending and if that IO could possibly be done for a
3538 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3539 * otherwise.
3540 *
3541 */
3542
3543 int
3544 rf_buf_queue_check(int raidid)
3545 {
3546 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3547 raidPtrs[raidid]->openings > 0) {
3548 /* there is work to do */
3549 return 0;
3550 }
3551 /* default is nothing to do */
3552 return 1;
3553 }
3554
3555 int
3556 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3557 {
3558 struct partinfo dpart;
3559 struct dkwedge_info dkw;
3560 int error;
3561
3562 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
3563 if (error == 0) {
3564 diskPtr->blockSize = dpart.disklab->d_secsize;
3565 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3566 diskPtr->partitionSize = dpart.part->p_size;
3567 return 0;
3568 }
3569
3570 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
3571 if (error == 0) {
3572 diskPtr->blockSize = 512; /* XXX */
3573 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3574 diskPtr->partitionSize = dkw.dkw_size;
3575 return 0;
3576 }
3577 return error;
3578 }
3579
3580 static int
3581 raid_match(struct device *self, struct cfdata *cfdata,
3582 void *aux)
3583 {
3584 return 1;
3585 }
3586
3587 static void
3588 raid_attach(struct device *parent, struct device *self,
3589 void *aux)
3590 {
3591
3592 }
3593
3594
3595 static int
3596 raid_detach(struct device *self, int flags)
3597 {
3598 struct raid_softc *rs = (struct raid_softc *)self;
3599
3600 if (rs->sc_flags & RAIDF_INITED)
3601 return EBUSY;
3602
3603 return 0;
3604 }
3605
3606 static void
3607 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3608 {
3609 prop_dictionary_t disk_info, odisk_info, geom;
3610 disk_info = prop_dictionary_create();
3611 geom = prop_dictionary_create();
3612 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3613 raidPtr->totalSectors);
3614 prop_dictionary_set_uint32(geom, "sector-size",
3615 raidPtr->bytesPerSector);
3616
3617 prop_dictionary_set_uint16(geom, "sectors-per-track",
3618 raidPtr->Layout.dataSectorsPerStripe);
3619 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3620 4 * raidPtr->numCol);
3621
3622 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3623 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3624 (4 * raidPtr->numCol)));
3625
3626 prop_dictionary_set(disk_info, "geometry", geom);
3627 prop_object_release(geom);
3628 prop_dictionary_set(device_properties(rs->sc_dev),
3629 "disk-info", disk_info);
3630 odisk_info = rs->sc_dkdev.dk_info;
3631 rs->sc_dkdev.dk_info = disk_info;
3632 if (odisk_info)
3633 prop_object_release(odisk_info);
3634 }
3635