rf_netbsdkintf.c revision 1.197 1 /* $NetBSD: rf_netbsdkintf.c,v 1.197 2006/01/08 21:53:26 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.197 2006/01/08 21:53:26 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /* XXX Not sure if the following should be replacing the raidPtrs above,
237 or if it should be used in conjunction with that...
238 */
239
240 struct raid_softc {
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 size_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 /*
259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
260 * Be aware that large numbers can allow the driver to consume a lot of
261 * kernel memory, especially on writes, and in degraded mode reads.
262 *
263 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
264 * a single 64K write will typically require 64K for the old data,
265 * 64K for the old parity, and 64K for the new parity, for a total
266 * of 192K (if the parity buffer is not re-used immediately).
267 * Even it if is used immediately, that's still 128K, which when multiplied
268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
269 *
270 * Now in degraded mode, for example, a 64K read on the above setup may
271 * require data reconstruction, which will require *all* of the 4 remaining
272 * disks to participate -- 4 * 32K/disk == 128K again.
273 */
274
275 #ifndef RAIDOUTSTANDING
276 #define RAIDOUTSTANDING 6
277 #endif
278
279 #define RAIDLABELDEV(dev) \
280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
281
282 /* declared here, and made public, for the benefit of KVM stuff.. */
283 struct raid_softc *raid_softc;
284
285 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
286 struct disklabel *);
287 static void raidgetdisklabel(dev_t);
288 static void raidmakedisklabel(struct raid_softc *);
289
290 static int raidlock(struct raid_softc *);
291 static void raidunlock(struct raid_softc *);
292
293 static void rf_markalldirty(RF_Raid_t *);
294
295 struct device *raidrootdev;
296
297 void rf_ReconThread(struct rf_recon_req *);
298 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 void rf_CopybackThread(RF_Raid_t *raidPtr);
300 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
301 int rf_autoconfig(struct device *self);
302 void rf_buildroothack(RF_ConfigSet_t *);
303
304 RF_AutoConfig_t *rf_find_raid_components(void);
305 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
306 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
307 static int rf_reasonable_label(RF_ComponentLabel_t *);
308 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 int rf_set_autoconfig(RF_Raid_t *, int);
310 int rf_set_rootpartition(RF_Raid_t *, int);
311 void rf_release_all_vps(RF_ConfigSet_t *);
312 void rf_cleanup_config_set(RF_ConfigSet_t *);
313 int rf_have_enough_components(RF_ConfigSet_t *);
314 int rf_auto_config_set(RF_ConfigSet_t *, int *);
315
316 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
317 allow autoconfig to take place.
318 Note that this is overridden by having
319 RAID_AUTOCONFIG as an option in the
320 kernel config file. */
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 #ifdef DEBUG
331 printf("raidattach: Asked for %d units\n", num);
332 #endif
333
334 if (num <= 0) {
335 #ifdef DIAGNOSTIC
336 panic("raidattach: count <= 0");
337 #endif
338 return;
339 }
340 /* This is where all the initialization stuff gets done. */
341
342 numraid = num;
343
344 /* Make some space for requested number of units... */
345
346 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
347 if (raidPtrs == NULL) {
348 panic("raidPtrs is NULL!!");
349 }
350
351 rf_mutex_init(&rf_sparet_wait_mutex);
352
353 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
354
355 for (i = 0; i < num; i++)
356 raidPtrs[i] = NULL;
357 rc = rf_BootRaidframe();
358 if (rc == 0)
359 printf("Kernelized RAIDframe activated\n");
360 else
361 panic("Serious error booting RAID!!");
362
363 /* put together some datastructures like the CCD device does.. This
364 * lets us lock the device and what-not when it gets opened. */
365
366 raid_softc = (struct raid_softc *)
367 malloc(num * sizeof(struct raid_softc),
368 M_RAIDFRAME, M_NOWAIT);
369 if (raid_softc == NULL) {
370 printf("WARNING: no memory for RAIDframe driver\n");
371 return;
372 }
373
374 memset(raid_softc, 0, num * sizeof(struct raid_softc));
375
376 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
377 M_RAIDFRAME, M_NOWAIT);
378 if (raidrootdev == NULL) {
379 panic("No memory for RAIDframe driver!!?!?!");
380 }
381
382 for (raidID = 0; raidID < num; raidID++) {
383 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
384 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
385
386 raidrootdev[raidID].dv_class = DV_DISK;
387 raidrootdev[raidID].dv_cfdata = NULL;
388 raidrootdev[raidID].dv_unit = raidID;
389 raidrootdev[raidID].dv_parent = NULL;
390 raidrootdev[raidID].dv_flags = 0;
391 snprintf(raidrootdev[raidID].dv_xname,
392 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
393
394 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
395 (RF_Raid_t *));
396 if (raidPtrs[raidID] == NULL) {
397 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
398 numraid = raidID;
399 return;
400 }
401 }
402
403 #ifdef RAID_AUTOCONFIG
404 raidautoconfig = 1;
405 #endif
406
407 /*
408 * Register a finalizer which will be used to auto-config RAID
409 * sets once all real hardware devices have been found.
410 */
411 if (config_finalize_register(NULL, rf_autoconfig) != 0)
412 printf("WARNING: unable to register RAIDframe finalizer\n");
413 }
414
415 int
416 rf_autoconfig(struct device *self)
417 {
418 RF_AutoConfig_t *ac_list;
419 RF_ConfigSet_t *config_sets;
420
421 if (raidautoconfig == 0)
422 return (0);
423
424 /* XXX This code can only be run once. */
425 raidautoconfig = 0;
426
427 /* 1. locate all RAID components on the system */
428 #ifdef DEBUG
429 printf("Searching for RAID components...\n");
430 #endif
431 ac_list = rf_find_raid_components();
432
433 /* 2. Sort them into their respective sets. */
434 config_sets = rf_create_auto_sets(ac_list);
435
436 /*
437 * 3. Evaluate each set andconfigure the valid ones.
438 * This gets done in rf_buildroothack().
439 */
440 rf_buildroothack(config_sets);
441
442 return (1);
443 }
444
445 void
446 rf_buildroothack(RF_ConfigSet_t *config_sets)
447 {
448 RF_ConfigSet_t *cset;
449 RF_ConfigSet_t *next_cset;
450 int retcode;
451 int raidID;
452 int rootID;
453 int num_root;
454
455 rootID = 0;
456 num_root = 0;
457 cset = config_sets;
458 while(cset != NULL ) {
459 next_cset = cset->next;
460 if (rf_have_enough_components(cset) &&
461 cset->ac->clabel->autoconfigure==1) {
462 retcode = rf_auto_config_set(cset,&raidID);
463 if (!retcode) {
464 if (cset->rootable) {
465 rootID = raidID;
466 num_root++;
467 }
468 } else {
469 /* The autoconfig didn't work :( */
470 #if DEBUG
471 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
472 #endif
473 rf_release_all_vps(cset);
474 }
475 } else {
476 /* we're not autoconfiguring this set...
477 release the associated resources */
478 rf_release_all_vps(cset);
479 }
480 /* cleanup */
481 rf_cleanup_config_set(cset);
482 cset = next_cset;
483 }
484
485 /* we found something bootable... */
486
487 if (num_root == 1) {
488 booted_device = &raidrootdev[rootID];
489 } else if (num_root > 1) {
490 /* we can't guess.. require the user to answer... */
491 boothowto |= RB_ASKNAME;
492 }
493 }
494
495
496 int
497 raidsize(dev_t dev)
498 {
499 struct raid_softc *rs;
500 struct disklabel *lp;
501 int part, unit, omask, size;
502
503 unit = raidunit(dev);
504 if (unit >= numraid)
505 return (-1);
506 rs = &raid_softc[unit];
507
508 if ((rs->sc_flags & RAIDF_INITED) == 0)
509 return (-1);
510
511 part = DISKPART(dev);
512 omask = rs->sc_dkdev.dk_openmask & (1 << part);
513 lp = rs->sc_dkdev.dk_label;
514
515 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
516 return (-1);
517
518 if (lp->d_partitions[part].p_fstype != FS_SWAP)
519 size = -1;
520 else
521 size = lp->d_partitions[part].p_size *
522 (lp->d_secsize / DEV_BSIZE);
523
524 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
525 return (-1);
526
527 return (size);
528
529 }
530
531 int
532 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
533 {
534 /* Not implemented. */
535 return ENXIO;
536 }
537 /* ARGSUSED */
538 int
539 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
540 {
541 int unit = raidunit(dev);
542 struct raid_softc *rs;
543 struct disklabel *lp;
544 int part, pmask;
545 int error = 0;
546
547 if (unit >= numraid)
548 return (ENXIO);
549 rs = &raid_softc[unit];
550
551 if ((error = raidlock(rs)) != 0)
552 return (error);
553 lp = rs->sc_dkdev.dk_label;
554
555 part = DISKPART(dev);
556 pmask = (1 << part);
557
558 if ((rs->sc_flags & RAIDF_INITED) &&
559 (rs->sc_dkdev.dk_openmask == 0))
560 raidgetdisklabel(dev);
561
562 /* make sure that this partition exists */
563
564 if (part != RAW_PART) {
565 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
566 ((part >= lp->d_npartitions) ||
567 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
568 error = ENXIO;
569 raidunlock(rs);
570 return (error);
571 }
572 }
573 /* Prevent this unit from being unconfigured while open. */
574 switch (fmt) {
575 case S_IFCHR:
576 rs->sc_dkdev.dk_copenmask |= pmask;
577 break;
578
579 case S_IFBLK:
580 rs->sc_dkdev.dk_bopenmask |= pmask;
581 break;
582 }
583
584 if ((rs->sc_dkdev.dk_openmask == 0) &&
585 ((rs->sc_flags & RAIDF_INITED) != 0)) {
586 /* First one... mark things as dirty... Note that we *MUST*
587 have done a configure before this. I DO NOT WANT TO BE
588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589 THAT THEY BELONG TOGETHER!!!!! */
590 /* XXX should check to see if we're only open for reading
591 here... If so, we needn't do this, but then need some
592 other way of keeping track of what's happened.. */
593
594 rf_markalldirty( raidPtrs[unit] );
595 }
596
597
598 rs->sc_dkdev.dk_openmask =
599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600
601 raidunlock(rs);
602
603 return (error);
604
605
606 }
607 /* ARGSUSED */
608 int
609 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
610 {
611 int unit = raidunit(dev);
612 struct raid_softc *rs;
613 int error = 0;
614 int part;
615
616 if (unit >= numraid)
617 return (ENXIO);
618 rs = &raid_softc[unit];
619
620 if ((error = raidlock(rs)) != 0)
621 return (error);
622
623 part = DISKPART(dev);
624
625 /* ...that much closer to allowing unconfiguration... */
626 switch (fmt) {
627 case S_IFCHR:
628 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
629 break;
630
631 case S_IFBLK:
632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
633 break;
634 }
635 rs->sc_dkdev.dk_openmask =
636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
637
638 if ((rs->sc_dkdev.dk_openmask == 0) &&
639 ((rs->sc_flags & RAIDF_INITED) != 0)) {
640 /* Last one... device is not unconfigured yet.
641 Device shutdown has taken care of setting the
642 clean bits if RAIDF_INITED is not set
643 mark things as clean... */
644
645 rf_update_component_labels(raidPtrs[unit],
646 RF_FINAL_COMPONENT_UPDATE);
647 if (doing_shutdown) {
648 /* last one, and we're going down, so
649 lights out for this RAID set too. */
650 error = rf_Shutdown(raidPtrs[unit]);
651
652 /* It's no longer initialized... */
653 rs->sc_flags &= ~RAIDF_INITED;
654
655 /* Detach the disk. */
656 pseudo_disk_detach(&rs->sc_dkdev);
657 }
658 }
659
660 raidunlock(rs);
661 return (0);
662
663 }
664
665 void
666 raidstrategy(struct buf *bp)
667 {
668 int s;
669
670 unsigned int raidID = raidunit(bp->b_dev);
671 RF_Raid_t *raidPtr;
672 struct raid_softc *rs = &raid_softc[raidID];
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 goto done;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 goto done;
684 }
685 raidPtr = raidPtrs[raidID];
686 if (!raidPtr->valid) {
687 bp->b_error = ENODEV;
688 bp->b_flags |= B_ERROR;
689 goto done;
690 }
691 if (bp->b_bcount == 0) {
692 db1_printf(("b_bcount is zero..\n"));
693 goto done;
694 }
695
696 /*
697 * Do bounds checking and adjust transfer. If there's an
698 * error, the bounds check will flag that for us.
699 */
700
701 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
702 if (DISKPART(bp->b_dev) == RAW_PART) {
703 uint64_t size; /* device size in DEV_BSIZE unit */
704
705 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
706 size = raidPtr->totalSectors <<
707 (raidPtr->logBytesPerSector - DEV_BSHIFT);
708 } else {
709 size = raidPtr->totalSectors >>
710 (DEV_BSHIFT - raidPtr->logBytesPerSector);
711 }
712 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
713 goto done;
714 }
715 } else {
716 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
717 db1_printf(("Bounds check failed!!:%d %d\n",
718 (int) bp->b_blkno, (int) wlabel));
719 goto done;
720 }
721 }
722 s = splbio();
723
724 bp->b_resid = 0;
725
726 /* stuff it onto our queue */
727 BUFQ_PUT(rs->buf_queue, bp);
728
729 /* scheduled the IO to happen at the next convenient time */
730 wakeup(&(raidPtrs[raidID]->iodone));
731
732 splx(s);
733 return;
734
735 done:
736 bp->b_resid = bp->b_bcount;
737 biodone(bp);
738 }
739 /* ARGSUSED */
740 int
741 raidread(dev_t dev, struct uio *uio, int flags)
742 {
743 int unit = raidunit(dev);
744 struct raid_softc *rs;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752
753 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
754
755 }
756 /* ARGSUSED */
757 int
758 raidwrite(dev_t dev, struct uio *uio, int flags)
759 {
760 int unit = raidunit(dev);
761 struct raid_softc *rs;
762
763 if (unit >= numraid)
764 return (ENXIO);
765 rs = &raid_softc[unit];
766
767 if ((rs->sc_flags & RAIDF_INITED) == 0)
768 return (ENXIO);
769
770 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
771
772 }
773
774 int
775 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
776 {
777 int unit = raidunit(dev);
778 int error = 0;
779 int part, pmask;
780 struct raid_softc *rs;
781 RF_Config_t *k_cfg, *u_cfg;
782 RF_Raid_t *raidPtr;
783 RF_RaidDisk_t *diskPtr;
784 RF_AccTotals_t *totals;
785 RF_DeviceConfig_t *d_cfg, **ucfgp;
786 u_char *specific_buf;
787 int retcode = 0;
788 int column;
789 int raidid;
790 struct rf_recon_req *rrcopy, *rr;
791 RF_ComponentLabel_t *clabel;
792 RF_ComponentLabel_t ci_label;
793 RF_ComponentLabel_t **clabel_ptr;
794 RF_SingleComponent_t *sparePtr,*componentPtr;
795 RF_SingleComponent_t hot_spare;
796 RF_SingleComponent_t component;
797 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
798 int i, j, d;
799 #ifdef __HAVE_OLD_DISKLABEL
800 struct disklabel newlabel;
801 #endif
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 #ifdef __HAVE_OLD_DISKLABEL
816 case ODIOCWDINFO:
817 case ODIOCSDINFO:
818 #endif
819 case DIOCWLABEL:
820 if ((flag & FWRITE) == 0)
821 return (EBADF);
822 }
823
824 /* Must be initialized for these... */
825 switch (cmd) {
826 case DIOCGDINFO:
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCGDINFO:
831 case ODIOCWDINFO:
832 case ODIOCSDINFO:
833 case ODIOCGDEFLABEL:
834 #endif
835 case DIOCGPART:
836 case DIOCWLABEL:
837 case DIOCGDEFLABEL:
838 case RAIDFRAME_SHUTDOWN:
839 case RAIDFRAME_REWRITEPARITY:
840 case RAIDFRAME_GET_INFO:
841 case RAIDFRAME_RESET_ACCTOTALS:
842 case RAIDFRAME_GET_ACCTOTALS:
843 case RAIDFRAME_KEEP_ACCTOTALS:
844 case RAIDFRAME_GET_SIZE:
845 case RAIDFRAME_FAIL_DISK:
846 case RAIDFRAME_COPYBACK:
847 case RAIDFRAME_CHECK_RECON_STATUS:
848 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
860 case RAIDFRAME_SET_AUTOCONFIG:
861 case RAIDFRAME_SET_ROOT:
862 case RAIDFRAME_DELETE_COMPONENT:
863 case RAIDFRAME_INCORPORATE_HOT_SPARE:
864 if ((rs->sc_flags & RAIDF_INITED) == 0)
865 return (ENXIO);
866 }
867
868 switch (cmd) {
869
870 /* configure the system */
871 case RAIDFRAME_CONFIGURE:
872
873 if (raidPtr->valid) {
874 /* There is a valid RAID set running on this unit! */
875 printf("raid%d: Device already configured!\n",unit);
876 return(EINVAL);
877 }
878
879 /* copy-in the configuration information */
880 /* data points to a pointer to the configuration structure */
881
882 u_cfg = *((RF_Config_t **) data);
883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
884 if (k_cfg == NULL) {
885 return (ENOMEM);
886 }
887 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
888 if (retcode) {
889 RF_Free(k_cfg, sizeof(RF_Config_t));
890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
891 retcode));
892 return (retcode);
893 }
894 /* allocate a buffer for the layout-specific data, and copy it
895 * in */
896 if (k_cfg->layoutSpecificSize) {
897 if (k_cfg->layoutSpecificSize > 10000) {
898 /* sanity check */
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 return (EINVAL);
901 }
902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
903 (u_char *));
904 if (specific_buf == NULL) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (ENOMEM);
907 }
908 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
909 k_cfg->layoutSpecificSize);
910 if (retcode) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 RF_Free(specific_buf,
913 k_cfg->layoutSpecificSize);
914 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
915 retcode));
916 return (retcode);
917 }
918 } else
919 specific_buf = NULL;
920 k_cfg->layoutSpecific = specific_buf;
921
922 /* should do some kind of sanity check on the configuration.
923 * Store the sum of all the bytes in the last byte? */
924
925 /* configure the system */
926
927 /*
928 * Clear the entire RAID descriptor, just to make sure
929 * there is no stale data left in the case of a
930 * reconfiguration
931 */
932 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
933 raidPtr->raidid = unit;
934
935 retcode = rf_Configure(raidPtr, k_cfg, NULL);
936
937 if (retcode == 0) {
938
939 /* allow this many simultaneous IO's to
940 this RAID device */
941 raidPtr->openings = RAIDOUTSTANDING;
942
943 raidinit(raidPtr);
944 rf_markalldirty(raidPtr);
945 }
946 /* free the buffers. No return code here. */
947 if (k_cfg->layoutSpecificSize) {
948 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
949 }
950 RF_Free(k_cfg, sizeof(RF_Config_t));
951
952 return (retcode);
953
954 /* shutdown the system */
955 case RAIDFRAME_SHUTDOWN:
956
957 if ((error = raidlock(rs)) != 0)
958 return (error);
959
960 /*
961 * If somebody has a partition mounted, we shouldn't
962 * shutdown.
963 */
964
965 part = DISKPART(dev);
966 pmask = (1 << part);
967 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
968 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
969 (rs->sc_dkdev.dk_copenmask & pmask))) {
970 raidunlock(rs);
971 return (EBUSY);
972 }
973
974 retcode = rf_Shutdown(raidPtr);
975
976 /* It's no longer initialized... */
977 rs->sc_flags &= ~RAIDF_INITED;
978
979 /* Detach the disk. */
980 pseudo_disk_detach(&rs->sc_dkdev);
981
982 raidunlock(rs);
983
984 return (retcode);
985 case RAIDFRAME_GET_COMPONENT_LABEL:
986 clabel_ptr = (RF_ComponentLabel_t **) data;
987 /* need to read the component label for the disk indicated
988 by row,column in clabel */
989
990 /* For practice, let's get it directly fromdisk, rather
991 than from the in-core copy */
992 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
993 (RF_ComponentLabel_t *));
994 if (clabel == NULL)
995 return (ENOMEM);
996
997 retcode = copyin( *clabel_ptr, clabel,
998 sizeof(RF_ComponentLabel_t));
999
1000 if (retcode) {
1001 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1002 return(retcode);
1003 }
1004
1005 clabel->row = 0; /* Don't allow looking at anything else.*/
1006
1007 column = clabel->column;
1008
1009 if ((column < 0) || (column >= raidPtr->numCol +
1010 raidPtr->numSpare)) {
1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1012 return(EINVAL);
1013 }
1014
1015 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1016 raidPtr->raid_cinfo[column].ci_vp,
1017 clabel );
1018
1019 if (retcode == 0) {
1020 retcode = copyout(clabel, *clabel_ptr,
1021 sizeof(RF_ComponentLabel_t));
1022 }
1023 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1024 return (retcode);
1025
1026 case RAIDFRAME_SET_COMPONENT_LABEL:
1027 clabel = (RF_ComponentLabel_t *) data;
1028
1029 /* XXX check the label for valid stuff... */
1030 /* Note that some things *should not* get modified --
1031 the user should be re-initing the labels instead of
1032 trying to patch things.
1033 */
1034
1035 raidid = raidPtr->raidid;
1036 #if DEBUG
1037 printf("raid%d: Got component label:\n", raidid);
1038 printf("raid%d: Version: %d\n", raidid, clabel->version);
1039 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1040 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1041 printf("raid%d: Column: %d\n", raidid, clabel->column);
1042 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1043 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1044 printf("raid%d: Status: %d\n", raidid, clabel->status);
1045 #endif
1046 clabel->row = 0;
1047 column = clabel->column;
1048
1049 if ((column < 0) || (column >= raidPtr->numCol)) {
1050 return(EINVAL);
1051 }
1052
1053 /* XXX this isn't allowed to do anything for now :-) */
1054
1055 /* XXX and before it is, we need to fill in the rest
1056 of the fields!?!?!?! */
1057 #if 0
1058 raidwrite_component_label(
1059 raidPtr->Disks[column].dev,
1060 raidPtr->raid_cinfo[column].ci_vp,
1061 clabel );
1062 #endif
1063 return (0);
1064
1065 case RAIDFRAME_INIT_LABELS:
1066 clabel = (RF_ComponentLabel_t *) data;
1067 /*
1068 we only want the serial number from
1069 the above. We get all the rest of the information
1070 from the config that was used to create this RAID
1071 set.
1072 */
1073
1074 raidPtr->serial_number = clabel->serial_number;
1075
1076 raid_init_component_label(raidPtr, &ci_label);
1077 ci_label.serial_number = clabel->serial_number;
1078 ci_label.row = 0; /* we dont' pretend to support more */
1079
1080 for(column=0;column<raidPtr->numCol;column++) {
1081 diskPtr = &raidPtr->Disks[column];
1082 if (!RF_DEAD_DISK(diskPtr->status)) {
1083 ci_label.partitionSize = diskPtr->partitionSize;
1084 ci_label.column = column;
1085 raidwrite_component_label(
1086 raidPtr->Disks[column].dev,
1087 raidPtr->raid_cinfo[column].ci_vp,
1088 &ci_label );
1089 }
1090 }
1091
1092 return (retcode);
1093 case RAIDFRAME_SET_AUTOCONFIG:
1094 d = rf_set_autoconfig(raidPtr, *(int *) data);
1095 printf("raid%d: New autoconfig value is: %d\n",
1096 raidPtr->raidid, d);
1097 *(int *) data = d;
1098 return (retcode);
1099
1100 case RAIDFRAME_SET_ROOT:
1101 d = rf_set_rootpartition(raidPtr, *(int *) data);
1102 printf("raid%d: New rootpartition value is: %d\n",
1103 raidPtr->raidid, d);
1104 *(int *) data = d;
1105 return (retcode);
1106
1107 /* initialize all parity */
1108 case RAIDFRAME_REWRITEPARITY:
1109
1110 if (raidPtr->Layout.map->faultsTolerated == 0) {
1111 /* Parity for RAID 0 is trivially correct */
1112 raidPtr->parity_good = RF_RAID_CLEAN;
1113 return(0);
1114 }
1115
1116 if (raidPtr->parity_rewrite_in_progress == 1) {
1117 /* Re-write is already in progress! */
1118 return(EINVAL);
1119 }
1120
1121 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1122 rf_RewriteParityThread,
1123 raidPtr,"raid_parity");
1124 return (retcode);
1125
1126
1127 case RAIDFRAME_ADD_HOT_SPARE:
1128 sparePtr = (RF_SingleComponent_t *) data;
1129 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1130 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1131 return(retcode);
1132
1133 case RAIDFRAME_REMOVE_HOT_SPARE:
1134 return(retcode);
1135
1136 case RAIDFRAME_DELETE_COMPONENT:
1137 componentPtr = (RF_SingleComponent_t *)data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 retcode = rf_delete_component(raidPtr, &component);
1141 return(retcode);
1142
1143 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_REBUILD_IN_PLACE:
1151
1152 if (raidPtr->Layout.map->faultsTolerated == 0) {
1153 /* Can't do this on a RAID 0!! */
1154 return(EINVAL);
1155 }
1156
1157 if (raidPtr->recon_in_progress == 1) {
1158 /* a reconstruct is already in progress! */
1159 return(EINVAL);
1160 }
1161
1162 componentPtr = (RF_SingleComponent_t *) data;
1163 memcpy( &component, componentPtr,
1164 sizeof(RF_SingleComponent_t));
1165 component.row = 0; /* we don't support any more */
1166 column = component.column;
1167
1168 if ((column < 0) || (column >= raidPtr->numCol)) {
1169 return(EINVAL);
1170 }
1171
1172 RF_LOCK_MUTEX(raidPtr->mutex);
1173 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1174 (raidPtr->numFailures > 0)) {
1175 /* XXX 0 above shouldn't be constant!!! */
1176 /* some component other than this has failed.
1177 Let's not make things worse than they already
1178 are... */
1179 printf("raid%d: Unable to reconstruct to disk at:\n",
1180 raidPtr->raidid);
1181 printf("raid%d: Col: %d Too many failures.\n",
1182 raidPtr->raidid, column);
1183 RF_UNLOCK_MUTEX(raidPtr->mutex);
1184 return (EINVAL);
1185 }
1186 if (raidPtr->Disks[column].status ==
1187 rf_ds_reconstructing) {
1188 printf("raid%d: Unable to reconstruct to disk at:\n",
1189 raidPtr->raidid);
1190 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1191
1192 RF_UNLOCK_MUTEX(raidPtr->mutex);
1193 return (EINVAL);
1194 }
1195 if (raidPtr->Disks[column].status == rf_ds_spared) {
1196 RF_UNLOCK_MUTEX(raidPtr->mutex);
1197 return (EINVAL);
1198 }
1199 RF_UNLOCK_MUTEX(raidPtr->mutex);
1200
1201 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1202 if (rrcopy == NULL)
1203 return(ENOMEM);
1204
1205 rrcopy->raidPtr = (void *) raidPtr;
1206 rrcopy->col = column;
1207
1208 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1209 rf_ReconstructInPlaceThread,
1210 rrcopy,"raid_reconip");
1211 return(retcode);
1212
1213 case RAIDFRAME_GET_INFO:
1214 if (!raidPtr->valid)
1215 return (ENODEV);
1216 ucfgp = (RF_DeviceConfig_t **) data;
1217 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1218 (RF_DeviceConfig_t *));
1219 if (d_cfg == NULL)
1220 return (ENOMEM);
1221 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1222 d_cfg->rows = 1; /* there is only 1 row now */
1223 d_cfg->cols = raidPtr->numCol;
1224 d_cfg->ndevs = raidPtr->numCol;
1225 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1226 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1227 return (ENOMEM);
1228 }
1229 d_cfg->nspares = raidPtr->numSpare;
1230 if (d_cfg->nspares >= RF_MAX_DISKS) {
1231 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1232 return (ENOMEM);
1233 }
1234 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1235 d = 0;
1236 for (j = 0; j < d_cfg->cols; j++) {
1237 d_cfg->devs[d] = raidPtr->Disks[j];
1238 d++;
1239 }
1240 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1241 d_cfg->spares[i] = raidPtr->Disks[j];
1242 }
1243 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1244 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1245
1246 return (retcode);
1247
1248 case RAIDFRAME_CHECK_PARITY:
1249 *(int *) data = raidPtr->parity_good;
1250 return (0);
1251
1252 case RAIDFRAME_RESET_ACCTOTALS:
1253 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1254 return (0);
1255
1256 case RAIDFRAME_GET_ACCTOTALS:
1257 totals = (RF_AccTotals_t *) data;
1258 *totals = raidPtr->acc_totals;
1259 return (0);
1260
1261 case RAIDFRAME_KEEP_ACCTOTALS:
1262 raidPtr->keep_acc_totals = *(int *)data;
1263 return (0);
1264
1265 case RAIDFRAME_GET_SIZE:
1266 *(int *) data = raidPtr->totalSectors;
1267 return (0);
1268
1269 /* fail a disk & optionally start reconstruction */
1270 case RAIDFRAME_FAIL_DISK:
1271
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* Can't do this on a RAID 0!! */
1274 return(EINVAL);
1275 }
1276
1277 rr = (struct rf_recon_req *) data;
1278 rr->row = 0;
1279 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1280 return (EINVAL);
1281
1282
1283 RF_LOCK_MUTEX(raidPtr->mutex);
1284 if (raidPtr->status == rf_rs_reconstructing) {
1285 /* you can't fail a disk while we're reconstructing! */
1286 /* XXX wrong for RAID6 */
1287 RF_UNLOCK_MUTEX(raidPtr->mutex);
1288 return (EINVAL);
1289 }
1290 if ((raidPtr->Disks[rr->col].status ==
1291 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1292 /* some other component has failed. Let's not make
1293 things worse. XXX wrong for RAID6 */
1294 RF_UNLOCK_MUTEX(raidPtr->mutex);
1295 return (EINVAL);
1296 }
1297 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1298 /* Can't fail a spared disk! */
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300 return (EINVAL);
1301 }
1302 RF_UNLOCK_MUTEX(raidPtr->mutex);
1303
1304 /* make a copy of the recon request so that we don't rely on
1305 * the user's buffer */
1306 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1307 if (rrcopy == NULL)
1308 return(ENOMEM);
1309 memcpy(rrcopy, rr, sizeof(*rr));
1310 rrcopy->raidPtr = (void *) raidPtr;
1311
1312 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1313 rf_ReconThread,
1314 rrcopy,"raid_recon");
1315 return (0);
1316
1317 /* invoke a copyback operation after recon on whatever disk
1318 * needs it, if any */
1319 case RAIDFRAME_COPYBACK:
1320
1321 if (raidPtr->Layout.map->faultsTolerated == 0) {
1322 /* This makes no sense on a RAID 0!! */
1323 return(EINVAL);
1324 }
1325
1326 if (raidPtr->copyback_in_progress == 1) {
1327 /* Copyback is already in progress! */
1328 return(EINVAL);
1329 }
1330
1331 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1332 rf_CopybackThread,
1333 raidPtr,"raid_copyback");
1334 return (retcode);
1335
1336 /* return the percentage completion of reconstruction */
1337 case RAIDFRAME_CHECK_RECON_STATUS:
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* This makes no sense on a RAID 0, so tell the
1340 user it's done. */
1341 *(int *) data = 100;
1342 return(0);
1343 }
1344 if (raidPtr->status != rf_rs_reconstructing)
1345 *(int *) data = 100;
1346 else {
1347 if (raidPtr->reconControl->numRUsTotal > 0) {
1348 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1349 } else {
1350 *(int *) data = 0;
1351 }
1352 }
1353 return (0);
1354 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1355 progressInfoPtr = (RF_ProgressInfo_t **) data;
1356 if (raidPtr->status != rf_rs_reconstructing) {
1357 progressInfo.remaining = 0;
1358 progressInfo.completed = 100;
1359 progressInfo.total = 100;
1360 } else {
1361 progressInfo.total =
1362 raidPtr->reconControl->numRUsTotal;
1363 progressInfo.completed =
1364 raidPtr->reconControl->numRUsComplete;
1365 progressInfo.remaining = progressInfo.total -
1366 progressInfo.completed;
1367 }
1368 retcode = copyout(&progressInfo, *progressInfoPtr,
1369 sizeof(RF_ProgressInfo_t));
1370 return (retcode);
1371
1372 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1373 if (raidPtr->Layout.map->faultsTolerated == 0) {
1374 /* This makes no sense on a RAID 0, so tell the
1375 user it's done. */
1376 *(int *) data = 100;
1377 return(0);
1378 }
1379 if (raidPtr->parity_rewrite_in_progress == 1) {
1380 *(int *) data = 100 *
1381 raidPtr->parity_rewrite_stripes_done /
1382 raidPtr->Layout.numStripe;
1383 } else {
1384 *(int *) data = 100;
1385 }
1386 return (0);
1387
1388 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1389 progressInfoPtr = (RF_ProgressInfo_t **) data;
1390 if (raidPtr->parity_rewrite_in_progress == 1) {
1391 progressInfo.total = raidPtr->Layout.numStripe;
1392 progressInfo.completed =
1393 raidPtr->parity_rewrite_stripes_done;
1394 progressInfo.remaining = progressInfo.total -
1395 progressInfo.completed;
1396 } else {
1397 progressInfo.remaining = 0;
1398 progressInfo.completed = 100;
1399 progressInfo.total = 100;
1400 }
1401 retcode = copyout(&progressInfo, *progressInfoPtr,
1402 sizeof(RF_ProgressInfo_t));
1403 return (retcode);
1404
1405 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1406 if (raidPtr->Layout.map->faultsTolerated == 0) {
1407 /* This makes no sense on a RAID 0 */
1408 *(int *) data = 100;
1409 return(0);
1410 }
1411 if (raidPtr->copyback_in_progress == 1) {
1412 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1413 raidPtr->Layout.numStripe;
1414 } else {
1415 *(int *) data = 100;
1416 }
1417 return (0);
1418
1419 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1420 progressInfoPtr = (RF_ProgressInfo_t **) data;
1421 if (raidPtr->copyback_in_progress == 1) {
1422 progressInfo.total = raidPtr->Layout.numStripe;
1423 progressInfo.completed =
1424 raidPtr->copyback_stripes_done;
1425 progressInfo.remaining = progressInfo.total -
1426 progressInfo.completed;
1427 } else {
1428 progressInfo.remaining = 0;
1429 progressInfo.completed = 100;
1430 progressInfo.total = 100;
1431 }
1432 retcode = copyout(&progressInfo, *progressInfoPtr,
1433 sizeof(RF_ProgressInfo_t));
1434 return (retcode);
1435
1436 /* the sparetable daemon calls this to wait for the kernel to
1437 * need a spare table. this ioctl does not return until a
1438 * spare table is needed. XXX -- calling mpsleep here in the
1439 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1440 * -- I should either compute the spare table in the kernel,
1441 * or have a different -- XXX XXX -- interface (a different
1442 * character device) for delivering the table -- XXX */
1443 #if 0
1444 case RAIDFRAME_SPARET_WAIT:
1445 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1446 while (!rf_sparet_wait_queue)
1447 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1448 waitreq = rf_sparet_wait_queue;
1449 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1451
1452 /* structure assignment */
1453 *((RF_SparetWait_t *) data) = *waitreq;
1454
1455 RF_Free(waitreq, sizeof(*waitreq));
1456 return (0);
1457
1458 /* wakes up a process waiting on SPARET_WAIT and puts an error
1459 * code in it that will cause the dameon to exit */
1460 case RAIDFRAME_ABORT_SPARET_WAIT:
1461 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1462 waitreq->fcol = -1;
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 waitreq->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = waitreq;
1466 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1467 wakeup(&rf_sparet_wait_queue);
1468 return (0);
1469
1470 /* used by the spare table daemon to deliver a spare table
1471 * into the kernel */
1472 case RAIDFRAME_SEND_SPARET:
1473
1474 /* install the spare table */
1475 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1476
1477 /* respond to the requestor. the return status of the spare
1478 * table installation is passed in the "fcol" field */
1479 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1480 waitreq->fcol = retcode;
1481 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1482 waitreq->next = rf_sparet_resp_queue;
1483 rf_sparet_resp_queue = waitreq;
1484 wakeup(&rf_sparet_resp_queue);
1485 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1486
1487 return (retcode);
1488 #endif
1489
1490 default:
1491 break; /* fall through to the os-specific code below */
1492
1493 }
1494
1495 if (!raidPtr->valid)
1496 return (EINVAL);
1497
1498 /*
1499 * Add support for "regular" device ioctls here.
1500 */
1501
1502 switch (cmd) {
1503 case DIOCGDINFO:
1504 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1505 break;
1506 #ifdef __HAVE_OLD_DISKLABEL
1507 case ODIOCGDINFO:
1508 newlabel = *(rs->sc_dkdev.dk_label);
1509 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1510 return ENOTTY;
1511 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1512 break;
1513 #endif
1514
1515 case DIOCGPART:
1516 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1517 ((struct partinfo *) data)->part =
1518 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1519 break;
1520
1521 case DIOCWDINFO:
1522 case DIOCSDINFO:
1523 #ifdef __HAVE_OLD_DISKLABEL
1524 case ODIOCWDINFO:
1525 case ODIOCSDINFO:
1526 #endif
1527 {
1528 struct disklabel *lp;
1529 #ifdef __HAVE_OLD_DISKLABEL
1530 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1531 memset(&newlabel, 0, sizeof newlabel);
1532 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1533 lp = &newlabel;
1534 } else
1535 #endif
1536 lp = (struct disklabel *)data;
1537
1538 if ((error = raidlock(rs)) != 0)
1539 return (error);
1540
1541 rs->sc_flags |= RAIDF_LABELLING;
1542
1543 error = setdisklabel(rs->sc_dkdev.dk_label,
1544 lp, 0, rs->sc_dkdev.dk_cpulabel);
1545 if (error == 0) {
1546 if (cmd == DIOCWDINFO
1547 #ifdef __HAVE_OLD_DISKLABEL
1548 || cmd == ODIOCWDINFO
1549 #endif
1550 )
1551 error = writedisklabel(RAIDLABELDEV(dev),
1552 raidstrategy, rs->sc_dkdev.dk_label,
1553 rs->sc_dkdev.dk_cpulabel);
1554 }
1555 rs->sc_flags &= ~RAIDF_LABELLING;
1556
1557 raidunlock(rs);
1558
1559 if (error)
1560 return (error);
1561 break;
1562 }
1563
1564 case DIOCWLABEL:
1565 if (*(int *) data != 0)
1566 rs->sc_flags |= RAIDF_WLABEL;
1567 else
1568 rs->sc_flags &= ~RAIDF_WLABEL;
1569 break;
1570
1571 case DIOCGDEFLABEL:
1572 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1573 break;
1574
1575 #ifdef __HAVE_OLD_DISKLABEL
1576 case ODIOCGDEFLABEL:
1577 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1578 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1579 return ENOTTY;
1580 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1581 break;
1582 #endif
1583
1584 default:
1585 retcode = ENOTTY;
1586 }
1587 return (retcode);
1588
1589 }
1590
1591
1592 /* raidinit -- complete the rest of the initialization for the
1593 RAIDframe device. */
1594
1595
1596 static void
1597 raidinit(RF_Raid_t *raidPtr)
1598 {
1599 struct raid_softc *rs;
1600 int unit;
1601
1602 unit = raidPtr->raidid;
1603
1604 rs = &raid_softc[unit];
1605
1606 /* XXX should check return code first... */
1607 rs->sc_flags |= RAIDF_INITED;
1608
1609 /* XXX doesn't check bounds. */
1610 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1611
1612 rs->sc_dkdev.dk_name = rs->sc_xname;
1613
1614 /* disk_attach actually creates space for the CPU disklabel, among
1615 * other things, so it's critical to call this *BEFORE* we try putzing
1616 * with disklabels. */
1617
1618 pseudo_disk_attach(&rs->sc_dkdev);
1619
1620 /* XXX There may be a weird interaction here between this, and
1621 * protectedSectors, as used in RAIDframe. */
1622
1623 rs->sc_size = raidPtr->totalSectors;
1624 }
1625 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1626 /* wake up the daemon & tell it to get us a spare table
1627 * XXX
1628 * the entries in the queues should be tagged with the raidPtr
1629 * so that in the extremely rare case that two recons happen at once,
1630 * we know for which device were requesting a spare table
1631 * XXX
1632 *
1633 * XXX This code is not currently used. GO
1634 */
1635 int
1636 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1637 {
1638 int retcode;
1639
1640 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1641 req->next = rf_sparet_wait_queue;
1642 rf_sparet_wait_queue = req;
1643 wakeup(&rf_sparet_wait_queue);
1644
1645 /* mpsleep unlocks the mutex */
1646 while (!rf_sparet_resp_queue) {
1647 tsleep(&rf_sparet_resp_queue, PRIBIO,
1648 "raidframe getsparetable", 0);
1649 }
1650 req = rf_sparet_resp_queue;
1651 rf_sparet_resp_queue = req->next;
1652 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1653
1654 retcode = req->fcol;
1655 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1656 * alloc'd */
1657 return (retcode);
1658 }
1659 #endif
1660
1661 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1662 * bp & passes it down.
1663 * any calls originating in the kernel must use non-blocking I/O
1664 * do some extra sanity checking to return "appropriate" error values for
1665 * certain conditions (to make some standard utilities work)
1666 *
1667 * Formerly known as: rf_DoAccessKernel
1668 */
1669 void
1670 raidstart(RF_Raid_t *raidPtr)
1671 {
1672 RF_SectorCount_t num_blocks, pb, sum;
1673 RF_RaidAddr_t raid_addr;
1674 struct partition *pp;
1675 daddr_t blocknum;
1676 int unit;
1677 struct raid_softc *rs;
1678 int do_async;
1679 struct buf *bp;
1680 int rc;
1681
1682 unit = raidPtr->raidid;
1683 rs = &raid_softc[unit];
1684
1685 /* quick check to see if anything has died recently */
1686 RF_LOCK_MUTEX(raidPtr->mutex);
1687 if (raidPtr->numNewFailures > 0) {
1688 RF_UNLOCK_MUTEX(raidPtr->mutex);
1689 rf_update_component_labels(raidPtr,
1690 RF_NORMAL_COMPONENT_UPDATE);
1691 RF_LOCK_MUTEX(raidPtr->mutex);
1692 raidPtr->numNewFailures--;
1693 }
1694
1695 /* Check to see if we're at the limit... */
1696 while (raidPtr->openings > 0) {
1697 RF_UNLOCK_MUTEX(raidPtr->mutex);
1698
1699 /* get the next item, if any, from the queue */
1700 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1701 /* nothing more to do */
1702 return;
1703 }
1704
1705 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1706 * partition.. Need to make it absolute to the underlying
1707 * device.. */
1708
1709 blocknum = bp->b_blkno;
1710 if (DISKPART(bp->b_dev) != RAW_PART) {
1711 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1712 blocknum += pp->p_offset;
1713 }
1714
1715 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1716 (int) blocknum));
1717
1718 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1719 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1720
1721 /* *THIS* is where we adjust what block we're going to...
1722 * but DO NOT TOUCH bp->b_blkno!!! */
1723 raid_addr = blocknum;
1724
1725 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1726 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1727 sum = raid_addr + num_blocks + pb;
1728 if (1 || rf_debugKernelAccess) {
1729 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1730 (int) raid_addr, (int) sum, (int) num_blocks,
1731 (int) pb, (int) bp->b_resid));
1732 }
1733 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1734 || (sum < num_blocks) || (sum < pb)) {
1735 bp->b_error = ENOSPC;
1736 bp->b_flags |= B_ERROR;
1737 bp->b_resid = bp->b_bcount;
1738 biodone(bp);
1739 RF_LOCK_MUTEX(raidPtr->mutex);
1740 continue;
1741 }
1742 /*
1743 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1744 */
1745
1746 if (bp->b_bcount & raidPtr->sectorMask) {
1747 bp->b_error = EINVAL;
1748 bp->b_flags |= B_ERROR;
1749 bp->b_resid = bp->b_bcount;
1750 biodone(bp);
1751 RF_LOCK_MUTEX(raidPtr->mutex);
1752 continue;
1753
1754 }
1755 db1_printf(("Calling DoAccess..\n"));
1756
1757
1758 RF_LOCK_MUTEX(raidPtr->mutex);
1759 raidPtr->openings--;
1760 RF_UNLOCK_MUTEX(raidPtr->mutex);
1761
1762 /*
1763 * Everything is async.
1764 */
1765 do_async = 1;
1766
1767 disk_busy(&rs->sc_dkdev);
1768
1769 /* XXX we're still at splbio() here... do we *really*
1770 need to be? */
1771
1772 /* don't ever condition on bp->b_flags & B_WRITE.
1773 * always condition on B_READ instead */
1774
1775 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1776 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1777 do_async, raid_addr, num_blocks,
1778 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1779
1780 if (rc) {
1781 bp->b_error = rc;
1782 bp->b_flags |= B_ERROR;
1783 bp->b_resid = bp->b_bcount;
1784 biodone(bp);
1785 /* continue loop */
1786 }
1787
1788 RF_LOCK_MUTEX(raidPtr->mutex);
1789 }
1790 RF_UNLOCK_MUTEX(raidPtr->mutex);
1791 }
1792
1793
1794
1795
1796 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1797
1798 int
1799 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1800 {
1801 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1802 struct buf *bp;
1803
1804 req->queue = queue;
1805
1806 #if DIAGNOSTIC
1807 if (queue->raidPtr->raidid >= numraid) {
1808 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1809 numraid);
1810 panic("Invalid Unit number in rf_DispatchKernelIO");
1811 }
1812 #endif
1813
1814 bp = req->bp;
1815
1816 /*
1817 * context for raidiodone
1818 */
1819
1820 bp->b_fspriv.bf_private = req;
1821
1822 switch (req->type) {
1823 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1824 /* XXX need to do something extra here.. */
1825 /* I'm leaving this in, as I've never actually seen it used,
1826 * and I'd like folks to report it... GO */
1827 printf(("WAKEUP CALLED\n"));
1828 queue->numOutstanding++;
1829
1830 bp->b_flags = 0;
1831
1832 KernelWakeupFunc(bp);
1833 break;
1834
1835 case RF_IO_TYPE_READ:
1836 case RF_IO_TYPE_WRITE:
1837 #if RF_ACC_TRACE > 0
1838 if (req->tracerec) {
1839 RF_ETIMER_START(req->tracerec->timer);
1840 }
1841 #endif
1842 InitBP(bp, queue->rf_cinfo->ci_vp,
1843 op, queue->rf_cinfo->ci_dev,
1844 req->sectorOffset, req->numSector,
1845 req->buf, KernelWakeupFunc, (void *) req,
1846 queue->raidPtr->logBytesPerSector, req->b_proc);
1847
1848 if (rf_debugKernelAccess) {
1849 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1850 (long) bp->b_blkno));
1851 }
1852 queue->numOutstanding++;
1853 queue->last_deq_sector = req->sectorOffset;
1854 /* acc wouldn't have been let in if there were any pending
1855 * reqs at any other priority */
1856 queue->curPriority = req->priority;
1857
1858 db1_printf(("Going for %c to unit %d col %d\n",
1859 req->type, queue->raidPtr->raidid,
1860 queue->col));
1861 db1_printf(("sector %d count %d (%d bytes) %d\n",
1862 (int) req->sectorOffset, (int) req->numSector,
1863 (int) (req->numSector <<
1864 queue->raidPtr->logBytesPerSector),
1865 (int) queue->raidPtr->logBytesPerSector));
1866 if ((bp->b_flags & B_READ) == 0) {
1867 bp->b_vp->v_numoutput++;
1868 }
1869 VOP_STRATEGY(bp->b_vp, bp);
1870
1871 break;
1872
1873 default:
1874 panic("bad req->type in rf_DispatchKernelIO");
1875 }
1876 db1_printf(("Exiting from DispatchKernelIO\n"));
1877
1878 return (0);
1879 }
1880 /* this is the callback function associated with a I/O invoked from
1881 kernel code.
1882 */
1883 static void
1884 KernelWakeupFunc(struct buf *bp)
1885 {
1886 RF_DiskQueueData_t *req = NULL;
1887 RF_DiskQueue_t *queue;
1888 int s;
1889
1890 s = splbio();
1891 db1_printf(("recovering the request queue:\n"));
1892 req = bp->b_fspriv.bf_private;
1893
1894 queue = (RF_DiskQueue_t *) req->queue;
1895
1896 #if RF_ACC_TRACE > 0
1897 if (req->tracerec) {
1898 RF_ETIMER_STOP(req->tracerec->timer);
1899 RF_ETIMER_EVAL(req->tracerec->timer);
1900 RF_LOCK_MUTEX(rf_tracing_mutex);
1901 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1902 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1903 req->tracerec->num_phys_ios++;
1904 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1905 }
1906 #endif
1907
1908 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1909 * ballistic, and mark the component as hosed... */
1910
1911 if (bp->b_flags & B_ERROR) {
1912 /* Mark the disk as dead */
1913 /* but only mark it once... */
1914 /* and only if it wouldn't leave this RAID set
1915 completely broken */
1916 if (((queue->raidPtr->Disks[queue->col].status ==
1917 rf_ds_optimal) ||
1918 (queue->raidPtr->Disks[queue->col].status ==
1919 rf_ds_used_spare)) &&
1920 (queue->raidPtr->numFailures <
1921 queue->raidPtr->Layout.map->faultsTolerated)) {
1922 printf("raid%d: IO Error. Marking %s as failed.\n",
1923 queue->raidPtr->raidid,
1924 queue->raidPtr->Disks[queue->col].devname);
1925 queue->raidPtr->Disks[queue->col].status =
1926 rf_ds_failed;
1927 queue->raidPtr->status = rf_rs_degraded;
1928 queue->raidPtr->numFailures++;
1929 queue->raidPtr->numNewFailures++;
1930 } else { /* Disk is already dead... */
1931 /* printf("Disk already marked as dead!\n"); */
1932 }
1933
1934 }
1935
1936 /* Fill in the error value */
1937
1938 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1939
1940 simple_lock(&queue->raidPtr->iodone_lock);
1941
1942 /* Drop this one on the "finished" queue... */
1943 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1944
1945 /* Let the raidio thread know there is work to be done. */
1946 wakeup(&(queue->raidPtr->iodone));
1947
1948 simple_unlock(&queue->raidPtr->iodone_lock);
1949
1950 splx(s);
1951 }
1952
1953
1954
1955 /*
1956 * initialize a buf structure for doing an I/O in the kernel.
1957 */
1958 static void
1959 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1960 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1961 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1962 struct proc *b_proc)
1963 {
1964 /* bp->b_flags = B_PHYS | rw_flag; */
1965 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1966 bp->b_bcount = numSect << logBytesPerSector;
1967 bp->b_bufsize = bp->b_bcount;
1968 bp->b_error = 0;
1969 bp->b_dev = dev;
1970 bp->b_data = bf;
1971 bp->b_blkno = startSect;
1972 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1973 if (bp->b_bcount == 0) {
1974 panic("bp->b_bcount is zero in InitBP!!");
1975 }
1976 bp->b_proc = b_proc;
1977 bp->b_iodone = cbFunc;
1978 bp->b_vp = b_vp;
1979
1980 }
1981
1982 static void
1983 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1984 struct disklabel *lp)
1985 {
1986 memset(lp, 0, sizeof(*lp));
1987
1988 /* fabricate a label... */
1989 lp->d_secperunit = raidPtr->totalSectors;
1990 lp->d_secsize = raidPtr->bytesPerSector;
1991 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1992 lp->d_ntracks = 4 * raidPtr->numCol;
1993 lp->d_ncylinders = raidPtr->totalSectors /
1994 (lp->d_nsectors * lp->d_ntracks);
1995 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1996
1997 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1998 lp->d_type = DTYPE_RAID;
1999 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2000 lp->d_rpm = 3600;
2001 lp->d_interleave = 1;
2002 lp->d_flags = 0;
2003
2004 lp->d_partitions[RAW_PART].p_offset = 0;
2005 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2006 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2007 lp->d_npartitions = RAW_PART + 1;
2008
2009 lp->d_magic = DISKMAGIC;
2010 lp->d_magic2 = DISKMAGIC;
2011 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2012
2013 }
2014 /*
2015 * Read the disklabel from the raid device. If one is not present, fake one
2016 * up.
2017 */
2018 static void
2019 raidgetdisklabel(dev_t dev)
2020 {
2021 int unit = raidunit(dev);
2022 struct raid_softc *rs = &raid_softc[unit];
2023 const char *errstring;
2024 struct disklabel *lp = rs->sc_dkdev.dk_label;
2025 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2026 RF_Raid_t *raidPtr;
2027
2028 db1_printf(("Getting the disklabel...\n"));
2029
2030 memset(clp, 0, sizeof(*clp));
2031
2032 raidPtr = raidPtrs[unit];
2033
2034 raidgetdefaultlabel(raidPtr, rs, lp);
2035
2036 /*
2037 * Call the generic disklabel extraction routine.
2038 */
2039 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2040 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2041 if (errstring)
2042 raidmakedisklabel(rs);
2043 else {
2044 int i;
2045 struct partition *pp;
2046
2047 /*
2048 * Sanity check whether the found disklabel is valid.
2049 *
2050 * This is necessary since total size of the raid device
2051 * may vary when an interleave is changed even though exactly
2052 * same componets are used, and old disklabel may used
2053 * if that is found.
2054 */
2055 if (lp->d_secperunit != rs->sc_size)
2056 printf("raid%d: WARNING: %s: "
2057 "total sector size in disklabel (%d) != "
2058 "the size of raid (%ld)\n", unit, rs->sc_xname,
2059 lp->d_secperunit, (long) rs->sc_size);
2060 for (i = 0; i < lp->d_npartitions; i++) {
2061 pp = &lp->d_partitions[i];
2062 if (pp->p_offset + pp->p_size > rs->sc_size)
2063 printf("raid%d: WARNING: %s: end of partition `%c' "
2064 "exceeds the size of raid (%ld)\n",
2065 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2066 }
2067 }
2068
2069 }
2070 /*
2071 * Take care of things one might want to take care of in the event
2072 * that a disklabel isn't present.
2073 */
2074 static void
2075 raidmakedisklabel(struct raid_softc *rs)
2076 {
2077 struct disklabel *lp = rs->sc_dkdev.dk_label;
2078 db1_printf(("Making a label..\n"));
2079
2080 /*
2081 * For historical reasons, if there's no disklabel present
2082 * the raw partition must be marked FS_BSDFFS.
2083 */
2084
2085 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2086
2087 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2088
2089 lp->d_checksum = dkcksum(lp);
2090 }
2091 /*
2092 * Lookup the provided name in the filesystem. If the file exists,
2093 * is a valid block device, and isn't being used by anyone else,
2094 * set *vpp to the file's vnode.
2095 * You'll find the original of this in ccd.c
2096 */
2097 int
2098 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2099 {
2100 struct nameidata nd;
2101 struct vnode *vp;
2102 struct proc *p;
2103 struct vattr va;
2104 int error;
2105
2106 p = l ? l->l_proc : NULL;
2107 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2108 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2109 return (error);
2110 }
2111 vp = nd.ni_vp;
2112 if (vp->v_usecount > 1) {
2113 VOP_UNLOCK(vp, 0);
2114 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2115 return (EBUSY);
2116 }
2117 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2118 VOP_UNLOCK(vp, 0);
2119 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2120 return (error);
2121 }
2122 /* XXX: eventually we should handle VREG, too. */
2123 if (va.va_type != VBLK) {
2124 VOP_UNLOCK(vp, 0);
2125 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2126 return (ENOTBLK);
2127 }
2128 VOP_UNLOCK(vp, 0);
2129 *vpp = vp;
2130 return (0);
2131 }
2132 /*
2133 * Wait interruptibly for an exclusive lock.
2134 *
2135 * XXX
2136 * Several drivers do this; it should be abstracted and made MP-safe.
2137 * (Hmm... where have we seen this warning before :-> GO )
2138 */
2139 static int
2140 raidlock(struct raid_softc *rs)
2141 {
2142 int error;
2143
2144 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2145 rs->sc_flags |= RAIDF_WANTED;
2146 if ((error =
2147 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2148 return (error);
2149 }
2150 rs->sc_flags |= RAIDF_LOCKED;
2151 return (0);
2152 }
2153 /*
2154 * Unlock and wake up any waiters.
2155 */
2156 static void
2157 raidunlock(struct raid_softc *rs)
2158 {
2159
2160 rs->sc_flags &= ~RAIDF_LOCKED;
2161 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2162 rs->sc_flags &= ~RAIDF_WANTED;
2163 wakeup(rs);
2164 }
2165 }
2166
2167
2168 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2169 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2170
2171 int
2172 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2173 {
2174 RF_ComponentLabel_t clabel;
2175 raidread_component_label(dev, b_vp, &clabel);
2176 clabel.mod_counter = mod_counter;
2177 clabel.clean = RF_RAID_CLEAN;
2178 raidwrite_component_label(dev, b_vp, &clabel);
2179 return(0);
2180 }
2181
2182
2183 int
2184 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2185 {
2186 RF_ComponentLabel_t clabel;
2187 raidread_component_label(dev, b_vp, &clabel);
2188 clabel.mod_counter = mod_counter;
2189 clabel.clean = RF_RAID_DIRTY;
2190 raidwrite_component_label(dev, b_vp, &clabel);
2191 return(0);
2192 }
2193
2194 /* ARGSUSED */
2195 int
2196 raidread_component_label(dev_t dev, struct vnode *b_vp,
2197 RF_ComponentLabel_t *clabel)
2198 {
2199 struct buf *bp;
2200 const struct bdevsw *bdev;
2201 int error;
2202
2203 /* XXX should probably ensure that we don't try to do this if
2204 someone has changed rf_protected_sectors. */
2205
2206 if (b_vp == NULL) {
2207 /* For whatever reason, this component is not valid.
2208 Don't try to read a component label from it. */
2209 return(EINVAL);
2210 }
2211
2212 /* get a block of the appropriate size... */
2213 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2214 bp->b_dev = dev;
2215
2216 /* get our ducks in a row for the read */
2217 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2218 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2219 bp->b_flags |= B_READ;
2220 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2221
2222 bdev = bdevsw_lookup(bp->b_dev);
2223 if (bdev == NULL)
2224 return (ENXIO);
2225 (*bdev->d_strategy)(bp);
2226
2227 error = biowait(bp);
2228
2229 if (!error) {
2230 memcpy(clabel, bp->b_data,
2231 sizeof(RF_ComponentLabel_t));
2232 }
2233
2234 brelse(bp);
2235 return(error);
2236 }
2237 /* ARGSUSED */
2238 int
2239 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2240 RF_ComponentLabel_t *clabel)
2241 {
2242 struct buf *bp;
2243 const struct bdevsw *bdev;
2244 int error;
2245
2246 /* get a block of the appropriate size... */
2247 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2248 bp->b_dev = dev;
2249
2250 /* get our ducks in a row for the write */
2251 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2252 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2253 bp->b_flags |= B_WRITE;
2254 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2255
2256 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2257
2258 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2259
2260 bdev = bdevsw_lookup(bp->b_dev);
2261 if (bdev == NULL)
2262 return (ENXIO);
2263 (*bdev->d_strategy)(bp);
2264 error = biowait(bp);
2265 brelse(bp);
2266 if (error) {
2267 #if 1
2268 printf("Failed to write RAID component info!\n");
2269 #endif
2270 }
2271
2272 return(error);
2273 }
2274
2275 void
2276 rf_markalldirty(RF_Raid_t *raidPtr)
2277 {
2278 RF_ComponentLabel_t clabel;
2279 int sparecol;
2280 int c;
2281 int j;
2282 int scol = -1;
2283
2284 raidPtr->mod_counter++;
2285 for (c = 0; c < raidPtr->numCol; c++) {
2286 /* we don't want to touch (at all) a disk that has
2287 failed */
2288 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2289 raidread_component_label(
2290 raidPtr->Disks[c].dev,
2291 raidPtr->raid_cinfo[c].ci_vp,
2292 &clabel);
2293 if (clabel.status == rf_ds_spared) {
2294 /* XXX do something special...
2295 but whatever you do, don't
2296 try to access it!! */
2297 } else {
2298 raidmarkdirty(
2299 raidPtr->Disks[c].dev,
2300 raidPtr->raid_cinfo[c].ci_vp,
2301 raidPtr->mod_counter);
2302 }
2303 }
2304 }
2305
2306 for( c = 0; c < raidPtr->numSpare ; c++) {
2307 sparecol = raidPtr->numCol + c;
2308 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2309 /*
2310
2311 we claim this disk is "optimal" if it's
2312 rf_ds_used_spare, as that means it should be
2313 directly substitutable for the disk it replaced.
2314 We note that too...
2315
2316 */
2317
2318 for(j=0;j<raidPtr->numCol;j++) {
2319 if (raidPtr->Disks[j].spareCol == sparecol) {
2320 scol = j;
2321 break;
2322 }
2323 }
2324
2325 raidread_component_label(
2326 raidPtr->Disks[sparecol].dev,
2327 raidPtr->raid_cinfo[sparecol].ci_vp,
2328 &clabel);
2329 /* make sure status is noted */
2330
2331 raid_init_component_label(raidPtr, &clabel);
2332
2333 clabel.row = 0;
2334 clabel.column = scol;
2335 /* Note: we *don't* change status from rf_ds_used_spare
2336 to rf_ds_optimal */
2337 /* clabel.status = rf_ds_optimal; */
2338
2339 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2340 raidPtr->raid_cinfo[sparecol].ci_vp,
2341 raidPtr->mod_counter);
2342 }
2343 }
2344 }
2345
2346
2347 void
2348 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2349 {
2350 RF_ComponentLabel_t clabel;
2351 int sparecol;
2352 int c;
2353 int j;
2354 int scol;
2355
2356 scol = -1;
2357
2358 /* XXX should do extra checks to make sure things really are clean,
2359 rather than blindly setting the clean bit... */
2360
2361 raidPtr->mod_counter++;
2362
2363 for (c = 0; c < raidPtr->numCol; c++) {
2364 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2365 raidread_component_label(
2366 raidPtr->Disks[c].dev,
2367 raidPtr->raid_cinfo[c].ci_vp,
2368 &clabel);
2369 /* make sure status is noted */
2370 clabel.status = rf_ds_optimal;
2371 /* bump the counter */
2372 clabel.mod_counter = raidPtr->mod_counter;
2373
2374 raidwrite_component_label(
2375 raidPtr->Disks[c].dev,
2376 raidPtr->raid_cinfo[c].ci_vp,
2377 &clabel);
2378 if (final == RF_FINAL_COMPONENT_UPDATE) {
2379 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2380 raidmarkclean(
2381 raidPtr->Disks[c].dev,
2382 raidPtr->raid_cinfo[c].ci_vp,
2383 raidPtr->mod_counter);
2384 }
2385 }
2386 }
2387 /* else we don't touch it.. */
2388 }
2389
2390 for( c = 0; c < raidPtr->numSpare ; c++) {
2391 sparecol = raidPtr->numCol + c;
2392 /* Need to ensure that the reconstruct actually completed! */
2393 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2394 /*
2395
2396 we claim this disk is "optimal" if it's
2397 rf_ds_used_spare, as that means it should be
2398 directly substitutable for the disk it replaced.
2399 We note that too...
2400
2401 */
2402
2403 for(j=0;j<raidPtr->numCol;j++) {
2404 if (raidPtr->Disks[j].spareCol == sparecol) {
2405 scol = j;
2406 break;
2407 }
2408 }
2409
2410 /* XXX shouldn't *really* need this... */
2411 raidread_component_label(
2412 raidPtr->Disks[sparecol].dev,
2413 raidPtr->raid_cinfo[sparecol].ci_vp,
2414 &clabel);
2415 /* make sure status is noted */
2416
2417 raid_init_component_label(raidPtr, &clabel);
2418
2419 clabel.mod_counter = raidPtr->mod_counter;
2420 clabel.column = scol;
2421 clabel.status = rf_ds_optimal;
2422
2423 raidwrite_component_label(
2424 raidPtr->Disks[sparecol].dev,
2425 raidPtr->raid_cinfo[sparecol].ci_vp,
2426 &clabel);
2427 if (final == RF_FINAL_COMPONENT_UPDATE) {
2428 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2429 raidmarkclean( raidPtr->Disks[sparecol].dev,
2430 raidPtr->raid_cinfo[sparecol].ci_vp,
2431 raidPtr->mod_counter);
2432 }
2433 }
2434 }
2435 }
2436 }
2437
2438 void
2439 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2440 {
2441 struct proc *p;
2442 struct lwp *l;
2443
2444 p = raidPtr->engine_thread;
2445 l = LIST_FIRST(&p->p_lwps);
2446
2447 if (vp != NULL) {
2448 if (auto_configured == 1) {
2449 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2450 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2451 vput(vp);
2452
2453 } else {
2454 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2455 }
2456 }
2457 }
2458
2459
2460 void
2461 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2462 {
2463 int r,c;
2464 struct vnode *vp;
2465 int acd;
2466
2467
2468 /* We take this opportunity to close the vnodes like we should.. */
2469
2470 for (c = 0; c < raidPtr->numCol; c++) {
2471 vp = raidPtr->raid_cinfo[c].ci_vp;
2472 acd = raidPtr->Disks[c].auto_configured;
2473 rf_close_component(raidPtr, vp, acd);
2474 raidPtr->raid_cinfo[c].ci_vp = NULL;
2475 raidPtr->Disks[c].auto_configured = 0;
2476 }
2477
2478 for (r = 0; r < raidPtr->numSpare; r++) {
2479 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2480 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2481 rf_close_component(raidPtr, vp, acd);
2482 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2483 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2484 }
2485 }
2486
2487
2488 void
2489 rf_ReconThread(struct rf_recon_req *req)
2490 {
2491 int s;
2492 RF_Raid_t *raidPtr;
2493
2494 s = splbio();
2495 raidPtr = (RF_Raid_t *) req->raidPtr;
2496 raidPtr->recon_in_progress = 1;
2497
2498 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2499 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2500
2501 RF_Free(req, sizeof(*req));
2502
2503 raidPtr->recon_in_progress = 0;
2504 splx(s);
2505
2506 /* That's all... */
2507 kthread_exit(0); /* does not return */
2508 }
2509
2510 void
2511 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2512 {
2513 int retcode;
2514 int s;
2515
2516 raidPtr->parity_rewrite_stripes_done = 0;
2517 raidPtr->parity_rewrite_in_progress = 1;
2518 s = splbio();
2519 retcode = rf_RewriteParity(raidPtr);
2520 splx(s);
2521 if (retcode) {
2522 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2523 } else {
2524 /* set the clean bit! If we shutdown correctly,
2525 the clean bit on each component label will get
2526 set */
2527 raidPtr->parity_good = RF_RAID_CLEAN;
2528 }
2529 raidPtr->parity_rewrite_in_progress = 0;
2530
2531 /* Anyone waiting for us to stop? If so, inform them... */
2532 if (raidPtr->waitShutdown) {
2533 wakeup(&raidPtr->parity_rewrite_in_progress);
2534 }
2535
2536 /* That's all... */
2537 kthread_exit(0); /* does not return */
2538 }
2539
2540
2541 void
2542 rf_CopybackThread(RF_Raid_t *raidPtr)
2543 {
2544 int s;
2545
2546 raidPtr->copyback_in_progress = 1;
2547 s = splbio();
2548 rf_CopybackReconstructedData(raidPtr);
2549 splx(s);
2550 raidPtr->copyback_in_progress = 0;
2551
2552 /* That's all... */
2553 kthread_exit(0); /* does not return */
2554 }
2555
2556
2557 void
2558 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2559 {
2560 int s;
2561 RF_Raid_t *raidPtr;
2562
2563 s = splbio();
2564 raidPtr = req->raidPtr;
2565 raidPtr->recon_in_progress = 1;
2566 rf_ReconstructInPlace(raidPtr, req->col);
2567 RF_Free(req, sizeof(*req));
2568 raidPtr->recon_in_progress = 0;
2569 splx(s);
2570
2571 /* That's all... */
2572 kthread_exit(0); /* does not return */
2573 }
2574
2575 RF_AutoConfig_t *
2576 rf_find_raid_components()
2577 {
2578 struct vnode *vp;
2579 struct disklabel label;
2580 struct device *dv;
2581 dev_t dev;
2582 int bmajor;
2583 int error;
2584 int i;
2585 int good_one;
2586 RF_ComponentLabel_t *clabel;
2587 RF_AutoConfig_t *ac_list;
2588 RF_AutoConfig_t *ac;
2589
2590
2591 /* initialize the AutoConfig list */
2592 ac_list = NULL;
2593
2594 /* we begin by trolling through *all* the devices on the system */
2595
2596 for (dv = alldevs.tqh_first; dv != NULL;
2597 dv = dv->dv_list.tqe_next) {
2598
2599 /* we are only interested in disks... */
2600 if (dv->dv_class != DV_DISK)
2601 continue;
2602
2603 /* we don't care about floppies... */
2604 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2605 continue;
2606 }
2607
2608 /* we don't care about CD's... */
2609 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2610 continue;
2611 }
2612
2613 /* hdfd is the Atari/Hades floppy driver */
2614 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2615 continue;
2616 }
2617 /* fdisa is the Atari/Milan floppy driver */
2618 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2619 continue;
2620 }
2621
2622 /* need to find the device_name_to_block_device_major stuff */
2623 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2624
2625 /* get a vnode for the raw partition of this disk */
2626
2627 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2628 if (bdevvp(dev, &vp))
2629 panic("RAID can't alloc vnode");
2630
2631 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2632
2633 if (error) {
2634 /* "Who cares." Continue looking
2635 for something that exists*/
2636 vput(vp);
2637 continue;
2638 }
2639
2640 /* Ok, the disk exists. Go get the disklabel. */
2641 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2642 if (error) {
2643 /*
2644 * XXX can't happen - open() would
2645 * have errored out (or faked up one)
2646 */
2647 if (error != ENOTTY)
2648 printf("RAIDframe: can't get label for dev "
2649 "%s (%d)\n", dv->dv_xname, error);
2650 }
2651
2652 /* don't need this any more. We'll allocate it again
2653 a little later if we really do... */
2654 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2655 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2656 vput(vp);
2657
2658 if (error)
2659 continue;
2660
2661 for (i=0; i < label.d_npartitions; i++) {
2662 /* We only support partitions marked as RAID */
2663 if (label.d_partitions[i].p_fstype != FS_RAID)
2664 continue;
2665
2666 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2667 if (bdevvp(dev, &vp))
2668 panic("RAID can't alloc vnode");
2669
2670 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2671 if (error) {
2672 /* Whatever... */
2673 vput(vp);
2674 continue;
2675 }
2676
2677 good_one = 0;
2678
2679 clabel = (RF_ComponentLabel_t *)
2680 malloc(sizeof(RF_ComponentLabel_t),
2681 M_RAIDFRAME, M_NOWAIT);
2682 if (clabel == NULL) {
2683 /* XXX CLEANUP HERE */
2684 printf("RAID auto config: out of memory!\n");
2685 return(NULL); /* XXX probably should panic? */
2686 }
2687
2688 if (!raidread_component_label(dev, vp, clabel)) {
2689 /* Got the label. Does it look reasonable? */
2690 if (rf_reasonable_label(clabel) &&
2691 (clabel->partitionSize <=
2692 label.d_partitions[i].p_size)) {
2693 #if DEBUG
2694 printf("Component on: %s%c: %d\n",
2695 dv->dv_xname, 'a'+i,
2696 label.d_partitions[i].p_size);
2697 rf_print_component_label(clabel);
2698 #endif
2699 /* if it's reasonable, add it,
2700 else ignore it. */
2701 ac = (RF_AutoConfig_t *)
2702 malloc(sizeof(RF_AutoConfig_t),
2703 M_RAIDFRAME,
2704 M_NOWAIT);
2705 if (ac == NULL) {
2706 /* XXX should panic?? */
2707 return(NULL);
2708 }
2709
2710 snprintf(ac->devname,
2711 sizeof(ac->devname), "%s%c",
2712 dv->dv_xname, 'a'+i);
2713 ac->dev = dev;
2714 ac->vp = vp;
2715 ac->clabel = clabel;
2716 ac->next = ac_list;
2717 ac_list = ac;
2718 good_one = 1;
2719 }
2720 }
2721 if (!good_one) {
2722 /* cleanup */
2723 free(clabel, M_RAIDFRAME);
2724 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2725 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2726 vput(vp);
2727 }
2728 }
2729 }
2730 return(ac_list);
2731 }
2732
2733 static int
2734 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2735 {
2736
2737 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2738 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2739 ((clabel->clean == RF_RAID_CLEAN) ||
2740 (clabel->clean == RF_RAID_DIRTY)) &&
2741 clabel->row >=0 &&
2742 clabel->column >= 0 &&
2743 clabel->num_rows > 0 &&
2744 clabel->num_columns > 0 &&
2745 clabel->row < clabel->num_rows &&
2746 clabel->column < clabel->num_columns &&
2747 clabel->blockSize > 0 &&
2748 clabel->numBlocks > 0) {
2749 /* label looks reasonable enough... */
2750 return(1);
2751 }
2752 return(0);
2753 }
2754
2755
2756 #if DEBUG
2757 void
2758 rf_print_component_label(RF_ComponentLabel_t *clabel)
2759 {
2760 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2761 clabel->row, clabel->column,
2762 clabel->num_rows, clabel->num_columns);
2763 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2764 clabel->version, clabel->serial_number,
2765 clabel->mod_counter);
2766 printf(" Clean: %s Status: %d\n",
2767 clabel->clean ? "Yes" : "No", clabel->status );
2768 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2769 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2770 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2771 (char) clabel->parityConfig, clabel->blockSize,
2772 clabel->numBlocks);
2773 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2774 printf(" Contains root partition: %s\n",
2775 clabel->root_partition ? "Yes" : "No" );
2776 printf(" Last configured as: raid%d\n", clabel->last_unit );
2777 #if 0
2778 printf(" Config order: %d\n", clabel->config_order);
2779 #endif
2780
2781 }
2782 #endif
2783
2784 RF_ConfigSet_t *
2785 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2786 {
2787 RF_AutoConfig_t *ac;
2788 RF_ConfigSet_t *config_sets;
2789 RF_ConfigSet_t *cset;
2790 RF_AutoConfig_t *ac_next;
2791
2792
2793 config_sets = NULL;
2794
2795 /* Go through the AutoConfig list, and figure out which components
2796 belong to what sets. */
2797 ac = ac_list;
2798 while(ac!=NULL) {
2799 /* we're going to putz with ac->next, so save it here
2800 for use at the end of the loop */
2801 ac_next = ac->next;
2802
2803 if (config_sets == NULL) {
2804 /* will need at least this one... */
2805 config_sets = (RF_ConfigSet_t *)
2806 malloc(sizeof(RF_ConfigSet_t),
2807 M_RAIDFRAME, M_NOWAIT);
2808 if (config_sets == NULL) {
2809 panic("rf_create_auto_sets: No memory!");
2810 }
2811 /* this one is easy :) */
2812 config_sets->ac = ac;
2813 config_sets->next = NULL;
2814 config_sets->rootable = 0;
2815 ac->next = NULL;
2816 } else {
2817 /* which set does this component fit into? */
2818 cset = config_sets;
2819 while(cset!=NULL) {
2820 if (rf_does_it_fit(cset, ac)) {
2821 /* looks like it matches... */
2822 ac->next = cset->ac;
2823 cset->ac = ac;
2824 break;
2825 }
2826 cset = cset->next;
2827 }
2828 if (cset==NULL) {
2829 /* didn't find a match above... new set..*/
2830 cset = (RF_ConfigSet_t *)
2831 malloc(sizeof(RF_ConfigSet_t),
2832 M_RAIDFRAME, M_NOWAIT);
2833 if (cset == NULL) {
2834 panic("rf_create_auto_sets: No memory!");
2835 }
2836 cset->ac = ac;
2837 ac->next = NULL;
2838 cset->next = config_sets;
2839 cset->rootable = 0;
2840 config_sets = cset;
2841 }
2842 }
2843 ac = ac_next;
2844 }
2845
2846
2847 return(config_sets);
2848 }
2849
2850 static int
2851 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2852 {
2853 RF_ComponentLabel_t *clabel1, *clabel2;
2854
2855 /* If this one matches the *first* one in the set, that's good
2856 enough, since the other members of the set would have been
2857 through here too... */
2858 /* note that we are not checking partitionSize here..
2859
2860 Note that we are also not checking the mod_counters here.
2861 If everything else matches execpt the mod_counter, that's
2862 good enough for this test. We will deal with the mod_counters
2863 a little later in the autoconfiguration process.
2864
2865 (clabel1->mod_counter == clabel2->mod_counter) &&
2866
2867 The reason we don't check for this is that failed disks
2868 will have lower modification counts. If those disks are
2869 not added to the set they used to belong to, then they will
2870 form their own set, which may result in 2 different sets,
2871 for example, competing to be configured at raid0, and
2872 perhaps competing to be the root filesystem set. If the
2873 wrong ones get configured, or both attempt to become /,
2874 weird behaviour and or serious lossage will occur. Thus we
2875 need to bring them into the fold here, and kick them out at
2876 a later point.
2877
2878 */
2879
2880 clabel1 = cset->ac->clabel;
2881 clabel2 = ac->clabel;
2882 if ((clabel1->version == clabel2->version) &&
2883 (clabel1->serial_number == clabel2->serial_number) &&
2884 (clabel1->num_rows == clabel2->num_rows) &&
2885 (clabel1->num_columns == clabel2->num_columns) &&
2886 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2887 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2888 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2889 (clabel1->parityConfig == clabel2->parityConfig) &&
2890 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2891 (clabel1->blockSize == clabel2->blockSize) &&
2892 (clabel1->numBlocks == clabel2->numBlocks) &&
2893 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2894 (clabel1->root_partition == clabel2->root_partition) &&
2895 (clabel1->last_unit == clabel2->last_unit) &&
2896 (clabel1->config_order == clabel2->config_order)) {
2897 /* if it get's here, it almost *has* to be a match */
2898 } else {
2899 /* it's not consistent with somebody in the set..
2900 punt */
2901 return(0);
2902 }
2903 /* all was fine.. it must fit... */
2904 return(1);
2905 }
2906
2907 int
2908 rf_have_enough_components(RF_ConfigSet_t *cset)
2909 {
2910 RF_AutoConfig_t *ac;
2911 RF_AutoConfig_t *auto_config;
2912 RF_ComponentLabel_t *clabel;
2913 int c;
2914 int num_cols;
2915 int num_missing;
2916 int mod_counter;
2917 int mod_counter_found;
2918 int even_pair_failed;
2919 char parity_type;
2920
2921
2922 /* check to see that we have enough 'live' components
2923 of this set. If so, we can configure it if necessary */
2924
2925 num_cols = cset->ac->clabel->num_columns;
2926 parity_type = cset->ac->clabel->parityConfig;
2927
2928 /* XXX Check for duplicate components!?!?!? */
2929
2930 /* Determine what the mod_counter is supposed to be for this set. */
2931
2932 mod_counter_found = 0;
2933 mod_counter = 0;
2934 ac = cset->ac;
2935 while(ac!=NULL) {
2936 if (mod_counter_found==0) {
2937 mod_counter = ac->clabel->mod_counter;
2938 mod_counter_found = 1;
2939 } else {
2940 if (ac->clabel->mod_counter > mod_counter) {
2941 mod_counter = ac->clabel->mod_counter;
2942 }
2943 }
2944 ac = ac->next;
2945 }
2946
2947 num_missing = 0;
2948 auto_config = cset->ac;
2949
2950 even_pair_failed = 0;
2951 for(c=0; c<num_cols; c++) {
2952 ac = auto_config;
2953 while(ac!=NULL) {
2954 if ((ac->clabel->column == c) &&
2955 (ac->clabel->mod_counter == mod_counter)) {
2956 /* it's this one... */
2957 #if DEBUG
2958 printf("Found: %s at %d\n",
2959 ac->devname,c);
2960 #endif
2961 break;
2962 }
2963 ac=ac->next;
2964 }
2965 if (ac==NULL) {
2966 /* Didn't find one here! */
2967 /* special case for RAID 1, especially
2968 where there are more than 2
2969 components (where RAIDframe treats
2970 things a little differently :( ) */
2971 if (parity_type == '1') {
2972 if (c%2 == 0) { /* even component */
2973 even_pair_failed = 1;
2974 } else { /* odd component. If
2975 we're failed, and
2976 so is the even
2977 component, it's
2978 "Good Night, Charlie" */
2979 if (even_pair_failed == 1) {
2980 return(0);
2981 }
2982 }
2983 } else {
2984 /* normal accounting */
2985 num_missing++;
2986 }
2987 }
2988 if ((parity_type == '1') && (c%2 == 1)) {
2989 /* Just did an even component, and we didn't
2990 bail.. reset the even_pair_failed flag,
2991 and go on to the next component.... */
2992 even_pair_failed = 0;
2993 }
2994 }
2995
2996 clabel = cset->ac->clabel;
2997
2998 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2999 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3000 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3001 /* XXX this needs to be made *much* more general */
3002 /* Too many failures */
3003 return(0);
3004 }
3005 /* otherwise, all is well, and we've got enough to take a kick
3006 at autoconfiguring this set */
3007 return(1);
3008 }
3009
3010 void
3011 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3012 RF_Raid_t *raidPtr)
3013 {
3014 RF_ComponentLabel_t *clabel;
3015 int i;
3016
3017 clabel = ac->clabel;
3018
3019 /* 1. Fill in the common stuff */
3020 config->numRow = clabel->num_rows = 1;
3021 config->numCol = clabel->num_columns;
3022 config->numSpare = 0; /* XXX should this be set here? */
3023 config->sectPerSU = clabel->sectPerSU;
3024 config->SUsPerPU = clabel->SUsPerPU;
3025 config->SUsPerRU = clabel->SUsPerRU;
3026 config->parityConfig = clabel->parityConfig;
3027 /* XXX... */
3028 strcpy(config->diskQueueType,"fifo");
3029 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3030 config->layoutSpecificSize = 0; /* XXX ?? */
3031
3032 while(ac!=NULL) {
3033 /* row/col values will be in range due to the checks
3034 in reasonable_label() */
3035 strcpy(config->devnames[0][ac->clabel->column],
3036 ac->devname);
3037 ac = ac->next;
3038 }
3039
3040 for(i=0;i<RF_MAXDBGV;i++) {
3041 config->debugVars[i][0] = 0;
3042 }
3043 }
3044
3045 int
3046 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3047 {
3048 RF_ComponentLabel_t clabel;
3049 struct vnode *vp;
3050 dev_t dev;
3051 int column;
3052 int sparecol;
3053
3054 raidPtr->autoconfigure = new_value;
3055
3056 for(column=0; column<raidPtr->numCol; column++) {
3057 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3058 dev = raidPtr->Disks[column].dev;
3059 vp = raidPtr->raid_cinfo[column].ci_vp;
3060 raidread_component_label(dev, vp, &clabel);
3061 clabel.autoconfigure = new_value;
3062 raidwrite_component_label(dev, vp, &clabel);
3063 }
3064 }
3065 for(column = 0; column < raidPtr->numSpare ; column++) {
3066 sparecol = raidPtr->numCol + column;
3067 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3068 dev = raidPtr->Disks[sparecol].dev;
3069 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3070 raidread_component_label(dev, vp, &clabel);
3071 clabel.autoconfigure = new_value;
3072 raidwrite_component_label(dev, vp, &clabel);
3073 }
3074 }
3075 return(new_value);
3076 }
3077
3078 int
3079 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3080 {
3081 RF_ComponentLabel_t clabel;
3082 struct vnode *vp;
3083 dev_t dev;
3084 int column;
3085 int sparecol;
3086
3087 raidPtr->root_partition = new_value;
3088 for(column=0; column<raidPtr->numCol; column++) {
3089 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3090 dev = raidPtr->Disks[column].dev;
3091 vp = raidPtr->raid_cinfo[column].ci_vp;
3092 raidread_component_label(dev, vp, &clabel);
3093 clabel.root_partition = new_value;
3094 raidwrite_component_label(dev, vp, &clabel);
3095 }
3096 }
3097 for(column = 0; column < raidPtr->numSpare ; column++) {
3098 sparecol = raidPtr->numCol + column;
3099 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3100 dev = raidPtr->Disks[sparecol].dev;
3101 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3102 raidread_component_label(dev, vp, &clabel);
3103 clabel.root_partition = new_value;
3104 raidwrite_component_label(dev, vp, &clabel);
3105 }
3106 }
3107 return(new_value);
3108 }
3109
3110 void
3111 rf_release_all_vps(RF_ConfigSet_t *cset)
3112 {
3113 RF_AutoConfig_t *ac;
3114
3115 ac = cset->ac;
3116 while(ac!=NULL) {
3117 /* Close the vp, and give it back */
3118 if (ac->vp) {
3119 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3120 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3121 vput(ac->vp);
3122 ac->vp = NULL;
3123 }
3124 ac = ac->next;
3125 }
3126 }
3127
3128
3129 void
3130 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3131 {
3132 RF_AutoConfig_t *ac;
3133 RF_AutoConfig_t *next_ac;
3134
3135 ac = cset->ac;
3136 while(ac!=NULL) {
3137 next_ac = ac->next;
3138 /* nuke the label */
3139 free(ac->clabel, M_RAIDFRAME);
3140 /* cleanup the config structure */
3141 free(ac, M_RAIDFRAME);
3142 /* "next.." */
3143 ac = next_ac;
3144 }
3145 /* and, finally, nuke the config set */
3146 free(cset, M_RAIDFRAME);
3147 }
3148
3149
3150 void
3151 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3152 {
3153 /* current version number */
3154 clabel->version = RF_COMPONENT_LABEL_VERSION;
3155 clabel->serial_number = raidPtr->serial_number;
3156 clabel->mod_counter = raidPtr->mod_counter;
3157 clabel->num_rows = 1;
3158 clabel->num_columns = raidPtr->numCol;
3159 clabel->clean = RF_RAID_DIRTY; /* not clean */
3160 clabel->status = rf_ds_optimal; /* "It's good!" */
3161
3162 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3163 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3164 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3165
3166 clabel->blockSize = raidPtr->bytesPerSector;
3167 clabel->numBlocks = raidPtr->sectorsPerDisk;
3168
3169 /* XXX not portable */
3170 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3171 clabel->maxOutstanding = raidPtr->maxOutstanding;
3172 clabel->autoconfigure = raidPtr->autoconfigure;
3173 clabel->root_partition = raidPtr->root_partition;
3174 clabel->last_unit = raidPtr->raidid;
3175 clabel->config_order = raidPtr->config_order;
3176 }
3177
3178 int
3179 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3180 {
3181 RF_Raid_t *raidPtr;
3182 RF_Config_t *config;
3183 int raidID;
3184 int retcode;
3185
3186 #if DEBUG
3187 printf("RAID autoconfigure\n");
3188 #endif
3189
3190 retcode = 0;
3191 *unit = -1;
3192
3193 /* 1. Create a config structure */
3194
3195 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3196 M_RAIDFRAME,
3197 M_NOWAIT);
3198 if (config==NULL) {
3199 printf("Out of mem!?!?\n");
3200 /* XXX do something more intelligent here. */
3201 return(1);
3202 }
3203
3204 memset(config, 0, sizeof(RF_Config_t));
3205
3206 /*
3207 2. Figure out what RAID ID this one is supposed to live at
3208 See if we can get the same RAID dev that it was configured
3209 on last time..
3210 */
3211
3212 raidID = cset->ac->clabel->last_unit;
3213 if ((raidID < 0) || (raidID >= numraid)) {
3214 /* let's not wander off into lala land. */
3215 raidID = numraid - 1;
3216 }
3217 if (raidPtrs[raidID]->valid != 0) {
3218
3219 /*
3220 Nope... Go looking for an alternative...
3221 Start high so we don't immediately use raid0 if that's
3222 not taken.
3223 */
3224
3225 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3226 if (raidPtrs[raidID]->valid == 0) {
3227 /* can use this one! */
3228 break;
3229 }
3230 }
3231 }
3232
3233 if (raidID < 0) {
3234 /* punt... */
3235 printf("Unable to auto configure this set!\n");
3236 printf("(Out of RAID devs!)\n");
3237 return(1);
3238 }
3239
3240 #if DEBUG
3241 printf("Configuring raid%d:\n",raidID);
3242 #endif
3243
3244 raidPtr = raidPtrs[raidID];
3245
3246 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3247 raidPtr->raidid = raidID;
3248 raidPtr->openings = RAIDOUTSTANDING;
3249
3250 /* 3. Build the configuration structure */
3251 rf_create_configuration(cset->ac, config, raidPtr);
3252
3253 /* 4. Do the configuration */
3254 retcode = rf_Configure(raidPtr, config, cset->ac);
3255
3256 if (retcode == 0) {
3257
3258 raidinit(raidPtrs[raidID]);
3259
3260 rf_markalldirty(raidPtrs[raidID]);
3261 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3262 if (cset->ac->clabel->root_partition==1) {
3263 /* everything configured just fine. Make a note
3264 that this set is eligible to be root. */
3265 cset->rootable = 1;
3266 /* XXX do this here? */
3267 raidPtrs[raidID]->root_partition = 1;
3268 }
3269 }
3270
3271 /* 5. Cleanup */
3272 free(config, M_RAIDFRAME);
3273
3274 *unit = raidID;
3275 return(retcode);
3276 }
3277
3278 void
3279 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3280 {
3281 struct buf *bp;
3282
3283 bp = (struct buf *)desc->bp;
3284 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3285 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3286 }
3287
3288 void
3289 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3290 size_t xmin, size_t xmax)
3291 {
3292 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3293 pool_sethiwat(p, xmax);
3294 pool_prime(p, xmin);
3295 pool_setlowat(p, xmin);
3296 }
3297
3298 /*
3299 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3300 * if there is IO pending and if that IO could possibly be done for a
3301 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3302 * otherwise.
3303 *
3304 */
3305
3306 int
3307 rf_buf_queue_check(int raidid)
3308 {
3309 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3310 raidPtrs[raidid]->openings > 0) {
3311 /* there is work to do */
3312 return 0;
3313 }
3314 /* default is nothing to do */
3315 return 1;
3316 }
3317