rf_netbsdkintf.c revision 1.195 1 /* $NetBSD: rf_netbsdkintf.c,v 1.195 2006/01/08 09:09:53 yamt Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.195 2006/01/08 09:09:53 yamt Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /* XXX Not sure if the following should be replacing the raidPtrs above,
237 or if it should be used in conjunction with that...
238 */
239
240 struct raid_softc {
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 size_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 /*
259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
260 * Be aware that large numbers can allow the driver to consume a lot of
261 * kernel memory, especially on writes, and in degraded mode reads.
262 *
263 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
264 * a single 64K write will typically require 64K for the old data,
265 * 64K for the old parity, and 64K for the new parity, for a total
266 * of 192K (if the parity buffer is not re-used immediately).
267 * Even it if is used immediately, that's still 128K, which when multiplied
268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
269 *
270 * Now in degraded mode, for example, a 64K read on the above setup may
271 * require data reconstruction, which will require *all* of the 4 remaining
272 * disks to participate -- 4 * 32K/disk == 128K again.
273 */
274
275 #ifndef RAIDOUTSTANDING
276 #define RAIDOUTSTANDING 6
277 #endif
278
279 #define RAIDLABELDEV(dev) \
280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
281
282 /* declared here, and made public, for the benefit of KVM stuff.. */
283 struct raid_softc *raid_softc;
284
285 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
286 struct disklabel *);
287 static void raidgetdisklabel(dev_t);
288 static void raidmakedisklabel(struct raid_softc *);
289
290 static int raidlock(struct raid_softc *);
291 static void raidunlock(struct raid_softc *);
292
293 static void rf_markalldirty(RF_Raid_t *);
294
295 struct device *raidrootdev;
296
297 void rf_ReconThread(struct rf_recon_req *);
298 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 void rf_CopybackThread(RF_Raid_t *raidPtr);
300 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
301 int rf_autoconfig(struct device *self);
302 void rf_buildroothack(RF_ConfigSet_t *);
303
304 RF_AutoConfig_t *rf_find_raid_components(void);
305 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
306 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
307 static int rf_reasonable_label(RF_ComponentLabel_t *);
308 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 int rf_set_autoconfig(RF_Raid_t *, int);
310 int rf_set_rootpartition(RF_Raid_t *, int);
311 void rf_release_all_vps(RF_ConfigSet_t *);
312 void rf_cleanup_config_set(RF_ConfigSet_t *);
313 int rf_have_enough_components(RF_ConfigSet_t *);
314 int rf_auto_config_set(RF_ConfigSet_t *, int *);
315
316 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
317 allow autoconfig to take place.
318 Note that this is overridden by having
319 RAID_AUTOCONFIG as an option in the
320 kernel config file. */
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 #ifdef DEBUG
331 printf("raidattach: Asked for %d units\n", num);
332 #endif
333
334 if (num <= 0) {
335 #ifdef DIAGNOSTIC
336 panic("raidattach: count <= 0");
337 #endif
338 return;
339 }
340 /* This is where all the initialization stuff gets done. */
341
342 numraid = num;
343
344 /* Make some space for requested number of units... */
345
346 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
347 if (raidPtrs == NULL) {
348 panic("raidPtrs is NULL!!");
349 }
350
351 rf_mutex_init(&rf_sparet_wait_mutex);
352
353 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
354
355 for (i = 0; i < num; i++)
356 raidPtrs[i] = NULL;
357 rc = rf_BootRaidframe();
358 if (rc == 0)
359 printf("Kernelized RAIDframe activated\n");
360 else
361 panic("Serious error booting RAID!!");
362
363 /* put together some datastructures like the CCD device does.. This
364 * lets us lock the device and what-not when it gets opened. */
365
366 raid_softc = (struct raid_softc *)
367 malloc(num * sizeof(struct raid_softc),
368 M_RAIDFRAME, M_NOWAIT);
369 if (raid_softc == NULL) {
370 printf("WARNING: no memory for RAIDframe driver\n");
371 return;
372 }
373
374 memset(raid_softc, 0, num * sizeof(struct raid_softc));
375
376 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
377 M_RAIDFRAME, M_NOWAIT);
378 if (raidrootdev == NULL) {
379 panic("No memory for RAIDframe driver!!?!?!");
380 }
381
382 for (raidID = 0; raidID < num; raidID++) {
383 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
384 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
385
386 raidrootdev[raidID].dv_class = DV_DISK;
387 raidrootdev[raidID].dv_cfdata = NULL;
388 raidrootdev[raidID].dv_unit = raidID;
389 raidrootdev[raidID].dv_parent = NULL;
390 raidrootdev[raidID].dv_flags = 0;
391 snprintf(raidrootdev[raidID].dv_xname,
392 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
393
394 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
395 (RF_Raid_t *));
396 if (raidPtrs[raidID] == NULL) {
397 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
398 numraid = raidID;
399 return;
400 }
401 }
402
403 #ifdef RAID_AUTOCONFIG
404 raidautoconfig = 1;
405 #endif
406
407 /*
408 * Register a finalizer which will be used to auto-config RAID
409 * sets once all real hardware devices have been found.
410 */
411 if (config_finalize_register(NULL, rf_autoconfig) != 0)
412 printf("WARNING: unable to register RAIDframe finalizer\n");
413 }
414
415 int
416 rf_autoconfig(struct device *self)
417 {
418 RF_AutoConfig_t *ac_list;
419 RF_ConfigSet_t *config_sets;
420
421 if (raidautoconfig == 0)
422 return (0);
423
424 /* XXX This code can only be run once. */
425 raidautoconfig = 0;
426
427 /* 1. locate all RAID components on the system */
428 #ifdef DEBUG
429 printf("Searching for RAID components...\n");
430 #endif
431 ac_list = rf_find_raid_components();
432
433 /* 2. Sort them into their respective sets. */
434 config_sets = rf_create_auto_sets(ac_list);
435
436 /*
437 * 3. Evaluate each set andconfigure the valid ones.
438 * This gets done in rf_buildroothack().
439 */
440 rf_buildroothack(config_sets);
441
442 return (1);
443 }
444
445 void
446 rf_buildroothack(RF_ConfigSet_t *config_sets)
447 {
448 RF_ConfigSet_t *cset;
449 RF_ConfigSet_t *next_cset;
450 int retcode;
451 int raidID;
452 int rootID;
453 int num_root;
454
455 rootID = 0;
456 num_root = 0;
457 cset = config_sets;
458 while(cset != NULL ) {
459 next_cset = cset->next;
460 if (rf_have_enough_components(cset) &&
461 cset->ac->clabel->autoconfigure==1) {
462 retcode = rf_auto_config_set(cset,&raidID);
463 if (!retcode) {
464 if (cset->rootable) {
465 rootID = raidID;
466 num_root++;
467 }
468 } else {
469 /* The autoconfig didn't work :( */
470 #if DEBUG
471 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
472 #endif
473 rf_release_all_vps(cset);
474 }
475 } else {
476 /* we're not autoconfiguring this set...
477 release the associated resources */
478 rf_release_all_vps(cset);
479 }
480 /* cleanup */
481 rf_cleanup_config_set(cset);
482 cset = next_cset;
483 }
484
485 /* we found something bootable... */
486
487 if (num_root == 1) {
488 booted_device = &raidrootdev[rootID];
489 } else if (num_root > 1) {
490 /* we can't guess.. require the user to answer... */
491 boothowto |= RB_ASKNAME;
492 }
493 }
494
495
496 int
497 raidsize(dev_t dev)
498 {
499 struct raid_softc *rs;
500 struct disklabel *lp;
501 int part, unit, omask, size;
502
503 unit = raidunit(dev);
504 if (unit >= numraid)
505 return (-1);
506 rs = &raid_softc[unit];
507
508 if ((rs->sc_flags & RAIDF_INITED) == 0)
509 return (-1);
510
511 part = DISKPART(dev);
512 omask = rs->sc_dkdev.dk_openmask & (1 << part);
513 lp = rs->sc_dkdev.dk_label;
514
515 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
516 return (-1);
517
518 if (lp->d_partitions[part].p_fstype != FS_SWAP)
519 size = -1;
520 else
521 size = lp->d_partitions[part].p_size *
522 (lp->d_secsize / DEV_BSIZE);
523
524 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
525 return (-1);
526
527 return (size);
528
529 }
530
531 int
532 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
533 {
534 /* Not implemented. */
535 return ENXIO;
536 }
537 /* ARGSUSED */
538 int
539 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
540 {
541 int unit = raidunit(dev);
542 struct raid_softc *rs;
543 struct disklabel *lp;
544 int part, pmask;
545 int error = 0;
546
547 if (unit >= numraid)
548 return (ENXIO);
549 rs = &raid_softc[unit];
550
551 if ((error = raidlock(rs)) != 0)
552 return (error);
553 lp = rs->sc_dkdev.dk_label;
554
555 part = DISKPART(dev);
556 pmask = (1 << part);
557
558 if ((rs->sc_flags & RAIDF_INITED) &&
559 (rs->sc_dkdev.dk_openmask == 0))
560 raidgetdisklabel(dev);
561
562 /* make sure that this partition exists */
563
564 if (part != RAW_PART) {
565 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
566 ((part >= lp->d_npartitions) ||
567 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
568 error = ENXIO;
569 raidunlock(rs);
570 return (error);
571 }
572 }
573 /* Prevent this unit from being unconfigured while open. */
574 switch (fmt) {
575 case S_IFCHR:
576 rs->sc_dkdev.dk_copenmask |= pmask;
577 break;
578
579 case S_IFBLK:
580 rs->sc_dkdev.dk_bopenmask |= pmask;
581 break;
582 }
583
584 if ((rs->sc_dkdev.dk_openmask == 0) &&
585 ((rs->sc_flags & RAIDF_INITED) != 0)) {
586 /* First one... mark things as dirty... Note that we *MUST*
587 have done a configure before this. I DO NOT WANT TO BE
588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589 THAT THEY BELONG TOGETHER!!!!! */
590 /* XXX should check to see if we're only open for reading
591 here... If so, we needn't do this, but then need some
592 other way of keeping track of what's happened.. */
593
594 rf_markalldirty( raidPtrs[unit] );
595 }
596
597
598 rs->sc_dkdev.dk_openmask =
599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600
601 raidunlock(rs);
602
603 return (error);
604
605
606 }
607 /* ARGSUSED */
608 int
609 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
610 {
611 int unit = raidunit(dev);
612 struct raid_softc *rs;
613 int error = 0;
614 int part;
615
616 if (unit >= numraid)
617 return (ENXIO);
618 rs = &raid_softc[unit];
619
620 if ((error = raidlock(rs)) != 0)
621 return (error);
622
623 part = DISKPART(dev);
624
625 /* ...that much closer to allowing unconfiguration... */
626 switch (fmt) {
627 case S_IFCHR:
628 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
629 break;
630
631 case S_IFBLK:
632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
633 break;
634 }
635 rs->sc_dkdev.dk_openmask =
636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
637
638 if ((rs->sc_dkdev.dk_openmask == 0) &&
639 ((rs->sc_flags & RAIDF_INITED) != 0)) {
640 /* Last one... device is not unconfigured yet.
641 Device shutdown has taken care of setting the
642 clean bits if RAIDF_INITED is not set
643 mark things as clean... */
644
645 rf_update_component_labels(raidPtrs[unit],
646 RF_FINAL_COMPONENT_UPDATE);
647 if (doing_shutdown) {
648 /* last one, and we're going down, so
649 lights out for this RAID set too. */
650 error = rf_Shutdown(raidPtrs[unit]);
651
652 /* It's no longer initialized... */
653 rs->sc_flags &= ~RAIDF_INITED;
654
655 /* Detach the disk. */
656 pseudo_disk_detach(&rs->sc_dkdev);
657 }
658 }
659
660 raidunlock(rs);
661 return (0);
662
663 }
664
665 void
666 raidstrategy(struct buf *bp)
667 {
668 int s;
669
670 unsigned int raidID = raidunit(bp->b_dev);
671 RF_Raid_t *raidPtr;
672 struct raid_softc *rs = &raid_softc[raidID];
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 if (raidID >= numraid || !raidPtrs[raidID]) {
683 bp->b_error = ENODEV;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 raidPtr = raidPtrs[raidID];
690 if (!raidPtr->valid) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 biodone(bp);
700 return;
701 }
702
703 /*
704 * Do bounds checking and adjust transfer. If there's an
705 * error, the bounds check will flag that for us.
706 */
707
708 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
709 if (DISKPART(bp->b_dev) != RAW_PART)
710 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
711 db1_printf(("Bounds check failed!!:%d %d\n",
712 (int) bp->b_blkno, (int) wlabel));
713 biodone(bp);
714 return;
715 }
716 s = splbio();
717
718 bp->b_resid = 0;
719
720 /* stuff it onto our queue */
721 BUFQ_PUT(rs->buf_queue, bp);
722
723 /* scheduled the IO to happen at the next convenient time */
724 wakeup(&(raidPtrs[raidID]->iodone));
725
726 splx(s);
727 }
728 /* ARGSUSED */
729 int
730 raidread(dev_t dev, struct uio *uio, int flags)
731 {
732 int unit = raidunit(dev);
733 struct raid_softc *rs;
734
735 if (unit >= numraid)
736 return (ENXIO);
737 rs = &raid_softc[unit];
738
739 if ((rs->sc_flags & RAIDF_INITED) == 0)
740 return (ENXIO);
741
742 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
743
744 }
745 /* ARGSUSED */
746 int
747 raidwrite(dev_t dev, struct uio *uio, int flags)
748 {
749 int unit = raidunit(dev);
750 struct raid_softc *rs;
751
752 if (unit >= numraid)
753 return (ENXIO);
754 rs = &raid_softc[unit];
755
756 if ((rs->sc_flags & RAIDF_INITED) == 0)
757 return (ENXIO);
758
759 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
760
761 }
762
763 int
764 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
765 {
766 int unit = raidunit(dev);
767 int error = 0;
768 int part, pmask;
769 struct raid_softc *rs;
770 RF_Config_t *k_cfg, *u_cfg;
771 RF_Raid_t *raidPtr;
772 RF_RaidDisk_t *diskPtr;
773 RF_AccTotals_t *totals;
774 RF_DeviceConfig_t *d_cfg, **ucfgp;
775 u_char *specific_buf;
776 int retcode = 0;
777 int column;
778 int raidid;
779 struct rf_recon_req *rrcopy, *rr;
780 RF_ComponentLabel_t *clabel;
781 RF_ComponentLabel_t ci_label;
782 RF_ComponentLabel_t **clabel_ptr;
783 RF_SingleComponent_t *sparePtr,*componentPtr;
784 RF_SingleComponent_t hot_spare;
785 RF_SingleComponent_t component;
786 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
787 int i, j, d;
788 #ifdef __HAVE_OLD_DISKLABEL
789 struct disklabel newlabel;
790 #endif
791
792 if (unit >= numraid)
793 return (ENXIO);
794 rs = &raid_softc[unit];
795 raidPtr = raidPtrs[unit];
796
797 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
798 (int) DISKPART(dev), (int) unit, (int) cmd));
799
800 /* Must be open for writes for these commands... */
801 switch (cmd) {
802 case DIOCSDINFO:
803 case DIOCWDINFO:
804 #ifdef __HAVE_OLD_DISKLABEL
805 case ODIOCWDINFO:
806 case ODIOCSDINFO:
807 #endif
808 case DIOCWLABEL:
809 if ((flag & FWRITE) == 0)
810 return (EBADF);
811 }
812
813 /* Must be initialized for these... */
814 switch (cmd) {
815 case DIOCGDINFO:
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCGDINFO:
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 case ODIOCGDEFLABEL:
823 #endif
824 case DIOCGPART:
825 case DIOCWLABEL:
826 case DIOCGDEFLABEL:
827 case RAIDFRAME_SHUTDOWN:
828 case RAIDFRAME_REWRITEPARITY:
829 case RAIDFRAME_GET_INFO:
830 case RAIDFRAME_RESET_ACCTOTALS:
831 case RAIDFRAME_GET_ACCTOTALS:
832 case RAIDFRAME_KEEP_ACCTOTALS:
833 case RAIDFRAME_GET_SIZE:
834 case RAIDFRAME_FAIL_DISK:
835 case RAIDFRAME_COPYBACK:
836 case RAIDFRAME_CHECK_RECON_STATUS:
837 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
838 case RAIDFRAME_GET_COMPONENT_LABEL:
839 case RAIDFRAME_SET_COMPONENT_LABEL:
840 case RAIDFRAME_ADD_HOT_SPARE:
841 case RAIDFRAME_REMOVE_HOT_SPARE:
842 case RAIDFRAME_INIT_LABELS:
843 case RAIDFRAME_REBUILD_IN_PLACE:
844 case RAIDFRAME_CHECK_PARITY:
845 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
846 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
847 case RAIDFRAME_CHECK_COPYBACK_STATUS:
848 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
849 case RAIDFRAME_SET_AUTOCONFIG:
850 case RAIDFRAME_SET_ROOT:
851 case RAIDFRAME_DELETE_COMPONENT:
852 case RAIDFRAME_INCORPORATE_HOT_SPARE:
853 if ((rs->sc_flags & RAIDF_INITED) == 0)
854 return (ENXIO);
855 }
856
857 switch (cmd) {
858
859 /* configure the system */
860 case RAIDFRAME_CONFIGURE:
861
862 if (raidPtr->valid) {
863 /* There is a valid RAID set running on this unit! */
864 printf("raid%d: Device already configured!\n",unit);
865 return(EINVAL);
866 }
867
868 /* copy-in the configuration information */
869 /* data points to a pointer to the configuration structure */
870
871 u_cfg = *((RF_Config_t **) data);
872 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
873 if (k_cfg == NULL) {
874 return (ENOMEM);
875 }
876 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
877 if (retcode) {
878 RF_Free(k_cfg, sizeof(RF_Config_t));
879 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
880 retcode));
881 return (retcode);
882 }
883 /* allocate a buffer for the layout-specific data, and copy it
884 * in */
885 if (k_cfg->layoutSpecificSize) {
886 if (k_cfg->layoutSpecificSize > 10000) {
887 /* sanity check */
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 return (EINVAL);
890 }
891 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
892 (u_char *));
893 if (specific_buf == NULL) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 return (ENOMEM);
896 }
897 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
898 k_cfg->layoutSpecificSize);
899 if (retcode) {
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 RF_Free(specific_buf,
902 k_cfg->layoutSpecificSize);
903 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
904 retcode));
905 return (retcode);
906 }
907 } else
908 specific_buf = NULL;
909 k_cfg->layoutSpecific = specific_buf;
910
911 /* should do some kind of sanity check on the configuration.
912 * Store the sum of all the bytes in the last byte? */
913
914 /* configure the system */
915
916 /*
917 * Clear the entire RAID descriptor, just to make sure
918 * there is no stale data left in the case of a
919 * reconfiguration
920 */
921 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
922 raidPtr->raidid = unit;
923
924 retcode = rf_Configure(raidPtr, k_cfg, NULL);
925
926 if (retcode == 0) {
927
928 /* allow this many simultaneous IO's to
929 this RAID device */
930 raidPtr->openings = RAIDOUTSTANDING;
931
932 raidinit(raidPtr);
933 rf_markalldirty(raidPtr);
934 }
935 /* free the buffers. No return code here. */
936 if (k_cfg->layoutSpecificSize) {
937 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
938 }
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940
941 return (retcode);
942
943 /* shutdown the system */
944 case RAIDFRAME_SHUTDOWN:
945
946 if ((error = raidlock(rs)) != 0)
947 return (error);
948
949 /*
950 * If somebody has a partition mounted, we shouldn't
951 * shutdown.
952 */
953
954 part = DISKPART(dev);
955 pmask = (1 << part);
956 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
957 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
958 (rs->sc_dkdev.dk_copenmask & pmask))) {
959 raidunlock(rs);
960 return (EBUSY);
961 }
962
963 retcode = rf_Shutdown(raidPtr);
964
965 /* It's no longer initialized... */
966 rs->sc_flags &= ~RAIDF_INITED;
967
968 /* Detach the disk. */
969 pseudo_disk_detach(&rs->sc_dkdev);
970
971 raidunlock(rs);
972
973 return (retcode);
974 case RAIDFRAME_GET_COMPONENT_LABEL:
975 clabel_ptr = (RF_ComponentLabel_t **) data;
976 /* need to read the component label for the disk indicated
977 by row,column in clabel */
978
979 /* For practice, let's get it directly fromdisk, rather
980 than from the in-core copy */
981 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
982 (RF_ComponentLabel_t *));
983 if (clabel == NULL)
984 return (ENOMEM);
985
986 retcode = copyin( *clabel_ptr, clabel,
987 sizeof(RF_ComponentLabel_t));
988
989 if (retcode) {
990 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
991 return(retcode);
992 }
993
994 clabel->row = 0; /* Don't allow looking at anything else.*/
995
996 column = clabel->column;
997
998 if ((column < 0) || (column >= raidPtr->numCol +
999 raidPtr->numSpare)) {
1000 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1001 return(EINVAL);
1002 }
1003
1004 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1005 raidPtr->raid_cinfo[column].ci_vp,
1006 clabel );
1007
1008 if (retcode == 0) {
1009 retcode = copyout(clabel, *clabel_ptr,
1010 sizeof(RF_ComponentLabel_t));
1011 }
1012 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1013 return (retcode);
1014
1015 case RAIDFRAME_SET_COMPONENT_LABEL:
1016 clabel = (RF_ComponentLabel_t *) data;
1017
1018 /* XXX check the label for valid stuff... */
1019 /* Note that some things *should not* get modified --
1020 the user should be re-initing the labels instead of
1021 trying to patch things.
1022 */
1023
1024 raidid = raidPtr->raidid;
1025 #if DEBUG
1026 printf("raid%d: Got component label:\n", raidid);
1027 printf("raid%d: Version: %d\n", raidid, clabel->version);
1028 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1029 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1030 printf("raid%d: Column: %d\n", raidid, clabel->column);
1031 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1032 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1033 printf("raid%d: Status: %d\n", raidid, clabel->status);
1034 #endif
1035 clabel->row = 0;
1036 column = clabel->column;
1037
1038 if ((column < 0) || (column >= raidPtr->numCol)) {
1039 return(EINVAL);
1040 }
1041
1042 /* XXX this isn't allowed to do anything for now :-) */
1043
1044 /* XXX and before it is, we need to fill in the rest
1045 of the fields!?!?!?! */
1046 #if 0
1047 raidwrite_component_label(
1048 raidPtr->Disks[column].dev,
1049 raidPtr->raid_cinfo[column].ci_vp,
1050 clabel );
1051 #endif
1052 return (0);
1053
1054 case RAIDFRAME_INIT_LABELS:
1055 clabel = (RF_ComponentLabel_t *) data;
1056 /*
1057 we only want the serial number from
1058 the above. We get all the rest of the information
1059 from the config that was used to create this RAID
1060 set.
1061 */
1062
1063 raidPtr->serial_number = clabel->serial_number;
1064
1065 raid_init_component_label(raidPtr, &ci_label);
1066 ci_label.serial_number = clabel->serial_number;
1067 ci_label.row = 0; /* we dont' pretend to support more */
1068
1069 for(column=0;column<raidPtr->numCol;column++) {
1070 diskPtr = &raidPtr->Disks[column];
1071 if (!RF_DEAD_DISK(diskPtr->status)) {
1072 ci_label.partitionSize = diskPtr->partitionSize;
1073 ci_label.column = column;
1074 raidwrite_component_label(
1075 raidPtr->Disks[column].dev,
1076 raidPtr->raid_cinfo[column].ci_vp,
1077 &ci_label );
1078 }
1079 }
1080
1081 return (retcode);
1082 case RAIDFRAME_SET_AUTOCONFIG:
1083 d = rf_set_autoconfig(raidPtr, *(int *) data);
1084 printf("raid%d: New autoconfig value is: %d\n",
1085 raidPtr->raidid, d);
1086 *(int *) data = d;
1087 return (retcode);
1088
1089 case RAIDFRAME_SET_ROOT:
1090 d = rf_set_rootpartition(raidPtr, *(int *) data);
1091 printf("raid%d: New rootpartition value is: %d\n",
1092 raidPtr->raidid, d);
1093 *(int *) data = d;
1094 return (retcode);
1095
1096 /* initialize all parity */
1097 case RAIDFRAME_REWRITEPARITY:
1098
1099 if (raidPtr->Layout.map->faultsTolerated == 0) {
1100 /* Parity for RAID 0 is trivially correct */
1101 raidPtr->parity_good = RF_RAID_CLEAN;
1102 return(0);
1103 }
1104
1105 if (raidPtr->parity_rewrite_in_progress == 1) {
1106 /* Re-write is already in progress! */
1107 return(EINVAL);
1108 }
1109
1110 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1111 rf_RewriteParityThread,
1112 raidPtr,"raid_parity");
1113 return (retcode);
1114
1115
1116 case RAIDFRAME_ADD_HOT_SPARE:
1117 sparePtr = (RF_SingleComponent_t *) data;
1118 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1119 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1120 return(retcode);
1121
1122 case RAIDFRAME_REMOVE_HOT_SPARE:
1123 return(retcode);
1124
1125 case RAIDFRAME_DELETE_COMPONENT:
1126 componentPtr = (RF_SingleComponent_t *)data;
1127 memcpy( &component, componentPtr,
1128 sizeof(RF_SingleComponent_t));
1129 retcode = rf_delete_component(raidPtr, &component);
1130 return(retcode);
1131
1132 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1133 componentPtr = (RF_SingleComponent_t *)data;
1134 memcpy( &component, componentPtr,
1135 sizeof(RF_SingleComponent_t));
1136 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1137 return(retcode);
1138
1139 case RAIDFRAME_REBUILD_IN_PLACE:
1140
1141 if (raidPtr->Layout.map->faultsTolerated == 0) {
1142 /* Can't do this on a RAID 0!! */
1143 return(EINVAL);
1144 }
1145
1146 if (raidPtr->recon_in_progress == 1) {
1147 /* a reconstruct is already in progress! */
1148 return(EINVAL);
1149 }
1150
1151 componentPtr = (RF_SingleComponent_t *) data;
1152 memcpy( &component, componentPtr,
1153 sizeof(RF_SingleComponent_t));
1154 component.row = 0; /* we don't support any more */
1155 column = component.column;
1156
1157 if ((column < 0) || (column >= raidPtr->numCol)) {
1158 return(EINVAL);
1159 }
1160
1161 RF_LOCK_MUTEX(raidPtr->mutex);
1162 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1163 (raidPtr->numFailures > 0)) {
1164 /* XXX 0 above shouldn't be constant!!! */
1165 /* some component other than this has failed.
1166 Let's not make things worse than they already
1167 are... */
1168 printf("raid%d: Unable to reconstruct to disk at:\n",
1169 raidPtr->raidid);
1170 printf("raid%d: Col: %d Too many failures.\n",
1171 raidPtr->raidid, column);
1172 RF_UNLOCK_MUTEX(raidPtr->mutex);
1173 return (EINVAL);
1174 }
1175 if (raidPtr->Disks[column].status ==
1176 rf_ds_reconstructing) {
1177 printf("raid%d: Unable to reconstruct to disk at:\n",
1178 raidPtr->raidid);
1179 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1180
1181 RF_UNLOCK_MUTEX(raidPtr->mutex);
1182 return (EINVAL);
1183 }
1184 if (raidPtr->Disks[column].status == rf_ds_spared) {
1185 RF_UNLOCK_MUTEX(raidPtr->mutex);
1186 return (EINVAL);
1187 }
1188 RF_UNLOCK_MUTEX(raidPtr->mutex);
1189
1190 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1191 if (rrcopy == NULL)
1192 return(ENOMEM);
1193
1194 rrcopy->raidPtr = (void *) raidPtr;
1195 rrcopy->col = column;
1196
1197 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1198 rf_ReconstructInPlaceThread,
1199 rrcopy,"raid_reconip");
1200 return(retcode);
1201
1202 case RAIDFRAME_GET_INFO:
1203 if (!raidPtr->valid)
1204 return (ENODEV);
1205 ucfgp = (RF_DeviceConfig_t **) data;
1206 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1207 (RF_DeviceConfig_t *));
1208 if (d_cfg == NULL)
1209 return (ENOMEM);
1210 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1211 d_cfg->rows = 1; /* there is only 1 row now */
1212 d_cfg->cols = raidPtr->numCol;
1213 d_cfg->ndevs = raidPtr->numCol;
1214 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1215 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1216 return (ENOMEM);
1217 }
1218 d_cfg->nspares = raidPtr->numSpare;
1219 if (d_cfg->nspares >= RF_MAX_DISKS) {
1220 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1221 return (ENOMEM);
1222 }
1223 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1224 d = 0;
1225 for (j = 0; j < d_cfg->cols; j++) {
1226 d_cfg->devs[d] = raidPtr->Disks[j];
1227 d++;
1228 }
1229 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1230 d_cfg->spares[i] = raidPtr->Disks[j];
1231 }
1232 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1233 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1234
1235 return (retcode);
1236
1237 case RAIDFRAME_CHECK_PARITY:
1238 *(int *) data = raidPtr->parity_good;
1239 return (0);
1240
1241 case RAIDFRAME_RESET_ACCTOTALS:
1242 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1243 return (0);
1244
1245 case RAIDFRAME_GET_ACCTOTALS:
1246 totals = (RF_AccTotals_t *) data;
1247 *totals = raidPtr->acc_totals;
1248 return (0);
1249
1250 case RAIDFRAME_KEEP_ACCTOTALS:
1251 raidPtr->keep_acc_totals = *(int *)data;
1252 return (0);
1253
1254 case RAIDFRAME_GET_SIZE:
1255 *(int *) data = raidPtr->totalSectors;
1256 return (0);
1257
1258 /* fail a disk & optionally start reconstruction */
1259 case RAIDFRAME_FAIL_DISK:
1260
1261 if (raidPtr->Layout.map->faultsTolerated == 0) {
1262 /* Can't do this on a RAID 0!! */
1263 return(EINVAL);
1264 }
1265
1266 rr = (struct rf_recon_req *) data;
1267 rr->row = 0;
1268 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1269 return (EINVAL);
1270
1271
1272 RF_LOCK_MUTEX(raidPtr->mutex);
1273 if (raidPtr->status == rf_rs_reconstructing) {
1274 /* you can't fail a disk while we're reconstructing! */
1275 /* XXX wrong for RAID6 */
1276 RF_UNLOCK_MUTEX(raidPtr->mutex);
1277 return (EINVAL);
1278 }
1279 if ((raidPtr->Disks[rr->col].status ==
1280 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1281 /* some other component has failed. Let's not make
1282 things worse. XXX wrong for RAID6 */
1283 RF_UNLOCK_MUTEX(raidPtr->mutex);
1284 return (EINVAL);
1285 }
1286 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1287 /* Can't fail a spared disk! */
1288 RF_UNLOCK_MUTEX(raidPtr->mutex);
1289 return (EINVAL);
1290 }
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292
1293 /* make a copy of the recon request so that we don't rely on
1294 * the user's buffer */
1295 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1296 if (rrcopy == NULL)
1297 return(ENOMEM);
1298 memcpy(rrcopy, rr, sizeof(*rr));
1299 rrcopy->raidPtr = (void *) raidPtr;
1300
1301 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1302 rf_ReconThread,
1303 rrcopy,"raid_recon");
1304 return (0);
1305
1306 /* invoke a copyback operation after recon on whatever disk
1307 * needs it, if any */
1308 case RAIDFRAME_COPYBACK:
1309
1310 if (raidPtr->Layout.map->faultsTolerated == 0) {
1311 /* This makes no sense on a RAID 0!! */
1312 return(EINVAL);
1313 }
1314
1315 if (raidPtr->copyback_in_progress == 1) {
1316 /* Copyback is already in progress! */
1317 return(EINVAL);
1318 }
1319
1320 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1321 rf_CopybackThread,
1322 raidPtr,"raid_copyback");
1323 return (retcode);
1324
1325 /* return the percentage completion of reconstruction */
1326 case RAIDFRAME_CHECK_RECON_STATUS:
1327 if (raidPtr->Layout.map->faultsTolerated == 0) {
1328 /* This makes no sense on a RAID 0, so tell the
1329 user it's done. */
1330 *(int *) data = 100;
1331 return(0);
1332 }
1333 if (raidPtr->status != rf_rs_reconstructing)
1334 *(int *) data = 100;
1335 else {
1336 if (raidPtr->reconControl->numRUsTotal > 0) {
1337 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1338 } else {
1339 *(int *) data = 0;
1340 }
1341 }
1342 return (0);
1343 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1344 progressInfoPtr = (RF_ProgressInfo_t **) data;
1345 if (raidPtr->status != rf_rs_reconstructing) {
1346 progressInfo.remaining = 0;
1347 progressInfo.completed = 100;
1348 progressInfo.total = 100;
1349 } else {
1350 progressInfo.total =
1351 raidPtr->reconControl->numRUsTotal;
1352 progressInfo.completed =
1353 raidPtr->reconControl->numRUsComplete;
1354 progressInfo.remaining = progressInfo.total -
1355 progressInfo.completed;
1356 }
1357 retcode = copyout(&progressInfo, *progressInfoPtr,
1358 sizeof(RF_ProgressInfo_t));
1359 return (retcode);
1360
1361 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1362 if (raidPtr->Layout.map->faultsTolerated == 0) {
1363 /* This makes no sense on a RAID 0, so tell the
1364 user it's done. */
1365 *(int *) data = 100;
1366 return(0);
1367 }
1368 if (raidPtr->parity_rewrite_in_progress == 1) {
1369 *(int *) data = 100 *
1370 raidPtr->parity_rewrite_stripes_done /
1371 raidPtr->Layout.numStripe;
1372 } else {
1373 *(int *) data = 100;
1374 }
1375 return (0);
1376
1377 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1378 progressInfoPtr = (RF_ProgressInfo_t **) data;
1379 if (raidPtr->parity_rewrite_in_progress == 1) {
1380 progressInfo.total = raidPtr->Layout.numStripe;
1381 progressInfo.completed =
1382 raidPtr->parity_rewrite_stripes_done;
1383 progressInfo.remaining = progressInfo.total -
1384 progressInfo.completed;
1385 } else {
1386 progressInfo.remaining = 0;
1387 progressInfo.completed = 100;
1388 progressInfo.total = 100;
1389 }
1390 retcode = copyout(&progressInfo, *progressInfoPtr,
1391 sizeof(RF_ProgressInfo_t));
1392 return (retcode);
1393
1394 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1395 if (raidPtr->Layout.map->faultsTolerated == 0) {
1396 /* This makes no sense on a RAID 0 */
1397 *(int *) data = 100;
1398 return(0);
1399 }
1400 if (raidPtr->copyback_in_progress == 1) {
1401 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1402 raidPtr->Layout.numStripe;
1403 } else {
1404 *(int *) data = 100;
1405 }
1406 return (0);
1407
1408 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1409 progressInfoPtr = (RF_ProgressInfo_t **) data;
1410 if (raidPtr->copyback_in_progress == 1) {
1411 progressInfo.total = raidPtr->Layout.numStripe;
1412 progressInfo.completed =
1413 raidPtr->copyback_stripes_done;
1414 progressInfo.remaining = progressInfo.total -
1415 progressInfo.completed;
1416 } else {
1417 progressInfo.remaining = 0;
1418 progressInfo.completed = 100;
1419 progressInfo.total = 100;
1420 }
1421 retcode = copyout(&progressInfo, *progressInfoPtr,
1422 sizeof(RF_ProgressInfo_t));
1423 return (retcode);
1424
1425 /* the sparetable daemon calls this to wait for the kernel to
1426 * need a spare table. this ioctl does not return until a
1427 * spare table is needed. XXX -- calling mpsleep here in the
1428 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1429 * -- I should either compute the spare table in the kernel,
1430 * or have a different -- XXX XXX -- interface (a different
1431 * character device) for delivering the table -- XXX */
1432 #if 0
1433 case RAIDFRAME_SPARET_WAIT:
1434 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1435 while (!rf_sparet_wait_queue)
1436 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1437 waitreq = rf_sparet_wait_queue;
1438 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1439 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1440
1441 /* structure assignment */
1442 *((RF_SparetWait_t *) data) = *waitreq;
1443
1444 RF_Free(waitreq, sizeof(*waitreq));
1445 return (0);
1446
1447 /* wakes up a process waiting on SPARET_WAIT and puts an error
1448 * code in it that will cause the dameon to exit */
1449 case RAIDFRAME_ABORT_SPARET_WAIT:
1450 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1451 waitreq->fcol = -1;
1452 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1453 waitreq->next = rf_sparet_wait_queue;
1454 rf_sparet_wait_queue = waitreq;
1455 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1456 wakeup(&rf_sparet_wait_queue);
1457 return (0);
1458
1459 /* used by the spare table daemon to deliver a spare table
1460 * into the kernel */
1461 case RAIDFRAME_SEND_SPARET:
1462
1463 /* install the spare table */
1464 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1465
1466 /* respond to the requestor. the return status of the spare
1467 * table installation is passed in the "fcol" field */
1468 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1469 waitreq->fcol = retcode;
1470 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1471 waitreq->next = rf_sparet_resp_queue;
1472 rf_sparet_resp_queue = waitreq;
1473 wakeup(&rf_sparet_resp_queue);
1474 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1475
1476 return (retcode);
1477 #endif
1478
1479 default:
1480 break; /* fall through to the os-specific code below */
1481
1482 }
1483
1484 if (!raidPtr->valid)
1485 return (EINVAL);
1486
1487 /*
1488 * Add support for "regular" device ioctls here.
1489 */
1490
1491 switch (cmd) {
1492 case DIOCGDINFO:
1493 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1494 break;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 case ODIOCGDINFO:
1497 newlabel = *(rs->sc_dkdev.dk_label);
1498 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1499 return ENOTTY;
1500 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1501 break;
1502 #endif
1503
1504 case DIOCGPART:
1505 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1506 ((struct partinfo *) data)->part =
1507 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1508 break;
1509
1510 case DIOCWDINFO:
1511 case DIOCSDINFO:
1512 #ifdef __HAVE_OLD_DISKLABEL
1513 case ODIOCWDINFO:
1514 case ODIOCSDINFO:
1515 #endif
1516 {
1517 struct disklabel *lp;
1518 #ifdef __HAVE_OLD_DISKLABEL
1519 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1520 memset(&newlabel, 0, sizeof newlabel);
1521 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1522 lp = &newlabel;
1523 } else
1524 #endif
1525 lp = (struct disklabel *)data;
1526
1527 if ((error = raidlock(rs)) != 0)
1528 return (error);
1529
1530 rs->sc_flags |= RAIDF_LABELLING;
1531
1532 error = setdisklabel(rs->sc_dkdev.dk_label,
1533 lp, 0, rs->sc_dkdev.dk_cpulabel);
1534 if (error == 0) {
1535 if (cmd == DIOCWDINFO
1536 #ifdef __HAVE_OLD_DISKLABEL
1537 || cmd == ODIOCWDINFO
1538 #endif
1539 )
1540 error = writedisklabel(RAIDLABELDEV(dev),
1541 raidstrategy, rs->sc_dkdev.dk_label,
1542 rs->sc_dkdev.dk_cpulabel);
1543 }
1544 rs->sc_flags &= ~RAIDF_LABELLING;
1545
1546 raidunlock(rs);
1547
1548 if (error)
1549 return (error);
1550 break;
1551 }
1552
1553 case DIOCWLABEL:
1554 if (*(int *) data != 0)
1555 rs->sc_flags |= RAIDF_WLABEL;
1556 else
1557 rs->sc_flags &= ~RAIDF_WLABEL;
1558 break;
1559
1560 case DIOCGDEFLABEL:
1561 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1562 break;
1563
1564 #ifdef __HAVE_OLD_DISKLABEL
1565 case ODIOCGDEFLABEL:
1566 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1567 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1568 return ENOTTY;
1569 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1570 break;
1571 #endif
1572
1573 default:
1574 retcode = ENOTTY;
1575 }
1576 return (retcode);
1577
1578 }
1579
1580
1581 /* raidinit -- complete the rest of the initialization for the
1582 RAIDframe device. */
1583
1584
1585 static void
1586 raidinit(RF_Raid_t *raidPtr)
1587 {
1588 struct raid_softc *rs;
1589 int unit;
1590
1591 unit = raidPtr->raidid;
1592
1593 rs = &raid_softc[unit];
1594
1595 /* XXX should check return code first... */
1596 rs->sc_flags |= RAIDF_INITED;
1597
1598 /* XXX doesn't check bounds. */
1599 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1600
1601 rs->sc_dkdev.dk_name = rs->sc_xname;
1602
1603 /* disk_attach actually creates space for the CPU disklabel, among
1604 * other things, so it's critical to call this *BEFORE* we try putzing
1605 * with disklabels. */
1606
1607 pseudo_disk_attach(&rs->sc_dkdev);
1608
1609 /* XXX There may be a weird interaction here between this, and
1610 * protectedSectors, as used in RAIDframe. */
1611
1612 rs->sc_size = raidPtr->totalSectors;
1613 }
1614 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1615 /* wake up the daemon & tell it to get us a spare table
1616 * XXX
1617 * the entries in the queues should be tagged with the raidPtr
1618 * so that in the extremely rare case that two recons happen at once,
1619 * we know for which device were requesting a spare table
1620 * XXX
1621 *
1622 * XXX This code is not currently used. GO
1623 */
1624 int
1625 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1626 {
1627 int retcode;
1628
1629 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1630 req->next = rf_sparet_wait_queue;
1631 rf_sparet_wait_queue = req;
1632 wakeup(&rf_sparet_wait_queue);
1633
1634 /* mpsleep unlocks the mutex */
1635 while (!rf_sparet_resp_queue) {
1636 tsleep(&rf_sparet_resp_queue, PRIBIO,
1637 "raidframe getsparetable", 0);
1638 }
1639 req = rf_sparet_resp_queue;
1640 rf_sparet_resp_queue = req->next;
1641 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1642
1643 retcode = req->fcol;
1644 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1645 * alloc'd */
1646 return (retcode);
1647 }
1648 #endif
1649
1650 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1651 * bp & passes it down.
1652 * any calls originating in the kernel must use non-blocking I/O
1653 * do some extra sanity checking to return "appropriate" error values for
1654 * certain conditions (to make some standard utilities work)
1655 *
1656 * Formerly known as: rf_DoAccessKernel
1657 */
1658 void
1659 raidstart(RF_Raid_t *raidPtr)
1660 {
1661 RF_SectorCount_t num_blocks, pb, sum;
1662 RF_RaidAddr_t raid_addr;
1663 struct partition *pp;
1664 daddr_t blocknum;
1665 int unit;
1666 struct raid_softc *rs;
1667 int do_async;
1668 struct buf *bp;
1669 int rc;
1670
1671 unit = raidPtr->raidid;
1672 rs = &raid_softc[unit];
1673
1674 /* quick check to see if anything has died recently */
1675 RF_LOCK_MUTEX(raidPtr->mutex);
1676 if (raidPtr->numNewFailures > 0) {
1677 RF_UNLOCK_MUTEX(raidPtr->mutex);
1678 rf_update_component_labels(raidPtr,
1679 RF_NORMAL_COMPONENT_UPDATE);
1680 RF_LOCK_MUTEX(raidPtr->mutex);
1681 raidPtr->numNewFailures--;
1682 }
1683
1684 /* Check to see if we're at the limit... */
1685 while (raidPtr->openings > 0) {
1686 RF_UNLOCK_MUTEX(raidPtr->mutex);
1687
1688 /* get the next item, if any, from the queue */
1689 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1690 /* nothing more to do */
1691 return;
1692 }
1693
1694 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1695 * partition.. Need to make it absolute to the underlying
1696 * device.. */
1697
1698 blocknum = bp->b_blkno;
1699 if (DISKPART(bp->b_dev) != RAW_PART) {
1700 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1701 blocknum += pp->p_offset;
1702 }
1703
1704 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1705 (int) blocknum));
1706
1707 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1708 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1709
1710 /* *THIS* is where we adjust what block we're going to...
1711 * but DO NOT TOUCH bp->b_blkno!!! */
1712 raid_addr = blocknum;
1713
1714 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1715 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1716 sum = raid_addr + num_blocks + pb;
1717 if (1 || rf_debugKernelAccess) {
1718 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1719 (int) raid_addr, (int) sum, (int) num_blocks,
1720 (int) pb, (int) bp->b_resid));
1721 }
1722 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1723 || (sum < num_blocks) || (sum < pb)) {
1724 bp->b_error = ENOSPC;
1725 bp->b_flags |= B_ERROR;
1726 bp->b_resid = bp->b_bcount;
1727 biodone(bp);
1728 RF_LOCK_MUTEX(raidPtr->mutex);
1729 continue;
1730 }
1731 /*
1732 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1733 */
1734
1735 if (bp->b_bcount & raidPtr->sectorMask) {
1736 bp->b_error = EINVAL;
1737 bp->b_flags |= B_ERROR;
1738 bp->b_resid = bp->b_bcount;
1739 biodone(bp);
1740 RF_LOCK_MUTEX(raidPtr->mutex);
1741 continue;
1742
1743 }
1744 db1_printf(("Calling DoAccess..\n"));
1745
1746
1747 RF_LOCK_MUTEX(raidPtr->mutex);
1748 raidPtr->openings--;
1749 RF_UNLOCK_MUTEX(raidPtr->mutex);
1750
1751 /*
1752 * Everything is async.
1753 */
1754 do_async = 1;
1755
1756 disk_busy(&rs->sc_dkdev);
1757
1758 /* XXX we're still at splbio() here... do we *really*
1759 need to be? */
1760
1761 /* don't ever condition on bp->b_flags & B_WRITE.
1762 * always condition on B_READ instead */
1763
1764 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1765 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1766 do_async, raid_addr, num_blocks,
1767 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1768
1769 if (rc) {
1770 bp->b_error = rc;
1771 bp->b_flags |= B_ERROR;
1772 bp->b_resid = bp->b_bcount;
1773 biodone(bp);
1774 /* continue loop */
1775 }
1776
1777 RF_LOCK_MUTEX(raidPtr->mutex);
1778 }
1779 RF_UNLOCK_MUTEX(raidPtr->mutex);
1780 }
1781
1782
1783
1784
1785 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1786
1787 int
1788 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1789 {
1790 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1791 struct buf *bp;
1792
1793 req->queue = queue;
1794
1795 #if DIAGNOSTIC
1796 if (queue->raidPtr->raidid >= numraid) {
1797 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1798 numraid);
1799 panic("Invalid Unit number in rf_DispatchKernelIO");
1800 }
1801 #endif
1802
1803 bp = req->bp;
1804 #if 1
1805 /* XXX when there is a physical disk failure, someone is passing us a
1806 * buffer that contains old stuff!! Attempt to deal with this problem
1807 * without taking a performance hit... (not sure where the real bug
1808 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1809
1810 if (bp->b_flags & B_ERROR) {
1811 bp->b_flags &= ~B_ERROR;
1812 }
1813 if (bp->b_error != 0) {
1814 bp->b_error = 0;
1815 }
1816 #endif
1817
1818 /*
1819 * context for raidiodone
1820 */
1821
1822 bp->b_fspriv.bf_private = req;
1823
1824 switch (req->type) {
1825 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1826 /* XXX need to do something extra here.. */
1827 /* I'm leaving this in, as I've never actually seen it used,
1828 * and I'd like folks to report it... GO */
1829 printf(("WAKEUP CALLED\n"));
1830 queue->numOutstanding++;
1831
1832 /* XXX need to glue the original buffer into this?? */
1833
1834 KernelWakeupFunc(bp);
1835 break;
1836
1837 case RF_IO_TYPE_READ:
1838 case RF_IO_TYPE_WRITE:
1839 #if RF_ACC_TRACE > 0
1840 if (req->tracerec) {
1841 RF_ETIMER_START(req->tracerec->timer);
1842 }
1843 #endif
1844 InitBP(bp, queue->rf_cinfo->ci_vp,
1845 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1846 req->sectorOffset, req->numSector,
1847 req->buf, KernelWakeupFunc, (void *) req,
1848 queue->raidPtr->logBytesPerSector, req->b_proc);
1849
1850 if (rf_debugKernelAccess) {
1851 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1852 (long) bp->b_blkno));
1853 }
1854 queue->numOutstanding++;
1855 queue->last_deq_sector = req->sectorOffset;
1856 /* acc wouldn't have been let in if there were any pending
1857 * reqs at any other priority */
1858 queue->curPriority = req->priority;
1859
1860 db1_printf(("Going for %c to unit %d col %d\n",
1861 req->type, queue->raidPtr->raidid,
1862 queue->col));
1863 db1_printf(("sector %d count %d (%d bytes) %d\n",
1864 (int) req->sectorOffset, (int) req->numSector,
1865 (int) (req->numSector <<
1866 queue->raidPtr->logBytesPerSector),
1867 (int) queue->raidPtr->logBytesPerSector));
1868 if ((bp->b_flags & B_READ) == 0) {
1869 bp->b_vp->v_numoutput++;
1870 }
1871 VOP_STRATEGY(bp->b_vp, bp);
1872
1873 break;
1874
1875 default:
1876 panic("bad req->type in rf_DispatchKernelIO");
1877 }
1878 db1_printf(("Exiting from DispatchKernelIO\n"));
1879
1880 return (0);
1881 }
1882 /* this is the callback function associated with a I/O invoked from
1883 kernel code.
1884 */
1885 static void
1886 KernelWakeupFunc(struct buf *bp)
1887 {
1888 RF_DiskQueueData_t *req = NULL;
1889 RF_DiskQueue_t *queue;
1890 int s;
1891
1892 s = splbio();
1893 db1_printf(("recovering the request queue:\n"));
1894 req = bp->b_fspriv.bf_private;
1895
1896 queue = (RF_DiskQueue_t *) req->queue;
1897
1898 #if RF_ACC_TRACE > 0
1899 if (req->tracerec) {
1900 RF_ETIMER_STOP(req->tracerec->timer);
1901 RF_ETIMER_EVAL(req->tracerec->timer);
1902 RF_LOCK_MUTEX(rf_tracing_mutex);
1903 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1904 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->num_phys_ios++;
1906 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1907 }
1908 #endif
1909
1910 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1911 * ballistic, and mark the component as hosed... */
1912
1913 if (bp->b_flags & B_ERROR) {
1914 /* Mark the disk as dead */
1915 /* but only mark it once... */
1916 /* and only if it wouldn't leave this RAID set
1917 completely broken */
1918 if (((queue->raidPtr->Disks[queue->col].status ==
1919 rf_ds_optimal) ||
1920 (queue->raidPtr->Disks[queue->col].status ==
1921 rf_ds_used_spare)) &&
1922 (queue->raidPtr->numFailures <
1923 queue->raidPtr->Layout.map->faultsTolerated)) {
1924 printf("raid%d: IO Error. Marking %s as failed.\n",
1925 queue->raidPtr->raidid,
1926 queue->raidPtr->Disks[queue->col].devname);
1927 queue->raidPtr->Disks[queue->col].status =
1928 rf_ds_failed;
1929 queue->raidPtr->status = rf_rs_degraded;
1930 queue->raidPtr->numFailures++;
1931 queue->raidPtr->numNewFailures++;
1932 } else { /* Disk is already dead... */
1933 /* printf("Disk already marked as dead!\n"); */
1934 }
1935
1936 }
1937
1938 /* Fill in the error value */
1939
1940 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1941
1942 simple_lock(&queue->raidPtr->iodone_lock);
1943
1944 /* Drop this one on the "finished" queue... */
1945 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1946
1947 /* Let the raidio thread know there is work to be done. */
1948 wakeup(&(queue->raidPtr->iodone));
1949
1950 simple_unlock(&queue->raidPtr->iodone_lock);
1951
1952 splx(s);
1953 }
1954
1955
1956
1957 /*
1958 * initialize a buf structure for doing an I/O in the kernel.
1959 */
1960 static void
1961 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1962 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1963 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1964 struct proc *b_proc)
1965 {
1966 /* bp->b_flags = B_PHYS | rw_flag; */
1967 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1968 bp->b_bcount = numSect << logBytesPerSector;
1969 bp->b_bufsize = bp->b_bcount;
1970 bp->b_error = 0;
1971 bp->b_dev = dev;
1972 bp->b_data = bf;
1973 bp->b_blkno = startSect;
1974 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1975 if (bp->b_bcount == 0) {
1976 panic("bp->b_bcount is zero in InitBP!!");
1977 }
1978 bp->b_proc = b_proc;
1979 bp->b_iodone = cbFunc;
1980 bp->b_vp = b_vp;
1981
1982 }
1983
1984 static void
1985 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1986 struct disklabel *lp)
1987 {
1988 memset(lp, 0, sizeof(*lp));
1989
1990 /* fabricate a label... */
1991 lp->d_secperunit = raidPtr->totalSectors;
1992 lp->d_secsize = raidPtr->bytesPerSector;
1993 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1994 lp->d_ntracks = 4 * raidPtr->numCol;
1995 lp->d_ncylinders = raidPtr->totalSectors /
1996 (lp->d_nsectors * lp->d_ntracks);
1997 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1998
1999 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2000 lp->d_type = DTYPE_RAID;
2001 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2002 lp->d_rpm = 3600;
2003 lp->d_interleave = 1;
2004 lp->d_flags = 0;
2005
2006 lp->d_partitions[RAW_PART].p_offset = 0;
2007 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2008 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2009 lp->d_npartitions = RAW_PART + 1;
2010
2011 lp->d_magic = DISKMAGIC;
2012 lp->d_magic2 = DISKMAGIC;
2013 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2014
2015 }
2016 /*
2017 * Read the disklabel from the raid device. If one is not present, fake one
2018 * up.
2019 */
2020 static void
2021 raidgetdisklabel(dev_t dev)
2022 {
2023 int unit = raidunit(dev);
2024 struct raid_softc *rs = &raid_softc[unit];
2025 const char *errstring;
2026 struct disklabel *lp = rs->sc_dkdev.dk_label;
2027 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2028 RF_Raid_t *raidPtr;
2029
2030 db1_printf(("Getting the disklabel...\n"));
2031
2032 memset(clp, 0, sizeof(*clp));
2033
2034 raidPtr = raidPtrs[unit];
2035
2036 raidgetdefaultlabel(raidPtr, rs, lp);
2037
2038 /*
2039 * Call the generic disklabel extraction routine.
2040 */
2041 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2042 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2043 if (errstring)
2044 raidmakedisklabel(rs);
2045 else {
2046 int i;
2047 struct partition *pp;
2048
2049 /*
2050 * Sanity check whether the found disklabel is valid.
2051 *
2052 * This is necessary since total size of the raid device
2053 * may vary when an interleave is changed even though exactly
2054 * same componets are used, and old disklabel may used
2055 * if that is found.
2056 */
2057 if (lp->d_secperunit != rs->sc_size)
2058 printf("raid%d: WARNING: %s: "
2059 "total sector size in disklabel (%d) != "
2060 "the size of raid (%ld)\n", unit, rs->sc_xname,
2061 lp->d_secperunit, (long) rs->sc_size);
2062 for (i = 0; i < lp->d_npartitions; i++) {
2063 pp = &lp->d_partitions[i];
2064 if (pp->p_offset + pp->p_size > rs->sc_size)
2065 printf("raid%d: WARNING: %s: end of partition `%c' "
2066 "exceeds the size of raid (%ld)\n",
2067 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2068 }
2069 }
2070
2071 }
2072 /*
2073 * Take care of things one might want to take care of in the event
2074 * that a disklabel isn't present.
2075 */
2076 static void
2077 raidmakedisklabel(struct raid_softc *rs)
2078 {
2079 struct disklabel *lp = rs->sc_dkdev.dk_label;
2080 db1_printf(("Making a label..\n"));
2081
2082 /*
2083 * For historical reasons, if there's no disklabel present
2084 * the raw partition must be marked FS_BSDFFS.
2085 */
2086
2087 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2088
2089 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2090
2091 lp->d_checksum = dkcksum(lp);
2092 }
2093 /*
2094 * Lookup the provided name in the filesystem. If the file exists,
2095 * is a valid block device, and isn't being used by anyone else,
2096 * set *vpp to the file's vnode.
2097 * You'll find the original of this in ccd.c
2098 */
2099 int
2100 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2101 {
2102 struct nameidata nd;
2103 struct vnode *vp;
2104 struct proc *p;
2105 struct vattr va;
2106 int error;
2107
2108 p = l ? l->l_proc : NULL;
2109 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2110 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2111 return (error);
2112 }
2113 vp = nd.ni_vp;
2114 if (vp->v_usecount > 1) {
2115 VOP_UNLOCK(vp, 0);
2116 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2117 return (EBUSY);
2118 }
2119 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2120 VOP_UNLOCK(vp, 0);
2121 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2122 return (error);
2123 }
2124 /* XXX: eventually we should handle VREG, too. */
2125 if (va.va_type != VBLK) {
2126 VOP_UNLOCK(vp, 0);
2127 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2128 return (ENOTBLK);
2129 }
2130 VOP_UNLOCK(vp, 0);
2131 *vpp = vp;
2132 return (0);
2133 }
2134 /*
2135 * Wait interruptibly for an exclusive lock.
2136 *
2137 * XXX
2138 * Several drivers do this; it should be abstracted and made MP-safe.
2139 * (Hmm... where have we seen this warning before :-> GO )
2140 */
2141 static int
2142 raidlock(struct raid_softc *rs)
2143 {
2144 int error;
2145
2146 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2147 rs->sc_flags |= RAIDF_WANTED;
2148 if ((error =
2149 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2150 return (error);
2151 }
2152 rs->sc_flags |= RAIDF_LOCKED;
2153 return (0);
2154 }
2155 /*
2156 * Unlock and wake up any waiters.
2157 */
2158 static void
2159 raidunlock(struct raid_softc *rs)
2160 {
2161
2162 rs->sc_flags &= ~RAIDF_LOCKED;
2163 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2164 rs->sc_flags &= ~RAIDF_WANTED;
2165 wakeup(rs);
2166 }
2167 }
2168
2169
2170 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2171 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2172
2173 int
2174 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2175 {
2176 RF_ComponentLabel_t clabel;
2177 raidread_component_label(dev, b_vp, &clabel);
2178 clabel.mod_counter = mod_counter;
2179 clabel.clean = RF_RAID_CLEAN;
2180 raidwrite_component_label(dev, b_vp, &clabel);
2181 return(0);
2182 }
2183
2184
2185 int
2186 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2187 {
2188 RF_ComponentLabel_t clabel;
2189 raidread_component_label(dev, b_vp, &clabel);
2190 clabel.mod_counter = mod_counter;
2191 clabel.clean = RF_RAID_DIRTY;
2192 raidwrite_component_label(dev, b_vp, &clabel);
2193 return(0);
2194 }
2195
2196 /* ARGSUSED */
2197 int
2198 raidread_component_label(dev_t dev, struct vnode *b_vp,
2199 RF_ComponentLabel_t *clabel)
2200 {
2201 struct buf *bp;
2202 const struct bdevsw *bdev;
2203 int error;
2204
2205 /* XXX should probably ensure that we don't try to do this if
2206 someone has changed rf_protected_sectors. */
2207
2208 if (b_vp == NULL) {
2209 /* For whatever reason, this component is not valid.
2210 Don't try to read a component label from it. */
2211 return(EINVAL);
2212 }
2213
2214 /* get a block of the appropriate size... */
2215 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2216 bp->b_dev = dev;
2217
2218 /* get our ducks in a row for the read */
2219 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2220 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2221 bp->b_flags |= B_READ;
2222 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2223
2224 bdev = bdevsw_lookup(bp->b_dev);
2225 if (bdev == NULL)
2226 return (ENXIO);
2227 (*bdev->d_strategy)(bp);
2228
2229 error = biowait(bp);
2230
2231 if (!error) {
2232 memcpy(clabel, bp->b_data,
2233 sizeof(RF_ComponentLabel_t));
2234 }
2235
2236 brelse(bp);
2237 return(error);
2238 }
2239 /* ARGSUSED */
2240 int
2241 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2242 RF_ComponentLabel_t *clabel)
2243 {
2244 struct buf *bp;
2245 const struct bdevsw *bdev;
2246 int error;
2247
2248 /* get a block of the appropriate size... */
2249 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2250 bp->b_dev = dev;
2251
2252 /* get our ducks in a row for the write */
2253 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2254 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2255 bp->b_flags |= B_WRITE;
2256 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2257
2258 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2259
2260 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2261
2262 bdev = bdevsw_lookup(bp->b_dev);
2263 if (bdev == NULL)
2264 return (ENXIO);
2265 (*bdev->d_strategy)(bp);
2266 error = biowait(bp);
2267 brelse(bp);
2268 if (error) {
2269 #if 1
2270 printf("Failed to write RAID component info!\n");
2271 #endif
2272 }
2273
2274 return(error);
2275 }
2276
2277 void
2278 rf_markalldirty(RF_Raid_t *raidPtr)
2279 {
2280 RF_ComponentLabel_t clabel;
2281 int sparecol;
2282 int c;
2283 int j;
2284 int scol = -1;
2285
2286 raidPtr->mod_counter++;
2287 for (c = 0; c < raidPtr->numCol; c++) {
2288 /* we don't want to touch (at all) a disk that has
2289 failed */
2290 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2291 raidread_component_label(
2292 raidPtr->Disks[c].dev,
2293 raidPtr->raid_cinfo[c].ci_vp,
2294 &clabel);
2295 if (clabel.status == rf_ds_spared) {
2296 /* XXX do something special...
2297 but whatever you do, don't
2298 try to access it!! */
2299 } else {
2300 raidmarkdirty(
2301 raidPtr->Disks[c].dev,
2302 raidPtr->raid_cinfo[c].ci_vp,
2303 raidPtr->mod_counter);
2304 }
2305 }
2306 }
2307
2308 for( c = 0; c < raidPtr->numSpare ; c++) {
2309 sparecol = raidPtr->numCol + c;
2310 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2311 /*
2312
2313 we claim this disk is "optimal" if it's
2314 rf_ds_used_spare, as that means it should be
2315 directly substitutable for the disk it replaced.
2316 We note that too...
2317
2318 */
2319
2320 for(j=0;j<raidPtr->numCol;j++) {
2321 if (raidPtr->Disks[j].spareCol == sparecol) {
2322 scol = j;
2323 break;
2324 }
2325 }
2326
2327 raidread_component_label(
2328 raidPtr->Disks[sparecol].dev,
2329 raidPtr->raid_cinfo[sparecol].ci_vp,
2330 &clabel);
2331 /* make sure status is noted */
2332
2333 raid_init_component_label(raidPtr, &clabel);
2334
2335 clabel.row = 0;
2336 clabel.column = scol;
2337 /* Note: we *don't* change status from rf_ds_used_spare
2338 to rf_ds_optimal */
2339 /* clabel.status = rf_ds_optimal; */
2340
2341 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2342 raidPtr->raid_cinfo[sparecol].ci_vp,
2343 raidPtr->mod_counter);
2344 }
2345 }
2346 }
2347
2348
2349 void
2350 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2351 {
2352 RF_ComponentLabel_t clabel;
2353 int sparecol;
2354 int c;
2355 int j;
2356 int scol;
2357
2358 scol = -1;
2359
2360 /* XXX should do extra checks to make sure things really are clean,
2361 rather than blindly setting the clean bit... */
2362
2363 raidPtr->mod_counter++;
2364
2365 for (c = 0; c < raidPtr->numCol; c++) {
2366 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2367 raidread_component_label(
2368 raidPtr->Disks[c].dev,
2369 raidPtr->raid_cinfo[c].ci_vp,
2370 &clabel);
2371 /* make sure status is noted */
2372 clabel.status = rf_ds_optimal;
2373 /* bump the counter */
2374 clabel.mod_counter = raidPtr->mod_counter;
2375
2376 raidwrite_component_label(
2377 raidPtr->Disks[c].dev,
2378 raidPtr->raid_cinfo[c].ci_vp,
2379 &clabel);
2380 if (final == RF_FINAL_COMPONENT_UPDATE) {
2381 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2382 raidmarkclean(
2383 raidPtr->Disks[c].dev,
2384 raidPtr->raid_cinfo[c].ci_vp,
2385 raidPtr->mod_counter);
2386 }
2387 }
2388 }
2389 /* else we don't touch it.. */
2390 }
2391
2392 for( c = 0; c < raidPtr->numSpare ; c++) {
2393 sparecol = raidPtr->numCol + c;
2394 /* Need to ensure that the reconstruct actually completed! */
2395 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2396 /*
2397
2398 we claim this disk is "optimal" if it's
2399 rf_ds_used_spare, as that means it should be
2400 directly substitutable for the disk it replaced.
2401 We note that too...
2402
2403 */
2404
2405 for(j=0;j<raidPtr->numCol;j++) {
2406 if (raidPtr->Disks[j].spareCol == sparecol) {
2407 scol = j;
2408 break;
2409 }
2410 }
2411
2412 /* XXX shouldn't *really* need this... */
2413 raidread_component_label(
2414 raidPtr->Disks[sparecol].dev,
2415 raidPtr->raid_cinfo[sparecol].ci_vp,
2416 &clabel);
2417 /* make sure status is noted */
2418
2419 raid_init_component_label(raidPtr, &clabel);
2420
2421 clabel.mod_counter = raidPtr->mod_counter;
2422 clabel.column = scol;
2423 clabel.status = rf_ds_optimal;
2424
2425 raidwrite_component_label(
2426 raidPtr->Disks[sparecol].dev,
2427 raidPtr->raid_cinfo[sparecol].ci_vp,
2428 &clabel);
2429 if (final == RF_FINAL_COMPONENT_UPDATE) {
2430 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2431 raidmarkclean( raidPtr->Disks[sparecol].dev,
2432 raidPtr->raid_cinfo[sparecol].ci_vp,
2433 raidPtr->mod_counter);
2434 }
2435 }
2436 }
2437 }
2438 }
2439
2440 void
2441 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2442 {
2443 struct proc *p;
2444 struct lwp *l;
2445
2446 p = raidPtr->engine_thread;
2447 l = LIST_FIRST(&p->p_lwps);
2448
2449 if (vp != NULL) {
2450 if (auto_configured == 1) {
2451 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2452 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2453 vput(vp);
2454
2455 } else {
2456 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2457 }
2458 }
2459 }
2460
2461
2462 void
2463 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2464 {
2465 int r,c;
2466 struct vnode *vp;
2467 int acd;
2468
2469
2470 /* We take this opportunity to close the vnodes like we should.. */
2471
2472 for (c = 0; c < raidPtr->numCol; c++) {
2473 vp = raidPtr->raid_cinfo[c].ci_vp;
2474 acd = raidPtr->Disks[c].auto_configured;
2475 rf_close_component(raidPtr, vp, acd);
2476 raidPtr->raid_cinfo[c].ci_vp = NULL;
2477 raidPtr->Disks[c].auto_configured = 0;
2478 }
2479
2480 for (r = 0; r < raidPtr->numSpare; r++) {
2481 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2482 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2483 rf_close_component(raidPtr, vp, acd);
2484 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2485 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2486 }
2487 }
2488
2489
2490 void
2491 rf_ReconThread(struct rf_recon_req *req)
2492 {
2493 int s;
2494 RF_Raid_t *raidPtr;
2495
2496 s = splbio();
2497 raidPtr = (RF_Raid_t *) req->raidPtr;
2498 raidPtr->recon_in_progress = 1;
2499
2500 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2501 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2502
2503 RF_Free(req, sizeof(*req));
2504
2505 raidPtr->recon_in_progress = 0;
2506 splx(s);
2507
2508 /* That's all... */
2509 kthread_exit(0); /* does not return */
2510 }
2511
2512 void
2513 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2514 {
2515 int retcode;
2516 int s;
2517
2518 raidPtr->parity_rewrite_stripes_done = 0;
2519 raidPtr->parity_rewrite_in_progress = 1;
2520 s = splbio();
2521 retcode = rf_RewriteParity(raidPtr);
2522 splx(s);
2523 if (retcode) {
2524 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2525 } else {
2526 /* set the clean bit! If we shutdown correctly,
2527 the clean bit on each component label will get
2528 set */
2529 raidPtr->parity_good = RF_RAID_CLEAN;
2530 }
2531 raidPtr->parity_rewrite_in_progress = 0;
2532
2533 /* Anyone waiting for us to stop? If so, inform them... */
2534 if (raidPtr->waitShutdown) {
2535 wakeup(&raidPtr->parity_rewrite_in_progress);
2536 }
2537
2538 /* That's all... */
2539 kthread_exit(0); /* does not return */
2540 }
2541
2542
2543 void
2544 rf_CopybackThread(RF_Raid_t *raidPtr)
2545 {
2546 int s;
2547
2548 raidPtr->copyback_in_progress = 1;
2549 s = splbio();
2550 rf_CopybackReconstructedData(raidPtr);
2551 splx(s);
2552 raidPtr->copyback_in_progress = 0;
2553
2554 /* That's all... */
2555 kthread_exit(0); /* does not return */
2556 }
2557
2558
2559 void
2560 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2561 {
2562 int s;
2563 RF_Raid_t *raidPtr;
2564
2565 s = splbio();
2566 raidPtr = req->raidPtr;
2567 raidPtr->recon_in_progress = 1;
2568 rf_ReconstructInPlace(raidPtr, req->col);
2569 RF_Free(req, sizeof(*req));
2570 raidPtr->recon_in_progress = 0;
2571 splx(s);
2572
2573 /* That's all... */
2574 kthread_exit(0); /* does not return */
2575 }
2576
2577 RF_AutoConfig_t *
2578 rf_find_raid_components()
2579 {
2580 struct vnode *vp;
2581 struct disklabel label;
2582 struct device *dv;
2583 dev_t dev;
2584 int bmajor;
2585 int error;
2586 int i;
2587 int good_one;
2588 RF_ComponentLabel_t *clabel;
2589 RF_AutoConfig_t *ac_list;
2590 RF_AutoConfig_t *ac;
2591
2592
2593 /* initialize the AutoConfig list */
2594 ac_list = NULL;
2595
2596 /* we begin by trolling through *all* the devices on the system */
2597
2598 for (dv = alldevs.tqh_first; dv != NULL;
2599 dv = dv->dv_list.tqe_next) {
2600
2601 /* we are only interested in disks... */
2602 if (dv->dv_class != DV_DISK)
2603 continue;
2604
2605 /* we don't care about floppies... */
2606 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2607 continue;
2608 }
2609
2610 /* we don't care about CD's... */
2611 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2612 continue;
2613 }
2614
2615 /* hdfd is the Atari/Hades floppy driver */
2616 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2617 continue;
2618 }
2619 /* fdisa is the Atari/Milan floppy driver */
2620 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2621 continue;
2622 }
2623
2624 /* need to find the device_name_to_block_device_major stuff */
2625 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2626
2627 /* get a vnode for the raw partition of this disk */
2628
2629 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2630 if (bdevvp(dev, &vp))
2631 panic("RAID can't alloc vnode");
2632
2633 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2634
2635 if (error) {
2636 /* "Who cares." Continue looking
2637 for something that exists*/
2638 vput(vp);
2639 continue;
2640 }
2641
2642 /* Ok, the disk exists. Go get the disklabel. */
2643 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2644 if (error) {
2645 /*
2646 * XXX can't happen - open() would
2647 * have errored out (or faked up one)
2648 */
2649 if (error != ENOTTY)
2650 printf("RAIDframe: can't get label for dev "
2651 "%s (%d)\n", dv->dv_xname, error);
2652 }
2653
2654 /* don't need this any more. We'll allocate it again
2655 a little later if we really do... */
2656 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2657 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2658 vput(vp);
2659
2660 if (error)
2661 continue;
2662
2663 for (i=0; i < label.d_npartitions; i++) {
2664 /* We only support partitions marked as RAID */
2665 if (label.d_partitions[i].p_fstype != FS_RAID)
2666 continue;
2667
2668 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2669 if (bdevvp(dev, &vp))
2670 panic("RAID can't alloc vnode");
2671
2672 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2673 if (error) {
2674 /* Whatever... */
2675 vput(vp);
2676 continue;
2677 }
2678
2679 good_one = 0;
2680
2681 clabel = (RF_ComponentLabel_t *)
2682 malloc(sizeof(RF_ComponentLabel_t),
2683 M_RAIDFRAME, M_NOWAIT);
2684 if (clabel == NULL) {
2685 /* XXX CLEANUP HERE */
2686 printf("RAID auto config: out of memory!\n");
2687 return(NULL); /* XXX probably should panic? */
2688 }
2689
2690 if (!raidread_component_label(dev, vp, clabel)) {
2691 /* Got the label. Does it look reasonable? */
2692 if (rf_reasonable_label(clabel) &&
2693 (clabel->partitionSize <=
2694 label.d_partitions[i].p_size)) {
2695 #if DEBUG
2696 printf("Component on: %s%c: %d\n",
2697 dv->dv_xname, 'a'+i,
2698 label.d_partitions[i].p_size);
2699 rf_print_component_label(clabel);
2700 #endif
2701 /* if it's reasonable, add it,
2702 else ignore it. */
2703 ac = (RF_AutoConfig_t *)
2704 malloc(sizeof(RF_AutoConfig_t),
2705 M_RAIDFRAME,
2706 M_NOWAIT);
2707 if (ac == NULL) {
2708 /* XXX should panic?? */
2709 return(NULL);
2710 }
2711
2712 snprintf(ac->devname,
2713 sizeof(ac->devname), "%s%c",
2714 dv->dv_xname, 'a'+i);
2715 ac->dev = dev;
2716 ac->vp = vp;
2717 ac->clabel = clabel;
2718 ac->next = ac_list;
2719 ac_list = ac;
2720 good_one = 1;
2721 }
2722 }
2723 if (!good_one) {
2724 /* cleanup */
2725 free(clabel, M_RAIDFRAME);
2726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2727 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2728 vput(vp);
2729 }
2730 }
2731 }
2732 return(ac_list);
2733 }
2734
2735 static int
2736 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2737 {
2738
2739 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2740 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2741 ((clabel->clean == RF_RAID_CLEAN) ||
2742 (clabel->clean == RF_RAID_DIRTY)) &&
2743 clabel->row >=0 &&
2744 clabel->column >= 0 &&
2745 clabel->num_rows > 0 &&
2746 clabel->num_columns > 0 &&
2747 clabel->row < clabel->num_rows &&
2748 clabel->column < clabel->num_columns &&
2749 clabel->blockSize > 0 &&
2750 clabel->numBlocks > 0) {
2751 /* label looks reasonable enough... */
2752 return(1);
2753 }
2754 return(0);
2755 }
2756
2757
2758 #if DEBUG
2759 void
2760 rf_print_component_label(RF_ComponentLabel_t *clabel)
2761 {
2762 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2763 clabel->row, clabel->column,
2764 clabel->num_rows, clabel->num_columns);
2765 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2766 clabel->version, clabel->serial_number,
2767 clabel->mod_counter);
2768 printf(" Clean: %s Status: %d\n",
2769 clabel->clean ? "Yes" : "No", clabel->status );
2770 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2771 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2772 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2773 (char) clabel->parityConfig, clabel->blockSize,
2774 clabel->numBlocks);
2775 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2776 printf(" Contains root partition: %s\n",
2777 clabel->root_partition ? "Yes" : "No" );
2778 printf(" Last configured as: raid%d\n", clabel->last_unit );
2779 #if 0
2780 printf(" Config order: %d\n", clabel->config_order);
2781 #endif
2782
2783 }
2784 #endif
2785
2786 RF_ConfigSet_t *
2787 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2788 {
2789 RF_AutoConfig_t *ac;
2790 RF_ConfigSet_t *config_sets;
2791 RF_ConfigSet_t *cset;
2792 RF_AutoConfig_t *ac_next;
2793
2794
2795 config_sets = NULL;
2796
2797 /* Go through the AutoConfig list, and figure out which components
2798 belong to what sets. */
2799 ac = ac_list;
2800 while(ac!=NULL) {
2801 /* we're going to putz with ac->next, so save it here
2802 for use at the end of the loop */
2803 ac_next = ac->next;
2804
2805 if (config_sets == NULL) {
2806 /* will need at least this one... */
2807 config_sets = (RF_ConfigSet_t *)
2808 malloc(sizeof(RF_ConfigSet_t),
2809 M_RAIDFRAME, M_NOWAIT);
2810 if (config_sets == NULL) {
2811 panic("rf_create_auto_sets: No memory!");
2812 }
2813 /* this one is easy :) */
2814 config_sets->ac = ac;
2815 config_sets->next = NULL;
2816 config_sets->rootable = 0;
2817 ac->next = NULL;
2818 } else {
2819 /* which set does this component fit into? */
2820 cset = config_sets;
2821 while(cset!=NULL) {
2822 if (rf_does_it_fit(cset, ac)) {
2823 /* looks like it matches... */
2824 ac->next = cset->ac;
2825 cset->ac = ac;
2826 break;
2827 }
2828 cset = cset->next;
2829 }
2830 if (cset==NULL) {
2831 /* didn't find a match above... new set..*/
2832 cset = (RF_ConfigSet_t *)
2833 malloc(sizeof(RF_ConfigSet_t),
2834 M_RAIDFRAME, M_NOWAIT);
2835 if (cset == NULL) {
2836 panic("rf_create_auto_sets: No memory!");
2837 }
2838 cset->ac = ac;
2839 ac->next = NULL;
2840 cset->next = config_sets;
2841 cset->rootable = 0;
2842 config_sets = cset;
2843 }
2844 }
2845 ac = ac_next;
2846 }
2847
2848
2849 return(config_sets);
2850 }
2851
2852 static int
2853 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2854 {
2855 RF_ComponentLabel_t *clabel1, *clabel2;
2856
2857 /* If this one matches the *first* one in the set, that's good
2858 enough, since the other members of the set would have been
2859 through here too... */
2860 /* note that we are not checking partitionSize here..
2861
2862 Note that we are also not checking the mod_counters here.
2863 If everything else matches execpt the mod_counter, that's
2864 good enough for this test. We will deal with the mod_counters
2865 a little later in the autoconfiguration process.
2866
2867 (clabel1->mod_counter == clabel2->mod_counter) &&
2868
2869 The reason we don't check for this is that failed disks
2870 will have lower modification counts. If those disks are
2871 not added to the set they used to belong to, then they will
2872 form their own set, which may result in 2 different sets,
2873 for example, competing to be configured at raid0, and
2874 perhaps competing to be the root filesystem set. If the
2875 wrong ones get configured, or both attempt to become /,
2876 weird behaviour and or serious lossage will occur. Thus we
2877 need to bring them into the fold here, and kick them out at
2878 a later point.
2879
2880 */
2881
2882 clabel1 = cset->ac->clabel;
2883 clabel2 = ac->clabel;
2884 if ((clabel1->version == clabel2->version) &&
2885 (clabel1->serial_number == clabel2->serial_number) &&
2886 (clabel1->num_rows == clabel2->num_rows) &&
2887 (clabel1->num_columns == clabel2->num_columns) &&
2888 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2889 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2890 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2891 (clabel1->parityConfig == clabel2->parityConfig) &&
2892 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2893 (clabel1->blockSize == clabel2->blockSize) &&
2894 (clabel1->numBlocks == clabel2->numBlocks) &&
2895 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2896 (clabel1->root_partition == clabel2->root_partition) &&
2897 (clabel1->last_unit == clabel2->last_unit) &&
2898 (clabel1->config_order == clabel2->config_order)) {
2899 /* if it get's here, it almost *has* to be a match */
2900 } else {
2901 /* it's not consistent with somebody in the set..
2902 punt */
2903 return(0);
2904 }
2905 /* all was fine.. it must fit... */
2906 return(1);
2907 }
2908
2909 int
2910 rf_have_enough_components(RF_ConfigSet_t *cset)
2911 {
2912 RF_AutoConfig_t *ac;
2913 RF_AutoConfig_t *auto_config;
2914 RF_ComponentLabel_t *clabel;
2915 int c;
2916 int num_cols;
2917 int num_missing;
2918 int mod_counter;
2919 int mod_counter_found;
2920 int even_pair_failed;
2921 char parity_type;
2922
2923
2924 /* check to see that we have enough 'live' components
2925 of this set. If so, we can configure it if necessary */
2926
2927 num_cols = cset->ac->clabel->num_columns;
2928 parity_type = cset->ac->clabel->parityConfig;
2929
2930 /* XXX Check for duplicate components!?!?!? */
2931
2932 /* Determine what the mod_counter is supposed to be for this set. */
2933
2934 mod_counter_found = 0;
2935 mod_counter = 0;
2936 ac = cset->ac;
2937 while(ac!=NULL) {
2938 if (mod_counter_found==0) {
2939 mod_counter = ac->clabel->mod_counter;
2940 mod_counter_found = 1;
2941 } else {
2942 if (ac->clabel->mod_counter > mod_counter) {
2943 mod_counter = ac->clabel->mod_counter;
2944 }
2945 }
2946 ac = ac->next;
2947 }
2948
2949 num_missing = 0;
2950 auto_config = cset->ac;
2951
2952 even_pair_failed = 0;
2953 for(c=0; c<num_cols; c++) {
2954 ac = auto_config;
2955 while(ac!=NULL) {
2956 if ((ac->clabel->column == c) &&
2957 (ac->clabel->mod_counter == mod_counter)) {
2958 /* it's this one... */
2959 #if DEBUG
2960 printf("Found: %s at %d\n",
2961 ac->devname,c);
2962 #endif
2963 break;
2964 }
2965 ac=ac->next;
2966 }
2967 if (ac==NULL) {
2968 /* Didn't find one here! */
2969 /* special case for RAID 1, especially
2970 where there are more than 2
2971 components (where RAIDframe treats
2972 things a little differently :( ) */
2973 if (parity_type == '1') {
2974 if (c%2 == 0) { /* even component */
2975 even_pair_failed = 1;
2976 } else { /* odd component. If
2977 we're failed, and
2978 so is the even
2979 component, it's
2980 "Good Night, Charlie" */
2981 if (even_pair_failed == 1) {
2982 return(0);
2983 }
2984 }
2985 } else {
2986 /* normal accounting */
2987 num_missing++;
2988 }
2989 }
2990 if ((parity_type == '1') && (c%2 == 1)) {
2991 /* Just did an even component, and we didn't
2992 bail.. reset the even_pair_failed flag,
2993 and go on to the next component.... */
2994 even_pair_failed = 0;
2995 }
2996 }
2997
2998 clabel = cset->ac->clabel;
2999
3000 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3001 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3002 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3003 /* XXX this needs to be made *much* more general */
3004 /* Too many failures */
3005 return(0);
3006 }
3007 /* otherwise, all is well, and we've got enough to take a kick
3008 at autoconfiguring this set */
3009 return(1);
3010 }
3011
3012 void
3013 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3014 RF_Raid_t *raidPtr)
3015 {
3016 RF_ComponentLabel_t *clabel;
3017 int i;
3018
3019 clabel = ac->clabel;
3020
3021 /* 1. Fill in the common stuff */
3022 config->numRow = clabel->num_rows = 1;
3023 config->numCol = clabel->num_columns;
3024 config->numSpare = 0; /* XXX should this be set here? */
3025 config->sectPerSU = clabel->sectPerSU;
3026 config->SUsPerPU = clabel->SUsPerPU;
3027 config->SUsPerRU = clabel->SUsPerRU;
3028 config->parityConfig = clabel->parityConfig;
3029 /* XXX... */
3030 strcpy(config->diskQueueType,"fifo");
3031 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3032 config->layoutSpecificSize = 0; /* XXX ?? */
3033
3034 while(ac!=NULL) {
3035 /* row/col values will be in range due to the checks
3036 in reasonable_label() */
3037 strcpy(config->devnames[0][ac->clabel->column],
3038 ac->devname);
3039 ac = ac->next;
3040 }
3041
3042 for(i=0;i<RF_MAXDBGV;i++) {
3043 config->debugVars[i][0] = 0;
3044 }
3045 }
3046
3047 int
3048 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3049 {
3050 RF_ComponentLabel_t clabel;
3051 struct vnode *vp;
3052 dev_t dev;
3053 int column;
3054 int sparecol;
3055
3056 raidPtr->autoconfigure = new_value;
3057
3058 for(column=0; column<raidPtr->numCol; column++) {
3059 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3060 dev = raidPtr->Disks[column].dev;
3061 vp = raidPtr->raid_cinfo[column].ci_vp;
3062 raidread_component_label(dev, vp, &clabel);
3063 clabel.autoconfigure = new_value;
3064 raidwrite_component_label(dev, vp, &clabel);
3065 }
3066 }
3067 for(column = 0; column < raidPtr->numSpare ; column++) {
3068 sparecol = raidPtr->numCol + column;
3069 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3070 dev = raidPtr->Disks[sparecol].dev;
3071 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3072 raidread_component_label(dev, vp, &clabel);
3073 clabel.autoconfigure = new_value;
3074 raidwrite_component_label(dev, vp, &clabel);
3075 }
3076 }
3077 return(new_value);
3078 }
3079
3080 int
3081 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3082 {
3083 RF_ComponentLabel_t clabel;
3084 struct vnode *vp;
3085 dev_t dev;
3086 int column;
3087 int sparecol;
3088
3089 raidPtr->root_partition = new_value;
3090 for(column=0; column<raidPtr->numCol; column++) {
3091 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3092 dev = raidPtr->Disks[column].dev;
3093 vp = raidPtr->raid_cinfo[column].ci_vp;
3094 raidread_component_label(dev, vp, &clabel);
3095 clabel.root_partition = new_value;
3096 raidwrite_component_label(dev, vp, &clabel);
3097 }
3098 }
3099 for(column = 0; column < raidPtr->numSpare ; column++) {
3100 sparecol = raidPtr->numCol + column;
3101 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3102 dev = raidPtr->Disks[sparecol].dev;
3103 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3104 raidread_component_label(dev, vp, &clabel);
3105 clabel.root_partition = new_value;
3106 raidwrite_component_label(dev, vp, &clabel);
3107 }
3108 }
3109 return(new_value);
3110 }
3111
3112 void
3113 rf_release_all_vps(RF_ConfigSet_t *cset)
3114 {
3115 RF_AutoConfig_t *ac;
3116
3117 ac = cset->ac;
3118 while(ac!=NULL) {
3119 /* Close the vp, and give it back */
3120 if (ac->vp) {
3121 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3122 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3123 vput(ac->vp);
3124 ac->vp = NULL;
3125 }
3126 ac = ac->next;
3127 }
3128 }
3129
3130
3131 void
3132 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3133 {
3134 RF_AutoConfig_t *ac;
3135 RF_AutoConfig_t *next_ac;
3136
3137 ac = cset->ac;
3138 while(ac!=NULL) {
3139 next_ac = ac->next;
3140 /* nuke the label */
3141 free(ac->clabel, M_RAIDFRAME);
3142 /* cleanup the config structure */
3143 free(ac, M_RAIDFRAME);
3144 /* "next.." */
3145 ac = next_ac;
3146 }
3147 /* and, finally, nuke the config set */
3148 free(cset, M_RAIDFRAME);
3149 }
3150
3151
3152 void
3153 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3154 {
3155 /* current version number */
3156 clabel->version = RF_COMPONENT_LABEL_VERSION;
3157 clabel->serial_number = raidPtr->serial_number;
3158 clabel->mod_counter = raidPtr->mod_counter;
3159 clabel->num_rows = 1;
3160 clabel->num_columns = raidPtr->numCol;
3161 clabel->clean = RF_RAID_DIRTY; /* not clean */
3162 clabel->status = rf_ds_optimal; /* "It's good!" */
3163
3164 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3165 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3166 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3167
3168 clabel->blockSize = raidPtr->bytesPerSector;
3169 clabel->numBlocks = raidPtr->sectorsPerDisk;
3170
3171 /* XXX not portable */
3172 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3173 clabel->maxOutstanding = raidPtr->maxOutstanding;
3174 clabel->autoconfigure = raidPtr->autoconfigure;
3175 clabel->root_partition = raidPtr->root_partition;
3176 clabel->last_unit = raidPtr->raidid;
3177 clabel->config_order = raidPtr->config_order;
3178 }
3179
3180 int
3181 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3182 {
3183 RF_Raid_t *raidPtr;
3184 RF_Config_t *config;
3185 int raidID;
3186 int retcode;
3187
3188 #if DEBUG
3189 printf("RAID autoconfigure\n");
3190 #endif
3191
3192 retcode = 0;
3193 *unit = -1;
3194
3195 /* 1. Create a config structure */
3196
3197 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3198 M_RAIDFRAME,
3199 M_NOWAIT);
3200 if (config==NULL) {
3201 printf("Out of mem!?!?\n");
3202 /* XXX do something more intelligent here. */
3203 return(1);
3204 }
3205
3206 memset(config, 0, sizeof(RF_Config_t));
3207
3208 /*
3209 2. Figure out what RAID ID this one is supposed to live at
3210 See if we can get the same RAID dev that it was configured
3211 on last time..
3212 */
3213
3214 raidID = cset->ac->clabel->last_unit;
3215 if ((raidID < 0) || (raidID >= numraid)) {
3216 /* let's not wander off into lala land. */
3217 raidID = numraid - 1;
3218 }
3219 if (raidPtrs[raidID]->valid != 0) {
3220
3221 /*
3222 Nope... Go looking for an alternative...
3223 Start high so we don't immediately use raid0 if that's
3224 not taken.
3225 */
3226
3227 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3228 if (raidPtrs[raidID]->valid == 0) {
3229 /* can use this one! */
3230 break;
3231 }
3232 }
3233 }
3234
3235 if (raidID < 0) {
3236 /* punt... */
3237 printf("Unable to auto configure this set!\n");
3238 printf("(Out of RAID devs!)\n");
3239 return(1);
3240 }
3241
3242 #if DEBUG
3243 printf("Configuring raid%d:\n",raidID);
3244 #endif
3245
3246 raidPtr = raidPtrs[raidID];
3247
3248 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3249 raidPtr->raidid = raidID;
3250 raidPtr->openings = RAIDOUTSTANDING;
3251
3252 /* 3. Build the configuration structure */
3253 rf_create_configuration(cset->ac, config, raidPtr);
3254
3255 /* 4. Do the configuration */
3256 retcode = rf_Configure(raidPtr, config, cset->ac);
3257
3258 if (retcode == 0) {
3259
3260 raidinit(raidPtrs[raidID]);
3261
3262 rf_markalldirty(raidPtrs[raidID]);
3263 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3264 if (cset->ac->clabel->root_partition==1) {
3265 /* everything configured just fine. Make a note
3266 that this set is eligible to be root. */
3267 cset->rootable = 1;
3268 /* XXX do this here? */
3269 raidPtrs[raidID]->root_partition = 1;
3270 }
3271 }
3272
3273 /* 5. Cleanup */
3274 free(config, M_RAIDFRAME);
3275
3276 *unit = raidID;
3277 return(retcode);
3278 }
3279
3280 void
3281 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3282 {
3283 struct buf *bp;
3284
3285 bp = (struct buf *)desc->bp;
3286 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3287 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3288 }
3289
3290 void
3291 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3292 size_t xmin, size_t xmax)
3293 {
3294 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3295 pool_sethiwat(p, xmax);
3296 pool_prime(p, xmin);
3297 pool_setlowat(p, xmin);
3298 }
3299
3300 /*
3301 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3302 * if there is IO pending and if that IO could possibly be done for a
3303 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3304 * otherwise.
3305 *
3306 */
3307
3308 int
3309 rf_buf_queue_check(int raidid)
3310 {
3311 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3312 raidPtrs[raidid]->openings > 0) {
3313 /* there is work to do */
3314 return 0;
3315 }
3316 /* default is nothing to do */
3317 return 1;
3318 }
3319