rf_netbsdkintf.c revision 1.202 1 /* $NetBSD: rf_netbsdkintf.c,v 1.202 2006/03/01 21:41:49 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.202 2006/03/01 21:41:49 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /* XXX Not sure if the following should be replacing the raidPtrs above,
237 or if it should be used in conjunction with that...
238 */
239
240 struct raid_softc {
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 size_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 extern struct cfdriver raid_cd;
259
260 /*
261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
262 * Be aware that large numbers can allow the driver to consume a lot of
263 * kernel memory, especially on writes, and in degraded mode reads.
264 *
265 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
266 * a single 64K write will typically require 64K for the old data,
267 * 64K for the old parity, and 64K for the new parity, for a total
268 * of 192K (if the parity buffer is not re-used immediately).
269 * Even it if is used immediately, that's still 128K, which when multiplied
270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
271 *
272 * Now in degraded mode, for example, a 64K read on the above setup may
273 * require data reconstruction, which will require *all* of the 4 remaining
274 * disks to participate -- 4 * 32K/disk == 128K again.
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 6
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
288 struct disklabel *);
289 static void raidgetdisklabel(dev_t);
290 static void raidmakedisklabel(struct raid_softc *);
291
292 static int raidlock(struct raid_softc *);
293 static void raidunlock(struct raid_softc *);
294
295 static void rf_markalldirty(RF_Raid_t *);
296
297 struct device *raidrootdev;
298
299 void rf_ReconThread(struct rf_recon_req *);
300 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
301 void rf_CopybackThread(RF_Raid_t *raidPtr);
302 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
303 int rf_autoconfig(struct device *self);
304 void rf_buildroothack(RF_ConfigSet_t *);
305
306 RF_AutoConfig_t *rf_find_raid_components(void);
307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
309 static int rf_reasonable_label(RF_ComponentLabel_t *);
310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
311 int rf_set_autoconfig(RF_Raid_t *, int);
312 int rf_set_rootpartition(RF_Raid_t *, int);
313 void rf_release_all_vps(RF_ConfigSet_t *);
314 void rf_cleanup_config_set(RF_ConfigSet_t *);
315 int rf_have_enough_components(RF_ConfigSet_t *);
316 int rf_auto_config_set(RF_ConfigSet_t *, int *);
317
318 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
319 allow autoconfig to take place.
320 Note that this is overridden by having
321 RAID_AUTOCONFIG as an option in the
322 kernel config file. */
323
324 struct RF_Pools_s rf_pools;
325
326 void
327 raidattach(int num)
328 {
329 int raidID;
330 int i, rc;
331
332 #ifdef DEBUG
333 printf("raidattach: Asked for %d units\n", num);
334 #endif
335
336 if (num <= 0) {
337 #ifdef DIAGNOSTIC
338 panic("raidattach: count <= 0");
339 #endif
340 return;
341 }
342 /* This is where all the initialization stuff gets done. */
343
344 numraid = num;
345
346 /* Make some space for requested number of units... */
347
348 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
349 if (raidPtrs == NULL) {
350 panic("raidPtrs is NULL!!");
351 }
352
353 rf_mutex_init(&rf_sparet_wait_mutex);
354
355 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 printf("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 printf("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
379 M_RAIDFRAME, M_NOWAIT);
380 if (raidrootdev == NULL) {
381 panic("No memory for RAIDframe driver!!?!?!");
382 }
383
384 for (raidID = 0; raidID < num; raidID++) {
385 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
386 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
387
388 /* XXXJRT Should use config_attach_pseudo() */
389
390 raidrootdev[raidID].dv_class = DV_DISK;
391 raidrootdev[raidID].dv_cfdata = NULL;
392 raidrootdev[raidID].dv_unit = raidID;
393 raidrootdev[raidID].dv_parent = NULL;
394 raidrootdev[raidID].dv_flags = 0;
395 raidrootdev[raidID].dv_cfdriver = &raid_cd;
396 snprintf(raidrootdev[raidID].dv_xname,
397 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
398
399 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
400 (RF_Raid_t *));
401 if (raidPtrs[raidID] == NULL) {
402 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
403 numraid = raidID;
404 return;
405 }
406 }
407
408 #ifdef RAID_AUTOCONFIG
409 raidautoconfig = 1;
410 #endif
411
412 /*
413 * Register a finalizer which will be used to auto-config RAID
414 * sets once all real hardware devices have been found.
415 */
416 if (config_finalize_register(NULL, rf_autoconfig) != 0)
417 printf("WARNING: unable to register RAIDframe finalizer\n");
418 }
419
420 int
421 rf_autoconfig(struct device *self)
422 {
423 RF_AutoConfig_t *ac_list;
424 RF_ConfigSet_t *config_sets;
425
426 if (raidautoconfig == 0)
427 return (0);
428
429 /* XXX This code can only be run once. */
430 raidautoconfig = 0;
431
432 /* 1. locate all RAID components on the system */
433 #ifdef DEBUG
434 printf("Searching for RAID components...\n");
435 #endif
436 ac_list = rf_find_raid_components();
437
438 /* 2. Sort them into their respective sets. */
439 config_sets = rf_create_auto_sets(ac_list);
440
441 /*
442 * 3. Evaluate each set andconfigure the valid ones.
443 * This gets done in rf_buildroothack().
444 */
445 rf_buildroothack(config_sets);
446
447 return (1);
448 }
449
450 void
451 rf_buildroothack(RF_ConfigSet_t *config_sets)
452 {
453 RF_ConfigSet_t *cset;
454 RF_ConfigSet_t *next_cset;
455 int retcode;
456 int raidID;
457 int rootID;
458 int num_root;
459
460 rootID = 0;
461 num_root = 0;
462 cset = config_sets;
463 while(cset != NULL ) {
464 next_cset = cset->next;
465 if (rf_have_enough_components(cset) &&
466 cset->ac->clabel->autoconfigure==1) {
467 retcode = rf_auto_config_set(cset,&raidID);
468 if (!retcode) {
469 if (cset->rootable) {
470 rootID = raidID;
471 num_root++;
472 }
473 } else {
474 /* The autoconfig didn't work :( */
475 #if DEBUG
476 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
477 #endif
478 rf_release_all_vps(cset);
479 }
480 } else {
481 /* we're not autoconfiguring this set...
482 release the associated resources */
483 rf_release_all_vps(cset);
484 }
485 /* cleanup */
486 rf_cleanup_config_set(cset);
487 cset = next_cset;
488 }
489
490 /* we found something bootable... */
491
492 if (num_root == 1) {
493 booted_device = &raidrootdev[rootID];
494 } else if (num_root > 1) {
495 /* we can't guess.. require the user to answer... */
496 boothowto |= RB_ASKNAME;
497 }
498 }
499
500
501 int
502 raidsize(dev_t dev)
503 {
504 struct raid_softc *rs;
505 struct disklabel *lp;
506 int part, unit, omask, size;
507
508 unit = raidunit(dev);
509 if (unit >= numraid)
510 return (-1);
511 rs = &raid_softc[unit];
512
513 if ((rs->sc_flags & RAIDF_INITED) == 0)
514 return (-1);
515
516 part = DISKPART(dev);
517 omask = rs->sc_dkdev.dk_openmask & (1 << part);
518 lp = rs->sc_dkdev.dk_label;
519
520 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
521 return (-1);
522
523 if (lp->d_partitions[part].p_fstype != FS_SWAP)
524 size = -1;
525 else
526 size = lp->d_partitions[part].p_size *
527 (lp->d_secsize / DEV_BSIZE);
528
529 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
530 return (-1);
531
532 return (size);
533
534 }
535
536 int
537 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
538 {
539 /* Not implemented. */
540 return ENXIO;
541 }
542 /* ARGSUSED */
543 int
544 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
545 {
546 int unit = raidunit(dev);
547 struct raid_softc *rs;
548 struct disklabel *lp;
549 int part, pmask;
550 int error = 0;
551
552 if (unit >= numraid)
553 return (ENXIO);
554 rs = &raid_softc[unit];
555
556 if ((error = raidlock(rs)) != 0)
557 return (error);
558 lp = rs->sc_dkdev.dk_label;
559
560 part = DISKPART(dev);
561 pmask = (1 << part);
562
563 if ((rs->sc_flags & RAIDF_INITED) &&
564 (rs->sc_dkdev.dk_openmask == 0))
565 raidgetdisklabel(dev);
566
567 /* make sure that this partition exists */
568
569 if (part != RAW_PART) {
570 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
571 ((part >= lp->d_npartitions) ||
572 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
573 error = ENXIO;
574 raidunlock(rs);
575 return (error);
576 }
577 }
578 /* Prevent this unit from being unconfigured while open. */
579 switch (fmt) {
580 case S_IFCHR:
581 rs->sc_dkdev.dk_copenmask |= pmask;
582 break;
583
584 case S_IFBLK:
585 rs->sc_dkdev.dk_bopenmask |= pmask;
586 break;
587 }
588
589 if ((rs->sc_dkdev.dk_openmask == 0) &&
590 ((rs->sc_flags & RAIDF_INITED) != 0)) {
591 /* First one... mark things as dirty... Note that we *MUST*
592 have done a configure before this. I DO NOT WANT TO BE
593 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
594 THAT THEY BELONG TOGETHER!!!!! */
595 /* XXX should check to see if we're only open for reading
596 here... If so, we needn't do this, but then need some
597 other way of keeping track of what's happened.. */
598
599 rf_markalldirty( raidPtrs[unit] );
600 }
601
602
603 rs->sc_dkdev.dk_openmask =
604 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
605
606 raidunlock(rs);
607
608 return (error);
609
610
611 }
612 /* ARGSUSED */
613 int
614 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
615 {
616 int unit = raidunit(dev);
617 struct raid_softc *rs;
618 int error = 0;
619 int part;
620
621 if (unit >= numraid)
622 return (ENXIO);
623 rs = &raid_softc[unit];
624
625 if ((error = raidlock(rs)) != 0)
626 return (error);
627
628 part = DISKPART(dev);
629
630 /* ...that much closer to allowing unconfiguration... */
631 switch (fmt) {
632 case S_IFCHR:
633 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
634 break;
635
636 case S_IFBLK:
637 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
638 break;
639 }
640 rs->sc_dkdev.dk_openmask =
641 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
642
643 if ((rs->sc_dkdev.dk_openmask == 0) &&
644 ((rs->sc_flags & RAIDF_INITED) != 0)) {
645 /* Last one... device is not unconfigured yet.
646 Device shutdown has taken care of setting the
647 clean bits if RAIDF_INITED is not set
648 mark things as clean... */
649
650 rf_update_component_labels(raidPtrs[unit],
651 RF_FINAL_COMPONENT_UPDATE);
652 if (doing_shutdown) {
653 /* last one, and we're going down, so
654 lights out for this RAID set too. */
655 error = rf_Shutdown(raidPtrs[unit]);
656
657 /* It's no longer initialized... */
658 rs->sc_flags &= ~RAIDF_INITED;
659
660 /* Detach the disk. */
661 pseudo_disk_detach(&rs->sc_dkdev);
662 }
663 }
664
665 raidunlock(rs);
666 return (0);
667
668 }
669
670 void
671 raidstrategy(struct buf *bp)
672 {
673 int s;
674
675 unsigned int raidID = raidunit(bp->b_dev);
676 RF_Raid_t *raidPtr;
677 struct raid_softc *rs = &raid_softc[raidID];
678 int wlabel;
679
680 if ((rs->sc_flags & RAIDF_INITED) ==0) {
681 bp->b_error = ENXIO;
682 bp->b_flags |= B_ERROR;
683 goto done;
684 }
685 if (raidID >= numraid || !raidPtrs[raidID]) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 goto done;
689 }
690 raidPtr = raidPtrs[raidID];
691 if (!raidPtr->valid) {
692 bp->b_error = ENODEV;
693 bp->b_flags |= B_ERROR;
694 goto done;
695 }
696 if (bp->b_bcount == 0) {
697 db1_printf(("b_bcount is zero..\n"));
698 goto done;
699 }
700
701 /*
702 * Do bounds checking and adjust transfer. If there's an
703 * error, the bounds check will flag that for us.
704 */
705
706 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
707 if (DISKPART(bp->b_dev) == RAW_PART) {
708 uint64_t size; /* device size in DEV_BSIZE unit */
709
710 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
711 size = raidPtr->totalSectors <<
712 (raidPtr->logBytesPerSector - DEV_BSHIFT);
713 } else {
714 size = raidPtr->totalSectors >>
715 (DEV_BSHIFT - raidPtr->logBytesPerSector);
716 }
717 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
718 goto done;
719 }
720 } else {
721 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
722 db1_printf(("Bounds check failed!!:%d %d\n",
723 (int) bp->b_blkno, (int) wlabel));
724 goto done;
725 }
726 }
727 s = splbio();
728
729 bp->b_resid = 0;
730
731 /* stuff it onto our queue */
732 BUFQ_PUT(rs->buf_queue, bp);
733
734 /* scheduled the IO to happen at the next convenient time */
735 wakeup(&(raidPtrs[raidID]->iodone));
736
737 splx(s);
738 return;
739
740 done:
741 bp->b_resid = bp->b_bcount;
742 biodone(bp);
743 }
744 /* ARGSUSED */
745 int
746 raidread(dev_t dev, struct uio *uio, int flags)
747 {
748 int unit = raidunit(dev);
749 struct raid_softc *rs;
750
751 if (unit >= numraid)
752 return (ENXIO);
753 rs = &raid_softc[unit];
754
755 if ((rs->sc_flags & RAIDF_INITED) == 0)
756 return (ENXIO);
757
758 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
759
760 }
761 /* ARGSUSED */
762 int
763 raidwrite(dev_t dev, struct uio *uio, int flags)
764 {
765 int unit = raidunit(dev);
766 struct raid_softc *rs;
767
768 if (unit >= numraid)
769 return (ENXIO);
770 rs = &raid_softc[unit];
771
772 if ((rs->sc_flags & RAIDF_INITED) == 0)
773 return (ENXIO);
774
775 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
776
777 }
778
779 int
780 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
781 {
782 int unit = raidunit(dev);
783 int error = 0;
784 int part, pmask;
785 struct raid_softc *rs;
786 RF_Config_t *k_cfg, *u_cfg;
787 RF_Raid_t *raidPtr;
788 RF_RaidDisk_t *diskPtr;
789 RF_AccTotals_t *totals;
790 RF_DeviceConfig_t *d_cfg, **ucfgp;
791 u_char *specific_buf;
792 int retcode = 0;
793 int column;
794 int raidid;
795 struct rf_recon_req *rrcopy, *rr;
796 RF_ComponentLabel_t *clabel;
797 RF_ComponentLabel_t ci_label;
798 RF_ComponentLabel_t **clabel_ptr;
799 RF_SingleComponent_t *sparePtr,*componentPtr;
800 RF_SingleComponent_t hot_spare;
801 RF_SingleComponent_t component;
802 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
803 int i, j, d;
804 #ifdef __HAVE_OLD_DISKLABEL
805 struct disklabel newlabel;
806 #endif
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811 raidPtr = raidPtrs[unit];
812
813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
814 (int) DISKPART(dev), (int) unit, (int) cmd));
815
816 /* Must be open for writes for these commands... */
817 switch (cmd) {
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCWDINFO:
822 case ODIOCSDINFO:
823 #endif
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 #ifdef __HAVE_OLD_DISKLABEL
835 case ODIOCGDINFO:
836 case ODIOCWDINFO:
837 case ODIOCSDINFO:
838 case ODIOCGDEFLABEL:
839 #endif
840 case DIOCGPART:
841 case DIOCWLABEL:
842 case DIOCGDEFLABEL:
843 case RAIDFRAME_SHUTDOWN:
844 case RAIDFRAME_REWRITEPARITY:
845 case RAIDFRAME_GET_INFO:
846 case RAIDFRAME_RESET_ACCTOTALS:
847 case RAIDFRAME_GET_ACCTOTALS:
848 case RAIDFRAME_KEEP_ACCTOTALS:
849 case RAIDFRAME_GET_SIZE:
850 case RAIDFRAME_FAIL_DISK:
851 case RAIDFRAME_COPYBACK:
852 case RAIDFRAME_CHECK_RECON_STATUS:
853 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
854 case RAIDFRAME_GET_COMPONENT_LABEL:
855 case RAIDFRAME_SET_COMPONENT_LABEL:
856 case RAIDFRAME_ADD_HOT_SPARE:
857 case RAIDFRAME_REMOVE_HOT_SPARE:
858 case RAIDFRAME_INIT_LABELS:
859 case RAIDFRAME_REBUILD_IN_PLACE:
860 case RAIDFRAME_CHECK_PARITY:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
865 case RAIDFRAME_SET_AUTOCONFIG:
866 case RAIDFRAME_SET_ROOT:
867 case RAIDFRAME_DELETE_COMPONENT:
868 case RAIDFRAME_INCORPORATE_HOT_SPARE:
869 if ((rs->sc_flags & RAIDF_INITED) == 0)
870 return (ENXIO);
871 }
872
873 switch (cmd) {
874
875 /* configure the system */
876 case RAIDFRAME_CONFIGURE:
877
878 if (raidPtr->valid) {
879 /* There is a valid RAID set running on this unit! */
880 printf("raid%d: Device already configured!\n",unit);
881 return(EINVAL);
882 }
883
884 /* copy-in the configuration information */
885 /* data points to a pointer to the configuration structure */
886
887 u_cfg = *((RF_Config_t **) data);
888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
889 if (k_cfg == NULL) {
890 return (ENOMEM);
891 }
892 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
914 k_cfg->layoutSpecificSize);
915 if (retcode) {
916 RF_Free(k_cfg, sizeof(RF_Config_t));
917 RF_Free(specific_buf,
918 k_cfg->layoutSpecificSize);
919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
920 retcode));
921 return (retcode);
922 }
923 } else
924 specific_buf = NULL;
925 k_cfg->layoutSpecific = specific_buf;
926
927 /* should do some kind of sanity check on the configuration.
928 * Store the sum of all the bytes in the last byte? */
929
930 /* configure the system */
931
932 /*
933 * Clear the entire RAID descriptor, just to make sure
934 * there is no stale data left in the case of a
935 * reconfiguration
936 */
937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
938 raidPtr->raidid = unit;
939
940 retcode = rf_Configure(raidPtr, k_cfg, NULL);
941
942 if (retcode == 0) {
943
944 /* allow this many simultaneous IO's to
945 this RAID device */
946 raidPtr->openings = RAIDOUTSTANDING;
947
948 raidinit(raidPtr);
949 rf_markalldirty(raidPtr);
950 }
951 /* free the buffers. No return code here. */
952 if (k_cfg->layoutSpecificSize) {
953 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
954 }
955 RF_Free(k_cfg, sizeof(RF_Config_t));
956
957 return (retcode);
958
959 /* shutdown the system */
960 case RAIDFRAME_SHUTDOWN:
961
962 if ((error = raidlock(rs)) != 0)
963 return (error);
964
965 /*
966 * If somebody has a partition mounted, we shouldn't
967 * shutdown.
968 */
969
970 part = DISKPART(dev);
971 pmask = (1 << part);
972 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
973 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
974 (rs->sc_dkdev.dk_copenmask & pmask))) {
975 raidunlock(rs);
976 return (EBUSY);
977 }
978
979 retcode = rf_Shutdown(raidPtr);
980
981 /* It's no longer initialized... */
982 rs->sc_flags &= ~RAIDF_INITED;
983
984 /* Detach the disk. */
985 pseudo_disk_detach(&rs->sc_dkdev);
986
987 raidunlock(rs);
988
989 return (retcode);
990 case RAIDFRAME_GET_COMPONENT_LABEL:
991 clabel_ptr = (RF_ComponentLabel_t **) data;
992 /* need to read the component label for the disk indicated
993 by row,column in clabel */
994
995 /* For practice, let's get it directly fromdisk, rather
996 than from the in-core copy */
997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
998 (RF_ComponentLabel_t *));
999 if (clabel == NULL)
1000 return (ENOMEM);
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 clabel->row = 0; /* Don't allow looking at anything else.*/
1011
1012 column = clabel->column;
1013
1014 if ((column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1021 raidPtr->raid_cinfo[column].ci_vp,
1022 clabel );
1023
1024 if (retcode == 0) {
1025 retcode = copyout(clabel, *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 }
1028 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1029 return (retcode);
1030
1031 case RAIDFRAME_SET_COMPONENT_LABEL:
1032 clabel = (RF_ComponentLabel_t *) data;
1033
1034 /* XXX check the label for valid stuff... */
1035 /* Note that some things *should not* get modified --
1036 the user should be re-initing the labels instead of
1037 trying to patch things.
1038 */
1039
1040 raidid = raidPtr->raidid;
1041 #if DEBUG
1042 printf("raid%d: Got component label:\n", raidid);
1043 printf("raid%d: Version: %d\n", raidid, clabel->version);
1044 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1045 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1046 printf("raid%d: Column: %d\n", raidid, clabel->column);
1047 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1048 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1049 printf("raid%d: Status: %d\n", raidid, clabel->status);
1050 #endif
1051 clabel->row = 0;
1052 column = clabel->column;
1053
1054 if ((column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[column].dev,
1065 raidPtr->raid_cinfo[column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083 ci_label.row = 0; /* we dont' pretend to support more */
1084
1085 for(column=0;column<raidPtr->numCol;column++) {
1086 diskPtr = &raidPtr->Disks[column];
1087 if (!RF_DEAD_DISK(diskPtr->status)) {
1088 ci_label.partitionSize = diskPtr->partitionSize;
1089 ci_label.column = column;
1090 raidwrite_component_label(
1091 raidPtr->Disks[column].dev,
1092 raidPtr->raid_cinfo[column].ci_vp,
1093 &ci_label );
1094 }
1095 }
1096
1097 return (retcode);
1098 case RAIDFRAME_SET_AUTOCONFIG:
1099 d = rf_set_autoconfig(raidPtr, *(int *) data);
1100 printf("raid%d: New autoconfig value is: %d\n",
1101 raidPtr->raidid, d);
1102 *(int *) data = d;
1103 return (retcode);
1104
1105 case RAIDFRAME_SET_ROOT:
1106 d = rf_set_rootpartition(raidPtr, *(int *) data);
1107 printf("raid%d: New rootpartition value is: %d\n",
1108 raidPtr->raidid, d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 /* initialize all parity */
1113 case RAIDFRAME_REWRITEPARITY:
1114
1115 if (raidPtr->Layout.map->faultsTolerated == 0) {
1116 /* Parity for RAID 0 is trivially correct */
1117 raidPtr->parity_good = RF_RAID_CLEAN;
1118 return(0);
1119 }
1120
1121 if (raidPtr->parity_rewrite_in_progress == 1) {
1122 /* Re-write is already in progress! */
1123 return(EINVAL);
1124 }
1125
1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1127 rf_RewriteParityThread,
1128 raidPtr,"raid_parity");
1129 return (retcode);
1130
1131
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 sparePtr = (RF_SingleComponent_t *) data;
1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1136 return(retcode);
1137
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 return(retcode);
1140
1141 case RAIDFRAME_DELETE_COMPONENT:
1142 componentPtr = (RF_SingleComponent_t *)data;
1143 memcpy( &component, componentPtr,
1144 sizeof(RF_SingleComponent_t));
1145 retcode = rf_delete_component(raidPtr, &component);
1146 return(retcode);
1147
1148 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_REBUILD_IN_PLACE:
1156
1157 if (raidPtr->Layout.map->faultsTolerated == 0) {
1158 /* Can't do this on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 if (raidPtr->recon_in_progress == 1) {
1163 /* a reconstruct is already in progress! */
1164 return(EINVAL);
1165 }
1166
1167 componentPtr = (RF_SingleComponent_t *) data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 component.row = 0; /* we don't support any more */
1171 column = component.column;
1172
1173 if ((column < 0) || (column >= raidPtr->numCol)) {
1174 return(EINVAL);
1175 }
1176
1177 RF_LOCK_MUTEX(raidPtr->mutex);
1178 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1179 (raidPtr->numFailures > 0)) {
1180 /* XXX 0 above shouldn't be constant!!! */
1181 /* some component other than this has failed.
1182 Let's not make things worse than they already
1183 are... */
1184 printf("raid%d: Unable to reconstruct to disk at:\n",
1185 raidPtr->raidid);
1186 printf("raid%d: Col: %d Too many failures.\n",
1187 raidPtr->raidid, column);
1188 RF_UNLOCK_MUTEX(raidPtr->mutex);
1189 return (EINVAL);
1190 }
1191 if (raidPtr->Disks[column].status ==
1192 rf_ds_reconstructing) {
1193 printf("raid%d: Unable to reconstruct to disk at:\n",
1194 raidPtr->raidid);
1195 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1196
1197 RF_UNLOCK_MUTEX(raidPtr->mutex);
1198 return (EINVAL);
1199 }
1200 if (raidPtr->Disks[column].status == rf_ds_spared) {
1201 RF_UNLOCK_MUTEX(raidPtr->mutex);
1202 return (EINVAL);
1203 }
1204 RF_UNLOCK_MUTEX(raidPtr->mutex);
1205
1206 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1207 if (rrcopy == NULL)
1208 return(ENOMEM);
1209
1210 rrcopy->raidPtr = (void *) raidPtr;
1211 rrcopy->col = column;
1212
1213 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1214 rf_ReconstructInPlaceThread,
1215 rrcopy,"raid_reconip");
1216 return(retcode);
1217
1218 case RAIDFRAME_GET_INFO:
1219 if (!raidPtr->valid)
1220 return (ENODEV);
1221 ucfgp = (RF_DeviceConfig_t **) data;
1222 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1223 (RF_DeviceConfig_t *));
1224 if (d_cfg == NULL)
1225 return (ENOMEM);
1226 d_cfg->rows = 1; /* there is only 1 row now */
1227 d_cfg->cols = raidPtr->numCol;
1228 d_cfg->ndevs = raidPtr->numCol;
1229 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1230 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1231 return (ENOMEM);
1232 }
1233 d_cfg->nspares = raidPtr->numSpare;
1234 if (d_cfg->nspares >= RF_MAX_DISKS) {
1235 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1236 return (ENOMEM);
1237 }
1238 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1239 d = 0;
1240 for (j = 0; j < d_cfg->cols; j++) {
1241 d_cfg->devs[d] = raidPtr->Disks[j];
1242 d++;
1243 }
1244 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1245 d_cfg->spares[i] = raidPtr->Disks[j];
1246 }
1247 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1248 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1249
1250 return (retcode);
1251
1252 case RAIDFRAME_CHECK_PARITY:
1253 *(int *) data = raidPtr->parity_good;
1254 return (0);
1255
1256 case RAIDFRAME_RESET_ACCTOTALS:
1257 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1258 return (0);
1259
1260 case RAIDFRAME_GET_ACCTOTALS:
1261 totals = (RF_AccTotals_t *) data;
1262 *totals = raidPtr->acc_totals;
1263 return (0);
1264
1265 case RAIDFRAME_KEEP_ACCTOTALS:
1266 raidPtr->keep_acc_totals = *(int *)data;
1267 return (0);
1268
1269 case RAIDFRAME_GET_SIZE:
1270 *(int *) data = raidPtr->totalSectors;
1271 return (0);
1272
1273 /* fail a disk & optionally start reconstruction */
1274 case RAIDFRAME_FAIL_DISK:
1275
1276 if (raidPtr->Layout.map->faultsTolerated == 0) {
1277 /* Can't do this on a RAID 0!! */
1278 return(EINVAL);
1279 }
1280
1281 rr = (struct rf_recon_req *) data;
1282 rr->row = 0;
1283 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1284 return (EINVAL);
1285
1286
1287 RF_LOCK_MUTEX(raidPtr->mutex);
1288 if (raidPtr->status == rf_rs_reconstructing) {
1289 /* you can't fail a disk while we're reconstructing! */
1290 /* XXX wrong for RAID6 */
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292 return (EINVAL);
1293 }
1294 if ((raidPtr->Disks[rr->col].status ==
1295 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1296 /* some other component has failed. Let's not make
1297 things worse. XXX wrong for RAID6 */
1298 RF_UNLOCK_MUTEX(raidPtr->mutex);
1299 return (EINVAL);
1300 }
1301 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1302 /* Can't fail a spared disk! */
1303 RF_UNLOCK_MUTEX(raidPtr->mutex);
1304 return (EINVAL);
1305 }
1306 RF_UNLOCK_MUTEX(raidPtr->mutex);
1307
1308 /* make a copy of the recon request so that we don't rely on
1309 * the user's buffer */
1310 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1311 if (rrcopy == NULL)
1312 return(ENOMEM);
1313 memcpy(rrcopy, rr, sizeof(*rr));
1314 rrcopy->raidPtr = (void *) raidPtr;
1315
1316 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1317 rf_ReconThread,
1318 rrcopy,"raid_recon");
1319 return (0);
1320
1321 /* invoke a copyback operation after recon on whatever disk
1322 * needs it, if any */
1323 case RAIDFRAME_COPYBACK:
1324
1325 if (raidPtr->Layout.map->faultsTolerated == 0) {
1326 /* This makes no sense on a RAID 0!! */
1327 return(EINVAL);
1328 }
1329
1330 if (raidPtr->copyback_in_progress == 1) {
1331 /* Copyback is already in progress! */
1332 return(EINVAL);
1333 }
1334
1335 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1336 rf_CopybackThread,
1337 raidPtr,"raid_copyback");
1338 return (retcode);
1339
1340 /* return the percentage completion of reconstruction */
1341 case RAIDFRAME_CHECK_RECON_STATUS:
1342 if (raidPtr->Layout.map->faultsTolerated == 0) {
1343 /* This makes no sense on a RAID 0, so tell the
1344 user it's done. */
1345 *(int *) data = 100;
1346 return(0);
1347 }
1348 if (raidPtr->status != rf_rs_reconstructing)
1349 *(int *) data = 100;
1350 else {
1351 if (raidPtr->reconControl->numRUsTotal > 0) {
1352 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1353 } else {
1354 *(int *) data = 0;
1355 }
1356 }
1357 return (0);
1358 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1359 progressInfoPtr = (RF_ProgressInfo_t **) data;
1360 if (raidPtr->status != rf_rs_reconstructing) {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 } else {
1365 progressInfo.total =
1366 raidPtr->reconControl->numRUsTotal;
1367 progressInfo.completed =
1368 raidPtr->reconControl->numRUsComplete;
1369 progressInfo.remaining = progressInfo.total -
1370 progressInfo.completed;
1371 }
1372 retcode = copyout(&progressInfo, *progressInfoPtr,
1373 sizeof(RF_ProgressInfo_t));
1374 return (retcode);
1375
1376 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1377 if (raidPtr->Layout.map->faultsTolerated == 0) {
1378 /* This makes no sense on a RAID 0, so tell the
1379 user it's done. */
1380 *(int *) data = 100;
1381 return(0);
1382 }
1383 if (raidPtr->parity_rewrite_in_progress == 1) {
1384 *(int *) data = 100 *
1385 raidPtr->parity_rewrite_stripes_done /
1386 raidPtr->Layout.numStripe;
1387 } else {
1388 *(int *) data = 100;
1389 }
1390 return (0);
1391
1392 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1393 progressInfoPtr = (RF_ProgressInfo_t **) data;
1394 if (raidPtr->parity_rewrite_in_progress == 1) {
1395 progressInfo.total = raidPtr->Layout.numStripe;
1396 progressInfo.completed =
1397 raidPtr->parity_rewrite_stripes_done;
1398 progressInfo.remaining = progressInfo.total -
1399 progressInfo.completed;
1400 } else {
1401 progressInfo.remaining = 0;
1402 progressInfo.completed = 100;
1403 progressInfo.total = 100;
1404 }
1405 retcode = copyout(&progressInfo, *progressInfoPtr,
1406 sizeof(RF_ProgressInfo_t));
1407 return (retcode);
1408
1409 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1410 if (raidPtr->Layout.map->faultsTolerated == 0) {
1411 /* This makes no sense on a RAID 0 */
1412 *(int *) data = 100;
1413 return(0);
1414 }
1415 if (raidPtr->copyback_in_progress == 1) {
1416 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1417 raidPtr->Layout.numStripe;
1418 } else {
1419 *(int *) data = 100;
1420 }
1421 return (0);
1422
1423 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1424 progressInfoPtr = (RF_ProgressInfo_t **) data;
1425 if (raidPtr->copyback_in_progress == 1) {
1426 progressInfo.total = raidPtr->Layout.numStripe;
1427 progressInfo.completed =
1428 raidPtr->copyback_stripes_done;
1429 progressInfo.remaining = progressInfo.total -
1430 progressInfo.completed;
1431 } else {
1432 progressInfo.remaining = 0;
1433 progressInfo.completed = 100;
1434 progressInfo.total = 100;
1435 }
1436 retcode = copyout(&progressInfo, *progressInfoPtr,
1437 sizeof(RF_ProgressInfo_t));
1438 return (retcode);
1439
1440 /* the sparetable daemon calls this to wait for the kernel to
1441 * need a spare table. this ioctl does not return until a
1442 * spare table is needed. XXX -- calling mpsleep here in the
1443 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1444 * -- I should either compute the spare table in the kernel,
1445 * or have a different -- XXX XXX -- interface (a different
1446 * character device) for delivering the table -- XXX */
1447 #if 0
1448 case RAIDFRAME_SPARET_WAIT:
1449 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1450 while (!rf_sparet_wait_queue)
1451 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1452 waitreq = rf_sparet_wait_queue;
1453 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1454 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1455
1456 /* structure assignment */
1457 *((RF_SparetWait_t *) data) = *waitreq;
1458
1459 RF_Free(waitreq, sizeof(*waitreq));
1460 return (0);
1461
1462 /* wakes up a process waiting on SPARET_WAIT and puts an error
1463 * code in it that will cause the dameon to exit */
1464 case RAIDFRAME_ABORT_SPARET_WAIT:
1465 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1466 waitreq->fcol = -1;
1467 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1468 waitreq->next = rf_sparet_wait_queue;
1469 rf_sparet_wait_queue = waitreq;
1470 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1471 wakeup(&rf_sparet_wait_queue);
1472 return (0);
1473
1474 /* used by the spare table daemon to deliver a spare table
1475 * into the kernel */
1476 case RAIDFRAME_SEND_SPARET:
1477
1478 /* install the spare table */
1479 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1480
1481 /* respond to the requestor. the return status of the spare
1482 * table installation is passed in the "fcol" field */
1483 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1484 waitreq->fcol = retcode;
1485 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1486 waitreq->next = rf_sparet_resp_queue;
1487 rf_sparet_resp_queue = waitreq;
1488 wakeup(&rf_sparet_resp_queue);
1489 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1490
1491 return (retcode);
1492 #endif
1493
1494 default:
1495 break; /* fall through to the os-specific code below */
1496
1497 }
1498
1499 if (!raidPtr->valid)
1500 return (EINVAL);
1501
1502 /*
1503 * Add support for "regular" device ioctls here.
1504 */
1505
1506 switch (cmd) {
1507 case DIOCGDINFO:
1508 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1509 break;
1510 #ifdef __HAVE_OLD_DISKLABEL
1511 case ODIOCGDINFO:
1512 newlabel = *(rs->sc_dkdev.dk_label);
1513 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1514 return ENOTTY;
1515 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1516 break;
1517 #endif
1518
1519 case DIOCGPART:
1520 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1521 ((struct partinfo *) data)->part =
1522 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1523 break;
1524
1525 case DIOCWDINFO:
1526 case DIOCSDINFO:
1527 #ifdef __HAVE_OLD_DISKLABEL
1528 case ODIOCWDINFO:
1529 case ODIOCSDINFO:
1530 #endif
1531 {
1532 struct disklabel *lp;
1533 #ifdef __HAVE_OLD_DISKLABEL
1534 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1535 memset(&newlabel, 0, sizeof newlabel);
1536 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1537 lp = &newlabel;
1538 } else
1539 #endif
1540 lp = (struct disklabel *)data;
1541
1542 if ((error = raidlock(rs)) != 0)
1543 return (error);
1544
1545 rs->sc_flags |= RAIDF_LABELLING;
1546
1547 error = setdisklabel(rs->sc_dkdev.dk_label,
1548 lp, 0, rs->sc_dkdev.dk_cpulabel);
1549 if (error == 0) {
1550 if (cmd == DIOCWDINFO
1551 #ifdef __HAVE_OLD_DISKLABEL
1552 || cmd == ODIOCWDINFO
1553 #endif
1554 )
1555 error = writedisklabel(RAIDLABELDEV(dev),
1556 raidstrategy, rs->sc_dkdev.dk_label,
1557 rs->sc_dkdev.dk_cpulabel);
1558 }
1559 rs->sc_flags &= ~RAIDF_LABELLING;
1560
1561 raidunlock(rs);
1562
1563 if (error)
1564 return (error);
1565 break;
1566 }
1567
1568 case DIOCWLABEL:
1569 if (*(int *) data != 0)
1570 rs->sc_flags |= RAIDF_WLABEL;
1571 else
1572 rs->sc_flags &= ~RAIDF_WLABEL;
1573 break;
1574
1575 case DIOCGDEFLABEL:
1576 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1577 break;
1578
1579 #ifdef __HAVE_OLD_DISKLABEL
1580 case ODIOCGDEFLABEL:
1581 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1582 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1583 return ENOTTY;
1584 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1585 break;
1586 #endif
1587
1588 default:
1589 retcode = ENOTTY;
1590 }
1591 return (retcode);
1592
1593 }
1594
1595
1596 /* raidinit -- complete the rest of the initialization for the
1597 RAIDframe device. */
1598
1599
1600 static void
1601 raidinit(RF_Raid_t *raidPtr)
1602 {
1603 struct raid_softc *rs;
1604 int unit;
1605
1606 unit = raidPtr->raidid;
1607
1608 rs = &raid_softc[unit];
1609
1610 /* XXX should check return code first... */
1611 rs->sc_flags |= RAIDF_INITED;
1612
1613 /* XXX doesn't check bounds. */
1614 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1615
1616 rs->sc_dkdev.dk_name = rs->sc_xname;
1617
1618 /* disk_attach actually creates space for the CPU disklabel, among
1619 * other things, so it's critical to call this *BEFORE* we try putzing
1620 * with disklabels. */
1621
1622 pseudo_disk_attach(&rs->sc_dkdev);
1623
1624 /* XXX There may be a weird interaction here between this, and
1625 * protectedSectors, as used in RAIDframe. */
1626
1627 rs->sc_size = raidPtr->totalSectors;
1628 }
1629 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1630 /* wake up the daemon & tell it to get us a spare table
1631 * XXX
1632 * the entries in the queues should be tagged with the raidPtr
1633 * so that in the extremely rare case that two recons happen at once,
1634 * we know for which device were requesting a spare table
1635 * XXX
1636 *
1637 * XXX This code is not currently used. GO
1638 */
1639 int
1640 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1641 {
1642 int retcode;
1643
1644 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1645 req->next = rf_sparet_wait_queue;
1646 rf_sparet_wait_queue = req;
1647 wakeup(&rf_sparet_wait_queue);
1648
1649 /* mpsleep unlocks the mutex */
1650 while (!rf_sparet_resp_queue) {
1651 tsleep(&rf_sparet_resp_queue, PRIBIO,
1652 "raidframe getsparetable", 0);
1653 }
1654 req = rf_sparet_resp_queue;
1655 rf_sparet_resp_queue = req->next;
1656 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1657
1658 retcode = req->fcol;
1659 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1660 * alloc'd */
1661 return (retcode);
1662 }
1663 #endif
1664
1665 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1666 * bp & passes it down.
1667 * any calls originating in the kernel must use non-blocking I/O
1668 * do some extra sanity checking to return "appropriate" error values for
1669 * certain conditions (to make some standard utilities work)
1670 *
1671 * Formerly known as: rf_DoAccessKernel
1672 */
1673 void
1674 raidstart(RF_Raid_t *raidPtr)
1675 {
1676 RF_SectorCount_t num_blocks, pb, sum;
1677 RF_RaidAddr_t raid_addr;
1678 struct partition *pp;
1679 daddr_t blocknum;
1680 int unit;
1681 struct raid_softc *rs;
1682 int do_async;
1683 struct buf *bp;
1684 int rc;
1685
1686 unit = raidPtr->raidid;
1687 rs = &raid_softc[unit];
1688
1689 /* quick check to see if anything has died recently */
1690 RF_LOCK_MUTEX(raidPtr->mutex);
1691 if (raidPtr->numNewFailures > 0) {
1692 RF_UNLOCK_MUTEX(raidPtr->mutex);
1693 rf_update_component_labels(raidPtr,
1694 RF_NORMAL_COMPONENT_UPDATE);
1695 RF_LOCK_MUTEX(raidPtr->mutex);
1696 raidPtr->numNewFailures--;
1697 }
1698
1699 /* Check to see if we're at the limit... */
1700 while (raidPtr->openings > 0) {
1701 RF_UNLOCK_MUTEX(raidPtr->mutex);
1702
1703 /* get the next item, if any, from the queue */
1704 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1705 /* nothing more to do */
1706 return;
1707 }
1708
1709 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1710 * partition.. Need to make it absolute to the underlying
1711 * device.. */
1712
1713 blocknum = bp->b_blkno;
1714 if (DISKPART(bp->b_dev) != RAW_PART) {
1715 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1716 blocknum += pp->p_offset;
1717 }
1718
1719 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1720 (int) blocknum));
1721
1722 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1723 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1724
1725 /* *THIS* is where we adjust what block we're going to...
1726 * but DO NOT TOUCH bp->b_blkno!!! */
1727 raid_addr = blocknum;
1728
1729 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1730 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1731 sum = raid_addr + num_blocks + pb;
1732 if (1 || rf_debugKernelAccess) {
1733 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1734 (int) raid_addr, (int) sum, (int) num_blocks,
1735 (int) pb, (int) bp->b_resid));
1736 }
1737 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1738 || (sum < num_blocks) || (sum < pb)) {
1739 bp->b_error = ENOSPC;
1740 bp->b_flags |= B_ERROR;
1741 bp->b_resid = bp->b_bcount;
1742 biodone(bp);
1743 RF_LOCK_MUTEX(raidPtr->mutex);
1744 continue;
1745 }
1746 /*
1747 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1748 */
1749
1750 if (bp->b_bcount & raidPtr->sectorMask) {
1751 bp->b_error = EINVAL;
1752 bp->b_flags |= B_ERROR;
1753 bp->b_resid = bp->b_bcount;
1754 biodone(bp);
1755 RF_LOCK_MUTEX(raidPtr->mutex);
1756 continue;
1757
1758 }
1759 db1_printf(("Calling DoAccess..\n"));
1760
1761
1762 RF_LOCK_MUTEX(raidPtr->mutex);
1763 raidPtr->openings--;
1764 RF_UNLOCK_MUTEX(raidPtr->mutex);
1765
1766 /*
1767 * Everything is async.
1768 */
1769 do_async = 1;
1770
1771 disk_busy(&rs->sc_dkdev);
1772
1773 /* XXX we're still at splbio() here... do we *really*
1774 need to be? */
1775
1776 /* don't ever condition on bp->b_flags & B_WRITE.
1777 * always condition on B_READ instead */
1778
1779 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1780 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1781 do_async, raid_addr, num_blocks,
1782 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1783
1784 if (rc) {
1785 bp->b_error = rc;
1786 bp->b_flags |= B_ERROR;
1787 bp->b_resid = bp->b_bcount;
1788 biodone(bp);
1789 /* continue loop */
1790 }
1791
1792 RF_LOCK_MUTEX(raidPtr->mutex);
1793 }
1794 RF_UNLOCK_MUTEX(raidPtr->mutex);
1795 }
1796
1797
1798
1799
1800 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1801
1802 int
1803 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1804 {
1805 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1806 struct buf *bp;
1807
1808 req->queue = queue;
1809
1810 #if DIAGNOSTIC
1811 if (queue->raidPtr->raidid >= numraid) {
1812 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1813 numraid);
1814 panic("Invalid Unit number in rf_DispatchKernelIO");
1815 }
1816 #endif
1817
1818 bp = req->bp;
1819
1820 switch (req->type) {
1821 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1822 /* XXX need to do something extra here.. */
1823 /* I'm leaving this in, as I've never actually seen it used,
1824 * and I'd like folks to report it... GO */
1825 printf(("WAKEUP CALLED\n"));
1826 queue->numOutstanding++;
1827
1828 bp->b_flags = 0;
1829 bp->b_fspriv.bf_private = req;
1830
1831 KernelWakeupFunc(bp);
1832 break;
1833
1834 case RF_IO_TYPE_READ:
1835 case RF_IO_TYPE_WRITE:
1836 #if RF_ACC_TRACE > 0
1837 if (req->tracerec) {
1838 RF_ETIMER_START(req->tracerec->timer);
1839 }
1840 #endif
1841 InitBP(bp, queue->rf_cinfo->ci_vp,
1842 op, queue->rf_cinfo->ci_dev,
1843 req->sectorOffset, req->numSector,
1844 req->buf, KernelWakeupFunc, (void *) req,
1845 queue->raidPtr->logBytesPerSector, req->b_proc);
1846
1847 if (rf_debugKernelAccess) {
1848 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1849 (long) bp->b_blkno));
1850 }
1851 queue->numOutstanding++;
1852 queue->last_deq_sector = req->sectorOffset;
1853 /* acc wouldn't have been let in if there were any pending
1854 * reqs at any other priority */
1855 queue->curPriority = req->priority;
1856
1857 db1_printf(("Going for %c to unit %d col %d\n",
1858 req->type, queue->raidPtr->raidid,
1859 queue->col));
1860 db1_printf(("sector %d count %d (%d bytes) %d\n",
1861 (int) req->sectorOffset, (int) req->numSector,
1862 (int) (req->numSector <<
1863 queue->raidPtr->logBytesPerSector),
1864 (int) queue->raidPtr->logBytesPerSector));
1865 VOP_STRATEGY(bp->b_vp, bp);
1866
1867 break;
1868
1869 default:
1870 panic("bad req->type in rf_DispatchKernelIO");
1871 }
1872 db1_printf(("Exiting from DispatchKernelIO\n"));
1873
1874 return (0);
1875 }
1876 /* this is the callback function associated with a I/O invoked from
1877 kernel code.
1878 */
1879 static void
1880 KernelWakeupFunc(struct buf *bp)
1881 {
1882 RF_DiskQueueData_t *req = NULL;
1883 RF_DiskQueue_t *queue;
1884 int s;
1885
1886 s = splbio();
1887 db1_printf(("recovering the request queue:\n"));
1888 req = bp->b_fspriv.bf_private;
1889
1890 queue = (RF_DiskQueue_t *) req->queue;
1891
1892 #if RF_ACC_TRACE > 0
1893 if (req->tracerec) {
1894 RF_ETIMER_STOP(req->tracerec->timer);
1895 RF_ETIMER_EVAL(req->tracerec->timer);
1896 RF_LOCK_MUTEX(rf_tracing_mutex);
1897 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1898 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->num_phys_ios++;
1900 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1901 }
1902 #endif
1903
1904 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1905 * ballistic, and mark the component as hosed... */
1906
1907 if (bp->b_flags & B_ERROR) {
1908 /* Mark the disk as dead */
1909 /* but only mark it once... */
1910 /* and only if it wouldn't leave this RAID set
1911 completely broken */
1912 if (((queue->raidPtr->Disks[queue->col].status ==
1913 rf_ds_optimal) ||
1914 (queue->raidPtr->Disks[queue->col].status ==
1915 rf_ds_used_spare)) &&
1916 (queue->raidPtr->numFailures <
1917 queue->raidPtr->Layout.map->faultsTolerated)) {
1918 printf("raid%d: IO Error. Marking %s as failed.\n",
1919 queue->raidPtr->raidid,
1920 queue->raidPtr->Disks[queue->col].devname);
1921 queue->raidPtr->Disks[queue->col].status =
1922 rf_ds_failed;
1923 queue->raidPtr->status = rf_rs_degraded;
1924 queue->raidPtr->numFailures++;
1925 queue->raidPtr->numNewFailures++;
1926 } else { /* Disk is already dead... */
1927 /* printf("Disk already marked as dead!\n"); */
1928 }
1929
1930 }
1931
1932 /* Fill in the error value */
1933
1934 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1935
1936 simple_lock(&queue->raidPtr->iodone_lock);
1937
1938 /* Drop this one on the "finished" queue... */
1939 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1940
1941 /* Let the raidio thread know there is work to be done. */
1942 wakeup(&(queue->raidPtr->iodone));
1943
1944 simple_unlock(&queue->raidPtr->iodone_lock);
1945
1946 splx(s);
1947 }
1948
1949
1950
1951 /*
1952 * initialize a buf structure for doing an I/O in the kernel.
1953 */
1954 static void
1955 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1956 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1957 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1958 struct proc *b_proc)
1959 {
1960 /* bp->b_flags = B_PHYS | rw_flag; */
1961 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1962 bp->b_bcount = numSect << logBytesPerSector;
1963 bp->b_bufsize = bp->b_bcount;
1964 bp->b_error = 0;
1965 bp->b_dev = dev;
1966 bp->b_data = bf;
1967 bp->b_blkno = startSect;
1968 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1969 if (bp->b_bcount == 0) {
1970 panic("bp->b_bcount is zero in InitBP!!");
1971 }
1972 bp->b_proc = b_proc;
1973 bp->b_iodone = cbFunc;
1974 bp->b_fspriv.bf_private = cbArg;
1975 bp->b_vp = b_vp;
1976 if ((bp->b_flags & B_READ) == 0) {
1977 bp->b_vp->v_numoutput++;
1978 }
1979
1980 }
1981
1982 static void
1983 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1984 struct disklabel *lp)
1985 {
1986 memset(lp, 0, sizeof(*lp));
1987
1988 /* fabricate a label... */
1989 lp->d_secperunit = raidPtr->totalSectors;
1990 lp->d_secsize = raidPtr->bytesPerSector;
1991 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1992 lp->d_ntracks = 4 * raidPtr->numCol;
1993 lp->d_ncylinders = raidPtr->totalSectors /
1994 (lp->d_nsectors * lp->d_ntracks);
1995 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1996
1997 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1998 lp->d_type = DTYPE_RAID;
1999 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2000 lp->d_rpm = 3600;
2001 lp->d_interleave = 1;
2002 lp->d_flags = 0;
2003
2004 lp->d_partitions[RAW_PART].p_offset = 0;
2005 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2006 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2007 lp->d_npartitions = RAW_PART + 1;
2008
2009 lp->d_magic = DISKMAGIC;
2010 lp->d_magic2 = DISKMAGIC;
2011 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2012
2013 }
2014 /*
2015 * Read the disklabel from the raid device. If one is not present, fake one
2016 * up.
2017 */
2018 static void
2019 raidgetdisklabel(dev_t dev)
2020 {
2021 int unit = raidunit(dev);
2022 struct raid_softc *rs = &raid_softc[unit];
2023 const char *errstring;
2024 struct disklabel *lp = rs->sc_dkdev.dk_label;
2025 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2026 RF_Raid_t *raidPtr;
2027
2028 db1_printf(("Getting the disklabel...\n"));
2029
2030 memset(clp, 0, sizeof(*clp));
2031
2032 raidPtr = raidPtrs[unit];
2033
2034 raidgetdefaultlabel(raidPtr, rs, lp);
2035
2036 /*
2037 * Call the generic disklabel extraction routine.
2038 */
2039 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2040 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2041 if (errstring)
2042 raidmakedisklabel(rs);
2043 else {
2044 int i;
2045 struct partition *pp;
2046
2047 /*
2048 * Sanity check whether the found disklabel is valid.
2049 *
2050 * This is necessary since total size of the raid device
2051 * may vary when an interleave is changed even though exactly
2052 * same componets are used, and old disklabel may used
2053 * if that is found.
2054 */
2055 if (lp->d_secperunit != rs->sc_size)
2056 printf("raid%d: WARNING: %s: "
2057 "total sector size in disklabel (%d) != "
2058 "the size of raid (%ld)\n", unit, rs->sc_xname,
2059 lp->d_secperunit, (long) rs->sc_size);
2060 for (i = 0; i < lp->d_npartitions; i++) {
2061 pp = &lp->d_partitions[i];
2062 if (pp->p_offset + pp->p_size > rs->sc_size)
2063 printf("raid%d: WARNING: %s: end of partition `%c' "
2064 "exceeds the size of raid (%ld)\n",
2065 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2066 }
2067 }
2068
2069 }
2070 /*
2071 * Take care of things one might want to take care of in the event
2072 * that a disklabel isn't present.
2073 */
2074 static void
2075 raidmakedisklabel(struct raid_softc *rs)
2076 {
2077 struct disklabel *lp = rs->sc_dkdev.dk_label;
2078 db1_printf(("Making a label..\n"));
2079
2080 /*
2081 * For historical reasons, if there's no disklabel present
2082 * the raw partition must be marked FS_BSDFFS.
2083 */
2084
2085 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2086
2087 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2088
2089 lp->d_checksum = dkcksum(lp);
2090 }
2091 /*
2092 * Lookup the provided name in the filesystem. If the file exists,
2093 * is a valid block device, and isn't being used by anyone else,
2094 * set *vpp to the file's vnode.
2095 * You'll find the original of this in ccd.c
2096 */
2097 int
2098 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2099 {
2100 struct nameidata nd;
2101 struct vnode *vp;
2102 struct proc *p;
2103 struct vattr va;
2104 int error;
2105
2106 p = l ? l->l_proc : NULL;
2107 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2108 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2109 return (error);
2110 }
2111 vp = nd.ni_vp;
2112 if (vp->v_usecount > 1) {
2113 VOP_UNLOCK(vp, 0);
2114 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2115 return (EBUSY);
2116 }
2117 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2118 VOP_UNLOCK(vp, 0);
2119 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2120 return (error);
2121 }
2122 /* XXX: eventually we should handle VREG, too. */
2123 if (va.va_type != VBLK) {
2124 VOP_UNLOCK(vp, 0);
2125 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2126 return (ENOTBLK);
2127 }
2128 VOP_UNLOCK(vp, 0);
2129 *vpp = vp;
2130 return (0);
2131 }
2132 /*
2133 * Wait interruptibly for an exclusive lock.
2134 *
2135 * XXX
2136 * Several drivers do this; it should be abstracted and made MP-safe.
2137 * (Hmm... where have we seen this warning before :-> GO )
2138 */
2139 static int
2140 raidlock(struct raid_softc *rs)
2141 {
2142 int error;
2143
2144 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2145 rs->sc_flags |= RAIDF_WANTED;
2146 if ((error =
2147 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2148 return (error);
2149 }
2150 rs->sc_flags |= RAIDF_LOCKED;
2151 return (0);
2152 }
2153 /*
2154 * Unlock and wake up any waiters.
2155 */
2156 static void
2157 raidunlock(struct raid_softc *rs)
2158 {
2159
2160 rs->sc_flags &= ~RAIDF_LOCKED;
2161 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2162 rs->sc_flags &= ~RAIDF_WANTED;
2163 wakeup(rs);
2164 }
2165 }
2166
2167
2168 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2169 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2170
2171 int
2172 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2173 {
2174 RF_ComponentLabel_t clabel;
2175 raidread_component_label(dev, b_vp, &clabel);
2176 clabel.mod_counter = mod_counter;
2177 clabel.clean = RF_RAID_CLEAN;
2178 raidwrite_component_label(dev, b_vp, &clabel);
2179 return(0);
2180 }
2181
2182
2183 int
2184 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2185 {
2186 RF_ComponentLabel_t clabel;
2187 raidread_component_label(dev, b_vp, &clabel);
2188 clabel.mod_counter = mod_counter;
2189 clabel.clean = RF_RAID_DIRTY;
2190 raidwrite_component_label(dev, b_vp, &clabel);
2191 return(0);
2192 }
2193
2194 /* ARGSUSED */
2195 int
2196 raidread_component_label(dev_t dev, struct vnode *b_vp,
2197 RF_ComponentLabel_t *clabel)
2198 {
2199 struct buf *bp;
2200 const struct bdevsw *bdev;
2201 int error;
2202
2203 /* XXX should probably ensure that we don't try to do this if
2204 someone has changed rf_protected_sectors. */
2205
2206 if (b_vp == NULL) {
2207 /* For whatever reason, this component is not valid.
2208 Don't try to read a component label from it. */
2209 return(EINVAL);
2210 }
2211
2212 /* get a block of the appropriate size... */
2213 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2214 bp->b_dev = dev;
2215
2216 /* get our ducks in a row for the read */
2217 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2218 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2219 bp->b_flags |= B_READ;
2220 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2221
2222 bdev = bdevsw_lookup(bp->b_dev);
2223 if (bdev == NULL)
2224 return (ENXIO);
2225 (*bdev->d_strategy)(bp);
2226
2227 error = biowait(bp);
2228
2229 if (!error) {
2230 memcpy(clabel, bp->b_data,
2231 sizeof(RF_ComponentLabel_t));
2232 }
2233
2234 brelse(bp);
2235 return(error);
2236 }
2237 /* ARGSUSED */
2238 int
2239 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2240 RF_ComponentLabel_t *clabel)
2241 {
2242 struct buf *bp;
2243 const struct bdevsw *bdev;
2244 int error;
2245
2246 /* get a block of the appropriate size... */
2247 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2248 bp->b_dev = dev;
2249
2250 /* get our ducks in a row for the write */
2251 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2252 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2253 bp->b_flags |= B_WRITE;
2254 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2255
2256 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2257
2258 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2259
2260 bdev = bdevsw_lookup(bp->b_dev);
2261 if (bdev == NULL)
2262 return (ENXIO);
2263 (*bdev->d_strategy)(bp);
2264 error = biowait(bp);
2265 brelse(bp);
2266 if (error) {
2267 #if 1
2268 printf("Failed to write RAID component info!\n");
2269 #endif
2270 }
2271
2272 return(error);
2273 }
2274
2275 void
2276 rf_markalldirty(RF_Raid_t *raidPtr)
2277 {
2278 RF_ComponentLabel_t clabel;
2279 int sparecol;
2280 int c;
2281 int j;
2282 int scol = -1;
2283
2284 raidPtr->mod_counter++;
2285 for (c = 0; c < raidPtr->numCol; c++) {
2286 /* we don't want to touch (at all) a disk that has
2287 failed */
2288 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2289 raidread_component_label(
2290 raidPtr->Disks[c].dev,
2291 raidPtr->raid_cinfo[c].ci_vp,
2292 &clabel);
2293 if (clabel.status == rf_ds_spared) {
2294 /* XXX do something special...
2295 but whatever you do, don't
2296 try to access it!! */
2297 } else {
2298 raidmarkdirty(
2299 raidPtr->Disks[c].dev,
2300 raidPtr->raid_cinfo[c].ci_vp,
2301 raidPtr->mod_counter);
2302 }
2303 }
2304 }
2305
2306 for( c = 0; c < raidPtr->numSpare ; c++) {
2307 sparecol = raidPtr->numCol + c;
2308 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2309 /*
2310
2311 we claim this disk is "optimal" if it's
2312 rf_ds_used_spare, as that means it should be
2313 directly substitutable for the disk it replaced.
2314 We note that too...
2315
2316 */
2317
2318 for(j=0;j<raidPtr->numCol;j++) {
2319 if (raidPtr->Disks[j].spareCol == sparecol) {
2320 scol = j;
2321 break;
2322 }
2323 }
2324
2325 raidread_component_label(
2326 raidPtr->Disks[sparecol].dev,
2327 raidPtr->raid_cinfo[sparecol].ci_vp,
2328 &clabel);
2329 /* make sure status is noted */
2330
2331 raid_init_component_label(raidPtr, &clabel);
2332
2333 clabel.row = 0;
2334 clabel.column = scol;
2335 /* Note: we *don't* change status from rf_ds_used_spare
2336 to rf_ds_optimal */
2337 /* clabel.status = rf_ds_optimal; */
2338
2339 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2340 raidPtr->raid_cinfo[sparecol].ci_vp,
2341 raidPtr->mod_counter);
2342 }
2343 }
2344 }
2345
2346
2347 void
2348 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2349 {
2350 RF_ComponentLabel_t clabel;
2351 int sparecol;
2352 int c;
2353 int j;
2354 int scol;
2355
2356 scol = -1;
2357
2358 /* XXX should do extra checks to make sure things really are clean,
2359 rather than blindly setting the clean bit... */
2360
2361 raidPtr->mod_counter++;
2362
2363 for (c = 0; c < raidPtr->numCol; c++) {
2364 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2365 raidread_component_label(
2366 raidPtr->Disks[c].dev,
2367 raidPtr->raid_cinfo[c].ci_vp,
2368 &clabel);
2369 /* make sure status is noted */
2370 clabel.status = rf_ds_optimal;
2371
2372 /* bump the counter */
2373 clabel.mod_counter = raidPtr->mod_counter;
2374
2375 raidwrite_component_label(
2376 raidPtr->Disks[c].dev,
2377 raidPtr->raid_cinfo[c].ci_vp,
2378 &clabel);
2379 if (final == RF_FINAL_COMPONENT_UPDATE) {
2380 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2381 raidmarkclean(
2382 raidPtr->Disks[c].dev,
2383 raidPtr->raid_cinfo[c].ci_vp,
2384 raidPtr->mod_counter);
2385 }
2386 }
2387 }
2388 /* else we don't touch it.. */
2389 }
2390
2391 for( c = 0; c < raidPtr->numSpare ; c++) {
2392 sparecol = raidPtr->numCol + c;
2393 /* Need to ensure that the reconstruct actually completed! */
2394 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2395 /*
2396
2397 we claim this disk is "optimal" if it's
2398 rf_ds_used_spare, as that means it should be
2399 directly substitutable for the disk it replaced.
2400 We note that too...
2401
2402 */
2403
2404 for(j=0;j<raidPtr->numCol;j++) {
2405 if (raidPtr->Disks[j].spareCol == sparecol) {
2406 scol = j;
2407 break;
2408 }
2409 }
2410
2411 /* XXX shouldn't *really* need this... */
2412 raidread_component_label(
2413 raidPtr->Disks[sparecol].dev,
2414 raidPtr->raid_cinfo[sparecol].ci_vp,
2415 &clabel);
2416 /* make sure status is noted */
2417
2418 raid_init_component_label(raidPtr, &clabel);
2419
2420 clabel.mod_counter = raidPtr->mod_counter;
2421 clabel.column = scol;
2422 clabel.status = rf_ds_optimal;
2423
2424 raidwrite_component_label(
2425 raidPtr->Disks[sparecol].dev,
2426 raidPtr->raid_cinfo[sparecol].ci_vp,
2427 &clabel);
2428 if (final == RF_FINAL_COMPONENT_UPDATE) {
2429 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2430 raidmarkclean( raidPtr->Disks[sparecol].dev,
2431 raidPtr->raid_cinfo[sparecol].ci_vp,
2432 raidPtr->mod_counter);
2433 }
2434 }
2435 }
2436 }
2437 }
2438
2439 void
2440 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2441 {
2442 struct proc *p;
2443 struct lwp *l;
2444
2445 p = raidPtr->engine_thread;
2446 l = LIST_FIRST(&p->p_lwps);
2447
2448 if (vp != NULL) {
2449 if (auto_configured == 1) {
2450 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2451 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2452 vput(vp);
2453
2454 } else {
2455 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2456 }
2457 }
2458 }
2459
2460
2461 void
2462 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2463 {
2464 int r,c;
2465 struct vnode *vp;
2466 int acd;
2467
2468
2469 /* We take this opportunity to close the vnodes like we should.. */
2470
2471 for (c = 0; c < raidPtr->numCol; c++) {
2472 vp = raidPtr->raid_cinfo[c].ci_vp;
2473 acd = raidPtr->Disks[c].auto_configured;
2474 rf_close_component(raidPtr, vp, acd);
2475 raidPtr->raid_cinfo[c].ci_vp = NULL;
2476 raidPtr->Disks[c].auto_configured = 0;
2477 }
2478
2479 for (r = 0; r < raidPtr->numSpare; r++) {
2480 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2481 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2482 rf_close_component(raidPtr, vp, acd);
2483 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2484 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2485 }
2486 }
2487
2488
2489 void
2490 rf_ReconThread(struct rf_recon_req *req)
2491 {
2492 int s;
2493 RF_Raid_t *raidPtr;
2494
2495 s = splbio();
2496 raidPtr = (RF_Raid_t *) req->raidPtr;
2497 raidPtr->recon_in_progress = 1;
2498
2499 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2500 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2501
2502 RF_Free(req, sizeof(*req));
2503
2504 raidPtr->recon_in_progress = 0;
2505 splx(s);
2506
2507 /* That's all... */
2508 kthread_exit(0); /* does not return */
2509 }
2510
2511 void
2512 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2513 {
2514 int retcode;
2515 int s;
2516
2517 raidPtr->parity_rewrite_stripes_done = 0;
2518 raidPtr->parity_rewrite_in_progress = 1;
2519 s = splbio();
2520 retcode = rf_RewriteParity(raidPtr);
2521 splx(s);
2522 if (retcode) {
2523 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2524 } else {
2525 /* set the clean bit! If we shutdown correctly,
2526 the clean bit on each component label will get
2527 set */
2528 raidPtr->parity_good = RF_RAID_CLEAN;
2529 }
2530 raidPtr->parity_rewrite_in_progress = 0;
2531
2532 /* Anyone waiting for us to stop? If so, inform them... */
2533 if (raidPtr->waitShutdown) {
2534 wakeup(&raidPtr->parity_rewrite_in_progress);
2535 }
2536
2537 /* That's all... */
2538 kthread_exit(0); /* does not return */
2539 }
2540
2541
2542 void
2543 rf_CopybackThread(RF_Raid_t *raidPtr)
2544 {
2545 int s;
2546
2547 raidPtr->copyback_in_progress = 1;
2548 s = splbio();
2549 rf_CopybackReconstructedData(raidPtr);
2550 splx(s);
2551 raidPtr->copyback_in_progress = 0;
2552
2553 /* That's all... */
2554 kthread_exit(0); /* does not return */
2555 }
2556
2557
2558 void
2559 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2560 {
2561 int s;
2562 RF_Raid_t *raidPtr;
2563
2564 s = splbio();
2565 raidPtr = req->raidPtr;
2566 raidPtr->recon_in_progress = 1;
2567 rf_ReconstructInPlace(raidPtr, req->col);
2568 RF_Free(req, sizeof(*req));
2569 raidPtr->recon_in_progress = 0;
2570 splx(s);
2571
2572 /* That's all... */
2573 kthread_exit(0); /* does not return */
2574 }
2575
2576 RF_AutoConfig_t *
2577 rf_find_raid_components()
2578 {
2579 struct vnode *vp;
2580 struct disklabel label;
2581 struct device *dv;
2582 dev_t dev;
2583 int bmajor;
2584 int error;
2585 int i;
2586 int good_one;
2587 RF_ComponentLabel_t *clabel;
2588 RF_AutoConfig_t *ac_list;
2589 RF_AutoConfig_t *ac;
2590
2591
2592 /* initialize the AutoConfig list */
2593 ac_list = NULL;
2594
2595 /* we begin by trolling through *all* the devices on the system */
2596
2597 for (dv = alldevs.tqh_first; dv != NULL;
2598 dv = dv->dv_list.tqe_next) {
2599
2600 /* we are only interested in disks... */
2601 if (device_class(dv) != DV_DISK)
2602 continue;
2603
2604 /* we don't care about floppies... */
2605 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2606 continue;
2607 }
2608
2609 /* we don't care about CD's... */
2610 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2611 continue;
2612 }
2613
2614 /* hdfd is the Atari/Hades floppy driver */
2615 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2616 continue;
2617 }
2618 /* fdisa is the Atari/Milan floppy driver */
2619 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2620 continue;
2621 }
2622
2623 /* need to find the device_name_to_block_device_major stuff */
2624 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2625
2626 /* get a vnode for the raw partition of this disk */
2627
2628 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2629 if (bdevvp(dev, &vp))
2630 panic("RAID can't alloc vnode");
2631
2632 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2633
2634 if (error) {
2635 /* "Who cares." Continue looking
2636 for something that exists*/
2637 vput(vp);
2638 continue;
2639 }
2640
2641 /* Ok, the disk exists. Go get the disklabel. */
2642 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2643 if (error) {
2644 /*
2645 * XXX can't happen - open() would
2646 * have errored out (or faked up one)
2647 */
2648 if (error != ENOTTY)
2649 printf("RAIDframe: can't get label for dev "
2650 "%s (%d)\n", dv->dv_xname, error);
2651 }
2652
2653 /* don't need this any more. We'll allocate it again
2654 a little later if we really do... */
2655 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2656 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2657 vput(vp);
2658
2659 if (error)
2660 continue;
2661
2662 for (i=0; i < label.d_npartitions; i++) {
2663 /* We only support partitions marked as RAID */
2664 if (label.d_partitions[i].p_fstype != FS_RAID)
2665 continue;
2666
2667 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2668 if (bdevvp(dev, &vp))
2669 panic("RAID can't alloc vnode");
2670
2671 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2672 if (error) {
2673 /* Whatever... */
2674 vput(vp);
2675 continue;
2676 }
2677
2678 good_one = 0;
2679
2680 clabel = (RF_ComponentLabel_t *)
2681 malloc(sizeof(RF_ComponentLabel_t),
2682 M_RAIDFRAME, M_NOWAIT);
2683 if (clabel == NULL) {
2684 /* XXX CLEANUP HERE */
2685 printf("RAID auto config: out of memory!\n");
2686 return(NULL); /* XXX probably should panic? */
2687 }
2688
2689 if (!raidread_component_label(dev, vp, clabel)) {
2690 /* Got the label. Does it look reasonable? */
2691 if (rf_reasonable_label(clabel) &&
2692 (clabel->partitionSize <=
2693 label.d_partitions[i].p_size)) {
2694 #if DEBUG
2695 printf("Component on: %s%c: %d\n",
2696 dv->dv_xname, 'a'+i,
2697 label.d_partitions[i].p_size);
2698 rf_print_component_label(clabel);
2699 #endif
2700 /* if it's reasonable, add it,
2701 else ignore it. */
2702 ac = (RF_AutoConfig_t *)
2703 malloc(sizeof(RF_AutoConfig_t),
2704 M_RAIDFRAME,
2705 M_NOWAIT);
2706 if (ac == NULL) {
2707 /* XXX should panic?? */
2708 return(NULL);
2709 }
2710
2711 snprintf(ac->devname,
2712 sizeof(ac->devname), "%s%c",
2713 dv->dv_xname, 'a'+i);
2714 ac->dev = dev;
2715 ac->vp = vp;
2716 ac->clabel = clabel;
2717 ac->next = ac_list;
2718 ac_list = ac;
2719 good_one = 1;
2720 }
2721 }
2722 if (!good_one) {
2723 /* cleanup */
2724 free(clabel, M_RAIDFRAME);
2725 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2726 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2727 vput(vp);
2728 }
2729 }
2730 }
2731 return(ac_list);
2732 }
2733
2734 static int
2735 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2736 {
2737
2738 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2739 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2740 ((clabel->clean == RF_RAID_CLEAN) ||
2741 (clabel->clean == RF_RAID_DIRTY)) &&
2742 clabel->row >=0 &&
2743 clabel->column >= 0 &&
2744 clabel->num_rows > 0 &&
2745 clabel->num_columns > 0 &&
2746 clabel->row < clabel->num_rows &&
2747 clabel->column < clabel->num_columns &&
2748 clabel->blockSize > 0 &&
2749 clabel->numBlocks > 0) {
2750 /* label looks reasonable enough... */
2751 return(1);
2752 }
2753 return(0);
2754 }
2755
2756
2757 #if DEBUG
2758 void
2759 rf_print_component_label(RF_ComponentLabel_t *clabel)
2760 {
2761 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2762 clabel->row, clabel->column,
2763 clabel->num_rows, clabel->num_columns);
2764 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2765 clabel->version, clabel->serial_number,
2766 clabel->mod_counter);
2767 printf(" Clean: %s Status: %d\n",
2768 clabel->clean ? "Yes" : "No", clabel->status );
2769 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2770 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2771 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2772 (char) clabel->parityConfig, clabel->blockSize,
2773 clabel->numBlocks);
2774 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2775 printf(" Contains root partition: %s\n",
2776 clabel->root_partition ? "Yes" : "No" );
2777 printf(" Last configured as: raid%d\n", clabel->last_unit );
2778 #if 0
2779 printf(" Config order: %d\n", clabel->config_order);
2780 #endif
2781
2782 }
2783 #endif
2784
2785 RF_ConfigSet_t *
2786 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2787 {
2788 RF_AutoConfig_t *ac;
2789 RF_ConfigSet_t *config_sets;
2790 RF_ConfigSet_t *cset;
2791 RF_AutoConfig_t *ac_next;
2792
2793
2794 config_sets = NULL;
2795
2796 /* Go through the AutoConfig list, and figure out which components
2797 belong to what sets. */
2798 ac = ac_list;
2799 while(ac!=NULL) {
2800 /* we're going to putz with ac->next, so save it here
2801 for use at the end of the loop */
2802 ac_next = ac->next;
2803
2804 if (config_sets == NULL) {
2805 /* will need at least this one... */
2806 config_sets = (RF_ConfigSet_t *)
2807 malloc(sizeof(RF_ConfigSet_t),
2808 M_RAIDFRAME, M_NOWAIT);
2809 if (config_sets == NULL) {
2810 panic("rf_create_auto_sets: No memory!");
2811 }
2812 /* this one is easy :) */
2813 config_sets->ac = ac;
2814 config_sets->next = NULL;
2815 config_sets->rootable = 0;
2816 ac->next = NULL;
2817 } else {
2818 /* which set does this component fit into? */
2819 cset = config_sets;
2820 while(cset!=NULL) {
2821 if (rf_does_it_fit(cset, ac)) {
2822 /* looks like it matches... */
2823 ac->next = cset->ac;
2824 cset->ac = ac;
2825 break;
2826 }
2827 cset = cset->next;
2828 }
2829 if (cset==NULL) {
2830 /* didn't find a match above... new set..*/
2831 cset = (RF_ConfigSet_t *)
2832 malloc(sizeof(RF_ConfigSet_t),
2833 M_RAIDFRAME, M_NOWAIT);
2834 if (cset == NULL) {
2835 panic("rf_create_auto_sets: No memory!");
2836 }
2837 cset->ac = ac;
2838 ac->next = NULL;
2839 cset->next = config_sets;
2840 cset->rootable = 0;
2841 config_sets = cset;
2842 }
2843 }
2844 ac = ac_next;
2845 }
2846
2847
2848 return(config_sets);
2849 }
2850
2851 static int
2852 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2853 {
2854 RF_ComponentLabel_t *clabel1, *clabel2;
2855
2856 /* If this one matches the *first* one in the set, that's good
2857 enough, since the other members of the set would have been
2858 through here too... */
2859 /* note that we are not checking partitionSize here..
2860
2861 Note that we are also not checking the mod_counters here.
2862 If everything else matches execpt the mod_counter, that's
2863 good enough for this test. We will deal with the mod_counters
2864 a little later in the autoconfiguration process.
2865
2866 (clabel1->mod_counter == clabel2->mod_counter) &&
2867
2868 The reason we don't check for this is that failed disks
2869 will have lower modification counts. If those disks are
2870 not added to the set they used to belong to, then they will
2871 form their own set, which may result in 2 different sets,
2872 for example, competing to be configured at raid0, and
2873 perhaps competing to be the root filesystem set. If the
2874 wrong ones get configured, or both attempt to become /,
2875 weird behaviour and or serious lossage will occur. Thus we
2876 need to bring them into the fold here, and kick them out at
2877 a later point.
2878
2879 */
2880
2881 clabel1 = cset->ac->clabel;
2882 clabel2 = ac->clabel;
2883 if ((clabel1->version == clabel2->version) &&
2884 (clabel1->serial_number == clabel2->serial_number) &&
2885 (clabel1->num_rows == clabel2->num_rows) &&
2886 (clabel1->num_columns == clabel2->num_columns) &&
2887 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2888 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2889 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2890 (clabel1->parityConfig == clabel2->parityConfig) &&
2891 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2892 (clabel1->blockSize == clabel2->blockSize) &&
2893 (clabel1->numBlocks == clabel2->numBlocks) &&
2894 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2895 (clabel1->root_partition == clabel2->root_partition) &&
2896 (clabel1->last_unit == clabel2->last_unit) &&
2897 (clabel1->config_order == clabel2->config_order)) {
2898 /* if it get's here, it almost *has* to be a match */
2899 } else {
2900 /* it's not consistent with somebody in the set..
2901 punt */
2902 return(0);
2903 }
2904 /* all was fine.. it must fit... */
2905 return(1);
2906 }
2907
2908 int
2909 rf_have_enough_components(RF_ConfigSet_t *cset)
2910 {
2911 RF_AutoConfig_t *ac;
2912 RF_AutoConfig_t *auto_config;
2913 RF_ComponentLabel_t *clabel;
2914 int c;
2915 int num_cols;
2916 int num_missing;
2917 int mod_counter;
2918 int mod_counter_found;
2919 int even_pair_failed;
2920 char parity_type;
2921
2922
2923 /* check to see that we have enough 'live' components
2924 of this set. If so, we can configure it if necessary */
2925
2926 num_cols = cset->ac->clabel->num_columns;
2927 parity_type = cset->ac->clabel->parityConfig;
2928
2929 /* XXX Check for duplicate components!?!?!? */
2930
2931 /* Determine what the mod_counter is supposed to be for this set. */
2932
2933 mod_counter_found = 0;
2934 mod_counter = 0;
2935 ac = cset->ac;
2936 while(ac!=NULL) {
2937 if (mod_counter_found==0) {
2938 mod_counter = ac->clabel->mod_counter;
2939 mod_counter_found = 1;
2940 } else {
2941 if (ac->clabel->mod_counter > mod_counter) {
2942 mod_counter = ac->clabel->mod_counter;
2943 }
2944 }
2945 ac = ac->next;
2946 }
2947
2948 num_missing = 0;
2949 auto_config = cset->ac;
2950
2951 even_pair_failed = 0;
2952 for(c=0; c<num_cols; c++) {
2953 ac = auto_config;
2954 while(ac!=NULL) {
2955 if ((ac->clabel->column == c) &&
2956 (ac->clabel->mod_counter == mod_counter)) {
2957 /* it's this one... */
2958 #if DEBUG
2959 printf("Found: %s at %d\n",
2960 ac->devname,c);
2961 #endif
2962 break;
2963 }
2964 ac=ac->next;
2965 }
2966 if (ac==NULL) {
2967 /* Didn't find one here! */
2968 /* special case for RAID 1, especially
2969 where there are more than 2
2970 components (where RAIDframe treats
2971 things a little differently :( ) */
2972 if (parity_type == '1') {
2973 if (c%2 == 0) { /* even component */
2974 even_pair_failed = 1;
2975 } else { /* odd component. If
2976 we're failed, and
2977 so is the even
2978 component, it's
2979 "Good Night, Charlie" */
2980 if (even_pair_failed == 1) {
2981 return(0);
2982 }
2983 }
2984 } else {
2985 /* normal accounting */
2986 num_missing++;
2987 }
2988 }
2989 if ((parity_type == '1') && (c%2 == 1)) {
2990 /* Just did an even component, and we didn't
2991 bail.. reset the even_pair_failed flag,
2992 and go on to the next component.... */
2993 even_pair_failed = 0;
2994 }
2995 }
2996
2997 clabel = cset->ac->clabel;
2998
2999 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3000 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3001 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3002 /* XXX this needs to be made *much* more general */
3003 /* Too many failures */
3004 return(0);
3005 }
3006 /* otherwise, all is well, and we've got enough to take a kick
3007 at autoconfiguring this set */
3008 return(1);
3009 }
3010
3011 void
3012 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3013 RF_Raid_t *raidPtr)
3014 {
3015 RF_ComponentLabel_t *clabel;
3016 int i;
3017
3018 clabel = ac->clabel;
3019
3020 /* 1. Fill in the common stuff */
3021 config->numRow = clabel->num_rows = 1;
3022 config->numCol = clabel->num_columns;
3023 config->numSpare = 0; /* XXX should this be set here? */
3024 config->sectPerSU = clabel->sectPerSU;
3025 config->SUsPerPU = clabel->SUsPerPU;
3026 config->SUsPerRU = clabel->SUsPerRU;
3027 config->parityConfig = clabel->parityConfig;
3028 /* XXX... */
3029 strcpy(config->diskQueueType,"fifo");
3030 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3031 config->layoutSpecificSize = 0; /* XXX ?? */
3032
3033 while(ac!=NULL) {
3034 /* row/col values will be in range due to the checks
3035 in reasonable_label() */
3036 strcpy(config->devnames[0][ac->clabel->column],
3037 ac->devname);
3038 ac = ac->next;
3039 }
3040
3041 for(i=0;i<RF_MAXDBGV;i++) {
3042 config->debugVars[i][0] = 0;
3043 }
3044 }
3045
3046 int
3047 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3048 {
3049 RF_ComponentLabel_t clabel;
3050 struct vnode *vp;
3051 dev_t dev;
3052 int column;
3053 int sparecol;
3054
3055 raidPtr->autoconfigure = new_value;
3056
3057 for(column=0; column<raidPtr->numCol; column++) {
3058 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3059 dev = raidPtr->Disks[column].dev;
3060 vp = raidPtr->raid_cinfo[column].ci_vp;
3061 raidread_component_label(dev, vp, &clabel);
3062 clabel.autoconfigure = new_value;
3063 raidwrite_component_label(dev, vp, &clabel);
3064 }
3065 }
3066 for(column = 0; column < raidPtr->numSpare ; column++) {
3067 sparecol = raidPtr->numCol + column;
3068 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3069 dev = raidPtr->Disks[sparecol].dev;
3070 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3071 raidread_component_label(dev, vp, &clabel);
3072 clabel.autoconfigure = new_value;
3073 raidwrite_component_label(dev, vp, &clabel);
3074 }
3075 }
3076 return(new_value);
3077 }
3078
3079 int
3080 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3081 {
3082 RF_ComponentLabel_t clabel;
3083 struct vnode *vp;
3084 dev_t dev;
3085 int column;
3086 int sparecol;
3087
3088 raidPtr->root_partition = new_value;
3089 for(column=0; column<raidPtr->numCol; column++) {
3090 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3091 dev = raidPtr->Disks[column].dev;
3092 vp = raidPtr->raid_cinfo[column].ci_vp;
3093 raidread_component_label(dev, vp, &clabel);
3094 clabel.root_partition = new_value;
3095 raidwrite_component_label(dev, vp, &clabel);
3096 }
3097 }
3098 for(column = 0; column < raidPtr->numSpare ; column++) {
3099 sparecol = raidPtr->numCol + column;
3100 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3101 dev = raidPtr->Disks[sparecol].dev;
3102 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3103 raidread_component_label(dev, vp, &clabel);
3104 clabel.root_partition = new_value;
3105 raidwrite_component_label(dev, vp, &clabel);
3106 }
3107 }
3108 return(new_value);
3109 }
3110
3111 void
3112 rf_release_all_vps(RF_ConfigSet_t *cset)
3113 {
3114 RF_AutoConfig_t *ac;
3115
3116 ac = cset->ac;
3117 while(ac!=NULL) {
3118 /* Close the vp, and give it back */
3119 if (ac->vp) {
3120 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3121 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3122 vput(ac->vp);
3123 ac->vp = NULL;
3124 }
3125 ac = ac->next;
3126 }
3127 }
3128
3129
3130 void
3131 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3132 {
3133 RF_AutoConfig_t *ac;
3134 RF_AutoConfig_t *next_ac;
3135
3136 ac = cset->ac;
3137 while(ac!=NULL) {
3138 next_ac = ac->next;
3139 /* nuke the label */
3140 free(ac->clabel, M_RAIDFRAME);
3141 /* cleanup the config structure */
3142 free(ac, M_RAIDFRAME);
3143 /* "next.." */
3144 ac = next_ac;
3145 }
3146 /* and, finally, nuke the config set */
3147 free(cset, M_RAIDFRAME);
3148 }
3149
3150
3151 void
3152 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3153 {
3154 /* current version number */
3155 clabel->version = RF_COMPONENT_LABEL_VERSION;
3156 clabel->serial_number = raidPtr->serial_number;
3157 clabel->mod_counter = raidPtr->mod_counter;
3158 clabel->num_rows = 1;
3159 clabel->num_columns = raidPtr->numCol;
3160 clabel->clean = RF_RAID_DIRTY; /* not clean */
3161 clabel->status = rf_ds_optimal; /* "It's good!" */
3162
3163 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3164 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3165 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3166
3167 clabel->blockSize = raidPtr->bytesPerSector;
3168 clabel->numBlocks = raidPtr->sectorsPerDisk;
3169
3170 /* XXX not portable */
3171 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3172 clabel->maxOutstanding = raidPtr->maxOutstanding;
3173 clabel->autoconfigure = raidPtr->autoconfigure;
3174 clabel->root_partition = raidPtr->root_partition;
3175 clabel->last_unit = raidPtr->raidid;
3176 clabel->config_order = raidPtr->config_order;
3177 }
3178
3179 int
3180 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3181 {
3182 RF_Raid_t *raidPtr;
3183 RF_Config_t *config;
3184 int raidID;
3185 int retcode;
3186
3187 #if DEBUG
3188 printf("RAID autoconfigure\n");
3189 #endif
3190
3191 retcode = 0;
3192 *unit = -1;
3193
3194 /* 1. Create a config structure */
3195
3196 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3197 M_RAIDFRAME,
3198 M_NOWAIT);
3199 if (config==NULL) {
3200 printf("Out of mem!?!?\n");
3201 /* XXX do something more intelligent here. */
3202 return(1);
3203 }
3204
3205 memset(config, 0, sizeof(RF_Config_t));
3206
3207 /*
3208 2. Figure out what RAID ID this one is supposed to live at
3209 See if we can get the same RAID dev that it was configured
3210 on last time..
3211 */
3212
3213 raidID = cset->ac->clabel->last_unit;
3214 if ((raidID < 0) || (raidID >= numraid)) {
3215 /* let's not wander off into lala land. */
3216 raidID = numraid - 1;
3217 }
3218 if (raidPtrs[raidID]->valid != 0) {
3219
3220 /*
3221 Nope... Go looking for an alternative...
3222 Start high so we don't immediately use raid0 if that's
3223 not taken.
3224 */
3225
3226 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3227 if (raidPtrs[raidID]->valid == 0) {
3228 /* can use this one! */
3229 break;
3230 }
3231 }
3232 }
3233
3234 if (raidID < 0) {
3235 /* punt... */
3236 printf("Unable to auto configure this set!\n");
3237 printf("(Out of RAID devs!)\n");
3238 return(1);
3239 }
3240
3241 #if DEBUG
3242 printf("Configuring raid%d:\n",raidID);
3243 #endif
3244
3245 raidPtr = raidPtrs[raidID];
3246
3247 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3248 raidPtr->raidid = raidID;
3249 raidPtr->openings = RAIDOUTSTANDING;
3250
3251 /* 3. Build the configuration structure */
3252 rf_create_configuration(cset->ac, config, raidPtr);
3253
3254 /* 4. Do the configuration */
3255 retcode = rf_Configure(raidPtr, config, cset->ac);
3256
3257 if (retcode == 0) {
3258
3259 raidinit(raidPtrs[raidID]);
3260
3261 rf_markalldirty(raidPtrs[raidID]);
3262 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3263 if (cset->ac->clabel->root_partition==1) {
3264 /* everything configured just fine. Make a note
3265 that this set is eligible to be root. */
3266 cset->rootable = 1;
3267 /* XXX do this here? */
3268 raidPtrs[raidID]->root_partition = 1;
3269 }
3270 }
3271
3272 /* 5. Cleanup */
3273 free(config, M_RAIDFRAME);
3274
3275 *unit = raidID;
3276 return(retcode);
3277 }
3278
3279 void
3280 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3281 {
3282 struct buf *bp;
3283
3284 bp = (struct buf *)desc->bp;
3285 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3286 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3287 }
3288
3289 void
3290 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3291 size_t xmin, size_t xmax)
3292 {
3293 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3294 pool_sethiwat(p, xmax);
3295 pool_prime(p, xmin);
3296 pool_setlowat(p, xmin);
3297 }
3298
3299 /*
3300 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3301 * if there is IO pending and if that IO could possibly be done for a
3302 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3303 * otherwise.
3304 *
3305 */
3306
3307 int
3308 rf_buf_queue_check(int raidid)
3309 {
3310 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3311 raidPtrs[raidid]->openings > 0) {
3312 /* there is work to do */
3313 return 0;
3314 }
3315 /* default is nothing to do */
3316 return 1;
3317 }
3318