rf_netbsdkintf.c revision 1.202.6.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.202.6.3 2006/05/24 15:50:29 tron Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.202.6.3 2006/05/24 15:50:29 tron Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /* XXX Not sure if the following should be replacing the raidPtrs above,
238 or if it should be used in conjunction with that...
239 */
240
241 struct raid_softc {
242 int sc_flags; /* flags */
243 int sc_cflags; /* configuration flags */
244 size_t sc_size; /* size of the raid device */
245 char sc_xname[20]; /* XXX external name */
246 struct disk sc_dkdev; /* generic disk device info */
247 struct bufq_state *buf_queue; /* used for the device queue */
248 };
249 /* sc_flags */
250 #define RAIDF_INITED 0x01 /* unit has been initialized */
251 #define RAIDF_WLABEL 0x02 /* label area is writable */
252 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
253 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
254 #define RAIDF_LOCKED 0x80 /* unit is locked */
255
256 #define raidunit(x) DISKUNIT(x)
257 int numraid = 0;
258
259 extern struct cfdriver raid_cd;
260
261 /*
262 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
263 * Be aware that large numbers can allow the driver to consume a lot of
264 * kernel memory, especially on writes, and in degraded mode reads.
265 *
266 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
267 * a single 64K write will typically require 64K for the old data,
268 * 64K for the old parity, and 64K for the new parity, for a total
269 * of 192K (if the parity buffer is not re-used immediately).
270 * Even it if is used immediately, that's still 128K, which when multiplied
271 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
272 *
273 * Now in degraded mode, for example, a 64K read on the above setup may
274 * require data reconstruction, which will require *all* of the 4 remaining
275 * disks to participate -- 4 * 32K/disk == 128K again.
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 6
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286 struct raid_softc *raid_softc;
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static void rf_markalldirty(RF_Raid_t *);
297
298 struct device *raidrootdev;
299
300 void rf_ReconThread(struct rf_recon_req *);
301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
302 void rf_CopybackThread(RF_Raid_t *raidPtr);
303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
304 int rf_autoconfig(struct device *self);
305 void rf_buildroothack(RF_ConfigSet_t *);
306
307 RF_AutoConfig_t *rf_find_raid_components(void);
308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
310 static int rf_reasonable_label(RF_ComponentLabel_t *);
311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
312 int rf_set_autoconfig(RF_Raid_t *, int);
313 int rf_set_rootpartition(RF_Raid_t *, int);
314 void rf_release_all_vps(RF_ConfigSet_t *);
315 void rf_cleanup_config_set(RF_ConfigSet_t *);
316 int rf_have_enough_components(RF_ConfigSet_t *);
317 int rf_auto_config_set(RF_ConfigSet_t *, int *);
318
319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
320 allow autoconfig to take place.
321 Note that this is overridden by having
322 RAID_AUTOCONFIG as an option in the
323 kernel config file. */
324
325 struct RF_Pools_s rf_pools;
326
327 void
328 raidattach(int num)
329 {
330 int raidID;
331 int i, rc;
332
333 #ifdef DEBUG
334 printf("raidattach: Asked for %d units\n", num);
335 #endif
336
337 if (num <= 0) {
338 #ifdef DIAGNOSTIC
339 panic("raidattach: count <= 0");
340 #endif
341 return;
342 }
343 /* This is where all the initialization stuff gets done. */
344
345 numraid = num;
346
347 /* Make some space for requested number of units... */
348
349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
350 if (raidPtrs == NULL) {
351 panic("raidPtrs is NULL!!");
352 }
353
354 rf_mutex_init(&rf_sparet_wait_mutex);
355
356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
357
358 for (i = 0; i < num; i++)
359 raidPtrs[i] = NULL;
360 rc = rf_BootRaidframe();
361 if (rc == 0)
362 printf("Kernelized RAIDframe activated\n");
363 else
364 panic("Serious error booting RAID!!");
365
366 /* put together some datastructures like the CCD device does.. This
367 * lets us lock the device and what-not when it gets opened. */
368
369 raid_softc = (struct raid_softc *)
370 malloc(num * sizeof(struct raid_softc),
371 M_RAIDFRAME, M_NOWAIT);
372 if (raid_softc == NULL) {
373 printf("WARNING: no memory for RAIDframe driver\n");
374 return;
375 }
376
377 memset(raid_softc, 0, num * sizeof(struct raid_softc));
378
379 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
380 M_RAIDFRAME, M_NOWAIT);
381 if (raidrootdev == NULL) {
382 panic("No memory for RAIDframe driver!!?!?!");
383 }
384
385 for (raidID = 0; raidID < num; raidID++) {
386 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
387 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
388
389 /* XXXJRT Should use config_attach_pseudo() */
390
391 raidrootdev[raidID].dv_class = DV_DISK;
392 raidrootdev[raidID].dv_cfdata = NULL;
393 raidrootdev[raidID].dv_unit = raidID;
394 raidrootdev[raidID].dv_parent = NULL;
395 raidrootdev[raidID].dv_flags = 0;
396 raidrootdev[raidID].dv_cfdriver = &raid_cd;
397 snprintf(raidrootdev[raidID].dv_xname,
398 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
399
400 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
401 (RF_Raid_t *));
402 if (raidPtrs[raidID] == NULL) {
403 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
404 numraid = raidID;
405 return;
406 }
407 }
408
409 #ifdef RAID_AUTOCONFIG
410 raidautoconfig = 1;
411 #endif
412
413 /*
414 * Register a finalizer which will be used to auto-config RAID
415 * sets once all real hardware devices have been found.
416 */
417 if (config_finalize_register(NULL, rf_autoconfig) != 0)
418 printf("WARNING: unable to register RAIDframe finalizer\n");
419 }
420
421 int
422 rf_autoconfig(struct device *self)
423 {
424 RF_AutoConfig_t *ac_list;
425 RF_ConfigSet_t *config_sets;
426
427 if (raidautoconfig == 0)
428 return (0);
429
430 /* XXX This code can only be run once. */
431 raidautoconfig = 0;
432
433 /* 1. locate all RAID components on the system */
434 #ifdef DEBUG
435 printf("Searching for RAID components...\n");
436 #endif
437 ac_list = rf_find_raid_components();
438
439 /* 2. Sort them into their respective sets. */
440 config_sets = rf_create_auto_sets(ac_list);
441
442 /*
443 * 3. Evaluate each set andconfigure the valid ones.
444 * This gets done in rf_buildroothack().
445 */
446 rf_buildroothack(config_sets);
447
448 return (1);
449 }
450
451 void
452 rf_buildroothack(RF_ConfigSet_t *config_sets)
453 {
454 RF_ConfigSet_t *cset;
455 RF_ConfigSet_t *next_cset;
456 int retcode;
457 int raidID;
458 int rootID;
459 int num_root;
460
461 rootID = 0;
462 num_root = 0;
463 cset = config_sets;
464 while(cset != NULL ) {
465 next_cset = cset->next;
466 if (rf_have_enough_components(cset) &&
467 cset->ac->clabel->autoconfigure==1) {
468 retcode = rf_auto_config_set(cset,&raidID);
469 if (!retcode) {
470 if (cset->rootable) {
471 rootID = raidID;
472 num_root++;
473 }
474 } else {
475 /* The autoconfig didn't work :( */
476 #if DEBUG
477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
478 #endif
479 rf_release_all_vps(cset);
480 }
481 } else {
482 /* we're not autoconfiguring this set...
483 release the associated resources */
484 rf_release_all_vps(cset);
485 }
486 /* cleanup */
487 rf_cleanup_config_set(cset);
488 cset = next_cset;
489 }
490
491 /* we found something bootable... */
492
493 if (num_root == 1) {
494 booted_device = &raidrootdev[rootID];
495 } else if (num_root > 1) {
496 /* we can't guess.. require the user to answer... */
497 boothowto |= RB_ASKNAME;
498 }
499 }
500
501
502 int
503 raidsize(dev_t dev)
504 {
505 struct raid_softc *rs;
506 struct disklabel *lp;
507 int part, unit, omask, size;
508
509 unit = raidunit(dev);
510 if (unit >= numraid)
511 return (-1);
512 rs = &raid_softc[unit];
513
514 if ((rs->sc_flags & RAIDF_INITED) == 0)
515 return (-1);
516
517 part = DISKPART(dev);
518 omask = rs->sc_dkdev.dk_openmask & (1 << part);
519 lp = rs->sc_dkdev.dk_label;
520
521 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
522 return (-1);
523
524 if (lp->d_partitions[part].p_fstype != FS_SWAP)
525 size = -1;
526 else
527 size = lp->d_partitions[part].p_size *
528 (lp->d_secsize / DEV_BSIZE);
529
530 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
531 return (-1);
532
533 return (size);
534
535 }
536
537 int
538 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
539 {
540 /* Not implemented. */
541 return ENXIO;
542 }
543 /* ARGSUSED */
544 int
545 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
546 {
547 int unit = raidunit(dev);
548 struct raid_softc *rs;
549 struct disklabel *lp;
550 int part, pmask;
551 int error = 0;
552
553 if (unit >= numraid)
554 return (ENXIO);
555 rs = &raid_softc[unit];
556
557 if ((error = raidlock(rs)) != 0)
558 return (error);
559 lp = rs->sc_dkdev.dk_label;
560
561 part = DISKPART(dev);
562 pmask = (1 << part);
563
564 if ((rs->sc_flags & RAIDF_INITED) &&
565 (rs->sc_dkdev.dk_openmask == 0))
566 raidgetdisklabel(dev);
567
568 /* make sure that this partition exists */
569
570 if (part != RAW_PART) {
571 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
572 ((part >= lp->d_npartitions) ||
573 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
574 error = ENXIO;
575 raidunlock(rs);
576 return (error);
577 }
578 }
579 /* Prevent this unit from being unconfigured while open. */
580 switch (fmt) {
581 case S_IFCHR:
582 rs->sc_dkdev.dk_copenmask |= pmask;
583 break;
584
585 case S_IFBLK:
586 rs->sc_dkdev.dk_bopenmask |= pmask;
587 break;
588 }
589
590 if ((rs->sc_dkdev.dk_openmask == 0) &&
591 ((rs->sc_flags & RAIDF_INITED) != 0)) {
592 /* First one... mark things as dirty... Note that we *MUST*
593 have done a configure before this. I DO NOT WANT TO BE
594 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
595 THAT THEY BELONG TOGETHER!!!!! */
596 /* XXX should check to see if we're only open for reading
597 here... If so, we needn't do this, but then need some
598 other way of keeping track of what's happened.. */
599
600 rf_markalldirty( raidPtrs[unit] );
601 }
602
603
604 rs->sc_dkdev.dk_openmask =
605 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
606
607 raidunlock(rs);
608
609 return (error);
610
611
612 }
613 /* ARGSUSED */
614 int
615 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int error = 0;
620 int part;
621
622 if (unit >= numraid)
623 return (ENXIO);
624 rs = &raid_softc[unit];
625
626 if ((error = raidlock(rs)) != 0)
627 return (error);
628
629 part = DISKPART(dev);
630
631 /* ...that much closer to allowing unconfiguration... */
632 switch (fmt) {
633 case S_IFCHR:
634 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
635 break;
636
637 case S_IFBLK:
638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
639 break;
640 }
641 rs->sc_dkdev.dk_openmask =
642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
643
644 if ((rs->sc_dkdev.dk_openmask == 0) &&
645 ((rs->sc_flags & RAIDF_INITED) != 0)) {
646 /* Last one... device is not unconfigured yet.
647 Device shutdown has taken care of setting the
648 clean bits if RAIDF_INITED is not set
649 mark things as clean... */
650
651 rf_update_component_labels(raidPtrs[unit],
652 RF_FINAL_COMPONENT_UPDATE);
653 if (doing_shutdown) {
654 /* last one, and we're going down, so
655 lights out for this RAID set too. */
656 error = rf_Shutdown(raidPtrs[unit]);
657
658 /* It's no longer initialized... */
659 rs->sc_flags &= ~RAIDF_INITED;
660
661 /* Detach the disk. */
662 pseudo_disk_detach(&rs->sc_dkdev);
663 }
664 }
665
666 raidunlock(rs);
667 return (0);
668
669 }
670
671 void
672 raidstrategy(struct buf *bp)
673 {
674 int s;
675
676 unsigned int raidID = raidunit(bp->b_dev);
677 RF_Raid_t *raidPtr;
678 struct raid_softc *rs = &raid_softc[raidID];
679 int wlabel;
680
681 if ((rs->sc_flags & RAIDF_INITED) ==0) {
682 bp->b_error = ENXIO;
683 bp->b_flags |= B_ERROR;
684 goto done;
685 }
686 if (raidID >= numraid || !raidPtrs[raidID]) {
687 bp->b_error = ENODEV;
688 bp->b_flags |= B_ERROR;
689 goto done;
690 }
691 raidPtr = raidPtrs[raidID];
692 if (!raidPtr->valid) {
693 bp->b_error = ENODEV;
694 bp->b_flags |= B_ERROR;
695 goto done;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 goto done;
700 }
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) == RAW_PART) {
709 uint64_t size; /* device size in DEV_BSIZE unit */
710
711 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
712 size = raidPtr->totalSectors <<
713 (raidPtr->logBytesPerSector - DEV_BSHIFT);
714 } else {
715 size = raidPtr->totalSectors >>
716 (DEV_BSHIFT - raidPtr->logBytesPerSector);
717 }
718 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
719 goto done;
720 }
721 } else {
722 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
723 db1_printf(("Bounds check failed!!:%d %d\n",
724 (int) bp->b_blkno, (int) wlabel));
725 goto done;
726 }
727 }
728 s = splbio();
729
730 bp->b_resid = 0;
731
732 /* stuff it onto our queue */
733 BUFQ_PUT(rs->buf_queue, bp);
734
735 /* scheduled the IO to happen at the next convenient time */
736 wakeup(&(raidPtrs[raidID]->iodone));
737
738 splx(s);
739 return;
740
741 done:
742 bp->b_resid = bp->b_bcount;
743 biodone(bp);
744 }
745 /* ARGSUSED */
746 int
747 raidread(dev_t dev, struct uio *uio, int flags)
748 {
749 int unit = raidunit(dev);
750 struct raid_softc *rs;
751
752 if (unit >= numraid)
753 return (ENXIO);
754 rs = &raid_softc[unit];
755
756 if ((rs->sc_flags & RAIDF_INITED) == 0)
757 return (ENXIO);
758
759 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
760
761 }
762 /* ARGSUSED */
763 int
764 raidwrite(dev_t dev, struct uio *uio, int flags)
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
777
778 }
779
780 int
781 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
782 {
783 int unit = raidunit(dev);
784 int error = 0;
785 int part, pmask;
786 struct raid_softc *rs;
787 RF_Config_t *k_cfg, *u_cfg;
788 RF_Raid_t *raidPtr;
789 RF_RaidDisk_t *diskPtr;
790 RF_AccTotals_t *totals;
791 RF_DeviceConfig_t *d_cfg, **ucfgp;
792 u_char *specific_buf;
793 int retcode = 0;
794 int column;
795 int raidid;
796 struct rf_recon_req *rrcopy, *rr;
797 RF_ComponentLabel_t *clabel;
798 RF_ComponentLabel_t ci_label;
799 RF_ComponentLabel_t **clabel_ptr;
800 RF_SingleComponent_t *sparePtr,*componentPtr;
801 RF_SingleComponent_t hot_spare;
802 RF_SingleComponent_t component;
803 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
804 int i, j, d;
805 #ifdef __HAVE_OLD_DISKLABEL
806 struct disklabel newlabel;
807 #endif
808
809 if (unit >= numraid)
810 return (ENXIO);
811 rs = &raid_softc[unit];
812 raidPtr = raidPtrs[unit];
813
814 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
815 (int) DISKPART(dev), (int) unit, (int) cmd));
816
817 /* Must be open for writes for these commands... */
818 switch (cmd) {
819 case DIOCSDINFO:
820 case DIOCWDINFO:
821 #ifdef __HAVE_OLD_DISKLABEL
822 case ODIOCWDINFO:
823 case ODIOCSDINFO:
824 #endif
825 case DIOCWLABEL:
826 if ((flag & FWRITE) == 0)
827 return (EBADF);
828 }
829
830 /* Must be initialized for these... */
831 switch (cmd) {
832 case DIOCGDINFO:
833 case DIOCSDINFO:
834 case DIOCWDINFO:
835 #ifdef __HAVE_OLD_DISKLABEL
836 case ODIOCGDINFO:
837 case ODIOCWDINFO:
838 case ODIOCSDINFO:
839 case ODIOCGDEFLABEL:
840 #endif
841 case DIOCGPART:
842 case DIOCWLABEL:
843 case DIOCGDEFLABEL:
844 case RAIDFRAME_SHUTDOWN:
845 case RAIDFRAME_REWRITEPARITY:
846 case RAIDFRAME_GET_INFO:
847 case RAIDFRAME_RESET_ACCTOTALS:
848 case RAIDFRAME_GET_ACCTOTALS:
849 case RAIDFRAME_KEEP_ACCTOTALS:
850 case RAIDFRAME_GET_SIZE:
851 case RAIDFRAME_FAIL_DISK:
852 case RAIDFRAME_COPYBACK:
853 case RAIDFRAME_CHECK_RECON_STATUS:
854 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
855 case RAIDFRAME_GET_COMPONENT_LABEL:
856 case RAIDFRAME_SET_COMPONENT_LABEL:
857 case RAIDFRAME_ADD_HOT_SPARE:
858 case RAIDFRAME_REMOVE_HOT_SPARE:
859 case RAIDFRAME_INIT_LABELS:
860 case RAIDFRAME_REBUILD_IN_PLACE:
861 case RAIDFRAME_CHECK_PARITY:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
863 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS:
865 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
866 case RAIDFRAME_SET_AUTOCONFIG:
867 case RAIDFRAME_SET_ROOT:
868 case RAIDFRAME_DELETE_COMPONENT:
869 case RAIDFRAME_INCORPORATE_HOT_SPARE:
870 if ((rs->sc_flags & RAIDF_INITED) == 0)
871 return (ENXIO);
872 }
873
874 switch (cmd) {
875
876 /* configure the system */
877 case RAIDFRAME_CONFIGURE:
878
879 if (raidPtr->valid) {
880 /* There is a valid RAID set running on this unit! */
881 printf("raid%d: Device already configured!\n",unit);
882 return(EINVAL);
883 }
884
885 /* copy-in the configuration information */
886 /* data points to a pointer to the configuration structure */
887
888 u_cfg = *((RF_Config_t **) data);
889 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
890 if (k_cfg == NULL) {
891 return (ENOMEM);
892 }
893 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
894 if (retcode) {
895 RF_Free(k_cfg, sizeof(RF_Config_t));
896 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
897 retcode));
898 return (retcode);
899 }
900 /* allocate a buffer for the layout-specific data, and copy it
901 * in */
902 if (k_cfg->layoutSpecificSize) {
903 if (k_cfg->layoutSpecificSize > 10000) {
904 /* sanity check */
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (EINVAL);
907 }
908 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
909 (u_char *));
910 if (specific_buf == NULL) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 return (ENOMEM);
913 }
914 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
915 k_cfg->layoutSpecificSize);
916 if (retcode) {
917 RF_Free(k_cfg, sizeof(RF_Config_t));
918 RF_Free(specific_buf,
919 k_cfg->layoutSpecificSize);
920 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
921 retcode));
922 return (retcode);
923 }
924 } else
925 specific_buf = NULL;
926 k_cfg->layoutSpecific = specific_buf;
927
928 /* should do some kind of sanity check on the configuration.
929 * Store the sum of all the bytes in the last byte? */
930
931 /* configure the system */
932
933 /*
934 * Clear the entire RAID descriptor, just to make sure
935 * there is no stale data left in the case of a
936 * reconfiguration
937 */
938 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
939 raidPtr->raidid = unit;
940
941 retcode = rf_Configure(raidPtr, k_cfg, NULL);
942
943 if (retcode == 0) {
944
945 /* allow this many simultaneous IO's to
946 this RAID device */
947 raidPtr->openings = RAIDOUTSTANDING;
948
949 raidinit(raidPtr);
950 rf_markalldirty(raidPtr);
951 }
952 /* free the buffers. No return code here. */
953 if (k_cfg->layoutSpecificSize) {
954 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
955 }
956 RF_Free(k_cfg, sizeof(RF_Config_t));
957
958 return (retcode);
959
960 /* shutdown the system */
961 case RAIDFRAME_SHUTDOWN:
962
963 if ((error = raidlock(rs)) != 0)
964 return (error);
965
966 /*
967 * If somebody has a partition mounted, we shouldn't
968 * shutdown.
969 */
970
971 part = DISKPART(dev);
972 pmask = (1 << part);
973 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
974 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
975 (rs->sc_dkdev.dk_copenmask & pmask))) {
976 raidunlock(rs);
977 return (EBUSY);
978 }
979
980 retcode = rf_Shutdown(raidPtr);
981
982 /* It's no longer initialized... */
983 rs->sc_flags &= ~RAIDF_INITED;
984
985 /* Detach the disk. */
986 pseudo_disk_detach(&rs->sc_dkdev);
987
988 raidunlock(rs);
989
990 return (retcode);
991 case RAIDFRAME_GET_COMPONENT_LABEL:
992 clabel_ptr = (RF_ComponentLabel_t **) data;
993 /* need to read the component label for the disk indicated
994 by row,column in clabel */
995
996 /* For practice, let's get it directly fromdisk, rather
997 than from the in-core copy */
998 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
999 (RF_ComponentLabel_t *));
1000 if (clabel == NULL)
1001 return (ENOMEM);
1002
1003 retcode = copyin( *clabel_ptr, clabel,
1004 sizeof(RF_ComponentLabel_t));
1005
1006 if (retcode) {
1007 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1008 return(retcode);
1009 }
1010
1011 clabel->row = 0; /* Don't allow looking at anything else.*/
1012
1013 column = clabel->column;
1014
1015 if ((column < 0) || (column >= raidPtr->numCol +
1016 raidPtr->numSpare)) {
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return(EINVAL);
1019 }
1020
1021 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1022 raidPtr->raid_cinfo[column].ci_vp,
1023 clabel );
1024
1025 if (retcode == 0) {
1026 retcode = copyout(clabel, *clabel_ptr,
1027 sizeof(RF_ComponentLabel_t));
1028 }
1029 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1030 return (retcode);
1031
1032 case RAIDFRAME_SET_COMPONENT_LABEL:
1033 clabel = (RF_ComponentLabel_t *) data;
1034
1035 /* XXX check the label for valid stuff... */
1036 /* Note that some things *should not* get modified --
1037 the user should be re-initing the labels instead of
1038 trying to patch things.
1039 */
1040
1041 raidid = raidPtr->raidid;
1042 #if DEBUG
1043 printf("raid%d: Got component label:\n", raidid);
1044 printf("raid%d: Version: %d\n", raidid, clabel->version);
1045 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1046 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1047 printf("raid%d: Column: %d\n", raidid, clabel->column);
1048 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1049 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1050 printf("raid%d: Status: %d\n", raidid, clabel->status);
1051 #endif
1052 clabel->row = 0;
1053 column = clabel->column;
1054
1055 if ((column < 0) || (column >= raidPtr->numCol)) {
1056 return(EINVAL);
1057 }
1058
1059 /* XXX this isn't allowed to do anything for now :-) */
1060
1061 /* XXX and before it is, we need to fill in the rest
1062 of the fields!?!?!?! */
1063 #if 0
1064 raidwrite_component_label(
1065 raidPtr->Disks[column].dev,
1066 raidPtr->raid_cinfo[column].ci_vp,
1067 clabel );
1068 #endif
1069 return (0);
1070
1071 case RAIDFRAME_INIT_LABELS:
1072 clabel = (RF_ComponentLabel_t *) data;
1073 /*
1074 we only want the serial number from
1075 the above. We get all the rest of the information
1076 from the config that was used to create this RAID
1077 set.
1078 */
1079
1080 raidPtr->serial_number = clabel->serial_number;
1081
1082 raid_init_component_label(raidPtr, &ci_label);
1083 ci_label.serial_number = clabel->serial_number;
1084 ci_label.row = 0; /* we dont' pretend to support more */
1085
1086 for(column=0;column<raidPtr->numCol;column++) {
1087 diskPtr = &raidPtr->Disks[column];
1088 if (!RF_DEAD_DISK(diskPtr->status)) {
1089 ci_label.partitionSize = diskPtr->partitionSize;
1090 ci_label.column = column;
1091 raidwrite_component_label(
1092 raidPtr->Disks[column].dev,
1093 raidPtr->raid_cinfo[column].ci_vp,
1094 &ci_label );
1095 }
1096 }
1097
1098 return (retcode);
1099 case RAIDFRAME_SET_AUTOCONFIG:
1100 d = rf_set_autoconfig(raidPtr, *(int *) data);
1101 printf("raid%d: New autoconfig value is: %d\n",
1102 raidPtr->raidid, d);
1103 *(int *) data = d;
1104 return (retcode);
1105
1106 case RAIDFRAME_SET_ROOT:
1107 d = rf_set_rootpartition(raidPtr, *(int *) data);
1108 printf("raid%d: New rootpartition value is: %d\n",
1109 raidPtr->raidid, d);
1110 *(int *) data = d;
1111 return (retcode);
1112
1113 /* initialize all parity */
1114 case RAIDFRAME_REWRITEPARITY:
1115
1116 if (raidPtr->Layout.map->faultsTolerated == 0) {
1117 /* Parity for RAID 0 is trivially correct */
1118 raidPtr->parity_good = RF_RAID_CLEAN;
1119 return(0);
1120 }
1121
1122 if (raidPtr->parity_rewrite_in_progress == 1) {
1123 /* Re-write is already in progress! */
1124 return(EINVAL);
1125 }
1126
1127 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1128 rf_RewriteParityThread,
1129 raidPtr,"raid_parity");
1130 return (retcode);
1131
1132
1133 case RAIDFRAME_ADD_HOT_SPARE:
1134 sparePtr = (RF_SingleComponent_t *) data;
1135 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1136 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1137 return(retcode);
1138
1139 case RAIDFRAME_REMOVE_HOT_SPARE:
1140 return(retcode);
1141
1142 case RAIDFRAME_DELETE_COMPONENT:
1143 componentPtr = (RF_SingleComponent_t *)data;
1144 memcpy( &component, componentPtr,
1145 sizeof(RF_SingleComponent_t));
1146 retcode = rf_delete_component(raidPtr, &component);
1147 return(retcode);
1148
1149 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1150 componentPtr = (RF_SingleComponent_t *)data;
1151 memcpy( &component, componentPtr,
1152 sizeof(RF_SingleComponent_t));
1153 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1154 return(retcode);
1155
1156 case RAIDFRAME_REBUILD_IN_PLACE:
1157
1158 if (raidPtr->Layout.map->faultsTolerated == 0) {
1159 /* Can't do this on a RAID 0!! */
1160 return(EINVAL);
1161 }
1162
1163 if (raidPtr->recon_in_progress == 1) {
1164 /* a reconstruct is already in progress! */
1165 return(EINVAL);
1166 }
1167
1168 componentPtr = (RF_SingleComponent_t *) data;
1169 memcpy( &component, componentPtr,
1170 sizeof(RF_SingleComponent_t));
1171 component.row = 0; /* we don't support any more */
1172 column = component.column;
1173
1174 if ((column < 0) || (column >= raidPtr->numCol)) {
1175 return(EINVAL);
1176 }
1177
1178 RF_LOCK_MUTEX(raidPtr->mutex);
1179 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1180 (raidPtr->numFailures > 0)) {
1181 /* XXX 0 above shouldn't be constant!!! */
1182 /* some component other than this has failed.
1183 Let's not make things worse than they already
1184 are... */
1185 printf("raid%d: Unable to reconstruct to disk at:\n",
1186 raidPtr->raidid);
1187 printf("raid%d: Col: %d Too many failures.\n",
1188 raidPtr->raidid, column);
1189 RF_UNLOCK_MUTEX(raidPtr->mutex);
1190 return (EINVAL);
1191 }
1192 if (raidPtr->Disks[column].status ==
1193 rf_ds_reconstructing) {
1194 printf("raid%d: Unable to reconstruct to disk at:\n",
1195 raidPtr->raidid);
1196 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1197
1198 RF_UNLOCK_MUTEX(raidPtr->mutex);
1199 return (EINVAL);
1200 }
1201 if (raidPtr->Disks[column].status == rf_ds_spared) {
1202 RF_UNLOCK_MUTEX(raidPtr->mutex);
1203 return (EINVAL);
1204 }
1205 RF_UNLOCK_MUTEX(raidPtr->mutex);
1206
1207 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1208 if (rrcopy == NULL)
1209 return(ENOMEM);
1210
1211 rrcopy->raidPtr = (void *) raidPtr;
1212 rrcopy->col = column;
1213
1214 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1215 rf_ReconstructInPlaceThread,
1216 rrcopy,"raid_reconip");
1217 return(retcode);
1218
1219 case RAIDFRAME_GET_INFO:
1220 if (!raidPtr->valid)
1221 return (ENODEV);
1222 ucfgp = (RF_DeviceConfig_t **) data;
1223 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1224 (RF_DeviceConfig_t *));
1225 if (d_cfg == NULL)
1226 return (ENOMEM);
1227 d_cfg->rows = 1; /* there is only 1 row now */
1228 d_cfg->cols = raidPtr->numCol;
1229 d_cfg->ndevs = raidPtr->numCol;
1230 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1231 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1232 return (ENOMEM);
1233 }
1234 d_cfg->nspares = raidPtr->numSpare;
1235 if (d_cfg->nspares >= RF_MAX_DISKS) {
1236 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1237 return (ENOMEM);
1238 }
1239 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1240 d = 0;
1241 for (j = 0; j < d_cfg->cols; j++) {
1242 d_cfg->devs[d] = raidPtr->Disks[j];
1243 d++;
1244 }
1245 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1246 d_cfg->spares[i] = raidPtr->Disks[j];
1247 }
1248 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1249 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1250
1251 return (retcode);
1252
1253 case RAIDFRAME_CHECK_PARITY:
1254 *(int *) data = raidPtr->parity_good;
1255 return (0);
1256
1257 case RAIDFRAME_RESET_ACCTOTALS:
1258 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1259 return (0);
1260
1261 case RAIDFRAME_GET_ACCTOTALS:
1262 totals = (RF_AccTotals_t *) data;
1263 *totals = raidPtr->acc_totals;
1264 return (0);
1265
1266 case RAIDFRAME_KEEP_ACCTOTALS:
1267 raidPtr->keep_acc_totals = *(int *)data;
1268 return (0);
1269
1270 case RAIDFRAME_GET_SIZE:
1271 *(int *) data = raidPtr->totalSectors;
1272 return (0);
1273
1274 /* fail a disk & optionally start reconstruction */
1275 case RAIDFRAME_FAIL_DISK:
1276
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* Can't do this on a RAID 0!! */
1279 return(EINVAL);
1280 }
1281
1282 rr = (struct rf_recon_req *) data;
1283 rr->row = 0;
1284 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1285 return (EINVAL);
1286
1287
1288 RF_LOCK_MUTEX(raidPtr->mutex);
1289 if (raidPtr->status == rf_rs_reconstructing) {
1290 /* you can't fail a disk while we're reconstructing! */
1291 /* XXX wrong for RAID6 */
1292 RF_UNLOCK_MUTEX(raidPtr->mutex);
1293 return (EINVAL);
1294 }
1295 if ((raidPtr->Disks[rr->col].status ==
1296 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1297 /* some other component has failed. Let's not make
1298 things worse. XXX wrong for RAID6 */
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300 return (EINVAL);
1301 }
1302 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1303 /* Can't fail a spared disk! */
1304 RF_UNLOCK_MUTEX(raidPtr->mutex);
1305 return (EINVAL);
1306 }
1307 RF_UNLOCK_MUTEX(raidPtr->mutex);
1308
1309 /* make a copy of the recon request so that we don't rely on
1310 * the user's buffer */
1311 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1312 if (rrcopy == NULL)
1313 return(ENOMEM);
1314 memcpy(rrcopy, rr, sizeof(*rr));
1315 rrcopy->raidPtr = (void *) raidPtr;
1316
1317 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1318 rf_ReconThread,
1319 rrcopy,"raid_recon");
1320 return (0);
1321
1322 /* invoke a copyback operation after recon on whatever disk
1323 * needs it, if any */
1324 case RAIDFRAME_COPYBACK:
1325
1326 if (raidPtr->Layout.map->faultsTolerated == 0) {
1327 /* This makes no sense on a RAID 0!! */
1328 return(EINVAL);
1329 }
1330
1331 if (raidPtr->copyback_in_progress == 1) {
1332 /* Copyback is already in progress! */
1333 return(EINVAL);
1334 }
1335
1336 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1337 rf_CopybackThread,
1338 raidPtr,"raid_copyback");
1339 return (retcode);
1340
1341 /* return the percentage completion of reconstruction */
1342 case RAIDFRAME_CHECK_RECON_STATUS:
1343 if (raidPtr->Layout.map->faultsTolerated == 0) {
1344 /* This makes no sense on a RAID 0, so tell the
1345 user it's done. */
1346 *(int *) data = 100;
1347 return(0);
1348 }
1349 if (raidPtr->status != rf_rs_reconstructing)
1350 *(int *) data = 100;
1351 else {
1352 if (raidPtr->reconControl->numRUsTotal > 0) {
1353 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1354 } else {
1355 *(int *) data = 0;
1356 }
1357 }
1358 return (0);
1359 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1360 progressInfoPtr = (RF_ProgressInfo_t **) data;
1361 if (raidPtr->status != rf_rs_reconstructing) {
1362 progressInfo.remaining = 0;
1363 progressInfo.completed = 100;
1364 progressInfo.total = 100;
1365 } else {
1366 progressInfo.total =
1367 raidPtr->reconControl->numRUsTotal;
1368 progressInfo.completed =
1369 raidPtr->reconControl->numRUsComplete;
1370 progressInfo.remaining = progressInfo.total -
1371 progressInfo.completed;
1372 }
1373 retcode = copyout(&progressInfo, *progressInfoPtr,
1374 sizeof(RF_ProgressInfo_t));
1375 return (retcode);
1376
1377 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1378 if (raidPtr->Layout.map->faultsTolerated == 0) {
1379 /* This makes no sense on a RAID 0, so tell the
1380 user it's done. */
1381 *(int *) data = 100;
1382 return(0);
1383 }
1384 if (raidPtr->parity_rewrite_in_progress == 1) {
1385 *(int *) data = 100 *
1386 raidPtr->parity_rewrite_stripes_done /
1387 raidPtr->Layout.numStripe;
1388 } else {
1389 *(int *) data = 100;
1390 }
1391 return (0);
1392
1393 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1394 progressInfoPtr = (RF_ProgressInfo_t **) data;
1395 if (raidPtr->parity_rewrite_in_progress == 1) {
1396 progressInfo.total = raidPtr->Layout.numStripe;
1397 progressInfo.completed =
1398 raidPtr->parity_rewrite_stripes_done;
1399 progressInfo.remaining = progressInfo.total -
1400 progressInfo.completed;
1401 } else {
1402 progressInfo.remaining = 0;
1403 progressInfo.completed = 100;
1404 progressInfo.total = 100;
1405 }
1406 retcode = copyout(&progressInfo, *progressInfoPtr,
1407 sizeof(RF_ProgressInfo_t));
1408 return (retcode);
1409
1410 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1411 if (raidPtr->Layout.map->faultsTolerated == 0) {
1412 /* This makes no sense on a RAID 0 */
1413 *(int *) data = 100;
1414 return(0);
1415 }
1416 if (raidPtr->copyback_in_progress == 1) {
1417 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1418 raidPtr->Layout.numStripe;
1419 } else {
1420 *(int *) data = 100;
1421 }
1422 return (0);
1423
1424 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1425 progressInfoPtr = (RF_ProgressInfo_t **) data;
1426 if (raidPtr->copyback_in_progress == 1) {
1427 progressInfo.total = raidPtr->Layout.numStripe;
1428 progressInfo.completed =
1429 raidPtr->copyback_stripes_done;
1430 progressInfo.remaining = progressInfo.total -
1431 progressInfo.completed;
1432 } else {
1433 progressInfo.remaining = 0;
1434 progressInfo.completed = 100;
1435 progressInfo.total = 100;
1436 }
1437 retcode = copyout(&progressInfo, *progressInfoPtr,
1438 sizeof(RF_ProgressInfo_t));
1439 return (retcode);
1440
1441 /* the sparetable daemon calls this to wait for the kernel to
1442 * need a spare table. this ioctl does not return until a
1443 * spare table is needed. XXX -- calling mpsleep here in the
1444 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1445 * -- I should either compute the spare table in the kernel,
1446 * or have a different -- XXX XXX -- interface (a different
1447 * character device) for delivering the table -- XXX */
1448 #if 0
1449 case RAIDFRAME_SPARET_WAIT:
1450 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1451 while (!rf_sparet_wait_queue)
1452 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1453 waitreq = rf_sparet_wait_queue;
1454 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1455 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1456
1457 /* structure assignment */
1458 *((RF_SparetWait_t *) data) = *waitreq;
1459
1460 RF_Free(waitreq, sizeof(*waitreq));
1461 return (0);
1462
1463 /* wakes up a process waiting on SPARET_WAIT and puts an error
1464 * code in it that will cause the dameon to exit */
1465 case RAIDFRAME_ABORT_SPARET_WAIT:
1466 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1467 waitreq->fcol = -1;
1468 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1469 waitreq->next = rf_sparet_wait_queue;
1470 rf_sparet_wait_queue = waitreq;
1471 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1472 wakeup(&rf_sparet_wait_queue);
1473 return (0);
1474
1475 /* used by the spare table daemon to deliver a spare table
1476 * into the kernel */
1477 case RAIDFRAME_SEND_SPARET:
1478
1479 /* install the spare table */
1480 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1481
1482 /* respond to the requestor. the return status of the spare
1483 * table installation is passed in the "fcol" field */
1484 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1485 waitreq->fcol = retcode;
1486 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1487 waitreq->next = rf_sparet_resp_queue;
1488 rf_sparet_resp_queue = waitreq;
1489 wakeup(&rf_sparet_resp_queue);
1490 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1491
1492 return (retcode);
1493 #endif
1494
1495 default:
1496 break; /* fall through to the os-specific code below */
1497
1498 }
1499
1500 if (!raidPtr->valid)
1501 return (EINVAL);
1502
1503 /*
1504 * Add support for "regular" device ioctls here.
1505 */
1506
1507 switch (cmd) {
1508 case DIOCGDINFO:
1509 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1510 break;
1511 #ifdef __HAVE_OLD_DISKLABEL
1512 case ODIOCGDINFO:
1513 newlabel = *(rs->sc_dkdev.dk_label);
1514 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1515 return ENOTTY;
1516 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1517 break;
1518 #endif
1519
1520 case DIOCGPART:
1521 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1522 ((struct partinfo *) data)->part =
1523 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1524 break;
1525
1526 case DIOCWDINFO:
1527 case DIOCSDINFO:
1528 #ifdef __HAVE_OLD_DISKLABEL
1529 case ODIOCWDINFO:
1530 case ODIOCSDINFO:
1531 #endif
1532 {
1533 struct disklabel *lp;
1534 #ifdef __HAVE_OLD_DISKLABEL
1535 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1536 memset(&newlabel, 0, sizeof newlabel);
1537 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1538 lp = &newlabel;
1539 } else
1540 #endif
1541 lp = (struct disklabel *)data;
1542
1543 if ((error = raidlock(rs)) != 0)
1544 return (error);
1545
1546 rs->sc_flags |= RAIDF_LABELLING;
1547
1548 error = setdisklabel(rs->sc_dkdev.dk_label,
1549 lp, 0, rs->sc_dkdev.dk_cpulabel);
1550 if (error == 0) {
1551 if (cmd == DIOCWDINFO
1552 #ifdef __HAVE_OLD_DISKLABEL
1553 || cmd == ODIOCWDINFO
1554 #endif
1555 )
1556 error = writedisklabel(RAIDLABELDEV(dev),
1557 raidstrategy, rs->sc_dkdev.dk_label,
1558 rs->sc_dkdev.dk_cpulabel);
1559 }
1560 rs->sc_flags &= ~RAIDF_LABELLING;
1561
1562 raidunlock(rs);
1563
1564 if (error)
1565 return (error);
1566 break;
1567 }
1568
1569 case DIOCWLABEL:
1570 if (*(int *) data != 0)
1571 rs->sc_flags |= RAIDF_WLABEL;
1572 else
1573 rs->sc_flags &= ~RAIDF_WLABEL;
1574 break;
1575
1576 case DIOCGDEFLABEL:
1577 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1578 break;
1579
1580 #ifdef __HAVE_OLD_DISKLABEL
1581 case ODIOCGDEFLABEL:
1582 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1583 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1584 return ENOTTY;
1585 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1586 break;
1587 #endif
1588
1589 default:
1590 retcode = ENOTTY;
1591 }
1592 return (retcode);
1593
1594 }
1595
1596
1597 /* raidinit -- complete the rest of the initialization for the
1598 RAIDframe device. */
1599
1600
1601 static void
1602 raidinit(RF_Raid_t *raidPtr)
1603 {
1604 struct raid_softc *rs;
1605 int unit;
1606
1607 unit = raidPtr->raidid;
1608
1609 rs = &raid_softc[unit];
1610
1611 /* XXX should check return code first... */
1612 rs->sc_flags |= RAIDF_INITED;
1613
1614 /* XXX doesn't check bounds. */
1615 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1616
1617 rs->sc_dkdev.dk_name = rs->sc_xname;
1618
1619 /* disk_attach actually creates space for the CPU disklabel, among
1620 * other things, so it's critical to call this *BEFORE* we try putzing
1621 * with disklabels. */
1622
1623 pseudo_disk_attach(&rs->sc_dkdev);
1624
1625 /* XXX There may be a weird interaction here between this, and
1626 * protectedSectors, as used in RAIDframe. */
1627
1628 rs->sc_size = raidPtr->totalSectors;
1629 }
1630 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1631 /* wake up the daemon & tell it to get us a spare table
1632 * XXX
1633 * the entries in the queues should be tagged with the raidPtr
1634 * so that in the extremely rare case that two recons happen at once,
1635 * we know for which device were requesting a spare table
1636 * XXX
1637 *
1638 * XXX This code is not currently used. GO
1639 */
1640 int
1641 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1642 {
1643 int retcode;
1644
1645 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1646 req->next = rf_sparet_wait_queue;
1647 rf_sparet_wait_queue = req;
1648 wakeup(&rf_sparet_wait_queue);
1649
1650 /* mpsleep unlocks the mutex */
1651 while (!rf_sparet_resp_queue) {
1652 tsleep(&rf_sparet_resp_queue, PRIBIO,
1653 "raidframe getsparetable", 0);
1654 }
1655 req = rf_sparet_resp_queue;
1656 rf_sparet_resp_queue = req->next;
1657 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1658
1659 retcode = req->fcol;
1660 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1661 * alloc'd */
1662 return (retcode);
1663 }
1664 #endif
1665
1666 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1667 * bp & passes it down.
1668 * any calls originating in the kernel must use non-blocking I/O
1669 * do some extra sanity checking to return "appropriate" error values for
1670 * certain conditions (to make some standard utilities work)
1671 *
1672 * Formerly known as: rf_DoAccessKernel
1673 */
1674 void
1675 raidstart(RF_Raid_t *raidPtr)
1676 {
1677 RF_SectorCount_t num_blocks, pb, sum;
1678 RF_RaidAddr_t raid_addr;
1679 struct partition *pp;
1680 daddr_t blocknum;
1681 int unit;
1682 struct raid_softc *rs;
1683 int do_async;
1684 struct buf *bp;
1685 int rc;
1686
1687 unit = raidPtr->raidid;
1688 rs = &raid_softc[unit];
1689
1690 /* quick check to see if anything has died recently */
1691 RF_LOCK_MUTEX(raidPtr->mutex);
1692 if (raidPtr->numNewFailures > 0) {
1693 RF_UNLOCK_MUTEX(raidPtr->mutex);
1694 rf_update_component_labels(raidPtr,
1695 RF_NORMAL_COMPONENT_UPDATE);
1696 RF_LOCK_MUTEX(raidPtr->mutex);
1697 raidPtr->numNewFailures--;
1698 }
1699
1700 /* Check to see if we're at the limit... */
1701 while (raidPtr->openings > 0) {
1702 RF_UNLOCK_MUTEX(raidPtr->mutex);
1703
1704 /* get the next item, if any, from the queue */
1705 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1706 /* nothing more to do */
1707 return;
1708 }
1709
1710 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1711 * partition.. Need to make it absolute to the underlying
1712 * device.. */
1713
1714 blocknum = bp->b_blkno;
1715 if (DISKPART(bp->b_dev) != RAW_PART) {
1716 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1717 blocknum += pp->p_offset;
1718 }
1719
1720 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1721 (int) blocknum));
1722
1723 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1724 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1725
1726 /* *THIS* is where we adjust what block we're going to...
1727 * but DO NOT TOUCH bp->b_blkno!!! */
1728 raid_addr = blocknum;
1729
1730 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1731 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1732 sum = raid_addr + num_blocks + pb;
1733 if (1 || rf_debugKernelAccess) {
1734 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1735 (int) raid_addr, (int) sum, (int) num_blocks,
1736 (int) pb, (int) bp->b_resid));
1737 }
1738 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1739 || (sum < num_blocks) || (sum < pb)) {
1740 bp->b_error = ENOSPC;
1741 bp->b_flags |= B_ERROR;
1742 bp->b_resid = bp->b_bcount;
1743 biodone(bp);
1744 RF_LOCK_MUTEX(raidPtr->mutex);
1745 continue;
1746 }
1747 /*
1748 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1749 */
1750
1751 if (bp->b_bcount & raidPtr->sectorMask) {
1752 bp->b_error = EINVAL;
1753 bp->b_flags |= B_ERROR;
1754 bp->b_resid = bp->b_bcount;
1755 biodone(bp);
1756 RF_LOCK_MUTEX(raidPtr->mutex);
1757 continue;
1758
1759 }
1760 db1_printf(("Calling DoAccess..\n"));
1761
1762
1763 RF_LOCK_MUTEX(raidPtr->mutex);
1764 raidPtr->openings--;
1765 RF_UNLOCK_MUTEX(raidPtr->mutex);
1766
1767 /*
1768 * Everything is async.
1769 */
1770 do_async = 1;
1771
1772 disk_busy(&rs->sc_dkdev);
1773
1774 /* XXX we're still at splbio() here... do we *really*
1775 need to be? */
1776
1777 /* don't ever condition on bp->b_flags & B_WRITE.
1778 * always condition on B_READ instead */
1779
1780 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1781 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1782 do_async, raid_addr, num_blocks,
1783 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1784
1785 if (rc) {
1786 bp->b_error = rc;
1787 bp->b_flags |= B_ERROR;
1788 bp->b_resid = bp->b_bcount;
1789 biodone(bp);
1790 /* continue loop */
1791 }
1792
1793 RF_LOCK_MUTEX(raidPtr->mutex);
1794 }
1795 RF_UNLOCK_MUTEX(raidPtr->mutex);
1796 }
1797
1798
1799
1800
1801 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1802
1803 int
1804 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1805 {
1806 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1807 struct buf *bp;
1808
1809 req->queue = queue;
1810
1811 #if DIAGNOSTIC
1812 if (queue->raidPtr->raidid >= numraid) {
1813 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1814 numraid);
1815 panic("Invalid Unit number in rf_DispatchKernelIO");
1816 }
1817 #endif
1818
1819 bp = req->bp;
1820
1821 switch (req->type) {
1822 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1823 /* XXX need to do something extra here.. */
1824 /* I'm leaving this in, as I've never actually seen it used,
1825 * and I'd like folks to report it... GO */
1826 printf(("WAKEUP CALLED\n"));
1827 queue->numOutstanding++;
1828
1829 bp->b_flags = 0;
1830 bp->b_private = req;
1831
1832 KernelWakeupFunc(bp);
1833 break;
1834
1835 case RF_IO_TYPE_READ:
1836 case RF_IO_TYPE_WRITE:
1837 #if RF_ACC_TRACE > 0
1838 if (req->tracerec) {
1839 RF_ETIMER_START(req->tracerec->timer);
1840 }
1841 #endif
1842 InitBP(bp, queue->rf_cinfo->ci_vp,
1843 op, queue->rf_cinfo->ci_dev,
1844 req->sectorOffset, req->numSector,
1845 req->buf, KernelWakeupFunc, (void *) req,
1846 queue->raidPtr->logBytesPerSector, req->b_proc);
1847
1848 if (rf_debugKernelAccess) {
1849 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1850 (long) bp->b_blkno));
1851 }
1852 queue->numOutstanding++;
1853 queue->last_deq_sector = req->sectorOffset;
1854 /* acc wouldn't have been let in if there were any pending
1855 * reqs at any other priority */
1856 queue->curPriority = req->priority;
1857
1858 db1_printf(("Going for %c to unit %d col %d\n",
1859 req->type, queue->raidPtr->raidid,
1860 queue->col));
1861 db1_printf(("sector %d count %d (%d bytes) %d\n",
1862 (int) req->sectorOffset, (int) req->numSector,
1863 (int) (req->numSector <<
1864 queue->raidPtr->logBytesPerSector),
1865 (int) queue->raidPtr->logBytesPerSector));
1866 VOP_STRATEGY(bp->b_vp, bp);
1867
1868 break;
1869
1870 default:
1871 panic("bad req->type in rf_DispatchKernelIO");
1872 }
1873 db1_printf(("Exiting from DispatchKernelIO\n"));
1874
1875 return (0);
1876 }
1877 /* this is the callback function associated with a I/O invoked from
1878 kernel code.
1879 */
1880 static void
1881 KernelWakeupFunc(struct buf *bp)
1882 {
1883 RF_DiskQueueData_t *req = NULL;
1884 RF_DiskQueue_t *queue;
1885 int s;
1886
1887 s = splbio();
1888 db1_printf(("recovering the request queue:\n"));
1889 req = bp->b_private;
1890
1891 queue = (RF_DiskQueue_t *) req->queue;
1892
1893 #if RF_ACC_TRACE > 0
1894 if (req->tracerec) {
1895 RF_ETIMER_STOP(req->tracerec->timer);
1896 RF_ETIMER_EVAL(req->tracerec->timer);
1897 RF_LOCK_MUTEX(rf_tracing_mutex);
1898 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1899 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1900 req->tracerec->num_phys_ios++;
1901 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1902 }
1903 #endif
1904
1905 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1906 * ballistic, and mark the component as hosed... */
1907
1908 if (bp->b_flags & B_ERROR) {
1909 /* Mark the disk as dead */
1910 /* but only mark it once... */
1911 /* and only if it wouldn't leave this RAID set
1912 completely broken */
1913 if (((queue->raidPtr->Disks[queue->col].status ==
1914 rf_ds_optimal) ||
1915 (queue->raidPtr->Disks[queue->col].status ==
1916 rf_ds_used_spare)) &&
1917 (queue->raidPtr->numFailures <
1918 queue->raidPtr->Layout.map->faultsTolerated)) {
1919 printf("raid%d: IO Error. Marking %s as failed.\n",
1920 queue->raidPtr->raidid,
1921 queue->raidPtr->Disks[queue->col].devname);
1922 queue->raidPtr->Disks[queue->col].status =
1923 rf_ds_failed;
1924 queue->raidPtr->status = rf_rs_degraded;
1925 queue->raidPtr->numFailures++;
1926 queue->raidPtr->numNewFailures++;
1927 } else { /* Disk is already dead... */
1928 /* printf("Disk already marked as dead!\n"); */
1929 }
1930
1931 }
1932
1933 /* Fill in the error value */
1934
1935 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1936
1937 simple_lock(&queue->raidPtr->iodone_lock);
1938
1939 /* Drop this one on the "finished" queue... */
1940 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1941
1942 /* Let the raidio thread know there is work to be done. */
1943 wakeup(&(queue->raidPtr->iodone));
1944
1945 simple_unlock(&queue->raidPtr->iodone_lock);
1946
1947 splx(s);
1948 }
1949
1950
1951
1952 /*
1953 * initialize a buf structure for doing an I/O in the kernel.
1954 */
1955 static void
1956 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1957 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1958 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1959 struct proc *b_proc)
1960 {
1961 /* bp->b_flags = B_PHYS | rw_flag; */
1962 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1963 bp->b_bcount = numSect << logBytesPerSector;
1964 bp->b_bufsize = bp->b_bcount;
1965 bp->b_error = 0;
1966 bp->b_dev = dev;
1967 bp->b_data = bf;
1968 bp->b_blkno = startSect;
1969 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1970 if (bp->b_bcount == 0) {
1971 panic("bp->b_bcount is zero in InitBP!!");
1972 }
1973 bp->b_proc = b_proc;
1974 bp->b_iodone = cbFunc;
1975 bp->b_private = cbArg;
1976 bp->b_vp = b_vp;
1977 if ((bp->b_flags & B_READ) == 0) {
1978 bp->b_vp->v_numoutput++;
1979 }
1980
1981 }
1982
1983 static void
1984 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1985 struct disklabel *lp)
1986 {
1987 memset(lp, 0, sizeof(*lp));
1988
1989 /* fabricate a label... */
1990 lp->d_secperunit = raidPtr->totalSectors;
1991 lp->d_secsize = raidPtr->bytesPerSector;
1992 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1993 lp->d_ntracks = 4 * raidPtr->numCol;
1994 lp->d_ncylinders = raidPtr->totalSectors /
1995 (lp->d_nsectors * lp->d_ntracks);
1996 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1997
1998 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1999 lp->d_type = DTYPE_RAID;
2000 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2001 lp->d_rpm = 3600;
2002 lp->d_interleave = 1;
2003 lp->d_flags = 0;
2004
2005 lp->d_partitions[RAW_PART].p_offset = 0;
2006 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2007 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2008 lp->d_npartitions = RAW_PART + 1;
2009
2010 lp->d_magic = DISKMAGIC;
2011 lp->d_magic2 = DISKMAGIC;
2012 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2013
2014 }
2015 /*
2016 * Read the disklabel from the raid device. If one is not present, fake one
2017 * up.
2018 */
2019 static void
2020 raidgetdisklabel(dev_t dev)
2021 {
2022 int unit = raidunit(dev);
2023 struct raid_softc *rs = &raid_softc[unit];
2024 const char *errstring;
2025 struct disklabel *lp = rs->sc_dkdev.dk_label;
2026 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2027 RF_Raid_t *raidPtr;
2028
2029 db1_printf(("Getting the disklabel...\n"));
2030
2031 memset(clp, 0, sizeof(*clp));
2032
2033 raidPtr = raidPtrs[unit];
2034
2035 raidgetdefaultlabel(raidPtr, rs, lp);
2036
2037 /*
2038 * Call the generic disklabel extraction routine.
2039 */
2040 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2041 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2042 if (errstring)
2043 raidmakedisklabel(rs);
2044 else {
2045 int i;
2046 struct partition *pp;
2047
2048 /*
2049 * Sanity check whether the found disklabel is valid.
2050 *
2051 * This is necessary since total size of the raid device
2052 * may vary when an interleave is changed even though exactly
2053 * same componets are used, and old disklabel may used
2054 * if that is found.
2055 */
2056 if (lp->d_secperunit != rs->sc_size)
2057 printf("raid%d: WARNING: %s: "
2058 "total sector size in disklabel (%d) != "
2059 "the size of raid (%ld)\n", unit, rs->sc_xname,
2060 lp->d_secperunit, (long) rs->sc_size);
2061 for (i = 0; i < lp->d_npartitions; i++) {
2062 pp = &lp->d_partitions[i];
2063 if (pp->p_offset + pp->p_size > rs->sc_size)
2064 printf("raid%d: WARNING: %s: end of partition `%c' "
2065 "exceeds the size of raid (%ld)\n",
2066 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2067 }
2068 }
2069
2070 }
2071 /*
2072 * Take care of things one might want to take care of in the event
2073 * that a disklabel isn't present.
2074 */
2075 static void
2076 raidmakedisklabel(struct raid_softc *rs)
2077 {
2078 struct disklabel *lp = rs->sc_dkdev.dk_label;
2079 db1_printf(("Making a label..\n"));
2080
2081 /*
2082 * For historical reasons, if there's no disklabel present
2083 * the raw partition must be marked FS_BSDFFS.
2084 */
2085
2086 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2087
2088 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2089
2090 lp->d_checksum = dkcksum(lp);
2091 }
2092 /*
2093 * Lookup the provided name in the filesystem. If the file exists,
2094 * is a valid block device, and isn't being used by anyone else,
2095 * set *vpp to the file's vnode.
2096 * You'll find the original of this in ccd.c
2097 */
2098 int
2099 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2100 {
2101 struct nameidata nd;
2102 struct vnode *vp;
2103 struct proc *p;
2104 struct vattr va;
2105 int error;
2106
2107 if (l == NULL)
2108 return(ESRCH); /* Is ESRCH the best choice? */
2109 p = l->l_proc;
2110
2111 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2112 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2113 return (error);
2114 }
2115 vp = nd.ni_vp;
2116 if (vp->v_usecount > 1) {
2117 VOP_UNLOCK(vp, 0);
2118 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2119 return (EBUSY);
2120 }
2121 if ((error = VOP_GETATTR(vp, &va, p->p_cred, l)) != 0) {
2122 VOP_UNLOCK(vp, 0);
2123 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2124 return (error);
2125 }
2126 /* XXX: eventually we should handle VREG, too. */
2127 if (va.va_type != VBLK) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2130 return (ENOTBLK);
2131 }
2132 VOP_UNLOCK(vp, 0);
2133 *vpp = vp;
2134 return (0);
2135 }
2136 /*
2137 * Wait interruptibly for an exclusive lock.
2138 *
2139 * XXX
2140 * Several drivers do this; it should be abstracted and made MP-safe.
2141 * (Hmm... where have we seen this warning before :-> GO )
2142 */
2143 static int
2144 raidlock(struct raid_softc *rs)
2145 {
2146 int error;
2147
2148 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2149 rs->sc_flags |= RAIDF_WANTED;
2150 if ((error =
2151 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2152 return (error);
2153 }
2154 rs->sc_flags |= RAIDF_LOCKED;
2155 return (0);
2156 }
2157 /*
2158 * Unlock and wake up any waiters.
2159 */
2160 static void
2161 raidunlock(struct raid_softc *rs)
2162 {
2163
2164 rs->sc_flags &= ~RAIDF_LOCKED;
2165 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2166 rs->sc_flags &= ~RAIDF_WANTED;
2167 wakeup(rs);
2168 }
2169 }
2170
2171
2172 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2173 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2174
2175 int
2176 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2177 {
2178 RF_ComponentLabel_t clabel;
2179 raidread_component_label(dev, b_vp, &clabel);
2180 clabel.mod_counter = mod_counter;
2181 clabel.clean = RF_RAID_CLEAN;
2182 raidwrite_component_label(dev, b_vp, &clabel);
2183 return(0);
2184 }
2185
2186
2187 int
2188 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2189 {
2190 RF_ComponentLabel_t clabel;
2191 raidread_component_label(dev, b_vp, &clabel);
2192 clabel.mod_counter = mod_counter;
2193 clabel.clean = RF_RAID_DIRTY;
2194 raidwrite_component_label(dev, b_vp, &clabel);
2195 return(0);
2196 }
2197
2198 /* ARGSUSED */
2199 int
2200 raidread_component_label(dev_t dev, struct vnode *b_vp,
2201 RF_ComponentLabel_t *clabel)
2202 {
2203 struct buf *bp;
2204 const struct bdevsw *bdev;
2205 int error;
2206
2207 /* XXX should probably ensure that we don't try to do this if
2208 someone has changed rf_protected_sectors. */
2209
2210 if (b_vp == NULL) {
2211 /* For whatever reason, this component is not valid.
2212 Don't try to read a component label from it. */
2213 return(EINVAL);
2214 }
2215
2216 /* get a block of the appropriate size... */
2217 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2218 bp->b_dev = dev;
2219
2220 /* get our ducks in a row for the read */
2221 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2222 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2223 bp->b_flags |= B_READ;
2224 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2225
2226 bdev = bdevsw_lookup(bp->b_dev);
2227 if (bdev == NULL)
2228 return (ENXIO);
2229 (*bdev->d_strategy)(bp);
2230
2231 error = biowait(bp);
2232
2233 if (!error) {
2234 memcpy(clabel, bp->b_data,
2235 sizeof(RF_ComponentLabel_t));
2236 }
2237
2238 brelse(bp);
2239 return(error);
2240 }
2241 /* ARGSUSED */
2242 int
2243 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2244 RF_ComponentLabel_t *clabel)
2245 {
2246 struct buf *bp;
2247 const struct bdevsw *bdev;
2248 int error;
2249
2250 /* get a block of the appropriate size... */
2251 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2252 bp->b_dev = dev;
2253
2254 /* get our ducks in a row for the write */
2255 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2256 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2257 bp->b_flags |= B_WRITE;
2258 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2259
2260 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2261
2262 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2263
2264 bdev = bdevsw_lookup(bp->b_dev);
2265 if (bdev == NULL)
2266 return (ENXIO);
2267 (*bdev->d_strategy)(bp);
2268 error = biowait(bp);
2269 brelse(bp);
2270 if (error) {
2271 #if 1
2272 printf("Failed to write RAID component info!\n");
2273 #endif
2274 }
2275
2276 return(error);
2277 }
2278
2279 void
2280 rf_markalldirty(RF_Raid_t *raidPtr)
2281 {
2282 RF_ComponentLabel_t clabel;
2283 int sparecol;
2284 int c;
2285 int j;
2286 int scol = -1;
2287
2288 raidPtr->mod_counter++;
2289 for (c = 0; c < raidPtr->numCol; c++) {
2290 /* we don't want to touch (at all) a disk that has
2291 failed */
2292 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2293 raidread_component_label(
2294 raidPtr->Disks[c].dev,
2295 raidPtr->raid_cinfo[c].ci_vp,
2296 &clabel);
2297 if (clabel.status == rf_ds_spared) {
2298 /* XXX do something special...
2299 but whatever you do, don't
2300 try to access it!! */
2301 } else {
2302 raidmarkdirty(
2303 raidPtr->Disks[c].dev,
2304 raidPtr->raid_cinfo[c].ci_vp,
2305 raidPtr->mod_counter);
2306 }
2307 }
2308 }
2309
2310 for( c = 0; c < raidPtr->numSpare ; c++) {
2311 sparecol = raidPtr->numCol + c;
2312 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2313 /*
2314
2315 we claim this disk is "optimal" if it's
2316 rf_ds_used_spare, as that means it should be
2317 directly substitutable for the disk it replaced.
2318 We note that too...
2319
2320 */
2321
2322 for(j=0;j<raidPtr->numCol;j++) {
2323 if (raidPtr->Disks[j].spareCol == sparecol) {
2324 scol = j;
2325 break;
2326 }
2327 }
2328
2329 raidread_component_label(
2330 raidPtr->Disks[sparecol].dev,
2331 raidPtr->raid_cinfo[sparecol].ci_vp,
2332 &clabel);
2333 /* make sure status is noted */
2334
2335 raid_init_component_label(raidPtr, &clabel);
2336
2337 clabel.row = 0;
2338 clabel.column = scol;
2339 /* Note: we *don't* change status from rf_ds_used_spare
2340 to rf_ds_optimal */
2341 /* clabel.status = rf_ds_optimal; */
2342
2343 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2344 raidPtr->raid_cinfo[sparecol].ci_vp,
2345 raidPtr->mod_counter);
2346 }
2347 }
2348 }
2349
2350
2351 void
2352 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2353 {
2354 RF_ComponentLabel_t clabel;
2355 int sparecol;
2356 int c;
2357 int j;
2358 int scol;
2359
2360 scol = -1;
2361
2362 /* XXX should do extra checks to make sure things really are clean,
2363 rather than blindly setting the clean bit... */
2364
2365 raidPtr->mod_counter++;
2366
2367 for (c = 0; c < raidPtr->numCol; c++) {
2368 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2369 raidread_component_label(
2370 raidPtr->Disks[c].dev,
2371 raidPtr->raid_cinfo[c].ci_vp,
2372 &clabel);
2373 /* make sure status is noted */
2374 clabel.status = rf_ds_optimal;
2375
2376 /* bump the counter */
2377 clabel.mod_counter = raidPtr->mod_counter;
2378
2379 raidwrite_component_label(
2380 raidPtr->Disks[c].dev,
2381 raidPtr->raid_cinfo[c].ci_vp,
2382 &clabel);
2383 if (final == RF_FINAL_COMPONENT_UPDATE) {
2384 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2385 raidmarkclean(
2386 raidPtr->Disks[c].dev,
2387 raidPtr->raid_cinfo[c].ci_vp,
2388 raidPtr->mod_counter);
2389 }
2390 }
2391 }
2392 /* else we don't touch it.. */
2393 }
2394
2395 for( c = 0; c < raidPtr->numSpare ; c++) {
2396 sparecol = raidPtr->numCol + c;
2397 /* Need to ensure that the reconstruct actually completed! */
2398 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2399 /*
2400
2401 we claim this disk is "optimal" if it's
2402 rf_ds_used_spare, as that means it should be
2403 directly substitutable for the disk it replaced.
2404 We note that too...
2405
2406 */
2407
2408 for(j=0;j<raidPtr->numCol;j++) {
2409 if (raidPtr->Disks[j].spareCol == sparecol) {
2410 scol = j;
2411 break;
2412 }
2413 }
2414
2415 /* XXX shouldn't *really* need this... */
2416 raidread_component_label(
2417 raidPtr->Disks[sparecol].dev,
2418 raidPtr->raid_cinfo[sparecol].ci_vp,
2419 &clabel);
2420 /* make sure status is noted */
2421
2422 raid_init_component_label(raidPtr, &clabel);
2423
2424 clabel.mod_counter = raidPtr->mod_counter;
2425 clabel.column = scol;
2426 clabel.status = rf_ds_optimal;
2427
2428 raidwrite_component_label(
2429 raidPtr->Disks[sparecol].dev,
2430 raidPtr->raid_cinfo[sparecol].ci_vp,
2431 &clabel);
2432 if (final == RF_FINAL_COMPONENT_UPDATE) {
2433 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2434 raidmarkclean( raidPtr->Disks[sparecol].dev,
2435 raidPtr->raid_cinfo[sparecol].ci_vp,
2436 raidPtr->mod_counter);
2437 }
2438 }
2439 }
2440 }
2441 }
2442
2443 void
2444 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2445 {
2446 struct proc *p;
2447 struct lwp *l;
2448
2449 p = raidPtr->engine_thread;
2450 l = LIST_FIRST(&p->p_lwps);
2451
2452 if (vp != NULL) {
2453 if (auto_configured == 1) {
2454 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2455 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2456 vput(vp);
2457
2458 } else {
2459 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2460 }
2461 }
2462 }
2463
2464
2465 void
2466 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2467 {
2468 int r,c;
2469 struct vnode *vp;
2470 int acd;
2471
2472
2473 /* We take this opportunity to close the vnodes like we should.. */
2474
2475 for (c = 0; c < raidPtr->numCol; c++) {
2476 vp = raidPtr->raid_cinfo[c].ci_vp;
2477 acd = raidPtr->Disks[c].auto_configured;
2478 rf_close_component(raidPtr, vp, acd);
2479 raidPtr->raid_cinfo[c].ci_vp = NULL;
2480 raidPtr->Disks[c].auto_configured = 0;
2481 }
2482
2483 for (r = 0; r < raidPtr->numSpare; r++) {
2484 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2485 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2486 rf_close_component(raidPtr, vp, acd);
2487 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2488 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2489 }
2490 }
2491
2492
2493 void
2494 rf_ReconThread(struct rf_recon_req *req)
2495 {
2496 int s;
2497 RF_Raid_t *raidPtr;
2498
2499 s = splbio();
2500 raidPtr = (RF_Raid_t *) req->raidPtr;
2501 raidPtr->recon_in_progress = 1;
2502
2503 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2504 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2505
2506 RF_Free(req, sizeof(*req));
2507
2508 raidPtr->recon_in_progress = 0;
2509 splx(s);
2510
2511 /* That's all... */
2512 kthread_exit(0); /* does not return */
2513 }
2514
2515 void
2516 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2517 {
2518 int retcode;
2519 int s;
2520
2521 raidPtr->parity_rewrite_stripes_done = 0;
2522 raidPtr->parity_rewrite_in_progress = 1;
2523 s = splbio();
2524 retcode = rf_RewriteParity(raidPtr);
2525 splx(s);
2526 if (retcode) {
2527 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2528 } else {
2529 /* set the clean bit! If we shutdown correctly,
2530 the clean bit on each component label will get
2531 set */
2532 raidPtr->parity_good = RF_RAID_CLEAN;
2533 }
2534 raidPtr->parity_rewrite_in_progress = 0;
2535
2536 /* Anyone waiting for us to stop? If so, inform them... */
2537 if (raidPtr->waitShutdown) {
2538 wakeup(&raidPtr->parity_rewrite_in_progress);
2539 }
2540
2541 /* That's all... */
2542 kthread_exit(0); /* does not return */
2543 }
2544
2545
2546 void
2547 rf_CopybackThread(RF_Raid_t *raidPtr)
2548 {
2549 int s;
2550
2551 raidPtr->copyback_in_progress = 1;
2552 s = splbio();
2553 rf_CopybackReconstructedData(raidPtr);
2554 splx(s);
2555 raidPtr->copyback_in_progress = 0;
2556
2557 /* That's all... */
2558 kthread_exit(0); /* does not return */
2559 }
2560
2561
2562 void
2563 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2564 {
2565 int s;
2566 RF_Raid_t *raidPtr;
2567
2568 s = splbio();
2569 raidPtr = req->raidPtr;
2570 raidPtr->recon_in_progress = 1;
2571 rf_ReconstructInPlace(raidPtr, req->col);
2572 RF_Free(req, sizeof(*req));
2573 raidPtr->recon_in_progress = 0;
2574 splx(s);
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580 RF_AutoConfig_t *
2581 rf_find_raid_components()
2582 {
2583 struct vnode *vp;
2584 struct disklabel label;
2585 struct device *dv;
2586 dev_t dev;
2587 int bmajor;
2588 int error;
2589 int i;
2590 int good_one;
2591 RF_ComponentLabel_t *clabel;
2592 RF_AutoConfig_t *ac_list;
2593 RF_AutoConfig_t *ac;
2594
2595
2596 /* initialize the AutoConfig list */
2597 ac_list = NULL;
2598
2599 /* we begin by trolling through *all* the devices on the system */
2600
2601 for (dv = alldevs.tqh_first; dv != NULL;
2602 dv = dv->dv_list.tqe_next) {
2603
2604 /* we are only interested in disks... */
2605 if (device_class(dv) != DV_DISK)
2606 continue;
2607
2608 /* we don't care about floppies... */
2609 if (device_is_a(dv, "fd")) {
2610 continue;
2611 }
2612
2613 /* we don't care about CD's... */
2614 if (device_is_a(dv, "cd")) {
2615 continue;
2616 }
2617
2618 /* hdfd is the Atari/Hades floppy driver */
2619 if (device_is_a(dv, "hdfd")) {
2620 continue;
2621 }
2622
2623 /* fdisa is the Atari/Milan floppy driver */
2624 if (device_is_a(dv, "fdisa")) {
2625 continue;
2626 }
2627
2628 /* need to find the device_name_to_block_device_major stuff */
2629 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2630
2631 /* get a vnode for the raw partition of this disk */
2632
2633 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2634 if (bdevvp(dev, &vp))
2635 panic("RAID can't alloc vnode");
2636
2637 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2638
2639 if (error) {
2640 /* "Who cares." Continue looking
2641 for something that exists*/
2642 vput(vp);
2643 continue;
2644 }
2645
2646 /* Ok, the disk exists. Go get the disklabel. */
2647 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2648 if (error) {
2649 /*
2650 * XXX can't happen - open() would
2651 * have errored out (or faked up one)
2652 */
2653 if (error != ENOTTY)
2654 printf("RAIDframe: can't get label for dev "
2655 "%s (%d)\n", dv->dv_xname, error);
2656 }
2657
2658 /* don't need this any more. We'll allocate it again
2659 a little later if we really do... */
2660 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2661 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2662 vput(vp);
2663
2664 if (error)
2665 continue;
2666
2667 for (i=0; i < label.d_npartitions; i++) {
2668 /* We only support partitions marked as RAID */
2669 if (label.d_partitions[i].p_fstype != FS_RAID)
2670 continue;
2671
2672 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2673 if (bdevvp(dev, &vp))
2674 panic("RAID can't alloc vnode");
2675
2676 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2677 if (error) {
2678 /* Whatever... */
2679 vput(vp);
2680 continue;
2681 }
2682
2683 good_one = 0;
2684
2685 clabel = (RF_ComponentLabel_t *)
2686 malloc(sizeof(RF_ComponentLabel_t),
2687 M_RAIDFRAME, M_NOWAIT);
2688 if (clabel == NULL) {
2689 while(ac_list) {
2690 ac = ac_list;
2691 if (ac->clabel)
2692 free(ac->clabel, M_RAIDFRAME);
2693 ac_list = ac_list->next;
2694 free(ac, M_RAIDFRAME);
2695 };
2696 printf("RAID auto config: out of memory!\n");
2697 return(NULL); /* XXX probably should panic? */
2698 }
2699
2700 if (!raidread_component_label(dev, vp, clabel)) {
2701 /* Got the label. Does it look reasonable? */
2702 if (rf_reasonable_label(clabel) &&
2703 (clabel->partitionSize <=
2704 label.d_partitions[i].p_size)) {
2705 #if DEBUG
2706 printf("Component on: %s%c: %d\n",
2707 dv->dv_xname, 'a'+i,
2708 label.d_partitions[i].p_size);
2709 rf_print_component_label(clabel);
2710 #endif
2711 /* if it's reasonable, add it,
2712 else ignore it. */
2713 ac = (RF_AutoConfig_t *)
2714 malloc(sizeof(RF_AutoConfig_t),
2715 M_RAIDFRAME,
2716 M_NOWAIT);
2717 if (ac == NULL) {
2718 /* XXX should panic?? */
2719 while(ac_list) {
2720 ac = ac_list;
2721 if (ac->clabel)
2722 free(ac->clabel,
2723 M_RAIDFRAME);
2724 ac_list = ac_list->next;
2725 free(ac, M_RAIDFRAME);
2726 }
2727 free(clabel, M_RAIDFRAME);
2728 return(NULL);
2729 }
2730
2731 snprintf(ac->devname,
2732 sizeof(ac->devname), "%s%c",
2733 dv->dv_xname, 'a'+i);
2734 ac->dev = dev;
2735 ac->vp = vp;
2736 ac->clabel = clabel;
2737 ac->next = ac_list;
2738 ac_list = ac;
2739 good_one = 1;
2740 }
2741 }
2742 if (!good_one) {
2743 /* cleanup */
2744 free(clabel, M_RAIDFRAME);
2745 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2746 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2747 vput(vp);
2748 }
2749 }
2750 }
2751 return(ac_list);
2752 }
2753
2754 static int
2755 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2756 {
2757
2758 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2759 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2760 ((clabel->clean == RF_RAID_CLEAN) ||
2761 (clabel->clean == RF_RAID_DIRTY)) &&
2762 clabel->row >=0 &&
2763 clabel->column >= 0 &&
2764 clabel->num_rows > 0 &&
2765 clabel->num_columns > 0 &&
2766 clabel->row < clabel->num_rows &&
2767 clabel->column < clabel->num_columns &&
2768 clabel->blockSize > 0 &&
2769 clabel->numBlocks > 0) {
2770 /* label looks reasonable enough... */
2771 return(1);
2772 }
2773 return(0);
2774 }
2775
2776
2777 #if DEBUG
2778 void
2779 rf_print_component_label(RF_ComponentLabel_t *clabel)
2780 {
2781 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2782 clabel->row, clabel->column,
2783 clabel->num_rows, clabel->num_columns);
2784 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2785 clabel->version, clabel->serial_number,
2786 clabel->mod_counter);
2787 printf(" Clean: %s Status: %d\n",
2788 clabel->clean ? "Yes" : "No", clabel->status );
2789 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2790 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2791 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2792 (char) clabel->parityConfig, clabel->blockSize,
2793 clabel->numBlocks);
2794 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2795 printf(" Contains root partition: %s\n",
2796 clabel->root_partition ? "Yes" : "No" );
2797 printf(" Last configured as: raid%d\n", clabel->last_unit );
2798 #if 0
2799 printf(" Config order: %d\n", clabel->config_order);
2800 #endif
2801
2802 }
2803 #endif
2804
2805 RF_ConfigSet_t *
2806 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2807 {
2808 RF_AutoConfig_t *ac;
2809 RF_ConfigSet_t *config_sets;
2810 RF_ConfigSet_t *cset;
2811 RF_AutoConfig_t *ac_next;
2812
2813
2814 config_sets = NULL;
2815
2816 /* Go through the AutoConfig list, and figure out which components
2817 belong to what sets. */
2818 ac = ac_list;
2819 while(ac!=NULL) {
2820 /* we're going to putz with ac->next, so save it here
2821 for use at the end of the loop */
2822 ac_next = ac->next;
2823
2824 if (config_sets == NULL) {
2825 /* will need at least this one... */
2826 config_sets = (RF_ConfigSet_t *)
2827 malloc(sizeof(RF_ConfigSet_t),
2828 M_RAIDFRAME, M_NOWAIT);
2829 if (config_sets == NULL) {
2830 panic("rf_create_auto_sets: No memory!");
2831 }
2832 /* this one is easy :) */
2833 config_sets->ac = ac;
2834 config_sets->next = NULL;
2835 config_sets->rootable = 0;
2836 ac->next = NULL;
2837 } else {
2838 /* which set does this component fit into? */
2839 cset = config_sets;
2840 while(cset!=NULL) {
2841 if (rf_does_it_fit(cset, ac)) {
2842 /* looks like it matches... */
2843 ac->next = cset->ac;
2844 cset->ac = ac;
2845 break;
2846 }
2847 cset = cset->next;
2848 }
2849 if (cset==NULL) {
2850 /* didn't find a match above... new set..*/
2851 cset = (RF_ConfigSet_t *)
2852 malloc(sizeof(RF_ConfigSet_t),
2853 M_RAIDFRAME, M_NOWAIT);
2854 if (cset == NULL) {
2855 panic("rf_create_auto_sets: No memory!");
2856 }
2857 cset->ac = ac;
2858 ac->next = NULL;
2859 cset->next = config_sets;
2860 cset->rootable = 0;
2861 config_sets = cset;
2862 }
2863 }
2864 ac = ac_next;
2865 }
2866
2867
2868 return(config_sets);
2869 }
2870
2871 static int
2872 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2873 {
2874 RF_ComponentLabel_t *clabel1, *clabel2;
2875
2876 /* If this one matches the *first* one in the set, that's good
2877 enough, since the other members of the set would have been
2878 through here too... */
2879 /* note that we are not checking partitionSize here..
2880
2881 Note that we are also not checking the mod_counters here.
2882 If everything else matches execpt the mod_counter, that's
2883 good enough for this test. We will deal with the mod_counters
2884 a little later in the autoconfiguration process.
2885
2886 (clabel1->mod_counter == clabel2->mod_counter) &&
2887
2888 The reason we don't check for this is that failed disks
2889 will have lower modification counts. If those disks are
2890 not added to the set they used to belong to, then they will
2891 form their own set, which may result in 2 different sets,
2892 for example, competing to be configured at raid0, and
2893 perhaps competing to be the root filesystem set. If the
2894 wrong ones get configured, or both attempt to become /,
2895 weird behaviour and or serious lossage will occur. Thus we
2896 need to bring them into the fold here, and kick them out at
2897 a later point.
2898
2899 */
2900
2901 clabel1 = cset->ac->clabel;
2902 clabel2 = ac->clabel;
2903 if ((clabel1->version == clabel2->version) &&
2904 (clabel1->serial_number == clabel2->serial_number) &&
2905 (clabel1->num_rows == clabel2->num_rows) &&
2906 (clabel1->num_columns == clabel2->num_columns) &&
2907 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2908 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2909 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2910 (clabel1->parityConfig == clabel2->parityConfig) &&
2911 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2912 (clabel1->blockSize == clabel2->blockSize) &&
2913 (clabel1->numBlocks == clabel2->numBlocks) &&
2914 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2915 (clabel1->root_partition == clabel2->root_partition) &&
2916 (clabel1->last_unit == clabel2->last_unit) &&
2917 (clabel1->config_order == clabel2->config_order)) {
2918 /* if it get's here, it almost *has* to be a match */
2919 } else {
2920 /* it's not consistent with somebody in the set..
2921 punt */
2922 return(0);
2923 }
2924 /* all was fine.. it must fit... */
2925 return(1);
2926 }
2927
2928 int
2929 rf_have_enough_components(RF_ConfigSet_t *cset)
2930 {
2931 RF_AutoConfig_t *ac;
2932 RF_AutoConfig_t *auto_config;
2933 RF_ComponentLabel_t *clabel;
2934 int c;
2935 int num_cols;
2936 int num_missing;
2937 int mod_counter;
2938 int mod_counter_found;
2939 int even_pair_failed;
2940 char parity_type;
2941
2942
2943 /* check to see that we have enough 'live' components
2944 of this set. If so, we can configure it if necessary */
2945
2946 num_cols = cset->ac->clabel->num_columns;
2947 parity_type = cset->ac->clabel->parityConfig;
2948
2949 /* XXX Check for duplicate components!?!?!? */
2950
2951 /* Determine what the mod_counter is supposed to be for this set. */
2952
2953 mod_counter_found = 0;
2954 mod_counter = 0;
2955 ac = cset->ac;
2956 while(ac!=NULL) {
2957 if (mod_counter_found==0) {
2958 mod_counter = ac->clabel->mod_counter;
2959 mod_counter_found = 1;
2960 } else {
2961 if (ac->clabel->mod_counter > mod_counter) {
2962 mod_counter = ac->clabel->mod_counter;
2963 }
2964 }
2965 ac = ac->next;
2966 }
2967
2968 num_missing = 0;
2969 auto_config = cset->ac;
2970
2971 even_pair_failed = 0;
2972 for(c=0; c<num_cols; c++) {
2973 ac = auto_config;
2974 while(ac!=NULL) {
2975 if ((ac->clabel->column == c) &&
2976 (ac->clabel->mod_counter == mod_counter)) {
2977 /* it's this one... */
2978 #if DEBUG
2979 printf("Found: %s at %d\n",
2980 ac->devname,c);
2981 #endif
2982 break;
2983 }
2984 ac=ac->next;
2985 }
2986 if (ac==NULL) {
2987 /* Didn't find one here! */
2988 /* special case for RAID 1, especially
2989 where there are more than 2
2990 components (where RAIDframe treats
2991 things a little differently :( ) */
2992 if (parity_type == '1') {
2993 if (c%2 == 0) { /* even component */
2994 even_pair_failed = 1;
2995 } else { /* odd component. If
2996 we're failed, and
2997 so is the even
2998 component, it's
2999 "Good Night, Charlie" */
3000 if (even_pair_failed == 1) {
3001 return(0);
3002 }
3003 }
3004 } else {
3005 /* normal accounting */
3006 num_missing++;
3007 }
3008 }
3009 if ((parity_type == '1') && (c%2 == 1)) {
3010 /* Just did an even component, and we didn't
3011 bail.. reset the even_pair_failed flag,
3012 and go on to the next component.... */
3013 even_pair_failed = 0;
3014 }
3015 }
3016
3017 clabel = cset->ac->clabel;
3018
3019 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3020 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3021 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3022 /* XXX this needs to be made *much* more general */
3023 /* Too many failures */
3024 return(0);
3025 }
3026 /* otherwise, all is well, and we've got enough to take a kick
3027 at autoconfiguring this set */
3028 return(1);
3029 }
3030
3031 void
3032 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3033 RF_Raid_t *raidPtr)
3034 {
3035 RF_ComponentLabel_t *clabel;
3036 int i;
3037
3038 clabel = ac->clabel;
3039
3040 /* 1. Fill in the common stuff */
3041 config->numRow = clabel->num_rows = 1;
3042 config->numCol = clabel->num_columns;
3043 config->numSpare = 0; /* XXX should this be set here? */
3044 config->sectPerSU = clabel->sectPerSU;
3045 config->SUsPerPU = clabel->SUsPerPU;
3046 config->SUsPerRU = clabel->SUsPerRU;
3047 config->parityConfig = clabel->parityConfig;
3048 /* XXX... */
3049 strcpy(config->diskQueueType,"fifo");
3050 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3051 config->layoutSpecificSize = 0; /* XXX ?? */
3052
3053 while(ac!=NULL) {
3054 /* row/col values will be in range due to the checks
3055 in reasonable_label() */
3056 strcpy(config->devnames[0][ac->clabel->column],
3057 ac->devname);
3058 ac = ac->next;
3059 }
3060
3061 for(i=0;i<RF_MAXDBGV;i++) {
3062 config->debugVars[i][0] = 0;
3063 }
3064 }
3065
3066 int
3067 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3068 {
3069 RF_ComponentLabel_t clabel;
3070 struct vnode *vp;
3071 dev_t dev;
3072 int column;
3073 int sparecol;
3074
3075 raidPtr->autoconfigure = new_value;
3076
3077 for(column=0; column<raidPtr->numCol; column++) {
3078 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3079 dev = raidPtr->Disks[column].dev;
3080 vp = raidPtr->raid_cinfo[column].ci_vp;
3081 raidread_component_label(dev, vp, &clabel);
3082 clabel.autoconfigure = new_value;
3083 raidwrite_component_label(dev, vp, &clabel);
3084 }
3085 }
3086 for(column = 0; column < raidPtr->numSpare ; column++) {
3087 sparecol = raidPtr->numCol + column;
3088 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3089 dev = raidPtr->Disks[sparecol].dev;
3090 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3091 raidread_component_label(dev, vp, &clabel);
3092 clabel.autoconfigure = new_value;
3093 raidwrite_component_label(dev, vp, &clabel);
3094 }
3095 }
3096 return(new_value);
3097 }
3098
3099 int
3100 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3101 {
3102 RF_ComponentLabel_t clabel;
3103 struct vnode *vp;
3104 dev_t dev;
3105 int column;
3106 int sparecol;
3107
3108 raidPtr->root_partition = new_value;
3109 for(column=0; column<raidPtr->numCol; column++) {
3110 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3111 dev = raidPtr->Disks[column].dev;
3112 vp = raidPtr->raid_cinfo[column].ci_vp;
3113 raidread_component_label(dev, vp, &clabel);
3114 clabel.root_partition = new_value;
3115 raidwrite_component_label(dev, vp, &clabel);
3116 }
3117 }
3118 for(column = 0; column < raidPtr->numSpare ; column++) {
3119 sparecol = raidPtr->numCol + column;
3120 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3121 dev = raidPtr->Disks[sparecol].dev;
3122 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3123 raidread_component_label(dev, vp, &clabel);
3124 clabel.root_partition = new_value;
3125 raidwrite_component_label(dev, vp, &clabel);
3126 }
3127 }
3128 return(new_value);
3129 }
3130
3131 void
3132 rf_release_all_vps(RF_ConfigSet_t *cset)
3133 {
3134 RF_AutoConfig_t *ac;
3135
3136 ac = cset->ac;
3137 while(ac!=NULL) {
3138 /* Close the vp, and give it back */
3139 if (ac->vp) {
3140 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3141 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3142 vput(ac->vp);
3143 ac->vp = NULL;
3144 }
3145 ac = ac->next;
3146 }
3147 }
3148
3149
3150 void
3151 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3152 {
3153 RF_AutoConfig_t *ac;
3154 RF_AutoConfig_t *next_ac;
3155
3156 ac = cset->ac;
3157 while(ac!=NULL) {
3158 next_ac = ac->next;
3159 /* nuke the label */
3160 free(ac->clabel, M_RAIDFRAME);
3161 /* cleanup the config structure */
3162 free(ac, M_RAIDFRAME);
3163 /* "next.." */
3164 ac = next_ac;
3165 }
3166 /* and, finally, nuke the config set */
3167 free(cset, M_RAIDFRAME);
3168 }
3169
3170
3171 void
3172 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3173 {
3174 /* current version number */
3175 clabel->version = RF_COMPONENT_LABEL_VERSION;
3176 clabel->serial_number = raidPtr->serial_number;
3177 clabel->mod_counter = raidPtr->mod_counter;
3178 clabel->num_rows = 1;
3179 clabel->num_columns = raidPtr->numCol;
3180 clabel->clean = RF_RAID_DIRTY; /* not clean */
3181 clabel->status = rf_ds_optimal; /* "It's good!" */
3182
3183 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3184 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3185 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3186
3187 clabel->blockSize = raidPtr->bytesPerSector;
3188 clabel->numBlocks = raidPtr->sectorsPerDisk;
3189
3190 /* XXX not portable */
3191 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3192 clabel->maxOutstanding = raidPtr->maxOutstanding;
3193 clabel->autoconfigure = raidPtr->autoconfigure;
3194 clabel->root_partition = raidPtr->root_partition;
3195 clabel->last_unit = raidPtr->raidid;
3196 clabel->config_order = raidPtr->config_order;
3197 }
3198
3199 int
3200 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3201 {
3202 RF_Raid_t *raidPtr;
3203 RF_Config_t *config;
3204 int raidID;
3205 int retcode;
3206
3207 #if DEBUG
3208 printf("RAID autoconfigure\n");
3209 #endif
3210
3211 retcode = 0;
3212 *unit = -1;
3213
3214 /* 1. Create a config structure */
3215
3216 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3217 M_RAIDFRAME,
3218 M_NOWAIT);
3219 if (config==NULL) {
3220 printf("Out of mem!?!?\n");
3221 /* XXX do something more intelligent here. */
3222 return(1);
3223 }
3224
3225 memset(config, 0, sizeof(RF_Config_t));
3226
3227 /*
3228 2. Figure out what RAID ID this one is supposed to live at
3229 See if we can get the same RAID dev that it was configured
3230 on last time..
3231 */
3232
3233 raidID = cset->ac->clabel->last_unit;
3234 if ((raidID < 0) || (raidID >= numraid)) {
3235 /* let's not wander off into lala land. */
3236 raidID = numraid - 1;
3237 }
3238 if (raidPtrs[raidID]->valid != 0) {
3239
3240 /*
3241 Nope... Go looking for an alternative...
3242 Start high so we don't immediately use raid0 if that's
3243 not taken.
3244 */
3245
3246 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3247 if (raidPtrs[raidID]->valid == 0) {
3248 /* can use this one! */
3249 break;
3250 }
3251 }
3252 }
3253
3254 if (raidID < 0) {
3255 /* punt... */
3256 printf("Unable to auto configure this set!\n");
3257 printf("(Out of RAID devs!)\n");
3258 free(config, M_RAIDFRAME);
3259 return(1);
3260 }
3261
3262 #if DEBUG
3263 printf("Configuring raid%d:\n",raidID);
3264 #endif
3265
3266 raidPtr = raidPtrs[raidID];
3267
3268 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3269 raidPtr->raidid = raidID;
3270 raidPtr->openings = RAIDOUTSTANDING;
3271
3272 /* 3. Build the configuration structure */
3273 rf_create_configuration(cset->ac, config, raidPtr);
3274
3275 /* 4. Do the configuration */
3276 retcode = rf_Configure(raidPtr, config, cset->ac);
3277
3278 if (retcode == 0) {
3279
3280 raidinit(raidPtrs[raidID]);
3281
3282 rf_markalldirty(raidPtrs[raidID]);
3283 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3284 if (cset->ac->clabel->root_partition==1) {
3285 /* everything configured just fine. Make a note
3286 that this set is eligible to be root. */
3287 cset->rootable = 1;
3288 /* XXX do this here? */
3289 raidPtrs[raidID]->root_partition = 1;
3290 }
3291 }
3292
3293 /* 5. Cleanup */
3294 free(config, M_RAIDFRAME);
3295
3296 *unit = raidID;
3297 return(retcode);
3298 }
3299
3300 void
3301 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3302 {
3303 struct buf *bp;
3304
3305 bp = (struct buf *)desc->bp;
3306 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3307 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3308 }
3309
3310 void
3311 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3312 size_t xmin, size_t xmax)
3313 {
3314 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3315 pool_sethiwat(p, xmax);
3316 pool_prime(p, xmin);
3317 pool_setlowat(p, xmin);
3318 }
3319
3320 /*
3321 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3322 * if there is IO pending and if that IO could possibly be done for a
3323 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3324 * otherwise.
3325 *
3326 */
3327
3328 int
3329 rf_buf_queue_check(int raidid)
3330 {
3331 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3332 raidPtrs[raidid]->openings > 0) {
3333 /* there is work to do */
3334 return 0;
3335 }
3336 /* default is nothing to do */
3337 return 1;
3338 }
3339