rf_netbsdkintf.c revision 1.165 1 /* $NetBSD: rf_netbsdkintf.c,v 1.165 2003/10/30 01:58:17 simonb Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.165 2003/10/30 01:58:17 simonb Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* component buffer pool */
248 struct pool raidframe_cbufpool;
249
250 /* XXX Not sure if the following should be replacing the raidPtrs above,
251 or if it should be used in conjunction with that...
252 */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes, and in degraded mode reads.
276 *
277 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
278 * a single 64K write will typically require 64K for the old data,
279 * 64K for the old parity, and 64K for the new parity, for a total
280 * of 192K (if the parity buffer is not re-used immediately).
281 * Even it if is used immediately, that's still 128K, which when multiplied
282 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
283 *
284 * Now in degraded mode, for example, a 64K read on the above setup may
285 * require data reconstruction, which will require *all* of the 4 remaining
286 * disks to participate -- 4 * 32K/disk == 128K again.
287 */
288
289 #ifndef RAIDOUTSTANDING
290 #define RAIDOUTSTANDING 6
291 #endif
292
293 #define RAIDLABELDEV(dev) \
294 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
295
296 /* declared here, and made public, for the benefit of KVM stuff.. */
297 struct raid_softc *raid_softc;
298
299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
300 struct disklabel *);
301 static void raidgetdisklabel(dev_t);
302 static void raidmakedisklabel(struct raid_softc *);
303
304 static int raidlock(struct raid_softc *);
305 static void raidunlock(struct raid_softc *);
306
307 static void rf_markalldirty(RF_Raid_t *);
308
309 struct device *raidrootdev;
310
311 void rf_ReconThread(struct rf_recon_req *);
312 /* XXX what I want is: */
313 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
315 void rf_CopybackThread(RF_Raid_t *raidPtr);
316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
317 int rf_autoconfig(struct device *self);
318 void rf_buildroothack(RF_ConfigSet_t *);
319
320 RF_AutoConfig_t *rf_find_raid_components(void);
321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
323 static int rf_reasonable_label(RF_ComponentLabel_t *);
324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
325 int rf_set_autoconfig(RF_Raid_t *, int);
326 int rf_set_rootpartition(RF_Raid_t *, int);
327 void rf_release_all_vps(RF_ConfigSet_t *);
328 void rf_cleanup_config_set(RF_ConfigSet_t *);
329 int rf_have_enough_components(RF_ConfigSet_t *);
330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
331
332 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
333 allow autoconfig to take place.
334 Note that this is overridden by having
335 RAID_AUTOCONFIG as an option in the
336 kernel config file. */
337
338 void
339 raidattach(num)
340 int num;
341 {
342 int raidID;
343 int i, rc;
344
345 #ifdef DEBUG
346 printf("raidattach: Asked for %d units\n", num);
347 #endif
348
349 if (num <= 0) {
350 #ifdef DIAGNOSTIC
351 panic("raidattach: count <= 0");
352 #endif
353 return;
354 }
355 /* This is where all the initialization stuff gets done. */
356
357 numraid = num;
358
359 /* Make some space for requested number of units... */
360
361 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
362 if (raidPtrs == NULL) {
363 panic("raidPtrs is NULL!!");
364 }
365
366 /* Initialize the component buffer pool. */
367 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
368 0, 0, "raidpl", NULL);
369
370 rc = rf_mutex_init(&rf_sparet_wait_mutex);
371 if (rc) {
372 RF_PANIC();
373 }
374
375 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
376
377 for (i = 0; i < num; i++)
378 raidPtrs[i] = NULL;
379 rc = rf_BootRaidframe();
380 if (rc == 0)
381 printf("Kernelized RAIDframe activated\n");
382 else
383 panic("Serious error booting RAID!!");
384
385 /* put together some datastructures like the CCD device does.. This
386 * lets us lock the device and what-not when it gets opened. */
387
388 raid_softc = (struct raid_softc *)
389 malloc(num * sizeof(struct raid_softc),
390 M_RAIDFRAME, M_NOWAIT);
391 if (raid_softc == NULL) {
392 printf("WARNING: no memory for RAIDframe driver\n");
393 return;
394 }
395
396 memset(raid_softc, 0, num * sizeof(struct raid_softc));
397
398 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
399 M_RAIDFRAME, M_NOWAIT);
400 if (raidrootdev == NULL) {
401 panic("No memory for RAIDframe driver!!?!?!");
402 }
403
404 for (raidID = 0; raidID < num; raidID++) {
405 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
406
407 raidrootdev[raidID].dv_class = DV_DISK;
408 raidrootdev[raidID].dv_cfdata = NULL;
409 raidrootdev[raidID].dv_unit = raidID;
410 raidrootdev[raidID].dv_parent = NULL;
411 raidrootdev[raidID].dv_flags = 0;
412 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
413
414 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
415 (RF_Raid_t *));
416 if (raidPtrs[raidID] == NULL) {
417 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
418 numraid = raidID;
419 return;
420 }
421 }
422
423 #ifdef RAID_AUTOCONFIG
424 raidautoconfig = 1;
425 #endif
426
427 /*
428 * Register a finalizer which will be used to auto-config RAID
429 * sets once all real hardware devices have been found.
430 */
431 if (config_finalize_register(NULL, rf_autoconfig) != 0)
432 printf("WARNING: unable to register RAIDframe finalizer\n");
433 }
434
435 int
436 rf_autoconfig(struct device *self)
437 {
438 RF_AutoConfig_t *ac_list;
439 RF_ConfigSet_t *config_sets;
440
441 if (raidautoconfig == 0)
442 return (0);
443
444 /* XXX This code can only be run once. */
445 raidautoconfig = 0;
446
447 /* 1. locate all RAID components on the system */
448 #ifdef DEBUG
449 printf("Searching for RAID components...\n");
450 #endif
451 ac_list = rf_find_raid_components();
452
453 /* 2. Sort them into their respective sets. */
454 config_sets = rf_create_auto_sets(ac_list);
455
456 /*
457 * 3. Evaluate each set andconfigure the valid ones.
458 * This gets done in rf_buildroothack().
459 */
460 rf_buildroothack(config_sets);
461
462 return (1);
463 }
464
465 void
466 rf_buildroothack(RF_ConfigSet_t *config_sets)
467 {
468 RF_ConfigSet_t *cset;
469 RF_ConfigSet_t *next_cset;
470 int retcode;
471 int raidID;
472 int rootID;
473 int num_root;
474
475 rootID = 0;
476 num_root = 0;
477 cset = config_sets;
478 while(cset != NULL ) {
479 next_cset = cset->next;
480 if (rf_have_enough_components(cset) &&
481 cset->ac->clabel->autoconfigure==1) {
482 retcode = rf_auto_config_set(cset,&raidID);
483 if (!retcode) {
484 if (cset->rootable) {
485 rootID = raidID;
486 num_root++;
487 }
488 } else {
489 /* The autoconfig didn't work :( */
490 #if DEBUG
491 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
492 #endif
493 rf_release_all_vps(cset);
494 }
495 } else {
496 /* we're not autoconfiguring this set...
497 release the associated resources */
498 rf_release_all_vps(cset);
499 }
500 /* cleanup */
501 rf_cleanup_config_set(cset);
502 cset = next_cset;
503 }
504
505 /* we found something bootable... */
506
507 if (num_root == 1) {
508 booted_device = &raidrootdev[rootID];
509 } else if (num_root > 1) {
510 /* we can't guess.. require the user to answer... */
511 boothowto |= RB_ASKNAME;
512 }
513 }
514
515
516 int
517 raidsize(dev)
518 dev_t dev;
519 {
520 struct raid_softc *rs;
521 struct disklabel *lp;
522 int part, unit, omask, size;
523
524 unit = raidunit(dev);
525 if (unit >= numraid)
526 return (-1);
527 rs = &raid_softc[unit];
528
529 if ((rs->sc_flags & RAIDF_INITED) == 0)
530 return (-1);
531
532 part = DISKPART(dev);
533 omask = rs->sc_dkdev.dk_openmask & (1 << part);
534 lp = rs->sc_dkdev.dk_label;
535
536 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
537 return (-1);
538
539 if (lp->d_partitions[part].p_fstype != FS_SWAP)
540 size = -1;
541 else
542 size = lp->d_partitions[part].p_size *
543 (lp->d_secsize / DEV_BSIZE);
544
545 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
546 return (-1);
547
548 return (size);
549
550 }
551
552 int
553 raiddump(dev, blkno, va, size)
554 dev_t dev;
555 daddr_t blkno;
556 caddr_t va;
557 size_t size;
558 {
559 /* Not implemented. */
560 return ENXIO;
561 }
562 /* ARGSUSED */
563 int
564 raidopen(dev, flags, fmt, p)
565 dev_t dev;
566 int flags, fmt;
567 struct proc *p;
568 {
569 int unit = raidunit(dev);
570 struct raid_softc *rs;
571 struct disklabel *lp;
572 int part, pmask;
573 int error = 0;
574
575 if (unit >= numraid)
576 return (ENXIO);
577 rs = &raid_softc[unit];
578
579 if ((error = raidlock(rs)) != 0)
580 return (error);
581 lp = rs->sc_dkdev.dk_label;
582
583 part = DISKPART(dev);
584 pmask = (1 << part);
585
586 if ((rs->sc_flags & RAIDF_INITED) &&
587 (rs->sc_dkdev.dk_openmask == 0))
588 raidgetdisklabel(dev);
589
590 /* make sure that this partition exists */
591
592 if (part != RAW_PART) {
593 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
594 ((part >= lp->d_npartitions) ||
595 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
596 error = ENXIO;
597 raidunlock(rs);
598 return (error);
599 }
600 }
601 /* Prevent this unit from being unconfigured while open. */
602 switch (fmt) {
603 case S_IFCHR:
604 rs->sc_dkdev.dk_copenmask |= pmask;
605 break;
606
607 case S_IFBLK:
608 rs->sc_dkdev.dk_bopenmask |= pmask;
609 break;
610 }
611
612 if ((rs->sc_dkdev.dk_openmask == 0) &&
613 ((rs->sc_flags & RAIDF_INITED) != 0)) {
614 /* First one... mark things as dirty... Note that we *MUST*
615 have done a configure before this. I DO NOT WANT TO BE
616 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
617 THAT THEY BELONG TOGETHER!!!!! */
618 /* XXX should check to see if we're only open for reading
619 here... If so, we needn't do this, but then need some
620 other way of keeping track of what's happened.. */
621
622 rf_markalldirty( raidPtrs[unit] );
623 }
624
625
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 raidunlock(rs);
630
631 return (error);
632
633
634 }
635 /* ARGSUSED */
636 int
637 raidclose(dev, flags, fmt, p)
638 dev_t dev;
639 int flags, fmt;
640 struct proc *p;
641 {
642 int unit = raidunit(dev);
643 struct raid_softc *rs;
644 int error = 0;
645 int part;
646
647 if (unit >= numraid)
648 return (ENXIO);
649 rs = &raid_softc[unit];
650
651 if ((error = raidlock(rs)) != 0)
652 return (error);
653
654 part = DISKPART(dev);
655
656 /* ...that much closer to allowing unconfiguration... */
657 switch (fmt) {
658 case S_IFCHR:
659 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
660 break;
661
662 case S_IFBLK:
663 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
664 break;
665 }
666 rs->sc_dkdev.dk_openmask =
667 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
668
669 if ((rs->sc_dkdev.dk_openmask == 0) &&
670 ((rs->sc_flags & RAIDF_INITED) != 0)) {
671 /* Last one... device is not unconfigured yet.
672 Device shutdown has taken care of setting the
673 clean bits if RAIDF_INITED is not set
674 mark things as clean... */
675
676 rf_update_component_labels(raidPtrs[unit],
677 RF_FINAL_COMPONENT_UPDATE);
678 if (doing_shutdown) {
679 /* last one, and we're going down, so
680 lights out for this RAID set too. */
681 error = rf_Shutdown(raidPtrs[unit]);
682
683 /* It's no longer initialized... */
684 rs->sc_flags &= ~RAIDF_INITED;
685
686 /* Detach the disk. */
687 disk_detach(&rs->sc_dkdev);
688 }
689 }
690
691 raidunlock(rs);
692 return (0);
693
694 }
695
696 void
697 raidstrategy(bp)
698 struct buf *bp;
699 {
700 int s;
701
702 unsigned int raidID = raidunit(bp->b_dev);
703 RF_Raid_t *raidPtr;
704 struct raid_softc *rs = &raid_softc[raidID];
705 int wlabel;
706
707 if ((rs->sc_flags & RAIDF_INITED) ==0) {
708 bp->b_error = ENXIO;
709 bp->b_flags |= B_ERROR;
710 bp->b_resid = bp->b_bcount;
711 biodone(bp);
712 return;
713 }
714 if (raidID >= numraid || !raidPtrs[raidID]) {
715 bp->b_error = ENODEV;
716 bp->b_flags |= B_ERROR;
717 bp->b_resid = bp->b_bcount;
718 biodone(bp);
719 return;
720 }
721 raidPtr = raidPtrs[raidID];
722 if (!raidPtr->valid) {
723 bp->b_error = ENODEV;
724 bp->b_flags |= B_ERROR;
725 bp->b_resid = bp->b_bcount;
726 biodone(bp);
727 return;
728 }
729 if (bp->b_bcount == 0) {
730 db1_printf(("b_bcount is zero..\n"));
731 biodone(bp);
732 return;
733 }
734
735 /*
736 * Do bounds checking and adjust transfer. If there's an
737 * error, the bounds check will flag that for us.
738 */
739
740 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
741 if (DISKPART(bp->b_dev) != RAW_PART)
742 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
743 db1_printf(("Bounds check failed!!:%d %d\n",
744 (int) bp->b_blkno, (int) wlabel));
745 biodone(bp);
746 return;
747 }
748 s = splbio();
749
750 bp->b_resid = 0;
751
752 /* stuff it onto our queue */
753 BUFQ_PUT(&rs->buf_queue, bp);
754
755 raidstart(raidPtrs[raidID]);
756
757 splx(s);
758 }
759 /* ARGSUSED */
760 int
761 raidread(dev, uio, flags)
762 dev_t dev;
763 struct uio *uio;
764 int flags;
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
777
778 }
779 /* ARGSUSED */
780 int
781 raidwrite(dev, uio, flags)
782 dev_t dev;
783 struct uio *uio;
784 int flags;
785 {
786 int unit = raidunit(dev);
787 struct raid_softc *rs;
788
789 if (unit >= numraid)
790 return (ENXIO);
791 rs = &raid_softc[unit];
792
793 if ((rs->sc_flags & RAIDF_INITED) == 0)
794 return (ENXIO);
795
796 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
797
798 }
799
800 int
801 raidioctl(dev, cmd, data, flag, p)
802 dev_t dev;
803 u_long cmd;
804 caddr_t data;
805 int flag;
806 struct proc *p;
807 {
808 int unit = raidunit(dev);
809 int error = 0;
810 int part, pmask;
811 struct raid_softc *rs;
812 RF_Config_t *k_cfg, *u_cfg;
813 RF_Raid_t *raidPtr;
814 RF_RaidDisk_t *diskPtr;
815 RF_AccTotals_t *totals;
816 RF_DeviceConfig_t *d_cfg, **ucfgp;
817 u_char *specific_buf;
818 int retcode = 0;
819 int row;
820 int column;
821 int raidid;
822 struct rf_recon_req *rrcopy, *rr;
823 RF_ComponentLabel_t *clabel;
824 RF_ComponentLabel_t ci_label;
825 RF_ComponentLabel_t **clabel_ptr;
826 RF_SingleComponent_t *sparePtr,*componentPtr;
827 RF_SingleComponent_t hot_spare;
828 RF_SingleComponent_t component;
829 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
830 int i, j, d;
831 #ifdef __HAVE_OLD_DISKLABEL
832 struct disklabel newlabel;
833 #endif
834
835 if (unit >= numraid)
836 return (ENXIO);
837 rs = &raid_softc[unit];
838 raidPtr = raidPtrs[unit];
839
840 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
841 (int) DISKPART(dev), (int) unit, (int) cmd));
842
843 /* Must be open for writes for these commands... */
844 switch (cmd) {
845 case DIOCSDINFO:
846 case DIOCWDINFO:
847 #ifdef __HAVE_OLD_DISKLABEL
848 case ODIOCWDINFO:
849 case ODIOCSDINFO:
850 #endif
851 case DIOCWLABEL:
852 if ((flag & FWRITE) == 0)
853 return (EBADF);
854 }
855
856 /* Must be initialized for these... */
857 switch (cmd) {
858 case DIOCGDINFO:
859 case DIOCSDINFO:
860 case DIOCWDINFO:
861 #ifdef __HAVE_OLD_DISKLABEL
862 case ODIOCGDINFO:
863 case ODIOCWDINFO:
864 case ODIOCSDINFO:
865 case ODIOCGDEFLABEL:
866 #endif
867 case DIOCGPART:
868 case DIOCWLABEL:
869 case DIOCGDEFLABEL:
870 case RAIDFRAME_SHUTDOWN:
871 case RAIDFRAME_REWRITEPARITY:
872 case RAIDFRAME_GET_INFO:
873 case RAIDFRAME_RESET_ACCTOTALS:
874 case RAIDFRAME_GET_ACCTOTALS:
875 case RAIDFRAME_KEEP_ACCTOTALS:
876 case RAIDFRAME_GET_SIZE:
877 case RAIDFRAME_FAIL_DISK:
878 case RAIDFRAME_COPYBACK:
879 case RAIDFRAME_CHECK_RECON_STATUS:
880 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
881 case RAIDFRAME_GET_COMPONENT_LABEL:
882 case RAIDFRAME_SET_COMPONENT_LABEL:
883 case RAIDFRAME_ADD_HOT_SPARE:
884 case RAIDFRAME_REMOVE_HOT_SPARE:
885 case RAIDFRAME_INIT_LABELS:
886 case RAIDFRAME_REBUILD_IN_PLACE:
887 case RAIDFRAME_CHECK_PARITY:
888 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
889 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
890 case RAIDFRAME_CHECK_COPYBACK_STATUS:
891 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
892 case RAIDFRAME_SET_AUTOCONFIG:
893 case RAIDFRAME_SET_ROOT:
894 case RAIDFRAME_DELETE_COMPONENT:
895 case RAIDFRAME_INCORPORATE_HOT_SPARE:
896 if ((rs->sc_flags & RAIDF_INITED) == 0)
897 return (ENXIO);
898 }
899
900 switch (cmd) {
901
902 /* configure the system */
903 case RAIDFRAME_CONFIGURE:
904
905 if (raidPtr->valid) {
906 /* There is a valid RAID set running on this unit! */
907 printf("raid%d: Device already configured!\n",unit);
908 return(EINVAL);
909 }
910
911 /* copy-in the configuration information */
912 /* data points to a pointer to the configuration structure */
913
914 u_cfg = *((RF_Config_t **) data);
915 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
916 if (k_cfg == NULL) {
917 return (ENOMEM);
918 }
919 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
920 if (retcode) {
921 RF_Free(k_cfg, sizeof(RF_Config_t));
922 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
923 retcode));
924 return (retcode);
925 }
926 /* allocate a buffer for the layout-specific data, and copy it
927 * in */
928 if (k_cfg->layoutSpecificSize) {
929 if (k_cfg->layoutSpecificSize > 10000) {
930 /* sanity check */
931 RF_Free(k_cfg, sizeof(RF_Config_t));
932 return (EINVAL);
933 }
934 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
935 (u_char *));
936 if (specific_buf == NULL) {
937 RF_Free(k_cfg, sizeof(RF_Config_t));
938 return (ENOMEM);
939 }
940 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
941 k_cfg->layoutSpecificSize);
942 if (retcode) {
943 RF_Free(k_cfg, sizeof(RF_Config_t));
944 RF_Free(specific_buf,
945 k_cfg->layoutSpecificSize);
946 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
947 retcode));
948 return (retcode);
949 }
950 } else
951 specific_buf = NULL;
952 k_cfg->layoutSpecific = specific_buf;
953
954 /* should do some kind of sanity check on the configuration.
955 * Store the sum of all the bytes in the last byte? */
956
957 /* configure the system */
958
959 /*
960 * Clear the entire RAID descriptor, just to make sure
961 * there is no stale data left in the case of a
962 * reconfiguration
963 */
964 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
965 raidPtr->raidid = unit;
966
967 retcode = rf_Configure(raidPtr, k_cfg, NULL);
968
969 if (retcode == 0) {
970
971 /* allow this many simultaneous IO's to
972 this RAID device */
973 raidPtr->openings = RAIDOUTSTANDING;
974
975 raidinit(raidPtr);
976 rf_markalldirty(raidPtr);
977 }
978 /* free the buffers. No return code here. */
979 if (k_cfg->layoutSpecificSize) {
980 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
981 }
982 RF_Free(k_cfg, sizeof(RF_Config_t));
983
984 return (retcode);
985
986 /* shutdown the system */
987 case RAIDFRAME_SHUTDOWN:
988
989 if ((error = raidlock(rs)) != 0)
990 return (error);
991
992 /*
993 * If somebody has a partition mounted, we shouldn't
994 * shutdown.
995 */
996
997 part = DISKPART(dev);
998 pmask = (1 << part);
999 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1000 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1001 (rs->sc_dkdev.dk_copenmask & pmask))) {
1002 raidunlock(rs);
1003 return (EBUSY);
1004 }
1005
1006 retcode = rf_Shutdown(raidPtr);
1007
1008 /* It's no longer initialized... */
1009 rs->sc_flags &= ~RAIDF_INITED;
1010
1011 /* Detach the disk. */
1012 disk_detach(&rs->sc_dkdev);
1013
1014 raidunlock(rs);
1015
1016 return (retcode);
1017 case RAIDFRAME_GET_COMPONENT_LABEL:
1018 clabel_ptr = (RF_ComponentLabel_t **) data;
1019 /* need to read the component label for the disk indicated
1020 by row,column in clabel */
1021
1022 /* For practice, let's get it directly fromdisk, rather
1023 than from the in-core copy */
1024 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1025 (RF_ComponentLabel_t *));
1026 if (clabel == NULL)
1027 return (ENOMEM);
1028
1029 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1030
1031 retcode = copyin( *clabel_ptr, clabel,
1032 sizeof(RF_ComponentLabel_t));
1033
1034 if (retcode) {
1035 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1036 return(retcode);
1037 }
1038
1039 row = clabel->row;
1040 column = clabel->column;
1041
1042 if ((row < 0) || (row >= raidPtr->numRow) ||
1043 (column < 0) || (column >= raidPtr->numCol +
1044 raidPtr->numSpare)) {
1045 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1046 return(EINVAL);
1047 }
1048
1049 raidread_component_label(raidPtr->Disks[row][column].dev,
1050 raidPtr->raid_cinfo[row][column].ci_vp,
1051 clabel );
1052
1053 retcode = copyout(clabel, *clabel_ptr,
1054 sizeof(RF_ComponentLabel_t));
1055 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1056 return (retcode);
1057
1058 case RAIDFRAME_SET_COMPONENT_LABEL:
1059 clabel = (RF_ComponentLabel_t *) data;
1060
1061 /* XXX check the label for valid stuff... */
1062 /* Note that some things *should not* get modified --
1063 the user should be re-initing the labels instead of
1064 trying to patch things.
1065 */
1066
1067 raidid = raidPtr->raidid;
1068 printf("raid%d: Got component label:\n", raidid);
1069 printf("raid%d: Version: %d\n", raidid, clabel->version);
1070 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1071 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1072 printf("raid%d: Row: %d\n", raidid, clabel->row);
1073 printf("raid%d: Column: %d\n", raidid, clabel->column);
1074 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1075 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1076 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1077 printf("raid%d: Status: %d\n", raidid, clabel->status);
1078
1079 row = clabel->row;
1080 column = clabel->column;
1081
1082 if ((row < 0) || (row >= raidPtr->numRow) ||
1083 (column < 0) || (column >= raidPtr->numCol)) {
1084 return(EINVAL);
1085 }
1086
1087 /* XXX this isn't allowed to do anything for now :-) */
1088
1089 /* XXX and before it is, we need to fill in the rest
1090 of the fields!?!?!?! */
1091 #if 0
1092 raidwrite_component_label(
1093 raidPtr->Disks[row][column].dev,
1094 raidPtr->raid_cinfo[row][column].ci_vp,
1095 clabel );
1096 #endif
1097 return (0);
1098
1099 case RAIDFRAME_INIT_LABELS:
1100 clabel = (RF_ComponentLabel_t *) data;
1101 /*
1102 we only want the serial number from
1103 the above. We get all the rest of the information
1104 from the config that was used to create this RAID
1105 set.
1106 */
1107
1108 raidPtr->serial_number = clabel->serial_number;
1109
1110 raid_init_component_label(raidPtr, &ci_label);
1111 ci_label.serial_number = clabel->serial_number;
1112
1113 for(row=0;row<raidPtr->numRow;row++) {
1114 ci_label.row = row;
1115 for(column=0;column<raidPtr->numCol;column++) {
1116 diskPtr = &raidPtr->Disks[row][column];
1117 if (!RF_DEAD_DISK(diskPtr->status)) {
1118 ci_label.partitionSize = diskPtr->partitionSize;
1119 ci_label.column = column;
1120 raidwrite_component_label(
1121 raidPtr->Disks[row][column].dev,
1122 raidPtr->raid_cinfo[row][column].ci_vp,
1123 &ci_label );
1124 }
1125 }
1126 }
1127
1128 return (retcode);
1129 case RAIDFRAME_SET_AUTOCONFIG:
1130 d = rf_set_autoconfig(raidPtr, *(int *) data);
1131 printf("raid%d: New autoconfig value is: %d\n",
1132 raidPtr->raidid, d);
1133 *(int *) data = d;
1134 return (retcode);
1135
1136 case RAIDFRAME_SET_ROOT:
1137 d = rf_set_rootpartition(raidPtr, *(int *) data);
1138 printf("raid%d: New rootpartition value is: %d\n",
1139 raidPtr->raidid, d);
1140 *(int *) data = d;
1141 return (retcode);
1142
1143 /* initialize all parity */
1144 case RAIDFRAME_REWRITEPARITY:
1145
1146 if (raidPtr->Layout.map->faultsTolerated == 0) {
1147 /* Parity for RAID 0 is trivially correct */
1148 raidPtr->parity_good = RF_RAID_CLEAN;
1149 return(0);
1150 }
1151
1152 if (raidPtr->parity_rewrite_in_progress == 1) {
1153 /* Re-write is already in progress! */
1154 return(EINVAL);
1155 }
1156
1157 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1158 rf_RewriteParityThread,
1159 raidPtr,"raid_parity");
1160 return (retcode);
1161
1162
1163 case RAIDFRAME_ADD_HOT_SPARE:
1164 sparePtr = (RF_SingleComponent_t *) data;
1165 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1166 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1167 return(retcode);
1168
1169 case RAIDFRAME_REMOVE_HOT_SPARE:
1170 return(retcode);
1171
1172 case RAIDFRAME_DELETE_COMPONENT:
1173 componentPtr = (RF_SingleComponent_t *)data;
1174 memcpy( &component, componentPtr,
1175 sizeof(RF_SingleComponent_t));
1176 retcode = rf_delete_component(raidPtr, &component);
1177 return(retcode);
1178
1179 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1180 componentPtr = (RF_SingleComponent_t *)data;
1181 memcpy( &component, componentPtr,
1182 sizeof(RF_SingleComponent_t));
1183 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1184 return(retcode);
1185
1186 case RAIDFRAME_REBUILD_IN_PLACE:
1187
1188 if (raidPtr->Layout.map->faultsTolerated == 0) {
1189 /* Can't do this on a RAID 0!! */
1190 return(EINVAL);
1191 }
1192
1193 if (raidPtr->recon_in_progress == 1) {
1194 /* a reconstruct is already in progress! */
1195 return(EINVAL);
1196 }
1197
1198 componentPtr = (RF_SingleComponent_t *) data;
1199 memcpy( &component, componentPtr,
1200 sizeof(RF_SingleComponent_t));
1201 row = component.row;
1202 column = component.column;
1203
1204 if ((row < 0) || (row >= raidPtr->numRow) ||
1205 (column < 0) || (column >= raidPtr->numCol)) {
1206 return(EINVAL);
1207 }
1208
1209 RF_LOCK_MUTEX(raidPtr->mutex);
1210 if ((raidPtr->Disks[row][column].status == rf_ds_optimal) &&
1211 (raidPtr->numFailures > 0)) {
1212 /* XXX 0 above shouldn't be constant!!! */
1213 /* some component other than this has failed.
1214 Let's not make things worse than they already
1215 are... */
1216 printf("raid%d: Unable to reconstruct to disk at:\n",
1217 raidPtr->raidid);
1218 printf("raid%d: Row: %d Col: %d Too many failures.\n",
1219 raidPtr->raidid, row, column);
1220 RF_UNLOCK_MUTEX(raidPtr->mutex);
1221 return (EINVAL);
1222 }
1223 if (raidPtr->Disks[row][column].status ==
1224 rf_ds_reconstructing) {
1225 printf("raid%d: Unable to reconstruct to disk at:\n",
1226 raidPtr->raidid);
1227 printf("raid%d: Row: %d Col: %d Reconstruction already occuring!\n", raidPtr->raidid, row, column);
1228
1229 RF_UNLOCK_MUTEX(raidPtr->mutex);
1230 return (EINVAL);
1231 }
1232 if (raidPtr->Disks[row][column].status == rf_ds_spared) {
1233 RF_UNLOCK_MUTEX(raidPtr->mutex);
1234 return (EINVAL);
1235 }
1236 RF_UNLOCK_MUTEX(raidPtr->mutex);
1237
1238 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1239 if (rrcopy == NULL)
1240 return(ENOMEM);
1241
1242 rrcopy->raidPtr = (void *) raidPtr;
1243 rrcopy->row = row;
1244 rrcopy->col = column;
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1247 rf_ReconstructInPlaceThread,
1248 rrcopy,"raid_reconip");
1249 return(retcode);
1250
1251 case RAIDFRAME_GET_INFO:
1252 if (!raidPtr->valid)
1253 return (ENODEV);
1254 ucfgp = (RF_DeviceConfig_t **) data;
1255 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1256 (RF_DeviceConfig_t *));
1257 if (d_cfg == NULL)
1258 return (ENOMEM);
1259 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1260 d_cfg->rows = raidPtr->numRow;
1261 d_cfg->cols = raidPtr->numCol;
1262 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1263 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1264 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1265 return (ENOMEM);
1266 }
1267 d_cfg->nspares = raidPtr->numSpare;
1268 if (d_cfg->nspares >= RF_MAX_DISKS) {
1269 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1270 return (ENOMEM);
1271 }
1272 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1273 d = 0;
1274 for (i = 0; i < d_cfg->rows; i++) {
1275 for (j = 0; j < d_cfg->cols; j++) {
1276 d_cfg->devs[d] = raidPtr->Disks[i][j];
1277 d++;
1278 }
1279 }
1280 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1281 d_cfg->spares[i] = raidPtr->Disks[0][j];
1282 }
1283 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1284 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1285
1286 return (retcode);
1287
1288 case RAIDFRAME_CHECK_PARITY:
1289 *(int *) data = raidPtr->parity_good;
1290 return (0);
1291
1292 case RAIDFRAME_RESET_ACCTOTALS:
1293 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1294 return (0);
1295
1296 case RAIDFRAME_GET_ACCTOTALS:
1297 totals = (RF_AccTotals_t *) data;
1298 *totals = raidPtr->acc_totals;
1299 return (0);
1300
1301 case RAIDFRAME_KEEP_ACCTOTALS:
1302 raidPtr->keep_acc_totals = *(int *)data;
1303 return (0);
1304
1305 case RAIDFRAME_GET_SIZE:
1306 *(int *) data = raidPtr->totalSectors;
1307 return (0);
1308
1309 /* fail a disk & optionally start reconstruction */
1310 case RAIDFRAME_FAIL_DISK:
1311
1312 if (raidPtr->Layout.map->faultsTolerated == 0) {
1313 /* Can't do this on a RAID 0!! */
1314 return(EINVAL);
1315 }
1316
1317 rr = (struct rf_recon_req *) data;
1318
1319 if (rr->row < 0 || rr->row >= raidPtr->numRow
1320 || rr->col < 0 || rr->col >= raidPtr->numCol)
1321 return (EINVAL);
1322
1323
1324 RF_LOCK_MUTEX(raidPtr->mutex);
1325 if ((raidPtr->Disks[rr->row][rr->col].status ==
1326 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1327 /* some other component has failed. Let's not make
1328 things worse. XXX wrong for RAID6 */
1329 RF_UNLOCK_MUTEX(raidPtr->mutex);
1330 return (EINVAL);
1331 }
1332 if (raidPtr->Disks[rr->row][rr->col].status == rf_ds_spared) {
1333 /* Can't fail a spared disk! */
1334 RF_UNLOCK_MUTEX(raidPtr->mutex);
1335 return (EINVAL);
1336 }
1337 RF_UNLOCK_MUTEX(raidPtr->mutex);
1338
1339 /* make a copy of the recon request so that we don't rely on
1340 * the user's buffer */
1341 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1342 if (rrcopy == NULL)
1343 return(ENOMEM);
1344 memcpy(rrcopy, rr, sizeof(*rr));
1345 rrcopy->raidPtr = (void *) raidPtr;
1346
1347 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1348 rf_ReconThread,
1349 rrcopy,"raid_recon");
1350 return (0);
1351
1352 /* invoke a copyback operation after recon on whatever disk
1353 * needs it, if any */
1354 case RAIDFRAME_COPYBACK:
1355
1356 if (raidPtr->Layout.map->faultsTolerated == 0) {
1357 /* This makes no sense on a RAID 0!! */
1358 return(EINVAL);
1359 }
1360
1361 if (raidPtr->copyback_in_progress == 1) {
1362 /* Copyback is already in progress! */
1363 return(EINVAL);
1364 }
1365
1366 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1367 rf_CopybackThread,
1368 raidPtr,"raid_copyback");
1369 return (retcode);
1370
1371 /* return the percentage completion of reconstruction */
1372 case RAIDFRAME_CHECK_RECON_STATUS:
1373 if (raidPtr->Layout.map->faultsTolerated == 0) {
1374 /* This makes no sense on a RAID 0, so tell the
1375 user it's done. */
1376 *(int *) data = 100;
1377 return(0);
1378 }
1379 row = 0; /* XXX we only consider a single row... */
1380 if (raidPtr->status[row] != rf_rs_reconstructing)
1381 *(int *) data = 100;
1382 else
1383 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1384 return (0);
1385 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1386 progressInfoPtr = (RF_ProgressInfo_t **) data;
1387 row = 0; /* XXX we only consider a single row... */
1388 if (raidPtr->status[row] != rf_rs_reconstructing) {
1389 progressInfo.remaining = 0;
1390 progressInfo.completed = 100;
1391 progressInfo.total = 100;
1392 } else {
1393 progressInfo.total =
1394 raidPtr->reconControl[row]->numRUsTotal;
1395 progressInfo.completed =
1396 raidPtr->reconControl[row]->numRUsComplete;
1397 progressInfo.remaining = progressInfo.total -
1398 progressInfo.completed;
1399 }
1400 retcode = copyout(&progressInfo, *progressInfoPtr,
1401 sizeof(RF_ProgressInfo_t));
1402 return (retcode);
1403
1404 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1405 if (raidPtr->Layout.map->faultsTolerated == 0) {
1406 /* This makes no sense on a RAID 0, so tell the
1407 user it's done. */
1408 *(int *) data = 100;
1409 return(0);
1410 }
1411 if (raidPtr->parity_rewrite_in_progress == 1) {
1412 *(int *) data = 100 *
1413 raidPtr->parity_rewrite_stripes_done /
1414 raidPtr->Layout.numStripe;
1415 } else {
1416 *(int *) data = 100;
1417 }
1418 return (0);
1419
1420 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1421 progressInfoPtr = (RF_ProgressInfo_t **) data;
1422 if (raidPtr->parity_rewrite_in_progress == 1) {
1423 progressInfo.total = raidPtr->Layout.numStripe;
1424 progressInfo.completed =
1425 raidPtr->parity_rewrite_stripes_done;
1426 progressInfo.remaining = progressInfo.total -
1427 progressInfo.completed;
1428 } else {
1429 progressInfo.remaining = 0;
1430 progressInfo.completed = 100;
1431 progressInfo.total = 100;
1432 }
1433 retcode = copyout(&progressInfo, *progressInfoPtr,
1434 sizeof(RF_ProgressInfo_t));
1435 return (retcode);
1436
1437 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1438 if (raidPtr->Layout.map->faultsTolerated == 0) {
1439 /* This makes no sense on a RAID 0 */
1440 *(int *) data = 100;
1441 return(0);
1442 }
1443 if (raidPtr->copyback_in_progress == 1) {
1444 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1445 raidPtr->Layout.numStripe;
1446 } else {
1447 *(int *) data = 100;
1448 }
1449 return (0);
1450
1451 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1452 progressInfoPtr = (RF_ProgressInfo_t **) data;
1453 if (raidPtr->copyback_in_progress == 1) {
1454 progressInfo.total = raidPtr->Layout.numStripe;
1455 progressInfo.completed =
1456 raidPtr->copyback_stripes_done;
1457 progressInfo.remaining = progressInfo.total -
1458 progressInfo.completed;
1459 } else {
1460 progressInfo.remaining = 0;
1461 progressInfo.completed = 100;
1462 progressInfo.total = 100;
1463 }
1464 retcode = copyout(&progressInfo, *progressInfoPtr,
1465 sizeof(RF_ProgressInfo_t));
1466 return (retcode);
1467
1468 /* the sparetable daemon calls this to wait for the kernel to
1469 * need a spare table. this ioctl does not return until a
1470 * spare table is needed. XXX -- calling mpsleep here in the
1471 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1472 * -- I should either compute the spare table in the kernel,
1473 * or have a different -- XXX XXX -- interface (a different
1474 * character device) for delivering the table -- XXX */
1475 #if 0
1476 case RAIDFRAME_SPARET_WAIT:
1477 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1478 while (!rf_sparet_wait_queue)
1479 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1480 waitreq = rf_sparet_wait_queue;
1481 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1483
1484 /* structure assignment */
1485 *((RF_SparetWait_t *) data) = *waitreq;
1486
1487 RF_Free(waitreq, sizeof(*waitreq));
1488 return (0);
1489
1490 /* wakes up a process waiting on SPARET_WAIT and puts an error
1491 * code in it that will cause the dameon to exit */
1492 case RAIDFRAME_ABORT_SPARET_WAIT:
1493 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1494 waitreq->fcol = -1;
1495 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1496 waitreq->next = rf_sparet_wait_queue;
1497 rf_sparet_wait_queue = waitreq;
1498 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1499 wakeup(&rf_sparet_wait_queue);
1500 return (0);
1501
1502 /* used by the spare table daemon to deliver a spare table
1503 * into the kernel */
1504 case RAIDFRAME_SEND_SPARET:
1505
1506 /* install the spare table */
1507 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1508
1509 /* respond to the requestor. the return status of the spare
1510 * table installation is passed in the "fcol" field */
1511 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1512 waitreq->fcol = retcode;
1513 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1514 waitreq->next = rf_sparet_resp_queue;
1515 rf_sparet_resp_queue = waitreq;
1516 wakeup(&rf_sparet_resp_queue);
1517 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1518
1519 return (retcode);
1520 #endif
1521
1522 default:
1523 break; /* fall through to the os-specific code below */
1524
1525 }
1526
1527 if (!raidPtr->valid)
1528 return (EINVAL);
1529
1530 /*
1531 * Add support for "regular" device ioctls here.
1532 */
1533
1534 switch (cmd) {
1535 case DIOCGDINFO:
1536 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1537 break;
1538 #ifdef __HAVE_OLD_DISKLABEL
1539 case ODIOCGDINFO:
1540 newlabel = *(rs->sc_dkdev.dk_label);
1541 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1542 return ENOTTY;
1543 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1544 break;
1545 #endif
1546
1547 case DIOCGPART:
1548 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1549 ((struct partinfo *) data)->part =
1550 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1551 break;
1552
1553 case DIOCWDINFO:
1554 case DIOCSDINFO:
1555 #ifdef __HAVE_OLD_DISKLABEL
1556 case ODIOCWDINFO:
1557 case ODIOCSDINFO:
1558 #endif
1559 {
1560 struct disklabel *lp;
1561 #ifdef __HAVE_OLD_DISKLABEL
1562 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1563 memset(&newlabel, 0, sizeof newlabel);
1564 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1565 lp = &newlabel;
1566 } else
1567 #endif
1568 lp = (struct disklabel *)data;
1569
1570 if ((error = raidlock(rs)) != 0)
1571 return (error);
1572
1573 rs->sc_flags |= RAIDF_LABELLING;
1574
1575 error = setdisklabel(rs->sc_dkdev.dk_label,
1576 lp, 0, rs->sc_dkdev.dk_cpulabel);
1577 if (error == 0) {
1578 if (cmd == DIOCWDINFO
1579 #ifdef __HAVE_OLD_DISKLABEL
1580 || cmd == ODIOCWDINFO
1581 #endif
1582 )
1583 error = writedisklabel(RAIDLABELDEV(dev),
1584 raidstrategy, rs->sc_dkdev.dk_label,
1585 rs->sc_dkdev.dk_cpulabel);
1586 }
1587 rs->sc_flags &= ~RAIDF_LABELLING;
1588
1589 raidunlock(rs);
1590
1591 if (error)
1592 return (error);
1593 break;
1594 }
1595
1596 case DIOCWLABEL:
1597 if (*(int *) data != 0)
1598 rs->sc_flags |= RAIDF_WLABEL;
1599 else
1600 rs->sc_flags &= ~RAIDF_WLABEL;
1601 break;
1602
1603 case DIOCGDEFLABEL:
1604 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1605 break;
1606
1607 #ifdef __HAVE_OLD_DISKLABEL
1608 case ODIOCGDEFLABEL:
1609 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1610 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1611 return ENOTTY;
1612 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1613 break;
1614 #endif
1615
1616 default:
1617 retcode = ENOTTY;
1618 }
1619 return (retcode);
1620
1621 }
1622
1623
1624 /* raidinit -- complete the rest of the initialization for the
1625 RAIDframe device. */
1626
1627
1628 static void
1629 raidinit(raidPtr)
1630 RF_Raid_t *raidPtr;
1631 {
1632 struct raid_softc *rs;
1633 int unit;
1634
1635 unit = raidPtr->raidid;
1636
1637 rs = &raid_softc[unit];
1638
1639 /* XXX should check return code first... */
1640 rs->sc_flags |= RAIDF_INITED;
1641
1642 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1643
1644 rs->sc_dkdev.dk_name = rs->sc_xname;
1645
1646 /* disk_attach actually creates space for the CPU disklabel, among
1647 * other things, so it's critical to call this *BEFORE* we try putzing
1648 * with disklabels. */
1649
1650 disk_attach(&rs->sc_dkdev);
1651
1652 /* XXX There may be a weird interaction here between this, and
1653 * protectedSectors, as used in RAIDframe. */
1654
1655 rs->sc_size = raidPtr->totalSectors;
1656
1657 }
1658 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1659 /* wake up the daemon & tell it to get us a spare table
1660 * XXX
1661 * the entries in the queues should be tagged with the raidPtr
1662 * so that in the extremely rare case that two recons happen at once,
1663 * we know for which device were requesting a spare table
1664 * XXX
1665 *
1666 * XXX This code is not currently used. GO
1667 */
1668 int
1669 rf_GetSpareTableFromDaemon(req)
1670 RF_SparetWait_t *req;
1671 {
1672 int retcode;
1673
1674 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1675 req->next = rf_sparet_wait_queue;
1676 rf_sparet_wait_queue = req;
1677 wakeup(&rf_sparet_wait_queue);
1678
1679 /* mpsleep unlocks the mutex */
1680 while (!rf_sparet_resp_queue) {
1681 tsleep(&rf_sparet_resp_queue, PRIBIO,
1682 "raidframe getsparetable", 0);
1683 }
1684 req = rf_sparet_resp_queue;
1685 rf_sparet_resp_queue = req->next;
1686 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1687
1688 retcode = req->fcol;
1689 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1690 * alloc'd */
1691 return (retcode);
1692 }
1693 #endif
1694
1695 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1696 * bp & passes it down.
1697 * any calls originating in the kernel must use non-blocking I/O
1698 * do some extra sanity checking to return "appropriate" error values for
1699 * certain conditions (to make some standard utilities work)
1700 *
1701 * Formerly known as: rf_DoAccessKernel
1702 */
1703 void
1704 raidstart(raidPtr)
1705 RF_Raid_t *raidPtr;
1706 {
1707 RF_SectorCount_t num_blocks, pb, sum;
1708 RF_RaidAddr_t raid_addr;
1709 struct partition *pp;
1710 daddr_t blocknum;
1711 int unit;
1712 struct raid_softc *rs;
1713 int do_async;
1714 struct buf *bp;
1715
1716 unit = raidPtr->raidid;
1717 rs = &raid_softc[unit];
1718
1719 /* quick check to see if anything has died recently */
1720 RF_LOCK_MUTEX(raidPtr->mutex);
1721 if (raidPtr->numNewFailures > 0) {
1722 RF_UNLOCK_MUTEX(raidPtr->mutex);
1723 rf_update_component_labels(raidPtr,
1724 RF_NORMAL_COMPONENT_UPDATE);
1725 RF_LOCK_MUTEX(raidPtr->mutex);
1726 raidPtr->numNewFailures--;
1727 }
1728
1729 /* Check to see if we're at the limit... */
1730 while (raidPtr->openings > 0) {
1731 RF_UNLOCK_MUTEX(raidPtr->mutex);
1732
1733 /* get the next item, if any, from the queue */
1734 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1735 /* nothing more to do */
1736 return;
1737 }
1738
1739 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1740 * partition.. Need to make it absolute to the underlying
1741 * device.. */
1742
1743 blocknum = bp->b_blkno;
1744 if (DISKPART(bp->b_dev) != RAW_PART) {
1745 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1746 blocknum += pp->p_offset;
1747 }
1748
1749 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1750 (int) blocknum));
1751
1752 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1753 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1754
1755 /* *THIS* is where we adjust what block we're going to...
1756 * but DO NOT TOUCH bp->b_blkno!!! */
1757 raid_addr = blocknum;
1758
1759 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1760 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1761 sum = raid_addr + num_blocks + pb;
1762 if (1 || rf_debugKernelAccess) {
1763 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1764 (int) raid_addr, (int) sum, (int) num_blocks,
1765 (int) pb, (int) bp->b_resid));
1766 }
1767 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1768 || (sum < num_blocks) || (sum < pb)) {
1769 bp->b_error = ENOSPC;
1770 bp->b_flags |= B_ERROR;
1771 bp->b_resid = bp->b_bcount;
1772 biodone(bp);
1773 RF_LOCK_MUTEX(raidPtr->mutex);
1774 continue;
1775 }
1776 /*
1777 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1778 */
1779
1780 if (bp->b_bcount & raidPtr->sectorMask) {
1781 bp->b_error = EINVAL;
1782 bp->b_flags |= B_ERROR;
1783 bp->b_resid = bp->b_bcount;
1784 biodone(bp);
1785 RF_LOCK_MUTEX(raidPtr->mutex);
1786 continue;
1787
1788 }
1789 db1_printf(("Calling DoAccess..\n"));
1790
1791
1792 RF_LOCK_MUTEX(raidPtr->mutex);
1793 raidPtr->openings--;
1794 RF_UNLOCK_MUTEX(raidPtr->mutex);
1795
1796 /*
1797 * Everything is async.
1798 */
1799 do_async = 1;
1800
1801 disk_busy(&rs->sc_dkdev);
1802
1803 /* XXX we're still at splbio() here... do we *really*
1804 need to be? */
1805
1806 /* don't ever condition on bp->b_flags & B_WRITE.
1807 * always condition on B_READ instead */
1808
1809 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1810 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1811 do_async, raid_addr, num_blocks,
1812 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1813
1814 if (bp->b_error) {
1815 bp->b_flags |= B_ERROR;
1816 }
1817
1818 RF_LOCK_MUTEX(raidPtr->mutex);
1819 }
1820 RF_UNLOCK_MUTEX(raidPtr->mutex);
1821 }
1822
1823
1824
1825
1826 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1827
1828 int
1829 rf_DispatchKernelIO(queue, req)
1830 RF_DiskQueue_t *queue;
1831 RF_DiskQueueData_t *req;
1832 {
1833 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1834 struct buf *bp;
1835 struct raidbuf *raidbp = NULL;
1836
1837 req->queue = queue;
1838
1839 #if DIAGNOSTIC
1840 if (queue->raidPtr->raidid >= numraid) {
1841 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1842 numraid);
1843 panic("Invalid Unit number in rf_DispatchKernelIO");
1844 }
1845 #endif
1846
1847 bp = req->bp;
1848 #if 1
1849 /* XXX when there is a physical disk failure, someone is passing us a
1850 * buffer that contains old stuff!! Attempt to deal with this problem
1851 * without taking a performance hit... (not sure where the real bug
1852 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1853
1854 if (bp->b_flags & B_ERROR) {
1855 bp->b_flags &= ~B_ERROR;
1856 }
1857 if (bp->b_error != 0) {
1858 bp->b_error = 0;
1859 }
1860 #endif
1861 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1862 if (raidbp == NULL) {
1863 bp->b_flags |= B_ERROR;
1864 bp->b_error = ENOMEM;
1865 return (ENOMEM);
1866 }
1867 BUF_INIT(&raidbp->rf_buf);
1868
1869 /*
1870 * context for raidiodone
1871 */
1872 raidbp->rf_obp = bp;
1873 raidbp->req = req;
1874
1875 switch (req->type) {
1876 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1877 /* XXX need to do something extra here.. */
1878 /* I'm leaving this in, as I've never actually seen it used,
1879 * and I'd like folks to report it... GO */
1880 printf(("WAKEUP CALLED\n"));
1881 queue->numOutstanding++;
1882
1883 /* XXX need to glue the original buffer into this?? */
1884
1885 KernelWakeupFunc(&raidbp->rf_buf);
1886 break;
1887
1888 case RF_IO_TYPE_READ:
1889 case RF_IO_TYPE_WRITE:
1890
1891 if (req->tracerec) {
1892 RF_ETIMER_START(req->tracerec->timer);
1893 }
1894 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1895 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1896 req->sectorOffset, req->numSector,
1897 req->buf, KernelWakeupFunc, (void *) req,
1898 queue->raidPtr->logBytesPerSector, req->b_proc);
1899
1900 if (rf_debugKernelAccess) {
1901 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1902 (long) bp->b_blkno));
1903 }
1904 queue->numOutstanding++;
1905 queue->last_deq_sector = req->sectorOffset;
1906 /* acc wouldn't have been let in if there were any pending
1907 * reqs at any other priority */
1908 queue->curPriority = req->priority;
1909
1910 db1_printf(("Going for %c to unit %d row %d col %d\n",
1911 req->type, queue->raidPtr->raidid,
1912 queue->row, queue->col));
1913 db1_printf(("sector %d count %d (%d bytes) %d\n",
1914 (int) req->sectorOffset, (int) req->numSector,
1915 (int) (req->numSector <<
1916 queue->raidPtr->logBytesPerSector),
1917 (int) queue->raidPtr->logBytesPerSector));
1918 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1919 raidbp->rf_buf.b_vp->v_numoutput++;
1920 }
1921 VOP_STRATEGY(&raidbp->rf_buf);
1922
1923 break;
1924
1925 default:
1926 panic("bad req->type in rf_DispatchKernelIO");
1927 }
1928 db1_printf(("Exiting from DispatchKernelIO\n"));
1929
1930 return (0);
1931 }
1932 /* this is the callback function associated with a I/O invoked from
1933 kernel code.
1934 */
1935 static void
1936 KernelWakeupFunc(vbp)
1937 struct buf *vbp;
1938 {
1939 RF_DiskQueueData_t *req = NULL;
1940 RF_DiskQueue_t *queue;
1941 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1942 struct buf *bp;
1943 int s;
1944
1945 s = splbio();
1946 db1_printf(("recovering the request queue:\n"));
1947 req = raidbp->req;
1948
1949 bp = raidbp->rf_obp;
1950
1951 queue = (RF_DiskQueue_t *) req->queue;
1952
1953 if (raidbp->rf_buf.b_flags & B_ERROR) {
1954 bp->b_flags |= B_ERROR;
1955 bp->b_error = raidbp->rf_buf.b_error ?
1956 raidbp->rf_buf.b_error : EIO;
1957 }
1958
1959 /* XXX methinks this could be wrong... */
1960 #if 1
1961 bp->b_resid = raidbp->rf_buf.b_resid;
1962 #endif
1963
1964 if (req->tracerec) {
1965 RF_ETIMER_STOP(req->tracerec->timer);
1966 RF_ETIMER_EVAL(req->tracerec->timer);
1967 RF_LOCK_MUTEX(rf_tracing_mutex);
1968 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1969 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1970 req->tracerec->num_phys_ios++;
1971 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1972 }
1973 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1974
1975 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1976 * ballistic, and mark the component as hosed... */
1977
1978 if (bp->b_flags & B_ERROR) {
1979 /* Mark the disk as dead */
1980 /* but only mark it once... */
1981 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1982 rf_ds_optimal) {
1983 printf("raid%d: IO Error. Marking %s as failed.\n",
1984 queue->raidPtr->raidid,
1985 queue->raidPtr->Disks[queue->row][queue->col].devname);
1986 queue->raidPtr->Disks[queue->row][queue->col].status =
1987 rf_ds_failed;
1988 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1989 queue->raidPtr->numFailures++;
1990 queue->raidPtr->numNewFailures++;
1991 } else { /* Disk is already dead... */
1992 /* printf("Disk already marked as dead!\n"); */
1993 }
1994
1995 }
1996
1997 pool_put(&raidframe_cbufpool, raidbp);
1998
1999 /* Fill in the error value */
2000
2001 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2002
2003 simple_lock(&queue->raidPtr->iodone_lock);
2004
2005 /* Drop this one on the "finished" queue... */
2006 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2007
2008 /* Let the raidio thread know there is work to be done. */
2009 wakeup(&(queue->raidPtr->iodone));
2010
2011 simple_unlock(&queue->raidPtr->iodone_lock);
2012
2013 splx(s);
2014 }
2015
2016
2017
2018 /*
2019 * initialize a buf structure for doing an I/O in the kernel.
2020 */
2021 static void
2022 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
2023 logBytesPerSector, b_proc)
2024 struct buf *bp;
2025 struct vnode *b_vp;
2026 unsigned rw_flag;
2027 dev_t dev;
2028 RF_SectorNum_t startSect;
2029 RF_SectorCount_t numSect;
2030 caddr_t buf;
2031 void (*cbFunc) (struct buf *);
2032 void *cbArg;
2033 int logBytesPerSector;
2034 struct proc *b_proc;
2035 {
2036 /* bp->b_flags = B_PHYS | rw_flag; */
2037 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2038 bp->b_bcount = numSect << logBytesPerSector;
2039 bp->b_bufsize = bp->b_bcount;
2040 bp->b_error = 0;
2041 bp->b_dev = dev;
2042 bp->b_data = buf;
2043 bp->b_blkno = startSect;
2044 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2045 if (bp->b_bcount == 0) {
2046 panic("bp->b_bcount is zero in InitBP!!");
2047 }
2048 bp->b_proc = b_proc;
2049 bp->b_iodone = cbFunc;
2050 bp->b_vp = b_vp;
2051
2052 }
2053
2054 static void
2055 raidgetdefaultlabel(raidPtr, rs, lp)
2056 RF_Raid_t *raidPtr;
2057 struct raid_softc *rs;
2058 struct disklabel *lp;
2059 {
2060 memset(lp, 0, sizeof(*lp));
2061
2062 /* fabricate a label... */
2063 lp->d_secperunit = raidPtr->totalSectors;
2064 lp->d_secsize = raidPtr->bytesPerSector;
2065 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2066 lp->d_ntracks = 4 * raidPtr->numCol;
2067 lp->d_ncylinders = raidPtr->totalSectors /
2068 (lp->d_nsectors * lp->d_ntracks);
2069 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2070
2071 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2072 lp->d_type = DTYPE_RAID;
2073 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2074 lp->d_rpm = 3600;
2075 lp->d_interleave = 1;
2076 lp->d_flags = 0;
2077
2078 lp->d_partitions[RAW_PART].p_offset = 0;
2079 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2080 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2081 lp->d_npartitions = RAW_PART + 1;
2082
2083 lp->d_magic = DISKMAGIC;
2084 lp->d_magic2 = DISKMAGIC;
2085 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2086
2087 }
2088 /*
2089 * Read the disklabel from the raid device. If one is not present, fake one
2090 * up.
2091 */
2092 static void
2093 raidgetdisklabel(dev)
2094 dev_t dev;
2095 {
2096 int unit = raidunit(dev);
2097 struct raid_softc *rs = &raid_softc[unit];
2098 const char *errstring;
2099 struct disklabel *lp = rs->sc_dkdev.dk_label;
2100 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2101 RF_Raid_t *raidPtr;
2102
2103 db1_printf(("Getting the disklabel...\n"));
2104
2105 memset(clp, 0, sizeof(*clp));
2106
2107 raidPtr = raidPtrs[unit];
2108
2109 raidgetdefaultlabel(raidPtr, rs, lp);
2110
2111 /*
2112 * Call the generic disklabel extraction routine.
2113 */
2114 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2115 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2116 if (errstring)
2117 raidmakedisklabel(rs);
2118 else {
2119 int i;
2120 struct partition *pp;
2121
2122 /*
2123 * Sanity check whether the found disklabel is valid.
2124 *
2125 * This is necessary since total size of the raid device
2126 * may vary when an interleave is changed even though exactly
2127 * same componets are used, and old disklabel may used
2128 * if that is found.
2129 */
2130 if (lp->d_secperunit != rs->sc_size)
2131 printf("raid%d: WARNING: %s: "
2132 "total sector size in disklabel (%d) != "
2133 "the size of raid (%ld)\n", unit, rs->sc_xname,
2134 lp->d_secperunit, (long) rs->sc_size);
2135 for (i = 0; i < lp->d_npartitions; i++) {
2136 pp = &lp->d_partitions[i];
2137 if (pp->p_offset + pp->p_size > rs->sc_size)
2138 printf("raid%d: WARNING: %s: end of partition `%c' "
2139 "exceeds the size of raid (%ld)\n",
2140 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2141 }
2142 }
2143
2144 }
2145 /*
2146 * Take care of things one might want to take care of in the event
2147 * that a disklabel isn't present.
2148 */
2149 static void
2150 raidmakedisklabel(rs)
2151 struct raid_softc *rs;
2152 {
2153 struct disklabel *lp = rs->sc_dkdev.dk_label;
2154 db1_printf(("Making a label..\n"));
2155
2156 /*
2157 * For historical reasons, if there's no disklabel present
2158 * the raw partition must be marked FS_BSDFFS.
2159 */
2160
2161 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2162
2163 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2164
2165 lp->d_checksum = dkcksum(lp);
2166 }
2167 /*
2168 * Lookup the provided name in the filesystem. If the file exists,
2169 * is a valid block device, and isn't being used by anyone else,
2170 * set *vpp to the file's vnode.
2171 * You'll find the original of this in ccd.c
2172 */
2173 int
2174 raidlookup(path, p, vpp)
2175 char *path;
2176 struct proc *p;
2177 struct vnode **vpp; /* result */
2178 {
2179 struct nameidata nd;
2180 struct vnode *vp;
2181 struct vattr va;
2182 int error;
2183
2184 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2185 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2186 return (error);
2187 }
2188 vp = nd.ni_vp;
2189 if (vp->v_usecount > 1) {
2190 VOP_UNLOCK(vp, 0);
2191 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2192 return (EBUSY);
2193 }
2194 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2195 VOP_UNLOCK(vp, 0);
2196 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2197 return (error);
2198 }
2199 /* XXX: eventually we should handle VREG, too. */
2200 if (va.va_type != VBLK) {
2201 VOP_UNLOCK(vp, 0);
2202 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2203 return (ENOTBLK);
2204 }
2205 VOP_UNLOCK(vp, 0);
2206 *vpp = vp;
2207 return (0);
2208 }
2209 /*
2210 * Wait interruptibly for an exclusive lock.
2211 *
2212 * XXX
2213 * Several drivers do this; it should be abstracted and made MP-safe.
2214 * (Hmm... where have we seen this warning before :-> GO )
2215 */
2216 static int
2217 raidlock(rs)
2218 struct raid_softc *rs;
2219 {
2220 int error;
2221
2222 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2223 rs->sc_flags |= RAIDF_WANTED;
2224 if ((error =
2225 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2226 return (error);
2227 }
2228 rs->sc_flags |= RAIDF_LOCKED;
2229 return (0);
2230 }
2231 /*
2232 * Unlock and wake up any waiters.
2233 */
2234 static void
2235 raidunlock(rs)
2236 struct raid_softc *rs;
2237 {
2238
2239 rs->sc_flags &= ~RAIDF_LOCKED;
2240 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2241 rs->sc_flags &= ~RAIDF_WANTED;
2242 wakeup(rs);
2243 }
2244 }
2245
2246
2247 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2248 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2249
2250 int
2251 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2252 {
2253 RF_ComponentLabel_t clabel;
2254 raidread_component_label(dev, b_vp, &clabel);
2255 clabel.mod_counter = mod_counter;
2256 clabel.clean = RF_RAID_CLEAN;
2257 raidwrite_component_label(dev, b_vp, &clabel);
2258 return(0);
2259 }
2260
2261
2262 int
2263 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2264 {
2265 RF_ComponentLabel_t clabel;
2266 raidread_component_label(dev, b_vp, &clabel);
2267 clabel.mod_counter = mod_counter;
2268 clabel.clean = RF_RAID_DIRTY;
2269 raidwrite_component_label(dev, b_vp, &clabel);
2270 return(0);
2271 }
2272
2273 /* ARGSUSED */
2274 int
2275 raidread_component_label(dev, b_vp, clabel)
2276 dev_t dev;
2277 struct vnode *b_vp;
2278 RF_ComponentLabel_t *clabel;
2279 {
2280 struct buf *bp;
2281 const struct bdevsw *bdev;
2282 int error;
2283
2284 /* XXX should probably ensure that we don't try to do this if
2285 someone has changed rf_protected_sectors. */
2286
2287 if (b_vp == NULL) {
2288 /* For whatever reason, this component is not valid.
2289 Don't try to read a component label from it. */
2290 return(EINVAL);
2291 }
2292
2293 /* get a block of the appropriate size... */
2294 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2295 bp->b_dev = dev;
2296
2297 /* get our ducks in a row for the read */
2298 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2299 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2300 bp->b_flags |= B_READ;
2301 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2302
2303 bdev = bdevsw_lookup(bp->b_dev);
2304 if (bdev == NULL)
2305 return (ENXIO);
2306 (*bdev->d_strategy)(bp);
2307
2308 error = biowait(bp);
2309
2310 if (!error) {
2311 memcpy(clabel, bp->b_data,
2312 sizeof(RF_ComponentLabel_t));
2313 }
2314
2315 brelse(bp);
2316 return(error);
2317 }
2318 /* ARGSUSED */
2319 int
2320 raidwrite_component_label(dev, b_vp, clabel)
2321 dev_t dev;
2322 struct vnode *b_vp;
2323 RF_ComponentLabel_t *clabel;
2324 {
2325 struct buf *bp;
2326 const struct bdevsw *bdev;
2327 int error;
2328
2329 /* get a block of the appropriate size... */
2330 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2331 bp->b_dev = dev;
2332
2333 /* get our ducks in a row for the write */
2334 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2335 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2336 bp->b_flags |= B_WRITE;
2337 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2338
2339 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2340
2341 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2342
2343 bdev = bdevsw_lookup(bp->b_dev);
2344 if (bdev == NULL)
2345 return (ENXIO);
2346 (*bdev->d_strategy)(bp);
2347 error = biowait(bp);
2348 brelse(bp);
2349 if (error) {
2350 #if 1
2351 printf("Failed to write RAID component info!\n");
2352 #endif
2353 }
2354
2355 return(error);
2356 }
2357
2358 void
2359 rf_markalldirty(raidPtr)
2360 RF_Raid_t *raidPtr;
2361 {
2362 RF_ComponentLabel_t clabel;
2363 int sparecol;
2364 int r,c;
2365 int i,j;
2366 int srow = -1, scol = -1;
2367
2368 raidPtr->mod_counter++;
2369 for (r = 0; r < raidPtr->numRow; r++) {
2370 for (c = 0; c < raidPtr->numCol; c++) {
2371 /* we don't want to touch (at all) a disk that has
2372 failed */
2373 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2374 raidread_component_label(
2375 raidPtr->Disks[r][c].dev,
2376 raidPtr->raid_cinfo[r][c].ci_vp,
2377 &clabel);
2378 if (clabel.status == rf_ds_spared) {
2379 /* XXX do something special...
2380 but whatever you do, don't
2381 try to access it!! */
2382 } else {
2383 raidmarkdirty(
2384 raidPtr->Disks[r][c].dev,
2385 raidPtr->raid_cinfo[r][c].ci_vp,
2386 raidPtr->mod_counter);
2387 }
2388 }
2389 }
2390 }
2391
2392 for( c = 0; c < raidPtr->numSpare ; c++) {
2393 sparecol = raidPtr->numCol + c;
2394 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2395 /*
2396
2397 we claim this disk is "optimal" if it's
2398 rf_ds_used_spare, as that means it should be
2399 directly substitutable for the disk it replaced.
2400 We note that too...
2401
2402 */
2403
2404 for(i=0;i<raidPtr->numRow;i++) {
2405 for(j=0;j<raidPtr->numCol;j++) {
2406 if ((raidPtr->Disks[i][j].spareRow ==
2407 0) &&
2408 (raidPtr->Disks[i][j].spareCol ==
2409 sparecol)) {
2410 srow = i;
2411 scol = j;
2412 break;
2413 }
2414 }
2415 }
2416
2417 raidread_component_label(
2418 raidPtr->Disks[0][sparecol].dev,
2419 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2420 &clabel);
2421 /* make sure status is noted */
2422
2423 raid_init_component_label(raidPtr, &clabel);
2424
2425 clabel.row = srow;
2426 clabel.column = scol;
2427 /* Note: we *don't* change status from rf_ds_used_spare
2428 to rf_ds_optimal */
2429 /* clabel.status = rf_ds_optimal; */
2430
2431 raidmarkdirty(raidPtr->Disks[0][sparecol].dev,
2432 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2433 raidPtr->mod_counter);
2434 }
2435 }
2436 }
2437
2438
2439 void
2440 rf_update_component_labels(raidPtr, final)
2441 RF_Raid_t *raidPtr;
2442 int final;
2443 {
2444 RF_ComponentLabel_t clabel;
2445 int sparecol;
2446 int r,c;
2447 int i,j;
2448 int srow, scol;
2449
2450 srow = -1;
2451 scol = -1;
2452
2453 /* XXX should do extra checks to make sure things really are clean,
2454 rather than blindly setting the clean bit... */
2455
2456 raidPtr->mod_counter++;
2457
2458 for (r = 0; r < raidPtr->numRow; r++) {
2459 for (c = 0; c < raidPtr->numCol; c++) {
2460 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2461 raidread_component_label(
2462 raidPtr->Disks[r][c].dev,
2463 raidPtr->raid_cinfo[r][c].ci_vp,
2464 &clabel);
2465 /* make sure status is noted */
2466 clabel.status = rf_ds_optimal;
2467 /* bump the counter */
2468 clabel.mod_counter = raidPtr->mod_counter;
2469
2470 raidwrite_component_label(
2471 raidPtr->Disks[r][c].dev,
2472 raidPtr->raid_cinfo[r][c].ci_vp,
2473 &clabel);
2474 if (final == RF_FINAL_COMPONENT_UPDATE) {
2475 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2476 raidmarkclean(
2477 raidPtr->Disks[r][c].dev,
2478 raidPtr->raid_cinfo[r][c].ci_vp,
2479 raidPtr->mod_counter);
2480 }
2481 }
2482 }
2483 /* else we don't touch it.. */
2484 }
2485 }
2486
2487 for( c = 0; c < raidPtr->numSpare ; c++) {
2488 sparecol = raidPtr->numCol + c;
2489 /* Need to ensure that the reconstruct actually completed! */
2490 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2491 /*
2492
2493 we claim this disk is "optimal" if it's
2494 rf_ds_used_spare, as that means it should be
2495 directly substitutable for the disk it replaced.
2496 We note that too...
2497
2498 */
2499
2500 for(i=0;i<raidPtr->numRow;i++) {
2501 for(j=0;j<raidPtr->numCol;j++) {
2502 if ((raidPtr->Disks[i][j].spareRow ==
2503 0) &&
2504 (raidPtr->Disks[i][j].spareCol ==
2505 sparecol)) {
2506 srow = i;
2507 scol = j;
2508 break;
2509 }
2510 }
2511 }
2512
2513 /* XXX shouldn't *really* need this... */
2514 raidread_component_label(
2515 raidPtr->Disks[0][sparecol].dev,
2516 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2517 &clabel);
2518 /* make sure status is noted */
2519
2520 raid_init_component_label(raidPtr, &clabel);
2521
2522 clabel.mod_counter = raidPtr->mod_counter;
2523 clabel.row = srow;
2524 clabel.column = scol;
2525 clabel.status = rf_ds_optimal;
2526
2527 raidwrite_component_label(
2528 raidPtr->Disks[0][sparecol].dev,
2529 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2530 &clabel);
2531 if (final == RF_FINAL_COMPONENT_UPDATE) {
2532 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2533 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2534 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2535 raidPtr->mod_counter);
2536 }
2537 }
2538 }
2539 }
2540 }
2541
2542 void
2543 rf_close_component(raidPtr, vp, auto_configured)
2544 RF_Raid_t *raidPtr;
2545 struct vnode *vp;
2546 int auto_configured;
2547 {
2548 struct proc *p;
2549
2550 p = raidPtr->engine_thread;
2551
2552 if (vp != NULL) {
2553 if (auto_configured == 1) {
2554 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2555 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2556 vput(vp);
2557
2558 } else {
2559 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2560 }
2561 }
2562 }
2563
2564
2565 void
2566 rf_UnconfigureVnodes(raidPtr)
2567 RF_Raid_t *raidPtr;
2568 {
2569 int r,c;
2570 struct vnode *vp;
2571 int acd;
2572
2573
2574 /* We take this opportunity to close the vnodes like we should.. */
2575
2576 for (r = 0; r < raidPtr->numRow; r++) {
2577 for (c = 0; c < raidPtr->numCol; c++) {
2578 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2579 acd = raidPtr->Disks[r][c].auto_configured;
2580 rf_close_component(raidPtr, vp, acd);
2581 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2582 raidPtr->Disks[r][c].auto_configured = 0;
2583 }
2584 }
2585 for (r = 0; r < raidPtr->numSpare; r++) {
2586 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2587 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2588 rf_close_component(raidPtr, vp, acd);
2589 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2590 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2591 }
2592 }
2593
2594
2595 void
2596 rf_ReconThread(req)
2597 struct rf_recon_req *req;
2598 {
2599 int s;
2600 RF_Raid_t *raidPtr;
2601
2602 s = splbio();
2603 raidPtr = (RF_Raid_t *) req->raidPtr;
2604 raidPtr->recon_in_progress = 1;
2605
2606 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2607 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2608
2609 /* XXX get rid of this! we don't need it at all.. */
2610 RF_Free(req, sizeof(*req));
2611
2612 raidPtr->recon_in_progress = 0;
2613 splx(s);
2614
2615 /* That's all... */
2616 kthread_exit(0); /* does not return */
2617 }
2618
2619 void
2620 rf_RewriteParityThread(raidPtr)
2621 RF_Raid_t *raidPtr;
2622 {
2623 int retcode;
2624 int s;
2625
2626 raidPtr->parity_rewrite_in_progress = 1;
2627 s = splbio();
2628 retcode = rf_RewriteParity(raidPtr);
2629 splx(s);
2630 if (retcode) {
2631 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2632 } else {
2633 /* set the clean bit! If we shutdown correctly,
2634 the clean bit on each component label will get
2635 set */
2636 raidPtr->parity_good = RF_RAID_CLEAN;
2637 }
2638 raidPtr->parity_rewrite_in_progress = 0;
2639
2640 /* Anyone waiting for us to stop? If so, inform them... */
2641 if (raidPtr->waitShutdown) {
2642 wakeup(&raidPtr->parity_rewrite_in_progress);
2643 }
2644
2645 /* That's all... */
2646 kthread_exit(0); /* does not return */
2647 }
2648
2649
2650 void
2651 rf_CopybackThread(raidPtr)
2652 RF_Raid_t *raidPtr;
2653 {
2654 int s;
2655
2656 raidPtr->copyback_in_progress = 1;
2657 s = splbio();
2658 rf_CopybackReconstructedData(raidPtr);
2659 splx(s);
2660 raidPtr->copyback_in_progress = 0;
2661
2662 /* That's all... */
2663 kthread_exit(0); /* does not return */
2664 }
2665
2666
2667 void
2668 rf_ReconstructInPlaceThread(req)
2669 struct rf_recon_req *req;
2670 {
2671 int s;
2672 RF_Raid_t *raidPtr;
2673
2674 s = splbio();
2675 raidPtr = req->raidPtr;
2676 raidPtr->recon_in_progress = 1;
2677 rf_ReconstructInPlace(raidPtr, req->row, req->col);
2678 RF_Free(req, sizeof(*req));
2679 raidPtr->recon_in_progress = 0;
2680 splx(s);
2681
2682 /* That's all... */
2683 kthread_exit(0); /* does not return */
2684 }
2685
2686 RF_AutoConfig_t *
2687 rf_find_raid_components()
2688 {
2689 struct vnode *vp;
2690 struct disklabel label;
2691 struct device *dv;
2692 dev_t dev;
2693 int bmajor;
2694 int error;
2695 int i;
2696 int good_one;
2697 RF_ComponentLabel_t *clabel;
2698 RF_AutoConfig_t *ac_list;
2699 RF_AutoConfig_t *ac;
2700
2701
2702 /* initialize the AutoConfig list */
2703 ac_list = NULL;
2704
2705 /* we begin by trolling through *all* the devices on the system */
2706
2707 for (dv = alldevs.tqh_first; dv != NULL;
2708 dv = dv->dv_list.tqe_next) {
2709
2710 /* we are only interested in disks... */
2711 if (dv->dv_class != DV_DISK)
2712 continue;
2713
2714 /* we don't care about floppies... */
2715 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2716 continue;
2717 }
2718
2719 /* we don't care about CD's... */
2720 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2721 continue;
2722 }
2723
2724 /* hdfd is the Atari/Hades floppy driver */
2725 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2726 continue;
2727 }
2728 /* fdisa is the Atari/Milan floppy driver */
2729 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2730 continue;
2731 }
2732
2733 /* need to find the device_name_to_block_device_major stuff */
2734 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2735
2736 /* get a vnode for the raw partition of this disk */
2737
2738 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2739 if (bdevvp(dev, &vp))
2740 panic("RAID can't alloc vnode");
2741
2742 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2743
2744 if (error) {
2745 /* "Who cares." Continue looking
2746 for something that exists*/
2747 vput(vp);
2748 continue;
2749 }
2750
2751 /* Ok, the disk exists. Go get the disklabel. */
2752 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2753 if (error) {
2754 /*
2755 * XXX can't happen - open() would
2756 * have errored out (or faked up one)
2757 */
2758 printf("can't get label for dev %s%c (%d)!?!?\n",
2759 dv->dv_xname, 'a' + RAW_PART, error);
2760 }
2761
2762 /* don't need this any more. We'll allocate it again
2763 a little later if we really do... */
2764 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2765 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2766 vput(vp);
2767
2768 for (i=0; i < label.d_npartitions; i++) {
2769 /* We only support partitions marked as RAID */
2770 if (label.d_partitions[i].p_fstype != FS_RAID)
2771 continue;
2772
2773 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2774 if (bdevvp(dev, &vp))
2775 panic("RAID can't alloc vnode");
2776
2777 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2778 if (error) {
2779 /* Whatever... */
2780 vput(vp);
2781 continue;
2782 }
2783
2784 good_one = 0;
2785
2786 clabel = (RF_ComponentLabel_t *)
2787 malloc(sizeof(RF_ComponentLabel_t),
2788 M_RAIDFRAME, M_NOWAIT);
2789 if (clabel == NULL) {
2790 /* XXX CLEANUP HERE */
2791 printf("RAID auto config: out of memory!\n");
2792 return(NULL); /* XXX probably should panic? */
2793 }
2794
2795 if (!raidread_component_label(dev, vp, clabel)) {
2796 /* Got the label. Does it look reasonable? */
2797 if (rf_reasonable_label(clabel) &&
2798 (clabel->partitionSize <=
2799 label.d_partitions[i].p_size)) {
2800 #if DEBUG
2801 printf("Component on: %s%c: %d\n",
2802 dv->dv_xname, 'a'+i,
2803 label.d_partitions[i].p_size);
2804 rf_print_component_label(clabel);
2805 #endif
2806 /* if it's reasonable, add it,
2807 else ignore it. */
2808 ac = (RF_AutoConfig_t *)
2809 malloc(sizeof(RF_AutoConfig_t),
2810 M_RAIDFRAME,
2811 M_NOWAIT);
2812 if (ac == NULL) {
2813 /* XXX should panic?? */
2814 return(NULL);
2815 }
2816
2817 sprintf(ac->devname, "%s%c",
2818 dv->dv_xname, 'a'+i);
2819 ac->dev = dev;
2820 ac->vp = vp;
2821 ac->clabel = clabel;
2822 ac->next = ac_list;
2823 ac_list = ac;
2824 good_one = 1;
2825 }
2826 }
2827 if (!good_one) {
2828 /* cleanup */
2829 free(clabel, M_RAIDFRAME);
2830 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2831 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2832 vput(vp);
2833 }
2834 }
2835 }
2836 return(ac_list);
2837 }
2838
2839 static int
2840 rf_reasonable_label(clabel)
2841 RF_ComponentLabel_t *clabel;
2842 {
2843
2844 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2845 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2846 ((clabel->clean == RF_RAID_CLEAN) ||
2847 (clabel->clean == RF_RAID_DIRTY)) &&
2848 clabel->row >=0 &&
2849 clabel->column >= 0 &&
2850 clabel->num_rows > 0 &&
2851 clabel->num_columns > 0 &&
2852 clabel->row < clabel->num_rows &&
2853 clabel->column < clabel->num_columns &&
2854 clabel->blockSize > 0 &&
2855 clabel->numBlocks > 0) {
2856 /* label looks reasonable enough... */
2857 return(1);
2858 }
2859 return(0);
2860 }
2861
2862
2863 #if DEBUG
2864 void
2865 rf_print_component_label(clabel)
2866 RF_ComponentLabel_t *clabel;
2867 {
2868 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2869 clabel->row, clabel->column,
2870 clabel->num_rows, clabel->num_columns);
2871 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2872 clabel->version, clabel->serial_number,
2873 clabel->mod_counter);
2874 printf(" Clean: %s Status: %d\n",
2875 clabel->clean ? "Yes" : "No", clabel->status );
2876 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2877 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2878 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2879 (char) clabel->parityConfig, clabel->blockSize,
2880 clabel->numBlocks);
2881 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2882 printf(" Contains root partition: %s\n",
2883 clabel->root_partition ? "Yes" : "No" );
2884 printf(" Last configured as: raid%d\n", clabel->last_unit );
2885 #if 0
2886 printf(" Config order: %d\n", clabel->config_order);
2887 #endif
2888
2889 }
2890 #endif
2891
2892 RF_ConfigSet_t *
2893 rf_create_auto_sets(ac_list)
2894 RF_AutoConfig_t *ac_list;
2895 {
2896 RF_AutoConfig_t *ac;
2897 RF_ConfigSet_t *config_sets;
2898 RF_ConfigSet_t *cset;
2899 RF_AutoConfig_t *ac_next;
2900
2901
2902 config_sets = NULL;
2903
2904 /* Go through the AutoConfig list, and figure out which components
2905 belong to what sets. */
2906 ac = ac_list;
2907 while(ac!=NULL) {
2908 /* we're going to putz with ac->next, so save it here
2909 for use at the end of the loop */
2910 ac_next = ac->next;
2911
2912 if (config_sets == NULL) {
2913 /* will need at least this one... */
2914 config_sets = (RF_ConfigSet_t *)
2915 malloc(sizeof(RF_ConfigSet_t),
2916 M_RAIDFRAME, M_NOWAIT);
2917 if (config_sets == NULL) {
2918 panic("rf_create_auto_sets: No memory!");
2919 }
2920 /* this one is easy :) */
2921 config_sets->ac = ac;
2922 config_sets->next = NULL;
2923 config_sets->rootable = 0;
2924 ac->next = NULL;
2925 } else {
2926 /* which set does this component fit into? */
2927 cset = config_sets;
2928 while(cset!=NULL) {
2929 if (rf_does_it_fit(cset, ac)) {
2930 /* looks like it matches... */
2931 ac->next = cset->ac;
2932 cset->ac = ac;
2933 break;
2934 }
2935 cset = cset->next;
2936 }
2937 if (cset==NULL) {
2938 /* didn't find a match above... new set..*/
2939 cset = (RF_ConfigSet_t *)
2940 malloc(sizeof(RF_ConfigSet_t),
2941 M_RAIDFRAME, M_NOWAIT);
2942 if (cset == NULL) {
2943 panic("rf_create_auto_sets: No memory!");
2944 }
2945 cset->ac = ac;
2946 ac->next = NULL;
2947 cset->next = config_sets;
2948 cset->rootable = 0;
2949 config_sets = cset;
2950 }
2951 }
2952 ac = ac_next;
2953 }
2954
2955
2956 return(config_sets);
2957 }
2958
2959 static int
2960 rf_does_it_fit(cset, ac)
2961 RF_ConfigSet_t *cset;
2962 RF_AutoConfig_t *ac;
2963 {
2964 RF_ComponentLabel_t *clabel1, *clabel2;
2965
2966 /* If this one matches the *first* one in the set, that's good
2967 enough, since the other members of the set would have been
2968 through here too... */
2969 /* note that we are not checking partitionSize here..
2970
2971 Note that we are also not checking the mod_counters here.
2972 If everything else matches execpt the mod_counter, that's
2973 good enough for this test. We will deal with the mod_counters
2974 a little later in the autoconfiguration process.
2975
2976 (clabel1->mod_counter == clabel2->mod_counter) &&
2977
2978 The reason we don't check for this is that failed disks
2979 will have lower modification counts. If those disks are
2980 not added to the set they used to belong to, then they will
2981 form their own set, which may result in 2 different sets,
2982 for example, competing to be configured at raid0, and
2983 perhaps competing to be the root filesystem set. If the
2984 wrong ones get configured, or both attempt to become /,
2985 weird behaviour and or serious lossage will occur. Thus we
2986 need to bring them into the fold here, and kick them out at
2987 a later point.
2988
2989 */
2990
2991 clabel1 = cset->ac->clabel;
2992 clabel2 = ac->clabel;
2993 if ((clabel1->version == clabel2->version) &&
2994 (clabel1->serial_number == clabel2->serial_number) &&
2995 (clabel1->num_rows == clabel2->num_rows) &&
2996 (clabel1->num_columns == clabel2->num_columns) &&
2997 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2998 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2999 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3000 (clabel1->parityConfig == clabel2->parityConfig) &&
3001 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3002 (clabel1->blockSize == clabel2->blockSize) &&
3003 (clabel1->numBlocks == clabel2->numBlocks) &&
3004 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3005 (clabel1->root_partition == clabel2->root_partition) &&
3006 (clabel1->last_unit == clabel2->last_unit) &&
3007 (clabel1->config_order == clabel2->config_order)) {
3008 /* if it get's here, it almost *has* to be a match */
3009 } else {
3010 /* it's not consistent with somebody in the set..
3011 punt */
3012 return(0);
3013 }
3014 /* all was fine.. it must fit... */
3015 return(1);
3016 }
3017
3018 int
3019 rf_have_enough_components(cset)
3020 RF_ConfigSet_t *cset;
3021 {
3022 RF_AutoConfig_t *ac;
3023 RF_AutoConfig_t *auto_config;
3024 RF_ComponentLabel_t *clabel;
3025 int r,c;
3026 int num_rows;
3027 int num_cols;
3028 int num_missing;
3029 int mod_counter;
3030 int mod_counter_found;
3031 int even_pair_failed;
3032 char parity_type;
3033
3034
3035 /* check to see that we have enough 'live' components
3036 of this set. If so, we can configure it if necessary */
3037
3038 num_rows = cset->ac->clabel->num_rows;
3039 num_cols = cset->ac->clabel->num_columns;
3040 parity_type = cset->ac->clabel->parityConfig;
3041
3042 /* XXX Check for duplicate components!?!?!? */
3043
3044 /* Determine what the mod_counter is supposed to be for this set. */
3045
3046 mod_counter_found = 0;
3047 mod_counter = 0;
3048 ac = cset->ac;
3049 while(ac!=NULL) {
3050 if (mod_counter_found==0) {
3051 mod_counter = ac->clabel->mod_counter;
3052 mod_counter_found = 1;
3053 } else {
3054 if (ac->clabel->mod_counter > mod_counter) {
3055 mod_counter = ac->clabel->mod_counter;
3056 }
3057 }
3058 ac = ac->next;
3059 }
3060
3061 num_missing = 0;
3062 auto_config = cset->ac;
3063
3064 for(r=0; r<num_rows; r++) {
3065 even_pair_failed = 0;
3066 for(c=0; c<num_cols; c++) {
3067 ac = auto_config;
3068 while(ac!=NULL) {
3069 if ((ac->clabel->row == r) &&
3070 (ac->clabel->column == c) &&
3071 (ac->clabel->mod_counter == mod_counter)) {
3072 /* it's this one... */
3073 #if DEBUG
3074 printf("Found: %s at %d,%d\n",
3075 ac->devname,r,c);
3076 #endif
3077 break;
3078 }
3079 ac=ac->next;
3080 }
3081 if (ac==NULL) {
3082 /* Didn't find one here! */
3083 /* special case for RAID 1, especially
3084 where there are more than 2
3085 components (where RAIDframe treats
3086 things a little differently :( ) */
3087 if (parity_type == '1') {
3088 if (c%2 == 0) { /* even component */
3089 even_pair_failed = 1;
3090 } else { /* odd component. If
3091 we're failed, and
3092 so is the even
3093 component, it's
3094 "Good Night, Charlie" */
3095 if (even_pair_failed == 1) {
3096 return(0);
3097 }
3098 }
3099 } else {
3100 /* normal accounting */
3101 num_missing++;
3102 }
3103 }
3104 if ((parity_type == '1') && (c%2 == 1)) {
3105 /* Just did an even component, and we didn't
3106 bail.. reset the even_pair_failed flag,
3107 and go on to the next component.... */
3108 even_pair_failed = 0;
3109 }
3110 }
3111 }
3112
3113 clabel = cset->ac->clabel;
3114
3115 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3116 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3117 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3118 /* XXX this needs to be made *much* more general */
3119 /* Too many failures */
3120 return(0);
3121 }
3122 /* otherwise, all is well, and we've got enough to take a kick
3123 at autoconfiguring this set */
3124 return(1);
3125 }
3126
3127 void
3128 rf_create_configuration(ac,config,raidPtr)
3129 RF_AutoConfig_t *ac;
3130 RF_Config_t *config;
3131 RF_Raid_t *raidPtr;
3132 {
3133 RF_ComponentLabel_t *clabel;
3134 int i;
3135
3136 clabel = ac->clabel;
3137
3138 /* 1. Fill in the common stuff */
3139 config->numRow = clabel->num_rows;
3140 config->numCol = clabel->num_columns;
3141 config->numSpare = 0; /* XXX should this be set here? */
3142 config->sectPerSU = clabel->sectPerSU;
3143 config->SUsPerPU = clabel->SUsPerPU;
3144 config->SUsPerRU = clabel->SUsPerRU;
3145 config->parityConfig = clabel->parityConfig;
3146 /* XXX... */
3147 strcpy(config->diskQueueType,"fifo");
3148 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3149 config->layoutSpecificSize = 0; /* XXX ?? */
3150
3151 while(ac!=NULL) {
3152 /* row/col values will be in range due to the checks
3153 in reasonable_label() */
3154 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3155 ac->devname);
3156 ac = ac->next;
3157 }
3158
3159 for(i=0;i<RF_MAXDBGV;i++) {
3160 config->debugVars[i][0] = 0;
3161 }
3162 }
3163
3164 int
3165 rf_set_autoconfig(raidPtr, new_value)
3166 RF_Raid_t *raidPtr;
3167 int new_value;
3168 {
3169 RF_ComponentLabel_t clabel;
3170 struct vnode *vp;
3171 dev_t dev;
3172 int row, column;
3173 int sparecol;
3174
3175 raidPtr->autoconfigure = new_value;
3176 for(row=0; row<raidPtr->numRow; row++) {
3177 for(column=0; column<raidPtr->numCol; column++) {
3178 if (raidPtr->Disks[row][column].status ==
3179 rf_ds_optimal) {
3180 dev = raidPtr->Disks[row][column].dev;
3181 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3182 raidread_component_label(dev, vp, &clabel);
3183 clabel.autoconfigure = new_value;
3184 raidwrite_component_label(dev, vp, &clabel);
3185 }
3186 }
3187 }
3188 for(column = 0; column < raidPtr->numSpare ; column++) {
3189 sparecol = raidPtr->numCol + column;
3190 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3191 dev = raidPtr->Disks[0][sparecol].dev;
3192 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3193 raidread_component_label(dev, vp, &clabel);
3194 clabel.autoconfigure = new_value;
3195 raidwrite_component_label(dev, vp, &clabel);
3196 }
3197 }
3198 return(new_value);
3199 }
3200
3201 int
3202 rf_set_rootpartition(raidPtr, new_value)
3203 RF_Raid_t *raidPtr;
3204 int new_value;
3205 {
3206 RF_ComponentLabel_t clabel;
3207 struct vnode *vp;
3208 dev_t dev;
3209 int row, column;
3210 int sparecol;
3211
3212 raidPtr->root_partition = new_value;
3213 for(row=0; row<raidPtr->numRow; row++) {
3214 for(column=0; column<raidPtr->numCol; column++) {
3215 if (raidPtr->Disks[row][column].status ==
3216 rf_ds_optimal) {
3217 dev = raidPtr->Disks[row][column].dev;
3218 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3219 raidread_component_label(dev, vp, &clabel);
3220 clabel.root_partition = new_value;
3221 raidwrite_component_label(dev, vp, &clabel);
3222 }
3223 }
3224 }
3225 for(column = 0; column < raidPtr->numSpare ; column++) {
3226 sparecol = raidPtr->numCol + column;
3227 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3228 dev = raidPtr->Disks[0][sparecol].dev;
3229 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3230 raidread_component_label(dev, vp, &clabel);
3231 clabel.root_partition = new_value;
3232 raidwrite_component_label(dev, vp, &clabel);
3233 }
3234 }
3235 return(new_value);
3236 }
3237
3238 void
3239 rf_release_all_vps(cset)
3240 RF_ConfigSet_t *cset;
3241 {
3242 RF_AutoConfig_t *ac;
3243
3244 ac = cset->ac;
3245 while(ac!=NULL) {
3246 /* Close the vp, and give it back */
3247 if (ac->vp) {
3248 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3249 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3250 vput(ac->vp);
3251 ac->vp = NULL;
3252 }
3253 ac = ac->next;
3254 }
3255 }
3256
3257
3258 void
3259 rf_cleanup_config_set(cset)
3260 RF_ConfigSet_t *cset;
3261 {
3262 RF_AutoConfig_t *ac;
3263 RF_AutoConfig_t *next_ac;
3264
3265 ac = cset->ac;
3266 while(ac!=NULL) {
3267 next_ac = ac->next;
3268 /* nuke the label */
3269 free(ac->clabel, M_RAIDFRAME);
3270 /* cleanup the config structure */
3271 free(ac, M_RAIDFRAME);
3272 /* "next.." */
3273 ac = next_ac;
3274 }
3275 /* and, finally, nuke the config set */
3276 free(cset, M_RAIDFRAME);
3277 }
3278
3279
3280 void
3281 raid_init_component_label(raidPtr, clabel)
3282 RF_Raid_t *raidPtr;
3283 RF_ComponentLabel_t *clabel;
3284 {
3285 /* current version number */
3286 clabel->version = RF_COMPONENT_LABEL_VERSION;
3287 clabel->serial_number = raidPtr->serial_number;
3288 clabel->mod_counter = raidPtr->mod_counter;
3289 clabel->num_rows = raidPtr->numRow;
3290 clabel->num_columns = raidPtr->numCol;
3291 clabel->clean = RF_RAID_DIRTY; /* not clean */
3292 clabel->status = rf_ds_optimal; /* "It's good!" */
3293
3294 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3295 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3296 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3297
3298 clabel->blockSize = raidPtr->bytesPerSector;
3299 clabel->numBlocks = raidPtr->sectorsPerDisk;
3300
3301 /* XXX not portable */
3302 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3303 clabel->maxOutstanding = raidPtr->maxOutstanding;
3304 clabel->autoconfigure = raidPtr->autoconfigure;
3305 clabel->root_partition = raidPtr->root_partition;
3306 clabel->last_unit = raidPtr->raidid;
3307 clabel->config_order = raidPtr->config_order;
3308 }
3309
3310 int
3311 rf_auto_config_set(cset,unit)
3312 RF_ConfigSet_t *cset;
3313 int *unit;
3314 {
3315 RF_Raid_t *raidPtr;
3316 RF_Config_t *config;
3317 int raidID;
3318 int retcode;
3319
3320 #if DEBUG
3321 printf("RAID autoconfigure\n");
3322 #endif
3323
3324 retcode = 0;
3325 *unit = -1;
3326
3327 /* 1. Create a config structure */
3328
3329 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3330 M_RAIDFRAME,
3331 M_NOWAIT);
3332 if (config==NULL) {
3333 printf("Out of mem!?!?\n");
3334 /* XXX do something more intelligent here. */
3335 return(1);
3336 }
3337
3338 memset(config, 0, sizeof(RF_Config_t));
3339
3340 /*
3341 2. Figure out what RAID ID this one is supposed to live at
3342 See if we can get the same RAID dev that it was configured
3343 on last time..
3344 */
3345
3346 raidID = cset->ac->clabel->last_unit;
3347 if ((raidID < 0) || (raidID >= numraid)) {
3348 /* let's not wander off into lala land. */
3349 raidID = numraid - 1;
3350 }
3351 if (raidPtrs[raidID]->valid != 0) {
3352
3353 /*
3354 Nope... Go looking for an alternative...
3355 Start high so we don't immediately use raid0 if that's
3356 not taken.
3357 */
3358
3359 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3360 if (raidPtrs[raidID]->valid == 0) {
3361 /* can use this one! */
3362 break;
3363 }
3364 }
3365 }
3366
3367 if (raidID < 0) {
3368 /* punt... */
3369 printf("Unable to auto configure this set!\n");
3370 printf("(Out of RAID devs!)\n");
3371 return(1);
3372 }
3373
3374 #if DEBUG
3375 printf("Configuring raid%d:\n",raidID);
3376 #endif
3377
3378 raidPtr = raidPtrs[raidID];
3379
3380 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3381 raidPtr->raidid = raidID;
3382 raidPtr->openings = RAIDOUTSTANDING;
3383
3384 /* 3. Build the configuration structure */
3385 rf_create_configuration(cset->ac, config, raidPtr);
3386
3387 /* 4. Do the configuration */
3388 retcode = rf_Configure(raidPtr, config, cset->ac);
3389
3390 if (retcode == 0) {
3391
3392 raidinit(raidPtrs[raidID]);
3393
3394 rf_markalldirty(raidPtrs[raidID]);
3395 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3396 if (cset->ac->clabel->root_partition==1) {
3397 /* everything configured just fine. Make a note
3398 that this set is eligible to be root. */
3399 cset->rootable = 1;
3400 /* XXX do this here? */
3401 raidPtrs[raidID]->root_partition = 1;
3402 }
3403 }
3404
3405 /* 5. Cleanup */
3406 free(config, M_RAIDFRAME);
3407
3408 *unit = raidID;
3409 return(retcode);
3410 }
3411
3412 void
3413 rf_disk_unbusy(desc)
3414 RF_RaidAccessDesc_t *desc;
3415 {
3416 struct buf *bp;
3417
3418 bp = (struct buf *)desc->bp;
3419 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3420 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3421 }
3422