rf_netbsdkintf.c revision 1.67 1 /* $NetBSD: rf_netbsdkintf.c,v 1.67 2000/03/07 02:12:13 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 register struct buf *bp;
648 {
649 register int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 if ((rs->sc_flags & RAIDF_INITED) == 0)
832 return (ENXIO);
833 }
834
835 switch (cmd) {
836
837 /* configure the system */
838 case RAIDFRAME_CONFIGURE:
839
840 if (raidPtr->valid) {
841 /* There is a valid RAID set running on this unit! */
842 printf("raid%d: Device already configured!\n",unit);
843 return(EINVAL);
844 }
845
846 /* copy-in the configuration information */
847 /* data points to a pointer to the configuration structure */
848
849 u_cfg = *((RF_Config_t **) data);
850 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
851 if (k_cfg == NULL) {
852 return (ENOMEM);
853 }
854 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
855 sizeof(RF_Config_t));
856 if (retcode) {
857 RF_Free(k_cfg, sizeof(RF_Config_t));
858 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
859 retcode));
860 return (retcode);
861 }
862 /* allocate a buffer for the layout-specific data, and copy it
863 * in */
864 if (k_cfg->layoutSpecificSize) {
865 if (k_cfg->layoutSpecificSize > 10000) {
866 /* sanity check */
867 RF_Free(k_cfg, sizeof(RF_Config_t));
868 return (EINVAL);
869 }
870 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
871 (u_char *));
872 if (specific_buf == NULL) {
873 RF_Free(k_cfg, sizeof(RF_Config_t));
874 return (ENOMEM);
875 }
876 retcode = copyin(k_cfg->layoutSpecific,
877 (caddr_t) specific_buf,
878 k_cfg->layoutSpecificSize);
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 RF_Free(specific_buf,
882 k_cfg->layoutSpecificSize);
883 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
884 retcode));
885 return (retcode);
886 }
887 } else
888 specific_buf = NULL;
889 k_cfg->layoutSpecific = specific_buf;
890
891 /* should do some kind of sanity check on the configuration.
892 * Store the sum of all the bytes in the last byte? */
893
894 /* configure the system */
895
896 /*
897 * Clear the entire RAID descriptor, just to make sure
898 * there is no stale data left in the case of a
899 * reconfiguration
900 */
901 bzero((char *) raidPtr, sizeof(RF_Raid_t));
902 raidPtr->raidid = unit;
903
904 retcode = rf_Configure(raidPtr, k_cfg, NULL);
905
906 if (retcode == 0) {
907
908 /* allow this many simultaneous IO's to
909 this RAID device */
910 raidPtr->openings = RAIDOUTSTANDING;
911
912 raidinit(raidPtr);
913 rf_markalldirty(raidPtr);
914 }
915 /* free the buffers. No return code here. */
916 if (k_cfg->layoutSpecificSize) {
917 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
918 }
919 RF_Free(k_cfg, sizeof(RF_Config_t));
920
921 return (retcode);
922
923 /* shutdown the system */
924 case RAIDFRAME_SHUTDOWN:
925
926 if ((error = raidlock(rs)) != 0)
927 return (error);
928
929 /*
930 * If somebody has a partition mounted, we shouldn't
931 * shutdown.
932 */
933
934 part = DISKPART(dev);
935 pmask = (1 << part);
936 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
937 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
938 (rs->sc_dkdev.dk_copenmask & pmask))) {
939 raidunlock(rs);
940 return (EBUSY);
941 }
942
943 retcode = rf_Shutdown(raidPtr);
944
945 pool_destroy(&rs->sc_cbufpool);
946
947 /* It's no longer initialized... */
948 rs->sc_flags &= ~RAIDF_INITED;
949
950 /* Detach the disk. */
951 disk_detach(&rs->sc_dkdev);
952
953 raidunlock(rs);
954
955 return (retcode);
956 case RAIDFRAME_GET_COMPONENT_LABEL:
957 clabel_ptr = (RF_ComponentLabel_t **) data;
958 /* need to read the component label for the disk indicated
959 by row,column in clabel */
960
961 /* For practice, let's get it directly fromdisk, rather
962 than from the in-core copy */
963 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
964 (RF_ComponentLabel_t *));
965 if (clabel == NULL)
966 return (ENOMEM);
967
968 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
969
970 retcode = copyin( *clabel_ptr, clabel,
971 sizeof(RF_ComponentLabel_t));
972
973 if (retcode) {
974 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
975 return(retcode);
976 }
977
978 row = clabel->row;
979 column = clabel->column;
980
981 if ((row < 0) || (row >= raidPtr->numRow) ||
982 (column < 0) || (column >= raidPtr->numCol)) {
983 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
984 return(EINVAL);
985 }
986
987 raidread_component_label(raidPtr->Disks[row][column].dev,
988 raidPtr->raid_cinfo[row][column].ci_vp,
989 clabel );
990
991 retcode = copyout((caddr_t) clabel,
992 (caddr_t) *clabel_ptr,
993 sizeof(RF_ComponentLabel_t));
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return (retcode);
996
997 case RAIDFRAME_SET_COMPONENT_LABEL:
998 clabel = (RF_ComponentLabel_t *) data;
999
1000 /* XXX check the label for valid stuff... */
1001 /* Note that some things *should not* get modified --
1002 the user should be re-initing the labels instead of
1003 trying to patch things.
1004 */
1005
1006 printf("Got component label:\n");
1007 printf("Version: %d\n",clabel->version);
1008 printf("Serial Number: %d\n",clabel->serial_number);
1009 printf("Mod counter: %d\n",clabel->mod_counter);
1010 printf("Row: %d\n", clabel->row);
1011 printf("Column: %d\n", clabel->column);
1012 printf("Num Rows: %d\n", clabel->num_rows);
1013 printf("Num Columns: %d\n", clabel->num_columns);
1014 printf("Clean: %d\n", clabel->clean);
1015 printf("Status: %d\n", clabel->status);
1016
1017 row = clabel->row;
1018 column = clabel->column;
1019
1020 if ((row < 0) || (row >= raidPtr->numRow) ||
1021 (column < 0) || (column >= raidPtr->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 /* XXX this isn't allowed to do anything for now :-) */
1026
1027 /* XXX and before it is, we need to fill in the rest
1028 of the fields!?!?!?! */
1029 #if 0
1030 raidwrite_component_label(
1031 raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034 #endif
1035 return (0);
1036
1037 case RAIDFRAME_INIT_LABELS:
1038 clabel = (RF_ComponentLabel_t *) data;
1039 /*
1040 we only want the serial number from
1041 the above. We get all the rest of the information
1042 from the config that was used to create this RAID
1043 set.
1044 */
1045
1046 raidPtr->serial_number = clabel->serial_number;
1047
1048 raid_init_component_label(raidPtr, &ci_label);
1049 ci_label.serial_number = clabel->serial_number;
1050
1051 for(row=0;row<raidPtr->numRow;row++) {
1052 ci_label.row = row;
1053 for(column=0;column<raidPtr->numCol;column++) {
1054 diskPtr = &raidPtr->Disks[row][column];
1055 ci_label.partitionSize = diskPtr->partitionSize;
1056 ci_label.column = column;
1057 raidwrite_component_label(
1058 raidPtr->Disks[row][column].dev,
1059 raidPtr->raid_cinfo[row][column].ci_vp,
1060 &ci_label );
1061 }
1062 }
1063
1064 return (retcode);
1065 case RAIDFRAME_SET_AUTOCONFIG:
1066 d = rf_set_autoconfig(raidPtr, *data);
1067 printf("New autoconfig value is: %d\n", d);
1068 *data = d;
1069 return (retcode);
1070
1071 case RAIDFRAME_SET_ROOT:
1072 d = rf_set_rootpartition(raidPtr, *data);
1073 printf("New rootpartition value is: %d\n", d);
1074 *data = d;
1075 return (retcode);
1076
1077 /* initialize all parity */
1078 case RAIDFRAME_REWRITEPARITY:
1079
1080 if (raidPtr->Layout.map->faultsTolerated == 0) {
1081 /* Parity for RAID 0 is trivially correct */
1082 raidPtr->parity_good = RF_RAID_CLEAN;
1083 return(0);
1084 }
1085
1086 if (raidPtr->parity_rewrite_in_progress == 1) {
1087 /* Re-write is already in progress! */
1088 return(EINVAL);
1089 }
1090
1091 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1092 rf_RewriteParityThread,
1093 raidPtr,"raid_parity");
1094 return (retcode);
1095
1096
1097 case RAIDFRAME_ADD_HOT_SPARE:
1098 sparePtr = (RF_SingleComponent_t *) data;
1099 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1100 printf("Adding spare\n");
1101 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1102 return(retcode);
1103
1104 case RAIDFRAME_REMOVE_HOT_SPARE:
1105 return(retcode);
1106
1107 case RAIDFRAME_REBUILD_IN_PLACE:
1108
1109 if (raidPtr->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 if (raidPtr->recon_in_progress == 1) {
1115 /* a reconstruct is already in progress! */
1116 return(EINVAL);
1117 }
1118
1119 componentPtr = (RF_SingleComponent_t *) data;
1120 memcpy( &component, componentPtr,
1121 sizeof(RF_SingleComponent_t));
1122 row = component.row;
1123 column = component.column;
1124 printf("Rebuild: %d %d\n",row, column);
1125 if ((row < 0) || (row >= raidPtr->numRow) ||
1126 (column < 0) || (column >= raidPtr->numCol)) {
1127 return(EINVAL);
1128 }
1129
1130 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1131 if (rrcopy == NULL)
1132 return(ENOMEM);
1133
1134 rrcopy->raidPtr = (void *) raidPtr;
1135 rrcopy->row = row;
1136 rrcopy->col = column;
1137
1138 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1139 rf_ReconstructInPlaceThread,
1140 rrcopy,"raid_reconip");
1141 return(retcode);
1142
1143 case RAIDFRAME_GET_INFO:
1144 if (!raidPtr->valid)
1145 return (ENODEV);
1146 ucfgp = (RF_DeviceConfig_t **) data;
1147 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1148 (RF_DeviceConfig_t *));
1149 if (d_cfg == NULL)
1150 return (ENOMEM);
1151 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1152 d_cfg->rows = raidPtr->numRow;
1153 d_cfg->cols = raidPtr->numCol;
1154 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1155 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1156 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1157 return (ENOMEM);
1158 }
1159 d_cfg->nspares = raidPtr->numSpare;
1160 if (d_cfg->nspares >= RF_MAX_DISKS) {
1161 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1162 return (ENOMEM);
1163 }
1164 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1165 d = 0;
1166 for (i = 0; i < d_cfg->rows; i++) {
1167 for (j = 0; j < d_cfg->cols; j++) {
1168 d_cfg->devs[d] = raidPtr->Disks[i][j];
1169 d++;
1170 }
1171 }
1172 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1173 d_cfg->spares[i] = raidPtr->Disks[0][j];
1174 }
1175 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1176 sizeof(RF_DeviceConfig_t));
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178
1179 return (retcode);
1180
1181 case RAIDFRAME_CHECK_PARITY:
1182 *(int *) data = raidPtr->parity_good;
1183 return (0);
1184
1185 case RAIDFRAME_RESET_ACCTOTALS:
1186 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1187 return (0);
1188
1189 case RAIDFRAME_GET_ACCTOTALS:
1190 totals = (RF_AccTotals_t *) data;
1191 *totals = raidPtr->acc_totals;
1192 return (0);
1193
1194 case RAIDFRAME_KEEP_ACCTOTALS:
1195 raidPtr->keep_acc_totals = *(int *)data;
1196 return (0);
1197
1198 case RAIDFRAME_GET_SIZE:
1199 *(int *) data = raidPtr->totalSectors;
1200 return (0);
1201
1202 /* fail a disk & optionally start reconstruction */
1203 case RAIDFRAME_FAIL_DISK:
1204
1205 if (raidPtr->Layout.map->faultsTolerated == 0) {
1206 /* Can't do this on a RAID 0!! */
1207 return(EINVAL);
1208 }
1209
1210 rr = (struct rf_recon_req *) data;
1211
1212 if (rr->row < 0 || rr->row >= raidPtr->numRow
1213 || rr->col < 0 || rr->col >= raidPtr->numCol)
1214 return (EINVAL);
1215
1216 printf("raid%d: Failing the disk: row: %d col: %d\n",
1217 unit, rr->row, rr->col);
1218
1219 /* make a copy of the recon request so that we don't rely on
1220 * the user's buffer */
1221 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1222 if (rrcopy == NULL)
1223 return(ENOMEM);
1224 bcopy(rr, rrcopy, sizeof(*rr));
1225 rrcopy->raidPtr = (void *) raidPtr;
1226
1227 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1228 rf_ReconThread,
1229 rrcopy,"raid_recon");
1230 return (0);
1231
1232 /* invoke a copyback operation after recon on whatever disk
1233 * needs it, if any */
1234 case RAIDFRAME_COPYBACK:
1235
1236 if (raidPtr->Layout.map->faultsTolerated == 0) {
1237 /* This makes no sense on a RAID 0!! */
1238 return(EINVAL);
1239 }
1240
1241 if (raidPtr->copyback_in_progress == 1) {
1242 /* Copyback is already in progress! */
1243 return(EINVAL);
1244 }
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1247 rf_CopybackThread,
1248 raidPtr,"raid_copyback");
1249 return (retcode);
1250
1251 /* return the percentage completion of reconstruction */
1252 case RAIDFRAME_CHECK_RECON_STATUS:
1253 if (raidPtr->Layout.map->faultsTolerated == 0) {
1254 /* This makes no sense on a RAID 0 */
1255 return(EINVAL);
1256 }
1257 row = 0; /* XXX we only consider a single row... */
1258 if (raidPtr->status[row] != rf_rs_reconstructing)
1259 *(int *) data = 100;
1260 else
1261 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1262 return (0);
1263
1264 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1265 if (raidPtr->Layout.map->faultsTolerated == 0) {
1266 /* This makes no sense on a RAID 0 */
1267 return(EINVAL);
1268 }
1269 if (raidPtr->parity_rewrite_in_progress == 1) {
1270 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1271 } else {
1272 *(int *) data = 100;
1273 }
1274 return (0);
1275
1276 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* This makes no sense on a RAID 0 */
1279 return(EINVAL);
1280 }
1281 if (raidPtr->copyback_in_progress == 1) {
1282 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1283 raidPtr->Layout.numStripe;
1284 } else {
1285 *(int *) data = 100;
1286 }
1287 return (0);
1288
1289
1290 /* the sparetable daemon calls this to wait for the kernel to
1291 * need a spare table. this ioctl does not return until a
1292 * spare table is needed. XXX -- calling mpsleep here in the
1293 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1294 * -- I should either compute the spare table in the kernel,
1295 * or have a different -- XXX XXX -- interface (a different
1296 * character device) for delivering the table -- XXX */
1297 #if 0
1298 case RAIDFRAME_SPARET_WAIT:
1299 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1300 while (!rf_sparet_wait_queue)
1301 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1302 waitreq = rf_sparet_wait_queue;
1303 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1304 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1305
1306 /* structure assignment */
1307 *((RF_SparetWait_t *) data) = *waitreq;
1308
1309 RF_Free(waitreq, sizeof(*waitreq));
1310 return (0);
1311
1312 /* wakes up a process waiting on SPARET_WAIT and puts an error
1313 * code in it that will cause the dameon to exit */
1314 case RAIDFRAME_ABORT_SPARET_WAIT:
1315 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1316 waitreq->fcol = -1;
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 waitreq->next = rf_sparet_wait_queue;
1319 rf_sparet_wait_queue = waitreq;
1320 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1321 wakeup(&rf_sparet_wait_queue);
1322 return (0);
1323
1324 /* used by the spare table daemon to deliver a spare table
1325 * into the kernel */
1326 case RAIDFRAME_SEND_SPARET:
1327
1328 /* install the spare table */
1329 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1330
1331 /* respond to the requestor. the return status of the spare
1332 * table installation is passed in the "fcol" field */
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = retcode;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_resp_queue;
1337 rf_sparet_resp_queue = waitreq;
1338 wakeup(&rf_sparet_resp_queue);
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340
1341 return (retcode);
1342 #endif
1343
1344 default:
1345 break; /* fall through to the os-specific code below */
1346
1347 }
1348
1349 if (!raidPtr->valid)
1350 return (EINVAL);
1351
1352 /*
1353 * Add support for "regular" device ioctls here.
1354 */
1355
1356 switch (cmd) {
1357 case DIOCGDINFO:
1358 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1359 break;
1360
1361 case DIOCGPART:
1362 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1363 ((struct partinfo *) data)->part =
1364 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1365 break;
1366
1367 case DIOCWDINFO:
1368 case DIOCSDINFO:
1369 if ((error = raidlock(rs)) != 0)
1370 return (error);
1371
1372 rs->sc_flags |= RAIDF_LABELLING;
1373
1374 error = setdisklabel(rs->sc_dkdev.dk_label,
1375 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1376 if (error == 0) {
1377 if (cmd == DIOCWDINFO)
1378 error = writedisklabel(RAIDLABELDEV(dev),
1379 raidstrategy, rs->sc_dkdev.dk_label,
1380 rs->sc_dkdev.dk_cpulabel);
1381 }
1382 rs->sc_flags &= ~RAIDF_LABELLING;
1383
1384 raidunlock(rs);
1385
1386 if (error)
1387 return (error);
1388 break;
1389
1390 case DIOCWLABEL:
1391 if (*(int *) data != 0)
1392 rs->sc_flags |= RAIDF_WLABEL;
1393 else
1394 rs->sc_flags &= ~RAIDF_WLABEL;
1395 break;
1396
1397 case DIOCGDEFLABEL:
1398 raidgetdefaultlabel(raidPtr, rs,
1399 (struct disklabel *) data);
1400 break;
1401
1402 default:
1403 retcode = ENOTTY;
1404 }
1405 return (retcode);
1406
1407 }
1408
1409
1410 /* raidinit -- complete the rest of the initialization for the
1411 RAIDframe device. */
1412
1413
1414 static void
1415 raidinit(raidPtr)
1416 RF_Raid_t *raidPtr;
1417 {
1418 struct raid_softc *rs;
1419 int unit;
1420
1421 unit = raidPtr->raidid;
1422
1423 rs = &raid_softc[unit];
1424 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1425 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1426
1427
1428 /* XXX should check return code first... */
1429 rs->sc_flags |= RAIDF_INITED;
1430
1431 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1432
1433 rs->sc_dkdev.dk_name = rs->sc_xname;
1434
1435 /* disk_attach actually creates space for the CPU disklabel, among
1436 * other things, so it's critical to call this *BEFORE* we try putzing
1437 * with disklabels. */
1438
1439 disk_attach(&rs->sc_dkdev);
1440
1441 /* XXX There may be a weird interaction here between this, and
1442 * protectedSectors, as used in RAIDframe. */
1443
1444 rs->sc_size = raidPtr->totalSectors;
1445
1446 }
1447
1448 /* wake up the daemon & tell it to get us a spare table
1449 * XXX
1450 * the entries in the queues should be tagged with the raidPtr
1451 * so that in the extremely rare case that two recons happen at once,
1452 * we know for which device were requesting a spare table
1453 * XXX
1454 *
1455 * XXX This code is not currently used. GO
1456 */
1457 int
1458 rf_GetSpareTableFromDaemon(req)
1459 RF_SparetWait_t *req;
1460 {
1461 int retcode;
1462
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 req->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = req;
1466 wakeup(&rf_sparet_wait_queue);
1467
1468 /* mpsleep unlocks the mutex */
1469 while (!rf_sparet_resp_queue) {
1470 tsleep(&rf_sparet_resp_queue, PRIBIO,
1471 "raidframe getsparetable", 0);
1472 }
1473 req = rf_sparet_resp_queue;
1474 rf_sparet_resp_queue = req->next;
1475 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1476
1477 retcode = req->fcol;
1478 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1479 * alloc'd */
1480 return (retcode);
1481 }
1482
1483 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1484 * bp & passes it down.
1485 * any calls originating in the kernel must use non-blocking I/O
1486 * do some extra sanity checking to return "appropriate" error values for
1487 * certain conditions (to make some standard utilities work)
1488 *
1489 * Formerly known as: rf_DoAccessKernel
1490 */
1491 void
1492 raidstart(raidPtr)
1493 RF_Raid_t *raidPtr;
1494 {
1495 RF_SectorCount_t num_blocks, pb, sum;
1496 RF_RaidAddr_t raid_addr;
1497 int retcode;
1498 struct partition *pp;
1499 daddr_t blocknum;
1500 int unit;
1501 struct raid_softc *rs;
1502 int do_async;
1503 struct buf *bp;
1504
1505 unit = raidPtr->raidid;
1506 rs = &raid_softc[unit];
1507
1508 /* quick check to see if anything has died recently */
1509 RF_LOCK_MUTEX(raidPtr->mutex);
1510 if (raidPtr->numNewFailures > 0) {
1511 rf_update_component_labels(raidPtr);
1512 raidPtr->numNewFailures--;
1513 }
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /* Check to see if we're at the limit... */
1517 RF_LOCK_MUTEX(raidPtr->mutex);
1518 while (raidPtr->openings > 0) {
1519 RF_UNLOCK_MUTEX(raidPtr->mutex);
1520
1521 /* get the next item, if any, from the queue */
1522 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1523 /* nothing more to do */
1524 return;
1525 }
1526 BUFQ_REMOVE(&rs->buf_queue, bp);
1527
1528 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1529 * partition.. Need to make it absolute to the underlying
1530 * device.. */
1531
1532 blocknum = bp->b_blkno;
1533 if (DISKPART(bp->b_dev) != RAW_PART) {
1534 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1535 blocknum += pp->p_offset;
1536 }
1537
1538 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1539 (int) blocknum));
1540
1541 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1542 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1543
1544 /* *THIS* is where we adjust what block we're going to...
1545 * but DO NOT TOUCH bp->b_blkno!!! */
1546 raid_addr = blocknum;
1547
1548 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1549 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1550 sum = raid_addr + num_blocks + pb;
1551 if (1 || rf_debugKernelAccess) {
1552 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1553 (int) raid_addr, (int) sum, (int) num_blocks,
1554 (int) pb, (int) bp->b_resid));
1555 }
1556 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1557 || (sum < num_blocks) || (sum < pb)) {
1558 bp->b_error = ENOSPC;
1559 bp->b_flags |= B_ERROR;
1560 bp->b_resid = bp->b_bcount;
1561 biodone(bp);
1562 RF_LOCK_MUTEX(raidPtr->mutex);
1563 continue;
1564 }
1565 /*
1566 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1567 */
1568
1569 if (bp->b_bcount & raidPtr->sectorMask) {
1570 bp->b_error = EINVAL;
1571 bp->b_flags |= B_ERROR;
1572 bp->b_resid = bp->b_bcount;
1573 biodone(bp);
1574 RF_LOCK_MUTEX(raidPtr->mutex);
1575 continue;
1576
1577 }
1578 db1_printf(("Calling DoAccess..\n"));
1579
1580
1581 RF_LOCK_MUTEX(raidPtr->mutex);
1582 raidPtr->openings--;
1583 RF_UNLOCK_MUTEX(raidPtr->mutex);
1584
1585 /*
1586 * Everything is async.
1587 */
1588 do_async = 1;
1589
1590 /* don't ever condition on bp->b_flags & B_WRITE.
1591 * always condition on B_READ instead */
1592
1593 /* XXX we're still at splbio() here... do we *really*
1594 need to be? */
1595
1596
1597 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1598 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1599 do_async, raid_addr, num_blocks,
1600 bp->b_un.b_addr, bp, NULL, NULL,
1601 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1602
1603
1604 RF_LOCK_MUTEX(raidPtr->mutex);
1605 }
1606 RF_UNLOCK_MUTEX(raidPtr->mutex);
1607 }
1608
1609
1610
1611
1612 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1613
1614 int
1615 rf_DispatchKernelIO(queue, req)
1616 RF_DiskQueue_t *queue;
1617 RF_DiskQueueData_t *req;
1618 {
1619 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1620 struct buf *bp;
1621 struct raidbuf *raidbp = NULL;
1622 struct raid_softc *rs;
1623 int unit;
1624 int s;
1625
1626 s=0;
1627 /* s = splbio();*/ /* want to test this */
1628 /* XXX along with the vnode, we also need the softc associated with
1629 * this device.. */
1630
1631 req->queue = queue;
1632
1633 unit = queue->raidPtr->raidid;
1634
1635 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1636
1637 if (unit >= numraid) {
1638 printf("Invalid unit number: %d %d\n", unit, numraid);
1639 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1640 }
1641 rs = &raid_softc[unit];
1642
1643 /* XXX is this the right place? */
1644 disk_busy(&rs->sc_dkdev);
1645
1646 bp = req->bp;
1647 #if 1
1648 /* XXX when there is a physical disk failure, someone is passing us a
1649 * buffer that contains old stuff!! Attempt to deal with this problem
1650 * without taking a performance hit... (not sure where the real bug
1651 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1652
1653 if (bp->b_flags & B_ERROR) {
1654 bp->b_flags &= ~B_ERROR;
1655 }
1656 if (bp->b_error != 0) {
1657 bp->b_error = 0;
1658 }
1659 #endif
1660 raidbp = RAIDGETBUF(rs);
1661
1662 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1663
1664 /*
1665 * context for raidiodone
1666 */
1667 raidbp->rf_obp = bp;
1668 raidbp->req = req;
1669
1670 LIST_INIT(&raidbp->rf_buf.b_dep);
1671
1672 switch (req->type) {
1673 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1674 /* XXX need to do something extra here.. */
1675 /* I'm leaving this in, as I've never actually seen it used,
1676 * and I'd like folks to report it... GO */
1677 printf(("WAKEUP CALLED\n"));
1678 queue->numOutstanding++;
1679
1680 /* XXX need to glue the original buffer into this?? */
1681
1682 KernelWakeupFunc(&raidbp->rf_buf);
1683 break;
1684
1685 case RF_IO_TYPE_READ:
1686 case RF_IO_TYPE_WRITE:
1687
1688 if (req->tracerec) {
1689 RF_ETIMER_START(req->tracerec->timer);
1690 }
1691 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1692 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1693 req->sectorOffset, req->numSector,
1694 req->buf, KernelWakeupFunc, (void *) req,
1695 queue->raidPtr->logBytesPerSector, req->b_proc);
1696
1697 if (rf_debugKernelAccess) {
1698 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1699 (long) bp->b_blkno));
1700 }
1701 queue->numOutstanding++;
1702 queue->last_deq_sector = req->sectorOffset;
1703 /* acc wouldn't have been let in if there were any pending
1704 * reqs at any other priority */
1705 queue->curPriority = req->priority;
1706
1707 db1_printf(("Going for %c to unit %d row %d col %d\n",
1708 req->type, unit, queue->row, queue->col));
1709 db1_printf(("sector %d count %d (%d bytes) %d\n",
1710 (int) req->sectorOffset, (int) req->numSector,
1711 (int) (req->numSector <<
1712 queue->raidPtr->logBytesPerSector),
1713 (int) queue->raidPtr->logBytesPerSector));
1714 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1715 raidbp->rf_buf.b_vp->v_numoutput++;
1716 }
1717 VOP_STRATEGY(&raidbp->rf_buf);
1718
1719 break;
1720
1721 default:
1722 panic("bad req->type in rf_DispatchKernelIO");
1723 }
1724 db1_printf(("Exiting from DispatchKernelIO\n"));
1725 /* splx(s); */ /* want to test this */
1726 return (0);
1727 }
1728 /* this is the callback function associated with a I/O invoked from
1729 kernel code.
1730 */
1731 static void
1732 KernelWakeupFunc(vbp)
1733 struct buf *vbp;
1734 {
1735 RF_DiskQueueData_t *req = NULL;
1736 RF_DiskQueue_t *queue;
1737 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1738 struct buf *bp;
1739 struct raid_softc *rs;
1740 int unit;
1741 register int s;
1742
1743 s = splbio();
1744 db1_printf(("recovering the request queue:\n"));
1745 req = raidbp->req;
1746
1747 bp = raidbp->rf_obp;
1748
1749 queue = (RF_DiskQueue_t *) req->queue;
1750
1751 if (raidbp->rf_buf.b_flags & B_ERROR) {
1752 bp->b_flags |= B_ERROR;
1753 bp->b_error = raidbp->rf_buf.b_error ?
1754 raidbp->rf_buf.b_error : EIO;
1755 }
1756
1757 /* XXX methinks this could be wrong... */
1758 #if 1
1759 bp->b_resid = raidbp->rf_buf.b_resid;
1760 #endif
1761
1762 if (req->tracerec) {
1763 RF_ETIMER_STOP(req->tracerec->timer);
1764 RF_ETIMER_EVAL(req->tracerec->timer);
1765 RF_LOCK_MUTEX(rf_tracing_mutex);
1766 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1767 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1768 req->tracerec->num_phys_ios++;
1769 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1770 }
1771 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1772
1773 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1774
1775
1776 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1777 * ballistic, and mark the component as hosed... */
1778
1779 if (bp->b_flags & B_ERROR) {
1780 /* Mark the disk as dead */
1781 /* but only mark it once... */
1782 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1783 rf_ds_optimal) {
1784 printf("raid%d: IO Error. Marking %s as failed.\n",
1785 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1786 queue->raidPtr->Disks[queue->row][queue->col].status =
1787 rf_ds_failed;
1788 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1789 queue->raidPtr->numFailures++;
1790 queue->raidPtr->numNewFailures++;
1791 /* XXX here we should bump the version number for each component, and write that data out */
1792 } else { /* Disk is already dead... */
1793 /* printf("Disk already marked as dead!\n"); */
1794 }
1795
1796 }
1797
1798 rs = &raid_softc[unit];
1799 RAIDPUTBUF(rs, raidbp);
1800
1801
1802 if (bp->b_resid == 0) {
1803 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1804 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1805 }
1806
1807 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1808 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1809
1810 splx(s);
1811 }
1812
1813
1814
1815 /*
1816 * initialize a buf structure for doing an I/O in the kernel.
1817 */
1818 static void
1819 InitBP(
1820 struct buf * bp,
1821 struct vnode * b_vp,
1822 unsigned rw_flag,
1823 dev_t dev,
1824 RF_SectorNum_t startSect,
1825 RF_SectorCount_t numSect,
1826 caddr_t buf,
1827 void (*cbFunc) (struct buf *),
1828 void *cbArg,
1829 int logBytesPerSector,
1830 struct proc * b_proc)
1831 {
1832 /* bp->b_flags = B_PHYS | rw_flag; */
1833 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1834 bp->b_bcount = numSect << logBytesPerSector;
1835 bp->b_bufsize = bp->b_bcount;
1836 bp->b_error = 0;
1837 bp->b_dev = dev;
1838 bp->b_un.b_addr = buf;
1839 bp->b_blkno = startSect;
1840 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1841 if (bp->b_bcount == 0) {
1842 panic("bp->b_bcount is zero in InitBP!!\n");
1843 }
1844 bp->b_proc = b_proc;
1845 bp->b_iodone = cbFunc;
1846 bp->b_vp = b_vp;
1847
1848 }
1849
1850 static void
1851 raidgetdefaultlabel(raidPtr, rs, lp)
1852 RF_Raid_t *raidPtr;
1853 struct raid_softc *rs;
1854 struct disklabel *lp;
1855 {
1856 db1_printf(("Building a default label...\n"));
1857 bzero(lp, sizeof(*lp));
1858
1859 /* fabricate a label... */
1860 lp->d_secperunit = raidPtr->totalSectors;
1861 lp->d_secsize = raidPtr->bytesPerSector;
1862 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1863 lp->d_ntracks = 1;
1864 lp->d_ncylinders = raidPtr->totalSectors /
1865 (lp->d_nsectors * lp->d_ntracks);
1866 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1867
1868 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1869 lp->d_type = DTYPE_RAID;
1870 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1871 lp->d_rpm = 3600;
1872 lp->d_interleave = 1;
1873 lp->d_flags = 0;
1874
1875 lp->d_partitions[RAW_PART].p_offset = 0;
1876 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1877 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1878 lp->d_npartitions = RAW_PART + 1;
1879
1880 lp->d_magic = DISKMAGIC;
1881 lp->d_magic2 = DISKMAGIC;
1882 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1883
1884 }
1885 /*
1886 * Read the disklabel from the raid device. If one is not present, fake one
1887 * up.
1888 */
1889 static void
1890 raidgetdisklabel(dev)
1891 dev_t dev;
1892 {
1893 int unit = raidunit(dev);
1894 struct raid_softc *rs = &raid_softc[unit];
1895 char *errstring;
1896 struct disklabel *lp = rs->sc_dkdev.dk_label;
1897 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1898 RF_Raid_t *raidPtr;
1899
1900 db1_printf(("Getting the disklabel...\n"));
1901
1902 bzero(clp, sizeof(*clp));
1903
1904 raidPtr = raidPtrs[unit];
1905
1906 raidgetdefaultlabel(raidPtr, rs, lp);
1907
1908 /*
1909 * Call the generic disklabel extraction routine.
1910 */
1911 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1912 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1913 if (errstring)
1914 raidmakedisklabel(rs);
1915 else {
1916 int i;
1917 struct partition *pp;
1918
1919 /*
1920 * Sanity check whether the found disklabel is valid.
1921 *
1922 * This is necessary since total size of the raid device
1923 * may vary when an interleave is changed even though exactly
1924 * same componets are used, and old disklabel may used
1925 * if that is found.
1926 */
1927 if (lp->d_secperunit != rs->sc_size)
1928 printf("WARNING: %s: "
1929 "total sector size in disklabel (%d) != "
1930 "the size of raid (%ld)\n", rs->sc_xname,
1931 lp->d_secperunit, (long) rs->sc_size);
1932 for (i = 0; i < lp->d_npartitions; i++) {
1933 pp = &lp->d_partitions[i];
1934 if (pp->p_offset + pp->p_size > rs->sc_size)
1935 printf("WARNING: %s: end of partition `%c' "
1936 "exceeds the size of raid (%ld)\n",
1937 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1938 }
1939 }
1940
1941 }
1942 /*
1943 * Take care of things one might want to take care of in the event
1944 * that a disklabel isn't present.
1945 */
1946 static void
1947 raidmakedisklabel(rs)
1948 struct raid_softc *rs;
1949 {
1950 struct disklabel *lp = rs->sc_dkdev.dk_label;
1951 db1_printf(("Making a label..\n"));
1952
1953 /*
1954 * For historical reasons, if there's no disklabel present
1955 * the raw partition must be marked FS_BSDFFS.
1956 */
1957
1958 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1959
1960 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1961
1962 lp->d_checksum = dkcksum(lp);
1963 }
1964 /*
1965 * Lookup the provided name in the filesystem. If the file exists,
1966 * is a valid block device, and isn't being used by anyone else,
1967 * set *vpp to the file's vnode.
1968 * You'll find the original of this in ccd.c
1969 */
1970 int
1971 raidlookup(path, p, vpp)
1972 char *path;
1973 struct proc *p;
1974 struct vnode **vpp; /* result */
1975 {
1976 struct nameidata nd;
1977 struct vnode *vp;
1978 struct vattr va;
1979 int error;
1980
1981 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1982 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1983 #ifdef DEBUG
1984 printf("RAIDframe: vn_open returned %d\n", error);
1985 #endif
1986 return (error);
1987 }
1988 vp = nd.ni_vp;
1989 if (vp->v_usecount > 1) {
1990 VOP_UNLOCK(vp, 0);
1991 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1992 return (EBUSY);
1993 }
1994 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1995 VOP_UNLOCK(vp, 0);
1996 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1997 return (error);
1998 }
1999 /* XXX: eventually we should handle VREG, too. */
2000 if (va.va_type != VBLK) {
2001 VOP_UNLOCK(vp, 0);
2002 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2003 return (ENOTBLK);
2004 }
2005 VOP_UNLOCK(vp, 0);
2006 *vpp = vp;
2007 return (0);
2008 }
2009 /*
2010 * Wait interruptibly for an exclusive lock.
2011 *
2012 * XXX
2013 * Several drivers do this; it should be abstracted and made MP-safe.
2014 * (Hmm... where have we seen this warning before :-> GO )
2015 */
2016 static int
2017 raidlock(rs)
2018 struct raid_softc *rs;
2019 {
2020 int error;
2021
2022 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2023 rs->sc_flags |= RAIDF_WANTED;
2024 if ((error =
2025 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2026 return (error);
2027 }
2028 rs->sc_flags |= RAIDF_LOCKED;
2029 return (0);
2030 }
2031 /*
2032 * Unlock and wake up any waiters.
2033 */
2034 static void
2035 raidunlock(rs)
2036 struct raid_softc *rs;
2037 {
2038
2039 rs->sc_flags &= ~RAIDF_LOCKED;
2040 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2041 rs->sc_flags &= ~RAIDF_WANTED;
2042 wakeup(rs);
2043 }
2044 }
2045
2046
2047 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2048 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2049
2050 int
2051 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2052 {
2053 RF_ComponentLabel_t clabel;
2054 raidread_component_label(dev, b_vp, &clabel);
2055 clabel.mod_counter = mod_counter;
2056 clabel.clean = RF_RAID_CLEAN;
2057 raidwrite_component_label(dev, b_vp, &clabel);
2058 return(0);
2059 }
2060
2061
2062 int
2063 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2064 {
2065 RF_ComponentLabel_t clabel;
2066 raidread_component_label(dev, b_vp, &clabel);
2067 clabel.mod_counter = mod_counter;
2068 clabel.clean = RF_RAID_DIRTY;
2069 raidwrite_component_label(dev, b_vp, &clabel);
2070 return(0);
2071 }
2072
2073 /* ARGSUSED */
2074 int
2075 raidread_component_label(dev, b_vp, clabel)
2076 dev_t dev;
2077 struct vnode *b_vp;
2078 RF_ComponentLabel_t *clabel;
2079 {
2080 struct buf *bp;
2081 int error;
2082
2083 /* XXX should probably ensure that we don't try to do this if
2084 someone has changed rf_protected_sectors. */
2085
2086 /* get a block of the appropriate size... */
2087 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2088 bp->b_dev = dev;
2089
2090 /* get our ducks in a row for the read */
2091 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2092 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2093 bp->b_flags = B_BUSY | B_READ;
2094 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2095
2096 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2097
2098 error = biowait(bp);
2099
2100 if (!error) {
2101 memcpy(clabel, bp->b_un.b_addr,
2102 sizeof(RF_ComponentLabel_t));
2103 #if 0
2104 rf_print_component_label( clabel );
2105 #endif
2106 } else {
2107 #if 0
2108 printf("Failed to read RAID component label!\n");
2109 #endif
2110 }
2111
2112 bp->b_flags = B_INVAL | B_AGE;
2113 brelse(bp);
2114 return(error);
2115 }
2116 /* ARGSUSED */
2117 int
2118 raidwrite_component_label(dev, b_vp, clabel)
2119 dev_t dev;
2120 struct vnode *b_vp;
2121 RF_ComponentLabel_t *clabel;
2122 {
2123 struct buf *bp;
2124 int error;
2125
2126 /* get a block of the appropriate size... */
2127 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2128 bp->b_dev = dev;
2129
2130 /* get our ducks in a row for the write */
2131 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2132 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2133 bp->b_flags = B_BUSY | B_WRITE;
2134 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2135
2136 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2137
2138 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2139
2140 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2141 error = biowait(bp);
2142 bp->b_flags = B_INVAL | B_AGE;
2143 brelse(bp);
2144 if (error) {
2145 #if 1
2146 printf("Failed to write RAID component info!\n");
2147 #endif
2148 }
2149
2150 return(error);
2151 }
2152
2153 void
2154 rf_markalldirty( raidPtr )
2155 RF_Raid_t *raidPtr;
2156 {
2157 RF_ComponentLabel_t clabel;
2158 int r,c;
2159
2160 raidPtr->mod_counter++;
2161 for (r = 0; r < raidPtr->numRow; r++) {
2162 for (c = 0; c < raidPtr->numCol; c++) {
2163 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2164 raidread_component_label(
2165 raidPtr->Disks[r][c].dev,
2166 raidPtr->raid_cinfo[r][c].ci_vp,
2167 &clabel);
2168 if (clabel.status == rf_ds_spared) {
2169 /* XXX do something special...
2170 but whatever you do, don't
2171 try to access it!! */
2172 } else {
2173 #if 0
2174 clabel.status =
2175 raidPtr->Disks[r][c].status;
2176 raidwrite_component_label(
2177 raidPtr->Disks[r][c].dev,
2178 raidPtr->raid_cinfo[r][c].ci_vp,
2179 &clabel);
2180 #endif
2181 raidmarkdirty(
2182 raidPtr->Disks[r][c].dev,
2183 raidPtr->raid_cinfo[r][c].ci_vp,
2184 raidPtr->mod_counter);
2185 }
2186 }
2187 }
2188 }
2189 /* printf("Component labels marked dirty.\n"); */
2190 #if 0
2191 for( c = 0; c < raidPtr->numSpare ; c++) {
2192 sparecol = raidPtr->numCol + c;
2193 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2194 /*
2195
2196 XXX this is where we get fancy and map this spare
2197 into it's correct spot in the array.
2198
2199 */
2200 /*
2201
2202 we claim this disk is "optimal" if it's
2203 rf_ds_used_spare, as that means it should be
2204 directly substitutable for the disk it replaced.
2205 We note that too...
2206
2207 */
2208
2209 for(i=0;i<raidPtr->numRow;i++) {
2210 for(j=0;j<raidPtr->numCol;j++) {
2211 if ((raidPtr->Disks[i][j].spareRow ==
2212 r) &&
2213 (raidPtr->Disks[i][j].spareCol ==
2214 sparecol)) {
2215 srow = r;
2216 scol = sparecol;
2217 break;
2218 }
2219 }
2220 }
2221
2222 raidread_component_label(
2223 raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2225 &clabel);
2226 /* make sure status is noted */
2227 clabel.version = RF_COMPONENT_LABEL_VERSION;
2228 clabel.mod_counter = raidPtr->mod_counter;
2229 clabel.serial_number = raidPtr->serial_number;
2230 clabel.row = srow;
2231 clabel.column = scol;
2232 clabel.num_rows = raidPtr->numRow;
2233 clabel.num_columns = raidPtr->numCol;
2234 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2235 clabel.status = rf_ds_optimal;
2236 raidwrite_component_label(
2237 raidPtr->Disks[r][sparecol].dev,
2238 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2239 &clabel);
2240 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2241 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2242 }
2243 }
2244
2245 #endif
2246 }
2247
2248
2249 void
2250 rf_update_component_labels( raidPtr )
2251 RF_Raid_t *raidPtr;
2252 {
2253 RF_ComponentLabel_t clabel;
2254 int sparecol;
2255 int r,c;
2256 int i,j;
2257 int srow, scol;
2258
2259 srow = -1;
2260 scol = -1;
2261
2262 /* XXX should do extra checks to make sure things really are clean,
2263 rather than blindly setting the clean bit... */
2264
2265 raidPtr->mod_counter++;
2266
2267 for (r = 0; r < raidPtr->numRow; r++) {
2268 for (c = 0; c < raidPtr->numCol; c++) {
2269 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2270 raidread_component_label(
2271 raidPtr->Disks[r][c].dev,
2272 raidPtr->raid_cinfo[r][c].ci_vp,
2273 &clabel);
2274 /* make sure status is noted */
2275 clabel.status = rf_ds_optimal;
2276 /* bump the counter */
2277 clabel.mod_counter = raidPtr->mod_counter;
2278
2279 raidwrite_component_label(
2280 raidPtr->Disks[r][c].dev,
2281 raidPtr->raid_cinfo[r][c].ci_vp,
2282 &clabel);
2283 }
2284 /* else we don't touch it.. */
2285 }
2286 }
2287
2288 for( c = 0; c < raidPtr->numSpare ; c++) {
2289 sparecol = raidPtr->numCol + c;
2290 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2291 /*
2292
2293 we claim this disk is "optimal" if it's
2294 rf_ds_used_spare, as that means it should be
2295 directly substitutable for the disk it replaced.
2296 We note that too...
2297
2298 */
2299
2300 for(i=0;i<raidPtr->numRow;i++) {
2301 for(j=0;j<raidPtr->numCol;j++) {
2302 if ((raidPtr->Disks[i][j].spareRow ==
2303 0) &&
2304 (raidPtr->Disks[i][j].spareCol ==
2305 sparecol)) {
2306 srow = i;
2307 scol = j;
2308 break;
2309 }
2310 }
2311 }
2312
2313 /* XXX shouldn't *really* need this... */
2314 raidread_component_label(
2315 raidPtr->Disks[0][sparecol].dev,
2316 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2317 &clabel);
2318 /* make sure status is noted */
2319
2320 raid_init_component_label(raidPtr, &clabel);
2321
2322 clabel.mod_counter = raidPtr->mod_counter;
2323 clabel.row = srow;
2324 clabel.column = scol;
2325 clabel.status = rf_ds_optimal;
2326
2327 raidwrite_component_label(
2328 raidPtr->Disks[0][sparecol].dev,
2329 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2330 &clabel);
2331 }
2332 }
2333 /* printf("Component labels updated\n"); */
2334 }
2335
2336
2337 void
2338 rf_final_update_component_labels( raidPtr )
2339 RF_Raid_t *raidPtr;
2340 {
2341 RF_ComponentLabel_t clabel;
2342 int sparecol;
2343 int r,c;
2344 int i,j;
2345 int srow, scol;
2346
2347 srow = -1;
2348 scol = -1;
2349
2350 /* XXX should do extra checks to make sure things really are clean,
2351 rather than blindly setting the clean bit... */
2352
2353 raidPtr->mod_counter++;
2354
2355 for (r = 0; r < raidPtr->numRow; r++) {
2356 for (c = 0; c < raidPtr->numCol; c++) {
2357 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2358 raidread_component_label(
2359 raidPtr->Disks[r][c].dev,
2360 raidPtr->raid_cinfo[r][c].ci_vp,
2361 &clabel);
2362 /* make sure status is noted */
2363 clabel.status = rf_ds_optimal;
2364 /* bump the counter */
2365 clabel.mod_counter = raidPtr->mod_counter;
2366
2367 raidwrite_component_label(
2368 raidPtr->Disks[r][c].dev,
2369 raidPtr->raid_cinfo[r][c].ci_vp,
2370 &clabel);
2371 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2372 raidmarkclean(
2373 raidPtr->Disks[r][c].dev,
2374 raidPtr->raid_cinfo[r][c].ci_vp,
2375 raidPtr->mod_counter);
2376 }
2377 }
2378 /* else we don't touch it.. */
2379 }
2380 }
2381
2382 for( c = 0; c < raidPtr->numSpare ; c++) {
2383 sparecol = raidPtr->numCol + c;
2384 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2385 /*
2386
2387 we claim this disk is "optimal" if it's
2388 rf_ds_used_spare, as that means it should be
2389 directly substitutable for the disk it replaced.
2390 We note that too...
2391
2392 */
2393
2394 for(i=0;i<raidPtr->numRow;i++) {
2395 for(j=0;j<raidPtr->numCol;j++) {
2396 if ((raidPtr->Disks[i][j].spareRow ==
2397 0) &&
2398 (raidPtr->Disks[i][j].spareCol ==
2399 sparecol)) {
2400 srow = i;
2401 scol = j;
2402 break;
2403 }
2404 }
2405 }
2406
2407 /* XXX shouldn't *really* need this... */
2408 raidread_component_label(
2409 raidPtr->Disks[0][sparecol].dev,
2410 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413
2414 raid_init_component_label(raidPtr, &clabel);
2415
2416 clabel.mod_counter = raidPtr->mod_counter;
2417 clabel.row = srow;
2418 clabel.column = scol;
2419 clabel.status = rf_ds_optimal;
2420
2421 raidwrite_component_label(
2422 raidPtr->Disks[0][sparecol].dev,
2423 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2424 &clabel);
2425 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2426 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2427 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2428 raidPtr->mod_counter);
2429 }
2430 }
2431 }
2432 /* printf("Component labels updated\n"); */
2433 }
2434
2435
2436 void
2437 rf_ReconThread(req)
2438 struct rf_recon_req *req;
2439 {
2440 int s;
2441 RF_Raid_t *raidPtr;
2442
2443 s = splbio();
2444 raidPtr = (RF_Raid_t *) req->raidPtr;
2445 raidPtr->recon_in_progress = 1;
2446
2447 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2448 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2449
2450 /* XXX get rid of this! we don't need it at all.. */
2451 RF_Free(req, sizeof(*req));
2452
2453 raidPtr->recon_in_progress = 0;
2454 splx(s);
2455
2456 /* That's all... */
2457 kthread_exit(0); /* does not return */
2458 }
2459
2460 void
2461 rf_RewriteParityThread(raidPtr)
2462 RF_Raid_t *raidPtr;
2463 {
2464 int retcode;
2465 int s;
2466
2467 raidPtr->parity_rewrite_in_progress = 1;
2468 s = splbio();
2469 retcode = rf_RewriteParity(raidPtr);
2470 splx(s);
2471 if (retcode) {
2472 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2473 } else {
2474 /* set the clean bit! If we shutdown correctly,
2475 the clean bit on each component label will get
2476 set */
2477 raidPtr->parity_good = RF_RAID_CLEAN;
2478 }
2479 raidPtr->parity_rewrite_in_progress = 0;
2480
2481 /* That's all... */
2482 kthread_exit(0); /* does not return */
2483 }
2484
2485
2486 void
2487 rf_CopybackThread(raidPtr)
2488 RF_Raid_t *raidPtr;
2489 {
2490 int s;
2491
2492 raidPtr->copyback_in_progress = 1;
2493 s = splbio();
2494 rf_CopybackReconstructedData(raidPtr);
2495 splx(s);
2496 raidPtr->copyback_in_progress = 0;
2497
2498 /* That's all... */
2499 kthread_exit(0); /* does not return */
2500 }
2501
2502
2503 void
2504 rf_ReconstructInPlaceThread(req)
2505 struct rf_recon_req *req;
2506 {
2507 int retcode;
2508 int s;
2509 RF_Raid_t *raidPtr;
2510
2511 s = splbio();
2512 raidPtr = req->raidPtr;
2513 raidPtr->recon_in_progress = 1;
2514 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2515 RF_Free(req, sizeof(*req));
2516 raidPtr->recon_in_progress = 0;
2517 splx(s);
2518
2519 /* That's all... */
2520 kthread_exit(0); /* does not return */
2521 }
2522
2523 void
2524 rf_mountroot_hook(dev)
2525 struct device *dev;
2526 {
2527
2528 }
2529
2530
2531 RF_AutoConfig_t *
2532 rf_find_raid_components()
2533 {
2534 struct devnametobdevmaj *dtobdm;
2535 struct vnode *vp;
2536 struct disklabel label;
2537 struct device *dv;
2538 char *cd_name;
2539 dev_t dev;
2540 int error;
2541 int i;
2542 int good_one;
2543 RF_ComponentLabel_t *clabel;
2544 RF_AutoConfig_t *ac_list;
2545 RF_AutoConfig_t *ac;
2546
2547
2548 /* initialize the AutoConfig list */
2549 ac_list = NULL;
2550
2551 if (raidautoconfig) {
2552
2553 /* we begin by trolling through *all* the devices on the system */
2554
2555 for (dv = alldevs.tqh_first; dv != NULL;
2556 dv = dv->dv_list.tqe_next) {
2557
2558 /* we are only interested in disks... */
2559 if (dv->dv_class != DV_DISK)
2560 continue;
2561
2562 /* we don't care about floppies... */
2563 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2564 continue;
2565 }
2566
2567 /* need to find the device_name_to_block_device_major stuff */
2568 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2569 dtobdm = dev_name2blk;
2570 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2571 dtobdm++;
2572 }
2573
2574 /* get a vnode for the raw partition of this disk */
2575
2576 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2577 if (bdevvp(dev, &vp))
2578 panic("RAID can't alloc vnode");
2579
2580 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2581
2582 if (error) {
2583 /* "Who cares." Continue looking
2584 for something that exists*/
2585 vput(vp);
2586 continue;
2587 }
2588
2589 /* Ok, the disk exists. Go get the disklabel. */
2590 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2591 FREAD, NOCRED, 0);
2592 if (error) {
2593 /*
2594 * XXX can't happen - open() would
2595 * have errored out (or faked up one)
2596 */
2597 printf("can't get label for dev %s%c (%d)!?!?\n",
2598 dv->dv_xname, 'a' + RAW_PART, error);
2599 }
2600
2601 /* don't need this any more. We'll allocate it again
2602 a little later if we really do... */
2603 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2604 vput(vp);
2605
2606 for (i=0; i < label.d_npartitions; i++) {
2607 /* We only support partitions marked as RAID */
2608 if (label.d_partitions[i].p_fstype != FS_RAID)
2609 continue;
2610
2611 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2612 if (bdevvp(dev, &vp))
2613 panic("RAID can't alloc vnode");
2614
2615 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2616 if (error) {
2617 /* Whatever... */
2618 vput(vp);
2619 continue;
2620 }
2621
2622 good_one = 0;
2623
2624 clabel = (RF_ComponentLabel_t *)
2625 malloc(sizeof(RF_ComponentLabel_t),
2626 M_RAIDFRAME, M_NOWAIT);
2627 if (clabel == NULL) {
2628 /* XXX CLEANUP HERE */
2629 printf("RAID auto config: out of memory!\n");
2630 return(NULL); /* XXX probably should panic? */
2631 }
2632
2633 if (!raidread_component_label(dev, vp, clabel)) {
2634 /* Got the label. Does it look reasonable? */
2635 if (rf_reasonable_label(clabel) &&
2636 (clabel->partitionSize <=
2637 label.d_partitions[i].p_size)) {
2638 #if DEBUG
2639 printf("Component on: %s%c: %d\n",
2640 dv->dv_xname, 'a'+i,
2641 label.d_partitions[i].p_size);
2642 rf_print_component_label(clabel);
2643 #endif
2644 /* if it's reasonable, add it,
2645 else ignore it. */
2646 ac = (RF_AutoConfig_t *)
2647 malloc(sizeof(RF_AutoConfig_t),
2648 M_RAIDFRAME,
2649 M_NOWAIT);
2650 if (ac == NULL) {
2651 /* XXX should panic?? */
2652 return(NULL);
2653 }
2654
2655 sprintf(ac->devname, "%s%c",
2656 dv->dv_xname, 'a'+i);
2657 ac->dev = dev;
2658 ac->vp = vp;
2659 ac->clabel = clabel;
2660 ac->next = ac_list;
2661 ac_list = ac;
2662 good_one = 1;
2663 }
2664 }
2665 if (!good_one) {
2666 /* cleanup */
2667 free(clabel, M_RAIDFRAME);
2668 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2669 vput(vp);
2670 }
2671 }
2672 }
2673 }
2674 return(ac_list);
2675 }
2676
2677 static int
2678 rf_reasonable_label(clabel)
2679 RF_ComponentLabel_t *clabel;
2680 {
2681
2682 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2683 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2684 ((clabel->clean == RF_RAID_CLEAN) ||
2685 (clabel->clean == RF_RAID_DIRTY)) &&
2686 clabel->row >=0 &&
2687 clabel->column >= 0 &&
2688 clabel->num_rows > 0 &&
2689 clabel->num_columns > 0 &&
2690 clabel->row < clabel->num_rows &&
2691 clabel->column < clabel->num_columns &&
2692 clabel->blockSize > 0 &&
2693 clabel->numBlocks > 0) {
2694 /* label looks reasonable enough... */
2695 return(1);
2696 }
2697 return(0);
2698 }
2699
2700
2701 void
2702 rf_print_component_label(clabel)
2703 RF_ComponentLabel_t *clabel;
2704 {
2705 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2706 clabel->row, clabel->column,
2707 clabel->num_rows, clabel->num_columns);
2708 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2709 clabel->version, clabel->serial_number,
2710 clabel->mod_counter);
2711 printf(" Clean: %s Status: %d\n",
2712 clabel->clean ? "Yes" : "No", clabel->status );
2713 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2714 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2715 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2716 (char) clabel->parityConfig, clabel->blockSize,
2717 clabel->numBlocks);
2718 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2719 printf(" Last configured as: raid%d\n", clabel->last_unit );
2720 #if 0
2721 printf(" Config order: %d\n", clabel->config_order);
2722 #endif
2723
2724 }
2725
2726 RF_ConfigSet_t *
2727 rf_create_auto_sets(ac_list)
2728 RF_AutoConfig_t *ac_list;
2729 {
2730 RF_AutoConfig_t *ac;
2731 RF_ConfigSet_t *config_sets;
2732 RF_ConfigSet_t *cset;
2733 RF_AutoConfig_t *ac_next;
2734
2735
2736 config_sets = NULL;
2737
2738 /* Go through the AutoConfig list, and figure out which components
2739 belong to what sets. */
2740 ac = ac_list;
2741 while(ac!=NULL) {
2742 /* we're going to putz with ac->next, so save it here
2743 for use at the end of the loop */
2744 ac_next = ac->next;
2745
2746 if (config_sets == NULL) {
2747 /* will need at least this one... */
2748 config_sets = (RF_ConfigSet_t *)
2749 malloc(sizeof(RF_ConfigSet_t),
2750 M_RAIDFRAME, M_NOWAIT);
2751 if (config_sets == NULL) {
2752 panic("rf_create_auto_sets: No memory!\n");
2753 }
2754 /* this one is easy :) */
2755 config_sets->ac = ac;
2756 config_sets->next = NULL;
2757 config_sets->rootable = 0;
2758 ac->next = NULL;
2759 } else {
2760 /* which set does this component fit into? */
2761 cset = config_sets;
2762 while(cset!=NULL) {
2763 if (rf_does_it_fit(cset, ac)) {
2764 /* looks like it matches */
2765 ac->next = cset->ac;
2766 cset->ac = ac;
2767 break;
2768 }
2769 cset = cset->next;
2770 }
2771 if (cset==NULL) {
2772 /* didn't find a match above... new set..*/
2773 cset = (RF_ConfigSet_t *)
2774 malloc(sizeof(RF_ConfigSet_t),
2775 M_RAIDFRAME, M_NOWAIT);
2776 if (cset == NULL) {
2777 panic("rf_create_auto_sets: No memory!\n");
2778 }
2779 cset->ac = ac;
2780 ac->next = NULL;
2781 cset->next = config_sets;
2782 cset->rootable = 0;
2783 config_sets = cset;
2784 }
2785 }
2786 ac = ac_next;
2787 }
2788
2789
2790 return(config_sets);
2791 }
2792
2793 static int
2794 rf_does_it_fit(cset, ac)
2795 RF_ConfigSet_t *cset;
2796 RF_AutoConfig_t *ac;
2797 {
2798 RF_ComponentLabel_t *clabel1, *clabel2;
2799
2800 /* If this one matches the *first* one in the set, that's good
2801 enough, since the other members of the set would have been
2802 through here too... */
2803 /* note that we are not checking partitionSize here..
2804
2805 Note that we are also not checking the mod_counters here.
2806 If everything else matches execpt the mod_counter, that's
2807 good enough for this test. We will deal with the mod_counters
2808 a little later in the autoconfiguration process.
2809
2810 (clabel1->mod_counter == clabel2->mod_counter) &&
2811
2812 */
2813
2814 clabel1 = cset->ac->clabel;
2815 clabel2 = ac->clabel;
2816 if ((clabel1->version == clabel2->version) &&
2817 (clabel1->serial_number == clabel2->serial_number) &&
2818 (clabel1->num_rows == clabel2->num_rows) &&
2819 (clabel1->num_columns == clabel2->num_columns) &&
2820 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2821 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2822 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2823 (clabel1->parityConfig == clabel2->parityConfig) &&
2824 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2825 (clabel1->blockSize == clabel2->blockSize) &&
2826 (clabel1->numBlocks == clabel2->numBlocks) &&
2827 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2828 (clabel1->root_partition == clabel2->root_partition) &&
2829 (clabel1->last_unit == clabel2->last_unit) &&
2830 (clabel1->config_order == clabel2->config_order)) {
2831 /* if it get's here, it almost *has* to be a match */
2832 } else {
2833 /* it's not consistent with somebody in the set..
2834 punt */
2835 return(0);
2836 }
2837 /* all was fine.. it must fit... */
2838 return(1);
2839 }
2840
2841 int
2842 rf_have_enough_components(cset)
2843 RF_ConfigSet_t *cset;
2844 {
2845 RF_AutoConfig_t *ac;
2846 RF_AutoConfig_t *auto_config;
2847 RF_ComponentLabel_t *clabel;
2848 int r,c;
2849 int num_rows;
2850 int num_cols;
2851 int num_missing;
2852
2853 /* check to see that we have enough 'live' components
2854 of this set. If so, we can configure it if necessary */
2855
2856 num_rows = cset->ac->clabel->num_rows;
2857 num_cols = cset->ac->clabel->num_columns;
2858
2859 /* XXX Check for duplicate components!?!?!? */
2860
2861 num_missing = 0;
2862 auto_config = cset->ac;
2863
2864 for(r=0; r<num_rows; r++) {
2865 for(c=0; c<num_cols; c++) {
2866 ac = auto_config;
2867 while(ac!=NULL) {
2868 if (ac->clabel==NULL) {
2869 /* big-time bad news. */
2870 goto fail;
2871 }
2872 if ((ac->clabel->row == r) &&
2873 (ac->clabel->column == c)) {
2874 /* it's this one... */
2875 #if DEBUG
2876 printf("Found: %s at %d,%d\n",
2877 ac->devname,r,c);
2878 #endif
2879 break;
2880 }
2881 ac=ac->next;
2882 }
2883 if (ac==NULL) {
2884 /* Didn't find one here! */
2885 num_missing++;
2886 }
2887 }
2888 }
2889
2890 clabel = cset->ac->clabel;
2891
2892 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2893 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2894 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2895 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2896 /* XXX this needs to be made *much* more general */
2897 /* Too many failures */
2898 return(0);
2899 }
2900 /* otherwise, all is well, and we've got enough to take a kick
2901 at autoconfiguring this set */
2902 return(1);
2903 fail:
2904 return(0);
2905
2906 }
2907
2908 void
2909 rf_create_configuration(ac,config,raidPtr)
2910 RF_AutoConfig_t *ac;
2911 RF_Config_t *config;
2912 RF_Raid_t *raidPtr;
2913 {
2914 RF_ComponentLabel_t *clabel;
2915
2916 clabel = ac->clabel;
2917
2918 /* 1. Fill in the common stuff */
2919 config->numRow = clabel->num_rows;
2920 config->numCol = clabel->num_columns;
2921 config->numSpare = 0; /* XXX should this be set here? */
2922 config->sectPerSU = clabel->sectPerSU;
2923 config->SUsPerPU = clabel->SUsPerPU;
2924 config->SUsPerRU = clabel->SUsPerRU;
2925 config->parityConfig = clabel->parityConfig;
2926 /* XXX... */
2927 strcpy(config->diskQueueType,"fifo");
2928 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2929 config->layoutSpecificSize = 0; /* XXX ?? */
2930
2931 while(ac!=NULL) {
2932 /* row/col values will be in range due to the checks
2933 in reasonable_label() */
2934 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2935 ac->devname);
2936 ac = ac->next;
2937 }
2938
2939 }
2940
2941 int
2942 rf_set_autoconfig(raidPtr, new_value)
2943 RF_Raid_t *raidPtr;
2944 int new_value;
2945 {
2946 RF_ComponentLabel_t clabel;
2947 struct vnode *vp;
2948 dev_t dev;
2949 int row, column;
2950
2951 raidPtr->autoconfigure = new_value;
2952 for(row=0; row<raidPtr->numRow; row++) {
2953 for(column=0; column<raidPtr->numCol; column++) {
2954 dev = raidPtr->Disks[row][column].dev;
2955 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2956 raidread_component_label(dev, vp, &clabel);
2957 clabel.autoconfigure = new_value;
2958 raidwrite_component_label(dev, vp, &clabel);
2959 }
2960 }
2961 return(new_value);
2962 }
2963
2964 int
2965 rf_set_rootpartition(raidPtr, new_value)
2966 RF_Raid_t *raidPtr;
2967 int new_value;
2968 {
2969 RF_ComponentLabel_t clabel;
2970 struct vnode *vp;
2971 dev_t dev;
2972 int row, column;
2973
2974 raidPtr->root_partition = new_value;
2975 for(row=0; row<raidPtr->numRow; row++) {
2976 for(column=0; column<raidPtr->numCol; column++) {
2977 dev = raidPtr->Disks[row][column].dev;
2978 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2979 raidread_component_label(dev, vp, &clabel);
2980 clabel.root_partition = new_value;
2981 raidwrite_component_label(dev, vp, &clabel);
2982 }
2983 }
2984 return(new_value);
2985 }
2986
2987 void
2988 rf_release_all_vps(cset)
2989 RF_ConfigSet_t *cset;
2990 {
2991 RF_AutoConfig_t *ac;
2992
2993 ac = cset->ac;
2994 while(ac!=NULL) {
2995 /* Close the vp, and give it back */
2996 if (ac->vp) {
2997 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2998 vput(ac->vp);
2999 }
3000 ac = ac->next;
3001 }
3002 }
3003
3004
3005 void
3006 rf_cleanup_config_set(cset)
3007 RF_ConfigSet_t *cset;
3008 {
3009 RF_AutoConfig_t *ac;
3010 RF_AutoConfig_t *next_ac;
3011
3012 ac = cset->ac;
3013 while(ac!=NULL) {
3014 next_ac = ac->next;
3015 /* nuke the label */
3016 free(ac->clabel, M_RAIDFRAME);
3017 /* cleanup the config structure */
3018 free(ac, M_RAIDFRAME);
3019 /* "next.." */
3020 ac = next_ac;
3021 }
3022 /* and, finally, nuke the config set */
3023 free(cset, M_RAIDFRAME);
3024 }
3025
3026
3027 void
3028 raid_init_component_label(raidPtr, clabel)
3029 RF_Raid_t *raidPtr;
3030 RF_ComponentLabel_t *clabel;
3031 {
3032 /* current version number */
3033 clabel->version = RF_COMPONENT_LABEL_VERSION;
3034 clabel->serial_number = raidPtr->serial_number;
3035 clabel->mod_counter = raidPtr->mod_counter;
3036 clabel->num_rows = raidPtr->numRow;
3037 clabel->num_columns = raidPtr->numCol;
3038 clabel->clean = RF_RAID_DIRTY; /* not clean */
3039 clabel->status = rf_ds_optimal; /* "It's good!" */
3040
3041 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3042 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3043 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3044
3045 clabel->blockSize = raidPtr->bytesPerSector;
3046 clabel->numBlocks = raidPtr->sectorsPerDisk;
3047
3048 /* XXX not portable */
3049 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3050 clabel->maxOutstanding = raidPtr->maxOutstanding;
3051 clabel->autoconfigure = raidPtr->autoconfigure;
3052 clabel->root_partition = raidPtr->root_partition;
3053 clabel->last_unit = raidPtr->raidid;
3054 clabel->config_order = raidPtr->config_order;
3055 }
3056
3057 int
3058 rf_auto_config_set(cset,unit)
3059 RF_ConfigSet_t *cset;
3060 int *unit;
3061 {
3062 RF_Raid_t *raidPtr;
3063 RF_Config_t *config;
3064 int raidID;
3065 int retcode;
3066
3067 printf("Starting autoconfigure on raid%d\n",raidID);
3068
3069 retcode = 0;
3070 *unit = -1;
3071
3072 /* 1. Create a config structure */
3073
3074 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3075 M_RAIDFRAME,
3076 M_NOWAIT);
3077 if (config==NULL) {
3078 printf("Out of mem!?!?\n");
3079 /* XXX do something more intelligent here. */
3080 return(1);
3081 }
3082 /* XXX raidID needs to be set correctly.. */
3083
3084 /*
3085 2. Figure out what RAID ID this one is supposed to live at
3086 See if we can get the same RAID dev that it was configured
3087 on last time..
3088 */
3089
3090 raidID = cset->ac->clabel->last_unit;
3091 if ((raidID < 0) || (raidID >= numraid)) {
3092 /* let's not wander off into lala land. */
3093 raidID = numraid - 1;
3094 }
3095 if (raidPtrs[raidID]->valid != 0) {
3096
3097 /*
3098 Nope... Go looking for an alternative...
3099 Start high so we don't immediately use raid0 if that's
3100 not taken.
3101 */
3102
3103 for(raidID = numraid; raidID >= 0; raidID--) {
3104 if (raidPtrs[raidID]->valid == 0) {
3105 /* can use this one! */
3106 break;
3107 }
3108 }
3109 }
3110
3111 if (raidID < 0) {
3112 /* punt... */
3113 printf("Unable to auto configure this set!\n");
3114 printf("(Out of RAID devs!)\n");
3115 return(1);
3116 }
3117
3118 raidPtr = raidPtrs[raidID];
3119
3120 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3121 raidPtr->raidid = raidID;
3122 raidPtr->openings = RAIDOUTSTANDING;
3123
3124 /* 3. Build the configuration structure */
3125 rf_create_configuration(cset->ac, config, raidPtr);
3126
3127 /* 4. Do the configuration */
3128 retcode = rf_Configure(raidPtr, config, cset->ac);
3129
3130 if (retcode == 0) {
3131
3132 raidinit(raidPtrs[raidID]);
3133
3134 rf_markalldirty(raidPtrs[raidID]);
3135 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3136 if (cset->ac->clabel->root_partition==1) {
3137 /* everything configured just fine. Make a note
3138 that this set is eligible to be root. */
3139 cset->rootable = 1;
3140 /* XXX do this here? */
3141 raidPtrs[raidID]->root_partition = 1;
3142 }
3143 }
3144
3145 /* 5. Cleanup */
3146 free(config, M_RAIDFRAME);
3147
3148 *unit = raidID;
3149 return(retcode);
3150 }
3151