rf_netbsdkintf.c revision 1.69 1 /* $NetBSD: rf_netbsdkintf.c,v 1.69 2000/03/07 02:59:50 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 register struct buf *bp;
648 {
649 register int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 if ((rs->sc_flags & RAIDF_INITED) == 0)
832 return (ENXIO);
833 }
834
835 switch (cmd) {
836
837 /* configure the system */
838 case RAIDFRAME_CONFIGURE:
839
840 if (raidPtr->valid) {
841 /* There is a valid RAID set running on this unit! */
842 printf("raid%d: Device already configured!\n",unit);
843 return(EINVAL);
844 }
845
846 /* copy-in the configuration information */
847 /* data points to a pointer to the configuration structure */
848
849 u_cfg = *((RF_Config_t **) data);
850 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
851 if (k_cfg == NULL) {
852 return (ENOMEM);
853 }
854 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
855 sizeof(RF_Config_t));
856 if (retcode) {
857 RF_Free(k_cfg, sizeof(RF_Config_t));
858 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
859 retcode));
860 return (retcode);
861 }
862 /* allocate a buffer for the layout-specific data, and copy it
863 * in */
864 if (k_cfg->layoutSpecificSize) {
865 if (k_cfg->layoutSpecificSize > 10000) {
866 /* sanity check */
867 RF_Free(k_cfg, sizeof(RF_Config_t));
868 return (EINVAL);
869 }
870 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
871 (u_char *));
872 if (specific_buf == NULL) {
873 RF_Free(k_cfg, sizeof(RF_Config_t));
874 return (ENOMEM);
875 }
876 retcode = copyin(k_cfg->layoutSpecific,
877 (caddr_t) specific_buf,
878 k_cfg->layoutSpecificSize);
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 RF_Free(specific_buf,
882 k_cfg->layoutSpecificSize);
883 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
884 retcode));
885 return (retcode);
886 }
887 } else
888 specific_buf = NULL;
889 k_cfg->layoutSpecific = specific_buf;
890
891 /* should do some kind of sanity check on the configuration.
892 * Store the sum of all the bytes in the last byte? */
893
894 /* configure the system */
895
896 /*
897 * Clear the entire RAID descriptor, just to make sure
898 * there is no stale data left in the case of a
899 * reconfiguration
900 */
901 bzero((char *) raidPtr, sizeof(RF_Raid_t));
902 raidPtr->raidid = unit;
903
904 retcode = rf_Configure(raidPtr, k_cfg, NULL);
905
906 if (retcode == 0) {
907
908 /* allow this many simultaneous IO's to
909 this RAID device */
910 raidPtr->openings = RAIDOUTSTANDING;
911
912 raidinit(raidPtr);
913 rf_markalldirty(raidPtr);
914 }
915 /* free the buffers. No return code here. */
916 if (k_cfg->layoutSpecificSize) {
917 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
918 }
919 RF_Free(k_cfg, sizeof(RF_Config_t));
920
921 return (retcode);
922
923 /* shutdown the system */
924 case RAIDFRAME_SHUTDOWN:
925
926 if ((error = raidlock(rs)) != 0)
927 return (error);
928
929 /*
930 * If somebody has a partition mounted, we shouldn't
931 * shutdown.
932 */
933
934 part = DISKPART(dev);
935 pmask = (1 << part);
936 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
937 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
938 (rs->sc_dkdev.dk_copenmask & pmask))) {
939 raidunlock(rs);
940 return (EBUSY);
941 }
942
943 retcode = rf_Shutdown(raidPtr);
944
945 pool_destroy(&rs->sc_cbufpool);
946
947 /* It's no longer initialized... */
948 rs->sc_flags &= ~RAIDF_INITED;
949
950 /* Detach the disk. */
951 disk_detach(&rs->sc_dkdev);
952
953 raidunlock(rs);
954
955 return (retcode);
956 case RAIDFRAME_GET_COMPONENT_LABEL:
957 clabel_ptr = (RF_ComponentLabel_t **) data;
958 /* need to read the component label for the disk indicated
959 by row,column in clabel */
960
961 /* For practice, let's get it directly fromdisk, rather
962 than from the in-core copy */
963 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
964 (RF_ComponentLabel_t *));
965 if (clabel == NULL)
966 return (ENOMEM);
967
968 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
969
970 retcode = copyin( *clabel_ptr, clabel,
971 sizeof(RF_ComponentLabel_t));
972
973 if (retcode) {
974 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
975 return(retcode);
976 }
977
978 row = clabel->row;
979 column = clabel->column;
980
981 if ((row < 0) || (row >= raidPtr->numRow) ||
982 (column < 0) || (column >= raidPtr->numCol)) {
983 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
984 return(EINVAL);
985 }
986
987 raidread_component_label(raidPtr->Disks[row][column].dev,
988 raidPtr->raid_cinfo[row][column].ci_vp,
989 clabel );
990
991 retcode = copyout((caddr_t) clabel,
992 (caddr_t) *clabel_ptr,
993 sizeof(RF_ComponentLabel_t));
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return (retcode);
996
997 case RAIDFRAME_SET_COMPONENT_LABEL:
998 clabel = (RF_ComponentLabel_t *) data;
999
1000 /* XXX check the label for valid stuff... */
1001 /* Note that some things *should not* get modified --
1002 the user should be re-initing the labels instead of
1003 trying to patch things.
1004 */
1005
1006 printf("Got component label:\n");
1007 printf("Version: %d\n",clabel->version);
1008 printf("Serial Number: %d\n",clabel->serial_number);
1009 printf("Mod counter: %d\n",clabel->mod_counter);
1010 printf("Row: %d\n", clabel->row);
1011 printf("Column: %d\n", clabel->column);
1012 printf("Num Rows: %d\n", clabel->num_rows);
1013 printf("Num Columns: %d\n", clabel->num_columns);
1014 printf("Clean: %d\n", clabel->clean);
1015 printf("Status: %d\n", clabel->status);
1016
1017 row = clabel->row;
1018 column = clabel->column;
1019
1020 if ((row < 0) || (row >= raidPtr->numRow) ||
1021 (column < 0) || (column >= raidPtr->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 /* XXX this isn't allowed to do anything for now :-) */
1026
1027 /* XXX and before it is, we need to fill in the rest
1028 of the fields!?!?!?! */
1029 #if 0
1030 raidwrite_component_label(
1031 raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034 #endif
1035 return (0);
1036
1037 case RAIDFRAME_INIT_LABELS:
1038 clabel = (RF_ComponentLabel_t *) data;
1039 /*
1040 we only want the serial number from
1041 the above. We get all the rest of the information
1042 from the config that was used to create this RAID
1043 set.
1044 */
1045
1046 raidPtr->serial_number = clabel->serial_number;
1047
1048 raid_init_component_label(raidPtr, &ci_label);
1049 ci_label.serial_number = clabel->serial_number;
1050
1051 for(row=0;row<raidPtr->numRow;row++) {
1052 ci_label.row = row;
1053 for(column=0;column<raidPtr->numCol;column++) {
1054 diskPtr = &raidPtr->Disks[row][column];
1055 ci_label.partitionSize = diskPtr->partitionSize;
1056 ci_label.column = column;
1057 raidwrite_component_label(
1058 raidPtr->Disks[row][column].dev,
1059 raidPtr->raid_cinfo[row][column].ci_vp,
1060 &ci_label );
1061 }
1062 }
1063
1064 return (retcode);
1065 case RAIDFRAME_SET_AUTOCONFIG:
1066 d = rf_set_autoconfig(raidPtr, *data);
1067 printf("New autoconfig value is: %d\n", d);
1068 *data = d;
1069 return (retcode);
1070
1071 case RAIDFRAME_SET_ROOT:
1072 d = rf_set_rootpartition(raidPtr, *data);
1073 printf("New rootpartition value is: %d\n", d);
1074 *data = d;
1075 return (retcode);
1076
1077 /* initialize all parity */
1078 case RAIDFRAME_REWRITEPARITY:
1079
1080 if (raidPtr->Layout.map->faultsTolerated == 0) {
1081 /* Parity for RAID 0 is trivially correct */
1082 raidPtr->parity_good = RF_RAID_CLEAN;
1083 return(0);
1084 }
1085
1086 if (raidPtr->parity_rewrite_in_progress == 1) {
1087 /* Re-write is already in progress! */
1088 return(EINVAL);
1089 }
1090
1091 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1092 rf_RewriteParityThread,
1093 raidPtr,"raid_parity");
1094 return (retcode);
1095
1096
1097 case RAIDFRAME_ADD_HOT_SPARE:
1098 sparePtr = (RF_SingleComponent_t *) data;
1099 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1100 printf("Adding spare\n");
1101 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1102 return(retcode);
1103
1104 case RAIDFRAME_REMOVE_HOT_SPARE:
1105 return(retcode);
1106
1107 case RAIDFRAME_REBUILD_IN_PLACE:
1108
1109 if (raidPtr->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 if (raidPtr->recon_in_progress == 1) {
1115 /* a reconstruct is already in progress! */
1116 return(EINVAL);
1117 }
1118
1119 componentPtr = (RF_SingleComponent_t *) data;
1120 memcpy( &component, componentPtr,
1121 sizeof(RF_SingleComponent_t));
1122 row = component.row;
1123 column = component.column;
1124 printf("Rebuild: %d %d\n",row, column);
1125 if ((row < 0) || (row >= raidPtr->numRow) ||
1126 (column < 0) || (column >= raidPtr->numCol)) {
1127 return(EINVAL);
1128 }
1129
1130 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1131 if (rrcopy == NULL)
1132 return(ENOMEM);
1133
1134 rrcopy->raidPtr = (void *) raidPtr;
1135 rrcopy->row = row;
1136 rrcopy->col = column;
1137
1138 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1139 rf_ReconstructInPlaceThread,
1140 rrcopy,"raid_reconip");
1141 return(retcode);
1142
1143 case RAIDFRAME_GET_INFO:
1144 if (!raidPtr->valid)
1145 return (ENODEV);
1146 ucfgp = (RF_DeviceConfig_t **) data;
1147 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1148 (RF_DeviceConfig_t *));
1149 if (d_cfg == NULL)
1150 return (ENOMEM);
1151 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1152 d_cfg->rows = raidPtr->numRow;
1153 d_cfg->cols = raidPtr->numCol;
1154 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1155 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1156 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1157 return (ENOMEM);
1158 }
1159 d_cfg->nspares = raidPtr->numSpare;
1160 if (d_cfg->nspares >= RF_MAX_DISKS) {
1161 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1162 return (ENOMEM);
1163 }
1164 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1165 d = 0;
1166 for (i = 0; i < d_cfg->rows; i++) {
1167 for (j = 0; j < d_cfg->cols; j++) {
1168 d_cfg->devs[d] = raidPtr->Disks[i][j];
1169 d++;
1170 }
1171 }
1172 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1173 d_cfg->spares[i] = raidPtr->Disks[0][j];
1174 }
1175 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1176 sizeof(RF_DeviceConfig_t));
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178
1179 return (retcode);
1180
1181 case RAIDFRAME_CHECK_PARITY:
1182 *(int *) data = raidPtr->parity_good;
1183 return (0);
1184
1185 case RAIDFRAME_RESET_ACCTOTALS:
1186 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1187 return (0);
1188
1189 case RAIDFRAME_GET_ACCTOTALS:
1190 totals = (RF_AccTotals_t *) data;
1191 *totals = raidPtr->acc_totals;
1192 return (0);
1193
1194 case RAIDFRAME_KEEP_ACCTOTALS:
1195 raidPtr->keep_acc_totals = *(int *)data;
1196 return (0);
1197
1198 case RAIDFRAME_GET_SIZE:
1199 *(int *) data = raidPtr->totalSectors;
1200 return (0);
1201
1202 /* fail a disk & optionally start reconstruction */
1203 case RAIDFRAME_FAIL_DISK:
1204
1205 if (raidPtr->Layout.map->faultsTolerated == 0) {
1206 /* Can't do this on a RAID 0!! */
1207 return(EINVAL);
1208 }
1209
1210 rr = (struct rf_recon_req *) data;
1211
1212 if (rr->row < 0 || rr->row >= raidPtr->numRow
1213 || rr->col < 0 || rr->col >= raidPtr->numCol)
1214 return (EINVAL);
1215
1216 printf("raid%d: Failing the disk: row: %d col: %d\n",
1217 unit, rr->row, rr->col);
1218
1219 /* make a copy of the recon request so that we don't rely on
1220 * the user's buffer */
1221 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1222 if (rrcopy == NULL)
1223 return(ENOMEM);
1224 bcopy(rr, rrcopy, sizeof(*rr));
1225 rrcopy->raidPtr = (void *) raidPtr;
1226
1227 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1228 rf_ReconThread,
1229 rrcopy,"raid_recon");
1230 return (0);
1231
1232 /* invoke a copyback operation after recon on whatever disk
1233 * needs it, if any */
1234 case RAIDFRAME_COPYBACK:
1235
1236 if (raidPtr->Layout.map->faultsTolerated == 0) {
1237 /* This makes no sense on a RAID 0!! */
1238 return(EINVAL);
1239 }
1240
1241 if (raidPtr->copyback_in_progress == 1) {
1242 /* Copyback is already in progress! */
1243 return(EINVAL);
1244 }
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1247 rf_CopybackThread,
1248 raidPtr,"raid_copyback");
1249 return (retcode);
1250
1251 /* return the percentage completion of reconstruction */
1252 case RAIDFRAME_CHECK_RECON_STATUS:
1253 if (raidPtr->Layout.map->faultsTolerated == 0) {
1254 /* This makes no sense on a RAID 0 */
1255 return(EINVAL);
1256 }
1257 row = 0; /* XXX we only consider a single row... */
1258 if (raidPtr->status[row] != rf_rs_reconstructing)
1259 *(int *) data = 100;
1260 else
1261 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1262 return (0);
1263
1264 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1265 if (raidPtr->Layout.map->faultsTolerated == 0) {
1266 /* This makes no sense on a RAID 0 */
1267 return(EINVAL);
1268 }
1269 if (raidPtr->parity_rewrite_in_progress == 1) {
1270 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1271 } else {
1272 *(int *) data = 100;
1273 }
1274 return (0);
1275
1276 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* This makes no sense on a RAID 0 */
1279 return(EINVAL);
1280 }
1281 if (raidPtr->copyback_in_progress == 1) {
1282 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1283 raidPtr->Layout.numStripe;
1284 } else {
1285 *(int *) data = 100;
1286 }
1287 return (0);
1288
1289
1290 /* the sparetable daemon calls this to wait for the kernel to
1291 * need a spare table. this ioctl does not return until a
1292 * spare table is needed. XXX -- calling mpsleep here in the
1293 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1294 * -- I should either compute the spare table in the kernel,
1295 * or have a different -- XXX XXX -- interface (a different
1296 * character device) for delivering the table -- XXX */
1297 #if 0
1298 case RAIDFRAME_SPARET_WAIT:
1299 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1300 while (!rf_sparet_wait_queue)
1301 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1302 waitreq = rf_sparet_wait_queue;
1303 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1304 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1305
1306 /* structure assignment */
1307 *((RF_SparetWait_t *) data) = *waitreq;
1308
1309 RF_Free(waitreq, sizeof(*waitreq));
1310 return (0);
1311
1312 /* wakes up a process waiting on SPARET_WAIT and puts an error
1313 * code in it that will cause the dameon to exit */
1314 case RAIDFRAME_ABORT_SPARET_WAIT:
1315 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1316 waitreq->fcol = -1;
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 waitreq->next = rf_sparet_wait_queue;
1319 rf_sparet_wait_queue = waitreq;
1320 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1321 wakeup(&rf_sparet_wait_queue);
1322 return (0);
1323
1324 /* used by the spare table daemon to deliver a spare table
1325 * into the kernel */
1326 case RAIDFRAME_SEND_SPARET:
1327
1328 /* install the spare table */
1329 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1330
1331 /* respond to the requestor. the return status of the spare
1332 * table installation is passed in the "fcol" field */
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = retcode;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_resp_queue;
1337 rf_sparet_resp_queue = waitreq;
1338 wakeup(&rf_sparet_resp_queue);
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340
1341 return (retcode);
1342 #endif
1343
1344 default:
1345 break; /* fall through to the os-specific code below */
1346
1347 }
1348
1349 if (!raidPtr->valid)
1350 return (EINVAL);
1351
1352 /*
1353 * Add support for "regular" device ioctls here.
1354 */
1355
1356 switch (cmd) {
1357 case DIOCGDINFO:
1358 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1359 break;
1360
1361 case DIOCGPART:
1362 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1363 ((struct partinfo *) data)->part =
1364 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1365 break;
1366
1367 case DIOCWDINFO:
1368 case DIOCSDINFO:
1369 if ((error = raidlock(rs)) != 0)
1370 return (error);
1371
1372 rs->sc_flags |= RAIDF_LABELLING;
1373
1374 error = setdisklabel(rs->sc_dkdev.dk_label,
1375 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1376 if (error == 0) {
1377 if (cmd == DIOCWDINFO)
1378 error = writedisklabel(RAIDLABELDEV(dev),
1379 raidstrategy, rs->sc_dkdev.dk_label,
1380 rs->sc_dkdev.dk_cpulabel);
1381 }
1382 rs->sc_flags &= ~RAIDF_LABELLING;
1383
1384 raidunlock(rs);
1385
1386 if (error)
1387 return (error);
1388 break;
1389
1390 case DIOCWLABEL:
1391 if (*(int *) data != 0)
1392 rs->sc_flags |= RAIDF_WLABEL;
1393 else
1394 rs->sc_flags &= ~RAIDF_WLABEL;
1395 break;
1396
1397 case DIOCGDEFLABEL:
1398 raidgetdefaultlabel(raidPtr, rs,
1399 (struct disklabel *) data);
1400 break;
1401
1402 default:
1403 retcode = ENOTTY;
1404 }
1405 return (retcode);
1406
1407 }
1408
1409
1410 /* raidinit -- complete the rest of the initialization for the
1411 RAIDframe device. */
1412
1413
1414 static void
1415 raidinit(raidPtr)
1416 RF_Raid_t *raidPtr;
1417 {
1418 struct raid_softc *rs;
1419 int unit;
1420
1421 unit = raidPtr->raidid;
1422
1423 rs = &raid_softc[unit];
1424 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1425 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1426
1427
1428 /* XXX should check return code first... */
1429 rs->sc_flags |= RAIDF_INITED;
1430
1431 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1432
1433 rs->sc_dkdev.dk_name = rs->sc_xname;
1434
1435 /* disk_attach actually creates space for the CPU disklabel, among
1436 * other things, so it's critical to call this *BEFORE* we try putzing
1437 * with disklabels. */
1438
1439 disk_attach(&rs->sc_dkdev);
1440
1441 /* XXX There may be a weird interaction here between this, and
1442 * protectedSectors, as used in RAIDframe. */
1443
1444 rs->sc_size = raidPtr->totalSectors;
1445
1446 }
1447
1448 /* wake up the daemon & tell it to get us a spare table
1449 * XXX
1450 * the entries in the queues should be tagged with the raidPtr
1451 * so that in the extremely rare case that two recons happen at once,
1452 * we know for which device were requesting a spare table
1453 * XXX
1454 *
1455 * XXX This code is not currently used. GO
1456 */
1457 int
1458 rf_GetSpareTableFromDaemon(req)
1459 RF_SparetWait_t *req;
1460 {
1461 int retcode;
1462
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 req->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = req;
1466 wakeup(&rf_sparet_wait_queue);
1467
1468 /* mpsleep unlocks the mutex */
1469 while (!rf_sparet_resp_queue) {
1470 tsleep(&rf_sparet_resp_queue, PRIBIO,
1471 "raidframe getsparetable", 0);
1472 }
1473 req = rf_sparet_resp_queue;
1474 rf_sparet_resp_queue = req->next;
1475 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1476
1477 retcode = req->fcol;
1478 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1479 * alloc'd */
1480 return (retcode);
1481 }
1482
1483 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1484 * bp & passes it down.
1485 * any calls originating in the kernel must use non-blocking I/O
1486 * do some extra sanity checking to return "appropriate" error values for
1487 * certain conditions (to make some standard utilities work)
1488 *
1489 * Formerly known as: rf_DoAccessKernel
1490 */
1491 void
1492 raidstart(raidPtr)
1493 RF_Raid_t *raidPtr;
1494 {
1495 RF_SectorCount_t num_blocks, pb, sum;
1496 RF_RaidAddr_t raid_addr;
1497 int retcode;
1498 struct partition *pp;
1499 daddr_t blocknum;
1500 int unit;
1501 struct raid_softc *rs;
1502 int do_async;
1503 struct buf *bp;
1504
1505 unit = raidPtr->raidid;
1506 rs = &raid_softc[unit];
1507
1508 /* quick check to see if anything has died recently */
1509 RF_LOCK_MUTEX(raidPtr->mutex);
1510 if (raidPtr->numNewFailures > 0) {
1511 rf_update_component_labels(raidPtr);
1512 raidPtr->numNewFailures--;
1513 }
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /* Check to see if we're at the limit... */
1517 RF_LOCK_MUTEX(raidPtr->mutex);
1518 while (raidPtr->openings > 0) {
1519 RF_UNLOCK_MUTEX(raidPtr->mutex);
1520
1521 /* get the next item, if any, from the queue */
1522 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1523 /* nothing more to do */
1524 return;
1525 }
1526 BUFQ_REMOVE(&rs->buf_queue, bp);
1527
1528 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1529 * partition.. Need to make it absolute to the underlying
1530 * device.. */
1531
1532 blocknum = bp->b_blkno;
1533 if (DISKPART(bp->b_dev) != RAW_PART) {
1534 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1535 blocknum += pp->p_offset;
1536 }
1537
1538 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1539 (int) blocknum));
1540
1541 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1542 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1543
1544 /* *THIS* is where we adjust what block we're going to...
1545 * but DO NOT TOUCH bp->b_blkno!!! */
1546 raid_addr = blocknum;
1547
1548 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1549 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1550 sum = raid_addr + num_blocks + pb;
1551 if (1 || rf_debugKernelAccess) {
1552 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1553 (int) raid_addr, (int) sum, (int) num_blocks,
1554 (int) pb, (int) bp->b_resid));
1555 }
1556 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1557 || (sum < num_blocks) || (sum < pb)) {
1558 bp->b_error = ENOSPC;
1559 bp->b_flags |= B_ERROR;
1560 bp->b_resid = bp->b_bcount;
1561 biodone(bp);
1562 RF_LOCK_MUTEX(raidPtr->mutex);
1563 continue;
1564 }
1565 /*
1566 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1567 */
1568
1569 if (bp->b_bcount & raidPtr->sectorMask) {
1570 bp->b_error = EINVAL;
1571 bp->b_flags |= B_ERROR;
1572 bp->b_resid = bp->b_bcount;
1573 biodone(bp);
1574 RF_LOCK_MUTEX(raidPtr->mutex);
1575 continue;
1576
1577 }
1578 db1_printf(("Calling DoAccess..\n"));
1579
1580
1581 RF_LOCK_MUTEX(raidPtr->mutex);
1582 raidPtr->openings--;
1583 RF_UNLOCK_MUTEX(raidPtr->mutex);
1584
1585 /*
1586 * Everything is async.
1587 */
1588 do_async = 1;
1589
1590 /* don't ever condition on bp->b_flags & B_WRITE.
1591 * always condition on B_READ instead */
1592
1593 /* XXX we're still at splbio() here... do we *really*
1594 need to be? */
1595
1596
1597 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1598 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1599 do_async, raid_addr, num_blocks,
1600 bp->b_un.b_addr, bp, NULL, NULL,
1601 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1602
1603
1604 RF_LOCK_MUTEX(raidPtr->mutex);
1605 }
1606 RF_UNLOCK_MUTEX(raidPtr->mutex);
1607 }
1608
1609
1610
1611
1612 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1613
1614 int
1615 rf_DispatchKernelIO(queue, req)
1616 RF_DiskQueue_t *queue;
1617 RF_DiskQueueData_t *req;
1618 {
1619 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1620 struct buf *bp;
1621 struct raidbuf *raidbp = NULL;
1622 struct raid_softc *rs;
1623 int unit;
1624 int s;
1625
1626 s=0;
1627 /* s = splbio();*/ /* want to test this */
1628 /* XXX along with the vnode, we also need the softc associated with
1629 * this device.. */
1630
1631 req->queue = queue;
1632
1633 unit = queue->raidPtr->raidid;
1634
1635 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1636
1637 if (unit >= numraid) {
1638 printf("Invalid unit number: %d %d\n", unit, numraid);
1639 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1640 }
1641 rs = &raid_softc[unit];
1642
1643 /* XXX is this the right place? */
1644 disk_busy(&rs->sc_dkdev);
1645
1646 bp = req->bp;
1647 #if 1
1648 /* XXX when there is a physical disk failure, someone is passing us a
1649 * buffer that contains old stuff!! Attempt to deal with this problem
1650 * without taking a performance hit... (not sure where the real bug
1651 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1652
1653 if (bp->b_flags & B_ERROR) {
1654 bp->b_flags &= ~B_ERROR;
1655 }
1656 if (bp->b_error != 0) {
1657 bp->b_error = 0;
1658 }
1659 #endif
1660 raidbp = RAIDGETBUF(rs);
1661
1662 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1663
1664 /*
1665 * context for raidiodone
1666 */
1667 raidbp->rf_obp = bp;
1668 raidbp->req = req;
1669
1670 LIST_INIT(&raidbp->rf_buf.b_dep);
1671
1672 switch (req->type) {
1673 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1674 /* XXX need to do something extra here.. */
1675 /* I'm leaving this in, as I've never actually seen it used,
1676 * and I'd like folks to report it... GO */
1677 printf(("WAKEUP CALLED\n"));
1678 queue->numOutstanding++;
1679
1680 /* XXX need to glue the original buffer into this?? */
1681
1682 KernelWakeupFunc(&raidbp->rf_buf);
1683 break;
1684
1685 case RF_IO_TYPE_READ:
1686 case RF_IO_TYPE_WRITE:
1687
1688 if (req->tracerec) {
1689 RF_ETIMER_START(req->tracerec->timer);
1690 }
1691 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1692 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1693 req->sectorOffset, req->numSector,
1694 req->buf, KernelWakeupFunc, (void *) req,
1695 queue->raidPtr->logBytesPerSector, req->b_proc);
1696
1697 if (rf_debugKernelAccess) {
1698 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1699 (long) bp->b_blkno));
1700 }
1701 queue->numOutstanding++;
1702 queue->last_deq_sector = req->sectorOffset;
1703 /* acc wouldn't have been let in if there were any pending
1704 * reqs at any other priority */
1705 queue->curPriority = req->priority;
1706
1707 db1_printf(("Going for %c to unit %d row %d col %d\n",
1708 req->type, unit, queue->row, queue->col));
1709 db1_printf(("sector %d count %d (%d bytes) %d\n",
1710 (int) req->sectorOffset, (int) req->numSector,
1711 (int) (req->numSector <<
1712 queue->raidPtr->logBytesPerSector),
1713 (int) queue->raidPtr->logBytesPerSector));
1714 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1715 raidbp->rf_buf.b_vp->v_numoutput++;
1716 }
1717 VOP_STRATEGY(&raidbp->rf_buf);
1718
1719 break;
1720
1721 default:
1722 panic("bad req->type in rf_DispatchKernelIO");
1723 }
1724 db1_printf(("Exiting from DispatchKernelIO\n"));
1725 /* splx(s); */ /* want to test this */
1726 return (0);
1727 }
1728 /* this is the callback function associated with a I/O invoked from
1729 kernel code.
1730 */
1731 static void
1732 KernelWakeupFunc(vbp)
1733 struct buf *vbp;
1734 {
1735 RF_DiskQueueData_t *req = NULL;
1736 RF_DiskQueue_t *queue;
1737 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1738 struct buf *bp;
1739 struct raid_softc *rs;
1740 int unit;
1741 register int s;
1742
1743 s = splbio();
1744 db1_printf(("recovering the request queue:\n"));
1745 req = raidbp->req;
1746
1747 bp = raidbp->rf_obp;
1748
1749 queue = (RF_DiskQueue_t *) req->queue;
1750
1751 if (raidbp->rf_buf.b_flags & B_ERROR) {
1752 bp->b_flags |= B_ERROR;
1753 bp->b_error = raidbp->rf_buf.b_error ?
1754 raidbp->rf_buf.b_error : EIO;
1755 }
1756
1757 /* XXX methinks this could be wrong... */
1758 #if 1
1759 bp->b_resid = raidbp->rf_buf.b_resid;
1760 #endif
1761
1762 if (req->tracerec) {
1763 RF_ETIMER_STOP(req->tracerec->timer);
1764 RF_ETIMER_EVAL(req->tracerec->timer);
1765 RF_LOCK_MUTEX(rf_tracing_mutex);
1766 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1767 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1768 req->tracerec->num_phys_ios++;
1769 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1770 }
1771 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1772
1773 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1774
1775
1776 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1777 * ballistic, and mark the component as hosed... */
1778
1779 if (bp->b_flags & B_ERROR) {
1780 /* Mark the disk as dead */
1781 /* but only mark it once... */
1782 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1783 rf_ds_optimal) {
1784 printf("raid%d: IO Error. Marking %s as failed.\n",
1785 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1786 queue->raidPtr->Disks[queue->row][queue->col].status =
1787 rf_ds_failed;
1788 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1789 queue->raidPtr->numFailures++;
1790 queue->raidPtr->numNewFailures++;
1791 /* XXX here we should bump the version number for each component, and write that data out */
1792 } else { /* Disk is already dead... */
1793 /* printf("Disk already marked as dead!\n"); */
1794 }
1795
1796 }
1797
1798 rs = &raid_softc[unit];
1799 RAIDPUTBUF(rs, raidbp);
1800
1801
1802 if (bp->b_resid == 0) {
1803 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1804 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1805 }
1806
1807 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1808 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1809
1810 splx(s);
1811 }
1812
1813
1814
1815 /*
1816 * initialize a buf structure for doing an I/O in the kernel.
1817 */
1818 static void
1819 InitBP(
1820 struct buf * bp,
1821 struct vnode * b_vp,
1822 unsigned rw_flag,
1823 dev_t dev,
1824 RF_SectorNum_t startSect,
1825 RF_SectorCount_t numSect,
1826 caddr_t buf,
1827 void (*cbFunc) (struct buf *),
1828 void *cbArg,
1829 int logBytesPerSector,
1830 struct proc * b_proc)
1831 {
1832 /* bp->b_flags = B_PHYS | rw_flag; */
1833 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1834 bp->b_bcount = numSect << logBytesPerSector;
1835 bp->b_bufsize = bp->b_bcount;
1836 bp->b_error = 0;
1837 bp->b_dev = dev;
1838 bp->b_un.b_addr = buf;
1839 bp->b_blkno = startSect;
1840 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1841 if (bp->b_bcount == 0) {
1842 panic("bp->b_bcount is zero in InitBP!!\n");
1843 }
1844 bp->b_proc = b_proc;
1845 bp->b_iodone = cbFunc;
1846 bp->b_vp = b_vp;
1847
1848 }
1849
1850 static void
1851 raidgetdefaultlabel(raidPtr, rs, lp)
1852 RF_Raid_t *raidPtr;
1853 struct raid_softc *rs;
1854 struct disklabel *lp;
1855 {
1856 db1_printf(("Building a default label...\n"));
1857 bzero(lp, sizeof(*lp));
1858
1859 /* fabricate a label... */
1860 lp->d_secperunit = raidPtr->totalSectors;
1861 lp->d_secsize = raidPtr->bytesPerSector;
1862 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1863 lp->d_ntracks = 1;
1864 lp->d_ncylinders = raidPtr->totalSectors /
1865 (lp->d_nsectors * lp->d_ntracks);
1866 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1867
1868 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1869 lp->d_type = DTYPE_RAID;
1870 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1871 lp->d_rpm = 3600;
1872 lp->d_interleave = 1;
1873 lp->d_flags = 0;
1874
1875 lp->d_partitions[RAW_PART].p_offset = 0;
1876 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1877 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1878 lp->d_npartitions = RAW_PART + 1;
1879
1880 lp->d_magic = DISKMAGIC;
1881 lp->d_magic2 = DISKMAGIC;
1882 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1883
1884 }
1885 /*
1886 * Read the disklabel from the raid device. If one is not present, fake one
1887 * up.
1888 */
1889 static void
1890 raidgetdisklabel(dev)
1891 dev_t dev;
1892 {
1893 int unit = raidunit(dev);
1894 struct raid_softc *rs = &raid_softc[unit];
1895 char *errstring;
1896 struct disklabel *lp = rs->sc_dkdev.dk_label;
1897 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1898 RF_Raid_t *raidPtr;
1899
1900 db1_printf(("Getting the disklabel...\n"));
1901
1902 bzero(clp, sizeof(*clp));
1903
1904 raidPtr = raidPtrs[unit];
1905
1906 raidgetdefaultlabel(raidPtr, rs, lp);
1907
1908 /*
1909 * Call the generic disklabel extraction routine.
1910 */
1911 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1912 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1913 if (errstring)
1914 raidmakedisklabel(rs);
1915 else {
1916 int i;
1917 struct partition *pp;
1918
1919 /*
1920 * Sanity check whether the found disklabel is valid.
1921 *
1922 * This is necessary since total size of the raid device
1923 * may vary when an interleave is changed even though exactly
1924 * same componets are used, and old disklabel may used
1925 * if that is found.
1926 */
1927 if (lp->d_secperunit != rs->sc_size)
1928 printf("WARNING: %s: "
1929 "total sector size in disklabel (%d) != "
1930 "the size of raid (%ld)\n", rs->sc_xname,
1931 lp->d_secperunit, (long) rs->sc_size);
1932 for (i = 0; i < lp->d_npartitions; i++) {
1933 pp = &lp->d_partitions[i];
1934 if (pp->p_offset + pp->p_size > rs->sc_size)
1935 printf("WARNING: %s: end of partition `%c' "
1936 "exceeds the size of raid (%ld)\n",
1937 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1938 }
1939 }
1940
1941 }
1942 /*
1943 * Take care of things one might want to take care of in the event
1944 * that a disklabel isn't present.
1945 */
1946 static void
1947 raidmakedisklabel(rs)
1948 struct raid_softc *rs;
1949 {
1950 struct disklabel *lp = rs->sc_dkdev.dk_label;
1951 db1_printf(("Making a label..\n"));
1952
1953 /*
1954 * For historical reasons, if there's no disklabel present
1955 * the raw partition must be marked FS_BSDFFS.
1956 */
1957
1958 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1959
1960 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1961
1962 lp->d_checksum = dkcksum(lp);
1963 }
1964 /*
1965 * Lookup the provided name in the filesystem. If the file exists,
1966 * is a valid block device, and isn't being used by anyone else,
1967 * set *vpp to the file's vnode.
1968 * You'll find the original of this in ccd.c
1969 */
1970 int
1971 raidlookup(path, p, vpp)
1972 char *path;
1973 struct proc *p;
1974 struct vnode **vpp; /* result */
1975 {
1976 struct nameidata nd;
1977 struct vnode *vp;
1978 struct vattr va;
1979 int error;
1980
1981 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1982 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1983 #ifdef DEBUG
1984 printf("RAIDframe: vn_open returned %d\n", error);
1985 #endif
1986 return (error);
1987 }
1988 vp = nd.ni_vp;
1989 if (vp->v_usecount > 1) {
1990 VOP_UNLOCK(vp, 0);
1991 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1992 return (EBUSY);
1993 }
1994 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1995 VOP_UNLOCK(vp, 0);
1996 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1997 return (error);
1998 }
1999 /* XXX: eventually we should handle VREG, too. */
2000 if (va.va_type != VBLK) {
2001 VOP_UNLOCK(vp, 0);
2002 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2003 return (ENOTBLK);
2004 }
2005 VOP_UNLOCK(vp, 0);
2006 *vpp = vp;
2007 return (0);
2008 }
2009 /*
2010 * Wait interruptibly for an exclusive lock.
2011 *
2012 * XXX
2013 * Several drivers do this; it should be abstracted and made MP-safe.
2014 * (Hmm... where have we seen this warning before :-> GO )
2015 */
2016 static int
2017 raidlock(rs)
2018 struct raid_softc *rs;
2019 {
2020 int error;
2021
2022 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2023 rs->sc_flags |= RAIDF_WANTED;
2024 if ((error =
2025 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2026 return (error);
2027 }
2028 rs->sc_flags |= RAIDF_LOCKED;
2029 return (0);
2030 }
2031 /*
2032 * Unlock and wake up any waiters.
2033 */
2034 static void
2035 raidunlock(rs)
2036 struct raid_softc *rs;
2037 {
2038
2039 rs->sc_flags &= ~RAIDF_LOCKED;
2040 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2041 rs->sc_flags &= ~RAIDF_WANTED;
2042 wakeup(rs);
2043 }
2044 }
2045
2046
2047 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2048 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2049
2050 int
2051 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2052 {
2053 RF_ComponentLabel_t clabel;
2054 raidread_component_label(dev, b_vp, &clabel);
2055 clabel.mod_counter = mod_counter;
2056 clabel.clean = RF_RAID_CLEAN;
2057 raidwrite_component_label(dev, b_vp, &clabel);
2058 return(0);
2059 }
2060
2061
2062 int
2063 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2064 {
2065 RF_ComponentLabel_t clabel;
2066 raidread_component_label(dev, b_vp, &clabel);
2067 clabel.mod_counter = mod_counter;
2068 clabel.clean = RF_RAID_DIRTY;
2069 raidwrite_component_label(dev, b_vp, &clabel);
2070 return(0);
2071 }
2072
2073 /* ARGSUSED */
2074 int
2075 raidread_component_label(dev, b_vp, clabel)
2076 dev_t dev;
2077 struct vnode *b_vp;
2078 RF_ComponentLabel_t *clabel;
2079 {
2080 struct buf *bp;
2081 int error;
2082
2083 /* XXX should probably ensure that we don't try to do this if
2084 someone has changed rf_protected_sectors. */
2085
2086 /* get a block of the appropriate size... */
2087 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2088 bp->b_dev = dev;
2089
2090 /* get our ducks in a row for the read */
2091 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2092 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2093 bp->b_flags = B_BUSY | B_READ;
2094 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2095
2096 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2097
2098 error = biowait(bp);
2099
2100 if (!error) {
2101 memcpy(clabel, bp->b_un.b_addr,
2102 sizeof(RF_ComponentLabel_t));
2103 #if 0
2104 rf_print_component_label( clabel );
2105 #endif
2106 } else {
2107 #if 0
2108 printf("Failed to read RAID component label!\n");
2109 #endif
2110 }
2111
2112 bp->b_flags = B_INVAL | B_AGE;
2113 brelse(bp);
2114 return(error);
2115 }
2116 /* ARGSUSED */
2117 int
2118 raidwrite_component_label(dev, b_vp, clabel)
2119 dev_t dev;
2120 struct vnode *b_vp;
2121 RF_ComponentLabel_t *clabel;
2122 {
2123 struct buf *bp;
2124 int error;
2125
2126 /* get a block of the appropriate size... */
2127 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2128 bp->b_dev = dev;
2129
2130 /* get our ducks in a row for the write */
2131 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2132 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2133 bp->b_flags = B_BUSY | B_WRITE;
2134 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2135
2136 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2137
2138 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2139
2140 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2141 error = biowait(bp);
2142 bp->b_flags = B_INVAL | B_AGE;
2143 brelse(bp);
2144 if (error) {
2145 #if 1
2146 printf("Failed to write RAID component info!\n");
2147 #endif
2148 }
2149
2150 return(error);
2151 }
2152
2153 void
2154 rf_markalldirty( raidPtr )
2155 RF_Raid_t *raidPtr;
2156 {
2157 RF_ComponentLabel_t clabel;
2158 int r,c;
2159
2160 raidPtr->mod_counter++;
2161 for (r = 0; r < raidPtr->numRow; r++) {
2162 for (c = 0; c < raidPtr->numCol; c++) {
2163 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2164 raidread_component_label(
2165 raidPtr->Disks[r][c].dev,
2166 raidPtr->raid_cinfo[r][c].ci_vp,
2167 &clabel);
2168 if (clabel.status == rf_ds_spared) {
2169 /* XXX do something special...
2170 but whatever you do, don't
2171 try to access it!! */
2172 } else {
2173 #if 0
2174 clabel.status =
2175 raidPtr->Disks[r][c].status;
2176 raidwrite_component_label(
2177 raidPtr->Disks[r][c].dev,
2178 raidPtr->raid_cinfo[r][c].ci_vp,
2179 &clabel);
2180 #endif
2181 raidmarkdirty(
2182 raidPtr->Disks[r][c].dev,
2183 raidPtr->raid_cinfo[r][c].ci_vp,
2184 raidPtr->mod_counter);
2185 }
2186 }
2187 }
2188 }
2189 /* printf("Component labels marked dirty.\n"); */
2190 #if 0
2191 for( c = 0; c < raidPtr->numSpare ; c++) {
2192 sparecol = raidPtr->numCol + c;
2193 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2194 /*
2195
2196 XXX this is where we get fancy and map this spare
2197 into it's correct spot in the array.
2198
2199 */
2200 /*
2201
2202 we claim this disk is "optimal" if it's
2203 rf_ds_used_spare, as that means it should be
2204 directly substitutable for the disk it replaced.
2205 We note that too...
2206
2207 */
2208
2209 for(i=0;i<raidPtr->numRow;i++) {
2210 for(j=0;j<raidPtr->numCol;j++) {
2211 if ((raidPtr->Disks[i][j].spareRow ==
2212 r) &&
2213 (raidPtr->Disks[i][j].spareCol ==
2214 sparecol)) {
2215 srow = r;
2216 scol = sparecol;
2217 break;
2218 }
2219 }
2220 }
2221
2222 raidread_component_label(
2223 raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2225 &clabel);
2226 /* make sure status is noted */
2227 clabel.version = RF_COMPONENT_LABEL_VERSION;
2228 clabel.mod_counter = raidPtr->mod_counter;
2229 clabel.serial_number = raidPtr->serial_number;
2230 clabel.row = srow;
2231 clabel.column = scol;
2232 clabel.num_rows = raidPtr->numRow;
2233 clabel.num_columns = raidPtr->numCol;
2234 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2235 clabel.status = rf_ds_optimal;
2236 raidwrite_component_label(
2237 raidPtr->Disks[r][sparecol].dev,
2238 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2239 &clabel);
2240 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2241 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2242 }
2243 }
2244
2245 #endif
2246 }
2247
2248
2249 void
2250 rf_update_component_labels( raidPtr )
2251 RF_Raid_t *raidPtr;
2252 {
2253 RF_ComponentLabel_t clabel;
2254 int sparecol;
2255 int r,c;
2256 int i,j;
2257 int srow, scol;
2258
2259 srow = -1;
2260 scol = -1;
2261
2262 /* XXX should do extra checks to make sure things really are clean,
2263 rather than blindly setting the clean bit... */
2264
2265 raidPtr->mod_counter++;
2266
2267 for (r = 0; r < raidPtr->numRow; r++) {
2268 for (c = 0; c < raidPtr->numCol; c++) {
2269 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2270 raidread_component_label(
2271 raidPtr->Disks[r][c].dev,
2272 raidPtr->raid_cinfo[r][c].ci_vp,
2273 &clabel);
2274 /* make sure status is noted */
2275 clabel.status = rf_ds_optimal;
2276 /* bump the counter */
2277 clabel.mod_counter = raidPtr->mod_counter;
2278
2279 raidwrite_component_label(
2280 raidPtr->Disks[r][c].dev,
2281 raidPtr->raid_cinfo[r][c].ci_vp,
2282 &clabel);
2283 }
2284 /* else we don't touch it.. */
2285 }
2286 }
2287
2288 for( c = 0; c < raidPtr->numSpare ; c++) {
2289 sparecol = raidPtr->numCol + c;
2290 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2291 /*
2292
2293 we claim this disk is "optimal" if it's
2294 rf_ds_used_spare, as that means it should be
2295 directly substitutable for the disk it replaced.
2296 We note that too...
2297
2298 */
2299
2300 for(i=0;i<raidPtr->numRow;i++) {
2301 for(j=0;j<raidPtr->numCol;j++) {
2302 if ((raidPtr->Disks[i][j].spareRow ==
2303 0) &&
2304 (raidPtr->Disks[i][j].spareCol ==
2305 sparecol)) {
2306 srow = i;
2307 scol = j;
2308 break;
2309 }
2310 }
2311 }
2312
2313 /* XXX shouldn't *really* need this... */
2314 raidread_component_label(
2315 raidPtr->Disks[0][sparecol].dev,
2316 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2317 &clabel);
2318 /* make sure status is noted */
2319
2320 raid_init_component_label(raidPtr, &clabel);
2321
2322 clabel.mod_counter = raidPtr->mod_counter;
2323 clabel.row = srow;
2324 clabel.column = scol;
2325 clabel.status = rf_ds_optimal;
2326
2327 raidwrite_component_label(
2328 raidPtr->Disks[0][sparecol].dev,
2329 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2330 &clabel);
2331 }
2332 }
2333 /* printf("Component labels updated\n"); */
2334 }
2335
2336
2337 void
2338 rf_final_update_component_labels( raidPtr )
2339 RF_Raid_t *raidPtr;
2340 {
2341 RF_ComponentLabel_t clabel;
2342 int sparecol;
2343 int r,c;
2344 int i,j;
2345 int srow, scol;
2346
2347 srow = -1;
2348 scol = -1;
2349
2350 /* XXX should do extra checks to make sure things really are clean,
2351 rather than blindly setting the clean bit... */
2352
2353 raidPtr->mod_counter++;
2354
2355 for (r = 0; r < raidPtr->numRow; r++) {
2356 for (c = 0; c < raidPtr->numCol; c++) {
2357 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2358 raidread_component_label(
2359 raidPtr->Disks[r][c].dev,
2360 raidPtr->raid_cinfo[r][c].ci_vp,
2361 &clabel);
2362 /* make sure status is noted */
2363 clabel.status = rf_ds_optimal;
2364 /* bump the counter */
2365 clabel.mod_counter = raidPtr->mod_counter;
2366
2367 raidwrite_component_label(
2368 raidPtr->Disks[r][c].dev,
2369 raidPtr->raid_cinfo[r][c].ci_vp,
2370 &clabel);
2371 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2372 raidmarkclean(
2373 raidPtr->Disks[r][c].dev,
2374 raidPtr->raid_cinfo[r][c].ci_vp,
2375 raidPtr->mod_counter);
2376 }
2377 }
2378 /* else we don't touch it.. */
2379 }
2380 }
2381
2382 for( c = 0; c < raidPtr->numSpare ; c++) {
2383 sparecol = raidPtr->numCol + c;
2384 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2385 /*
2386
2387 we claim this disk is "optimal" if it's
2388 rf_ds_used_spare, as that means it should be
2389 directly substitutable for the disk it replaced.
2390 We note that too...
2391
2392 */
2393
2394 for(i=0;i<raidPtr->numRow;i++) {
2395 for(j=0;j<raidPtr->numCol;j++) {
2396 if ((raidPtr->Disks[i][j].spareRow ==
2397 0) &&
2398 (raidPtr->Disks[i][j].spareCol ==
2399 sparecol)) {
2400 srow = i;
2401 scol = j;
2402 break;
2403 }
2404 }
2405 }
2406
2407 /* XXX shouldn't *really* need this... */
2408 raidread_component_label(
2409 raidPtr->Disks[0][sparecol].dev,
2410 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413
2414 raid_init_component_label(raidPtr, &clabel);
2415
2416 clabel.mod_counter = raidPtr->mod_counter;
2417 clabel.row = srow;
2418 clabel.column = scol;
2419 clabel.status = rf_ds_optimal;
2420
2421 raidwrite_component_label(
2422 raidPtr->Disks[0][sparecol].dev,
2423 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2424 &clabel);
2425 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2426 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2427 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2428 raidPtr->mod_counter);
2429 }
2430 }
2431 }
2432 /* printf("Component labels updated\n"); */
2433 }
2434
2435 void
2436 rf_close_component( raidPtr, vp, auto_configured )
2437 RF_Raid_t *raidPtr;
2438 struct vnode *vp;
2439 int auto_configured;
2440 {
2441 struct proc *p;
2442
2443 p = raidPtr->engine_thread;
2444
2445 if (vp != NULL) {
2446 if (auto_configured == 1) {
2447 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2448 vput(vp);
2449
2450 } else {
2451 VOP_UNLOCK(vp, 0);
2452 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2453 }
2454 } else {
2455 printf("vnode was NULL\n");
2456 }
2457 }
2458
2459
2460 void
2461 rf_UnconfigureVnodes( raidPtr )
2462 RF_Raid_t *raidPtr;
2463 {
2464 int r,c;
2465 struct proc *p;
2466 struct vnode *vp;
2467 int acd;
2468
2469
2470 /* We take this opportunity to close the vnodes like we should.. */
2471
2472 p = raidPtr->engine_thread;
2473
2474 for (r = 0; r < raidPtr->numRow; r++) {
2475 for (c = 0; c < raidPtr->numCol; c++) {
2476 printf("Closing vnode for row: %d col: %d\n", r, c);
2477 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2478 acd = raidPtr->Disks[r][c].auto_configured;
2479 rf_close_component(raidPtr, vp, acd);
2480 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2481 raidPtr->Disks[r][c].auto_configured = 0;
2482 }
2483 }
2484 for (r = 0; r < raidPtr->numSpare; r++) {
2485 printf("Closing vnode for spare: %d\n", r);
2486 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2487 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2488 rf_close_component(raidPtr, vp, acd);
2489 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2490 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2491 }
2492 }
2493
2494
2495 void
2496 rf_ReconThread(req)
2497 struct rf_recon_req *req;
2498 {
2499 int s;
2500 RF_Raid_t *raidPtr;
2501
2502 s = splbio();
2503 raidPtr = (RF_Raid_t *) req->raidPtr;
2504 raidPtr->recon_in_progress = 1;
2505
2506 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2507 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2508
2509 /* XXX get rid of this! we don't need it at all.. */
2510 RF_Free(req, sizeof(*req));
2511
2512 raidPtr->recon_in_progress = 0;
2513 splx(s);
2514
2515 /* That's all... */
2516 kthread_exit(0); /* does not return */
2517 }
2518
2519 void
2520 rf_RewriteParityThread(raidPtr)
2521 RF_Raid_t *raidPtr;
2522 {
2523 int retcode;
2524 int s;
2525
2526 raidPtr->parity_rewrite_in_progress = 1;
2527 s = splbio();
2528 retcode = rf_RewriteParity(raidPtr);
2529 splx(s);
2530 if (retcode) {
2531 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2532 } else {
2533 /* set the clean bit! If we shutdown correctly,
2534 the clean bit on each component label will get
2535 set */
2536 raidPtr->parity_good = RF_RAID_CLEAN;
2537 }
2538 raidPtr->parity_rewrite_in_progress = 0;
2539
2540 /* That's all... */
2541 kthread_exit(0); /* does not return */
2542 }
2543
2544
2545 void
2546 rf_CopybackThread(raidPtr)
2547 RF_Raid_t *raidPtr;
2548 {
2549 int s;
2550
2551 raidPtr->copyback_in_progress = 1;
2552 s = splbio();
2553 rf_CopybackReconstructedData(raidPtr);
2554 splx(s);
2555 raidPtr->copyback_in_progress = 0;
2556
2557 /* That's all... */
2558 kthread_exit(0); /* does not return */
2559 }
2560
2561
2562 void
2563 rf_ReconstructInPlaceThread(req)
2564 struct rf_recon_req *req;
2565 {
2566 int retcode;
2567 int s;
2568 RF_Raid_t *raidPtr;
2569
2570 s = splbio();
2571 raidPtr = req->raidPtr;
2572 raidPtr->recon_in_progress = 1;
2573 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2574 RF_Free(req, sizeof(*req));
2575 raidPtr->recon_in_progress = 0;
2576 splx(s);
2577
2578 /* That's all... */
2579 kthread_exit(0); /* does not return */
2580 }
2581
2582 void
2583 rf_mountroot_hook(dev)
2584 struct device *dev;
2585 {
2586
2587 }
2588
2589
2590 RF_AutoConfig_t *
2591 rf_find_raid_components()
2592 {
2593 struct devnametobdevmaj *dtobdm;
2594 struct vnode *vp;
2595 struct disklabel label;
2596 struct device *dv;
2597 char *cd_name;
2598 dev_t dev;
2599 int error;
2600 int i;
2601 int good_one;
2602 RF_ComponentLabel_t *clabel;
2603 RF_AutoConfig_t *ac_list;
2604 RF_AutoConfig_t *ac;
2605
2606
2607 /* initialize the AutoConfig list */
2608 ac_list = NULL;
2609
2610 if (raidautoconfig) {
2611
2612 /* we begin by trolling through *all* the devices on the system */
2613
2614 for (dv = alldevs.tqh_first; dv != NULL;
2615 dv = dv->dv_list.tqe_next) {
2616
2617 /* we are only interested in disks... */
2618 if (dv->dv_class != DV_DISK)
2619 continue;
2620
2621 /* we don't care about floppies... */
2622 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2623 continue;
2624 }
2625
2626 /* need to find the device_name_to_block_device_major stuff */
2627 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2628 dtobdm = dev_name2blk;
2629 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2630 dtobdm++;
2631 }
2632
2633 /* get a vnode for the raw partition of this disk */
2634
2635 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2636 if (bdevvp(dev, &vp))
2637 panic("RAID can't alloc vnode");
2638
2639 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2640
2641 if (error) {
2642 /* "Who cares." Continue looking
2643 for something that exists*/
2644 vput(vp);
2645 continue;
2646 }
2647
2648 /* Ok, the disk exists. Go get the disklabel. */
2649 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2650 FREAD, NOCRED, 0);
2651 if (error) {
2652 /*
2653 * XXX can't happen - open() would
2654 * have errored out (or faked up one)
2655 */
2656 printf("can't get label for dev %s%c (%d)!?!?\n",
2657 dv->dv_xname, 'a' + RAW_PART, error);
2658 }
2659
2660 /* don't need this any more. We'll allocate it again
2661 a little later if we really do... */
2662 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2663 vput(vp);
2664
2665 for (i=0; i < label.d_npartitions; i++) {
2666 /* We only support partitions marked as RAID */
2667 if (label.d_partitions[i].p_fstype != FS_RAID)
2668 continue;
2669
2670 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2671 if (bdevvp(dev, &vp))
2672 panic("RAID can't alloc vnode");
2673
2674 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2675 if (error) {
2676 /* Whatever... */
2677 vput(vp);
2678 continue;
2679 }
2680
2681 good_one = 0;
2682
2683 clabel = (RF_ComponentLabel_t *)
2684 malloc(sizeof(RF_ComponentLabel_t),
2685 M_RAIDFRAME, M_NOWAIT);
2686 if (clabel == NULL) {
2687 /* XXX CLEANUP HERE */
2688 printf("RAID auto config: out of memory!\n");
2689 return(NULL); /* XXX probably should panic? */
2690 }
2691
2692 if (!raidread_component_label(dev, vp, clabel)) {
2693 /* Got the label. Does it look reasonable? */
2694 if (rf_reasonable_label(clabel) &&
2695 (clabel->partitionSize <=
2696 label.d_partitions[i].p_size)) {
2697 #if DEBUG
2698 printf("Component on: %s%c: %d\n",
2699 dv->dv_xname, 'a'+i,
2700 label.d_partitions[i].p_size);
2701 rf_print_component_label(clabel);
2702 #endif
2703 /* if it's reasonable, add it,
2704 else ignore it. */
2705 ac = (RF_AutoConfig_t *)
2706 malloc(sizeof(RF_AutoConfig_t),
2707 M_RAIDFRAME,
2708 M_NOWAIT);
2709 if (ac == NULL) {
2710 /* XXX should panic?? */
2711 return(NULL);
2712 }
2713
2714 sprintf(ac->devname, "%s%c",
2715 dv->dv_xname, 'a'+i);
2716 ac->dev = dev;
2717 ac->vp = vp;
2718 ac->clabel = clabel;
2719 ac->next = ac_list;
2720 ac_list = ac;
2721 good_one = 1;
2722 }
2723 }
2724 if (!good_one) {
2725 /* cleanup */
2726 free(clabel, M_RAIDFRAME);
2727 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2728 vput(vp);
2729 }
2730 }
2731 }
2732 }
2733 return(ac_list);
2734 }
2735
2736 static int
2737 rf_reasonable_label(clabel)
2738 RF_ComponentLabel_t *clabel;
2739 {
2740
2741 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2742 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2743 ((clabel->clean == RF_RAID_CLEAN) ||
2744 (clabel->clean == RF_RAID_DIRTY)) &&
2745 clabel->row >=0 &&
2746 clabel->column >= 0 &&
2747 clabel->num_rows > 0 &&
2748 clabel->num_columns > 0 &&
2749 clabel->row < clabel->num_rows &&
2750 clabel->column < clabel->num_columns &&
2751 clabel->blockSize > 0 &&
2752 clabel->numBlocks > 0) {
2753 /* label looks reasonable enough... */
2754 return(1);
2755 }
2756 return(0);
2757 }
2758
2759
2760 void
2761 rf_print_component_label(clabel)
2762 RF_ComponentLabel_t *clabel;
2763 {
2764 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2765 clabel->row, clabel->column,
2766 clabel->num_rows, clabel->num_columns);
2767 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2768 clabel->version, clabel->serial_number,
2769 clabel->mod_counter);
2770 printf(" Clean: %s Status: %d\n",
2771 clabel->clean ? "Yes" : "No", clabel->status );
2772 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2773 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2774 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2775 (char) clabel->parityConfig, clabel->blockSize,
2776 clabel->numBlocks);
2777 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2778 printf(" Last configured as: raid%d\n", clabel->last_unit );
2779 #if 0
2780 printf(" Config order: %d\n", clabel->config_order);
2781 #endif
2782
2783 }
2784
2785 RF_ConfigSet_t *
2786 rf_create_auto_sets(ac_list)
2787 RF_AutoConfig_t *ac_list;
2788 {
2789 RF_AutoConfig_t *ac;
2790 RF_ConfigSet_t *config_sets;
2791 RF_ConfigSet_t *cset;
2792 RF_AutoConfig_t *ac_next;
2793
2794
2795 config_sets = NULL;
2796
2797 /* Go through the AutoConfig list, and figure out which components
2798 belong to what sets. */
2799 ac = ac_list;
2800 while(ac!=NULL) {
2801 /* we're going to putz with ac->next, so save it here
2802 for use at the end of the loop */
2803 ac_next = ac->next;
2804
2805 if (config_sets == NULL) {
2806 /* will need at least this one... */
2807 config_sets = (RF_ConfigSet_t *)
2808 malloc(sizeof(RF_ConfigSet_t),
2809 M_RAIDFRAME, M_NOWAIT);
2810 if (config_sets == NULL) {
2811 panic("rf_create_auto_sets: No memory!\n");
2812 }
2813 /* this one is easy :) */
2814 config_sets->ac = ac;
2815 config_sets->next = NULL;
2816 config_sets->rootable = 0;
2817 ac->next = NULL;
2818 } else {
2819 /* which set does this component fit into? */
2820 cset = config_sets;
2821 while(cset!=NULL) {
2822 if (rf_does_it_fit(cset, ac)) {
2823 /* looks like it matches */
2824 ac->next = cset->ac;
2825 cset->ac = ac;
2826 break;
2827 }
2828 cset = cset->next;
2829 }
2830 if (cset==NULL) {
2831 /* didn't find a match above... new set..*/
2832 cset = (RF_ConfigSet_t *)
2833 malloc(sizeof(RF_ConfigSet_t),
2834 M_RAIDFRAME, M_NOWAIT);
2835 if (cset == NULL) {
2836 panic("rf_create_auto_sets: No memory!\n");
2837 }
2838 cset->ac = ac;
2839 ac->next = NULL;
2840 cset->next = config_sets;
2841 cset->rootable = 0;
2842 config_sets = cset;
2843 }
2844 }
2845 ac = ac_next;
2846 }
2847
2848
2849 return(config_sets);
2850 }
2851
2852 static int
2853 rf_does_it_fit(cset, ac)
2854 RF_ConfigSet_t *cset;
2855 RF_AutoConfig_t *ac;
2856 {
2857 RF_ComponentLabel_t *clabel1, *clabel2;
2858
2859 /* If this one matches the *first* one in the set, that's good
2860 enough, since the other members of the set would have been
2861 through here too... */
2862 /* note that we are not checking partitionSize here..
2863
2864 Note that we are also not checking the mod_counters here.
2865 If everything else matches execpt the mod_counter, that's
2866 good enough for this test. We will deal with the mod_counters
2867 a little later in the autoconfiguration process.
2868
2869 (clabel1->mod_counter == clabel2->mod_counter) &&
2870
2871 */
2872
2873 clabel1 = cset->ac->clabel;
2874 clabel2 = ac->clabel;
2875 if ((clabel1->version == clabel2->version) &&
2876 (clabel1->serial_number == clabel2->serial_number) &&
2877 (clabel1->num_rows == clabel2->num_rows) &&
2878 (clabel1->num_columns == clabel2->num_columns) &&
2879 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2880 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2881 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2882 (clabel1->parityConfig == clabel2->parityConfig) &&
2883 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2884 (clabel1->blockSize == clabel2->blockSize) &&
2885 (clabel1->numBlocks == clabel2->numBlocks) &&
2886 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2887 (clabel1->root_partition == clabel2->root_partition) &&
2888 (clabel1->last_unit == clabel2->last_unit) &&
2889 (clabel1->config_order == clabel2->config_order)) {
2890 /* if it get's here, it almost *has* to be a match */
2891 } else {
2892 /* it's not consistent with somebody in the set..
2893 punt */
2894 return(0);
2895 }
2896 /* all was fine.. it must fit... */
2897 return(1);
2898 }
2899
2900 int
2901 rf_have_enough_components(cset)
2902 RF_ConfigSet_t *cset;
2903 {
2904 RF_AutoConfig_t *ac;
2905 RF_AutoConfig_t *auto_config;
2906 RF_ComponentLabel_t *clabel;
2907 int r,c;
2908 int num_rows;
2909 int num_cols;
2910 int num_missing;
2911
2912 /* check to see that we have enough 'live' components
2913 of this set. If so, we can configure it if necessary */
2914
2915 num_rows = cset->ac->clabel->num_rows;
2916 num_cols = cset->ac->clabel->num_columns;
2917
2918 /* XXX Check for duplicate components!?!?!? */
2919
2920 num_missing = 0;
2921 auto_config = cset->ac;
2922
2923 for(r=0; r<num_rows; r++) {
2924 for(c=0; c<num_cols; c++) {
2925 ac = auto_config;
2926 while(ac!=NULL) {
2927 if (ac->clabel==NULL) {
2928 /* big-time bad news. */
2929 goto fail;
2930 }
2931 if ((ac->clabel->row == r) &&
2932 (ac->clabel->column == c)) {
2933 /* it's this one... */
2934 #if DEBUG
2935 printf("Found: %s at %d,%d\n",
2936 ac->devname,r,c);
2937 #endif
2938 break;
2939 }
2940 ac=ac->next;
2941 }
2942 if (ac==NULL) {
2943 /* Didn't find one here! */
2944 num_missing++;
2945 }
2946 }
2947 }
2948
2949 clabel = cset->ac->clabel;
2950
2951 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2952 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2953 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2954 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2955 /* XXX this needs to be made *much* more general */
2956 /* Too many failures */
2957 return(0);
2958 }
2959 /* otherwise, all is well, and we've got enough to take a kick
2960 at autoconfiguring this set */
2961 return(1);
2962 fail:
2963 return(0);
2964
2965 }
2966
2967 void
2968 rf_create_configuration(ac,config,raidPtr)
2969 RF_AutoConfig_t *ac;
2970 RF_Config_t *config;
2971 RF_Raid_t *raidPtr;
2972 {
2973 RF_ComponentLabel_t *clabel;
2974
2975 clabel = ac->clabel;
2976
2977 /* 1. Fill in the common stuff */
2978 config->numRow = clabel->num_rows;
2979 config->numCol = clabel->num_columns;
2980 config->numSpare = 0; /* XXX should this be set here? */
2981 config->sectPerSU = clabel->sectPerSU;
2982 config->SUsPerPU = clabel->SUsPerPU;
2983 config->SUsPerRU = clabel->SUsPerRU;
2984 config->parityConfig = clabel->parityConfig;
2985 /* XXX... */
2986 strcpy(config->diskQueueType,"fifo");
2987 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2988 config->layoutSpecificSize = 0; /* XXX ?? */
2989
2990 while(ac!=NULL) {
2991 /* row/col values will be in range due to the checks
2992 in reasonable_label() */
2993 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2994 ac->devname);
2995 ac = ac->next;
2996 }
2997
2998 }
2999
3000 int
3001 rf_set_autoconfig(raidPtr, new_value)
3002 RF_Raid_t *raidPtr;
3003 int new_value;
3004 {
3005 RF_ComponentLabel_t clabel;
3006 struct vnode *vp;
3007 dev_t dev;
3008 int row, column;
3009
3010 raidPtr->autoconfigure = new_value;
3011 for(row=0; row<raidPtr->numRow; row++) {
3012 for(column=0; column<raidPtr->numCol; column++) {
3013 dev = raidPtr->Disks[row][column].dev;
3014 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3015 raidread_component_label(dev, vp, &clabel);
3016 clabel.autoconfigure = new_value;
3017 raidwrite_component_label(dev, vp, &clabel);
3018 }
3019 }
3020 return(new_value);
3021 }
3022
3023 int
3024 rf_set_rootpartition(raidPtr, new_value)
3025 RF_Raid_t *raidPtr;
3026 int new_value;
3027 {
3028 RF_ComponentLabel_t clabel;
3029 struct vnode *vp;
3030 dev_t dev;
3031 int row, column;
3032
3033 raidPtr->root_partition = new_value;
3034 for(row=0; row<raidPtr->numRow; row++) {
3035 for(column=0; column<raidPtr->numCol; column++) {
3036 dev = raidPtr->Disks[row][column].dev;
3037 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3038 raidread_component_label(dev, vp, &clabel);
3039 clabel.root_partition = new_value;
3040 raidwrite_component_label(dev, vp, &clabel);
3041 }
3042 }
3043 return(new_value);
3044 }
3045
3046 void
3047 rf_release_all_vps(cset)
3048 RF_ConfigSet_t *cset;
3049 {
3050 RF_AutoConfig_t *ac;
3051
3052 ac = cset->ac;
3053 while(ac!=NULL) {
3054 /* Close the vp, and give it back */
3055 if (ac->vp) {
3056 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3057 vput(ac->vp);
3058 }
3059 ac = ac->next;
3060 }
3061 }
3062
3063
3064 void
3065 rf_cleanup_config_set(cset)
3066 RF_ConfigSet_t *cset;
3067 {
3068 RF_AutoConfig_t *ac;
3069 RF_AutoConfig_t *next_ac;
3070
3071 ac = cset->ac;
3072 while(ac!=NULL) {
3073 next_ac = ac->next;
3074 /* nuke the label */
3075 free(ac->clabel, M_RAIDFRAME);
3076 /* cleanup the config structure */
3077 free(ac, M_RAIDFRAME);
3078 /* "next.." */
3079 ac = next_ac;
3080 }
3081 /* and, finally, nuke the config set */
3082 free(cset, M_RAIDFRAME);
3083 }
3084
3085
3086 void
3087 raid_init_component_label(raidPtr, clabel)
3088 RF_Raid_t *raidPtr;
3089 RF_ComponentLabel_t *clabel;
3090 {
3091 /* current version number */
3092 clabel->version = RF_COMPONENT_LABEL_VERSION;
3093 clabel->serial_number = raidPtr->serial_number;
3094 clabel->mod_counter = raidPtr->mod_counter;
3095 clabel->num_rows = raidPtr->numRow;
3096 clabel->num_columns = raidPtr->numCol;
3097 clabel->clean = RF_RAID_DIRTY; /* not clean */
3098 clabel->status = rf_ds_optimal; /* "It's good!" */
3099
3100 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3101 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3102 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3103
3104 clabel->blockSize = raidPtr->bytesPerSector;
3105 clabel->numBlocks = raidPtr->sectorsPerDisk;
3106
3107 /* XXX not portable */
3108 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3109 clabel->maxOutstanding = raidPtr->maxOutstanding;
3110 clabel->autoconfigure = raidPtr->autoconfigure;
3111 clabel->root_partition = raidPtr->root_partition;
3112 clabel->last_unit = raidPtr->raidid;
3113 clabel->config_order = raidPtr->config_order;
3114 }
3115
3116 int
3117 rf_auto_config_set(cset,unit)
3118 RF_ConfigSet_t *cset;
3119 int *unit;
3120 {
3121 RF_Raid_t *raidPtr;
3122 RF_Config_t *config;
3123 int raidID;
3124 int retcode;
3125
3126 printf("Starting autoconfigure on raid%d\n",raidID);
3127
3128 retcode = 0;
3129 *unit = -1;
3130
3131 /* 1. Create a config structure */
3132
3133 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3134 M_RAIDFRAME,
3135 M_NOWAIT);
3136 if (config==NULL) {
3137 printf("Out of mem!?!?\n");
3138 /* XXX do something more intelligent here. */
3139 return(1);
3140 }
3141 /* XXX raidID needs to be set correctly.. */
3142
3143 /*
3144 2. Figure out what RAID ID this one is supposed to live at
3145 See if we can get the same RAID dev that it was configured
3146 on last time..
3147 */
3148
3149 raidID = cset->ac->clabel->last_unit;
3150 if ((raidID < 0) || (raidID >= numraid)) {
3151 /* let's not wander off into lala land. */
3152 raidID = numraid - 1;
3153 }
3154 if (raidPtrs[raidID]->valid != 0) {
3155
3156 /*
3157 Nope... Go looking for an alternative...
3158 Start high so we don't immediately use raid0 if that's
3159 not taken.
3160 */
3161
3162 for(raidID = numraid; raidID >= 0; raidID--) {
3163 if (raidPtrs[raidID]->valid == 0) {
3164 /* can use this one! */
3165 break;
3166 }
3167 }
3168 }
3169
3170 if (raidID < 0) {
3171 /* punt... */
3172 printf("Unable to auto configure this set!\n");
3173 printf("(Out of RAID devs!)\n");
3174 return(1);
3175 }
3176
3177 raidPtr = raidPtrs[raidID];
3178
3179 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3180 raidPtr->raidid = raidID;
3181 raidPtr->openings = RAIDOUTSTANDING;
3182
3183 /* 3. Build the configuration structure */
3184 rf_create_configuration(cset->ac, config, raidPtr);
3185
3186 /* 4. Do the configuration */
3187 retcode = rf_Configure(raidPtr, config, cset->ac);
3188
3189 if (retcode == 0) {
3190
3191 raidinit(raidPtrs[raidID]);
3192
3193 rf_markalldirty(raidPtrs[raidID]);
3194 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3195 if (cset->ac->clabel->root_partition==1) {
3196 /* everything configured just fine. Make a note
3197 that this set is eligible to be root. */
3198 cset->rootable = 1;
3199 /* XXX do this here? */
3200 raidPtrs[raidID]->root_partition = 1;
3201 }
3202 }
3203
3204 /* 5. Cleanup */
3205 free(config, M_RAIDFRAME);
3206
3207 *unit = raidID;
3208 return(retcode);
3209 }
3210