rf_netbsdkintf.c revision 1.65 1 /* $NetBSD: rf_netbsdkintf.c,v 1.65 2000/03/03 01:46:36 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 void print_component_label __P((RF_ComponentLabel_t *));
282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
286 RF_Raid_t *));
287 int rf_set_autoconfig __P((RF_Raid_t *, int));
288 int rf_set_rootpartition __P((RF_Raid_t *, int));
289 void rf_release_all_vps __P((RF_ConfigSet_t *));
290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
291 int rf_have_enough_components __P((RF_ConfigSet_t *));
292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299 extern struct device *booted_device;
300
301 void
302 raidattach(num)
303 int num;
304 {
305 int raidID;
306 int i, rc;
307 RF_AutoConfig_t *ac_list; /* autoconfig list */
308 RF_ConfigSet_t *config_sets;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 numraid = num;
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337
338 for (i = 0; i < num; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356
357 bzero(raid_softc, num * sizeof(struct raid_softc));
358
359 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
360 M_RAIDFRAME, M_NOWAIT);
361 if (raidrootdev == NULL) {
362 panic("No memory for RAIDframe driver!!?!?!\n");
363 }
364
365 for (raidID = 0; raidID < num; raidID++) {
366 BUFQ_INIT(&raid_softc[raidID].buf_queue);
367
368 raidrootdev[raidID].dv_class = DV_DISK;
369 raidrootdev[raidID].dv_cfdata = NULL;
370 raidrootdev[raidID].dv_unit = raidID;
371 raidrootdev[raidID].dv_parent = NULL;
372 raidrootdev[raidID].dv_flags = 0;
373 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
374
375 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
376 (RF_Raid_t *));
377 if (raidPtrs[raidID] == NULL) {
378 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
379 numraid = raidID;
380 return;
381 }
382 }
383
384 #if RAID_AUTOCONFIG
385 raidautoconfig = 1;
386 #endif
387
388 if (raidautoconfig) {
389 /* 1. locate all RAID components on the system */
390
391 #if DEBUG
392 printf("Searching for raid components...\n");
393 #endif
394 ac_list = rf_find_raid_components();
395
396 /* 2. sort them into their respective sets */
397
398 config_sets = rf_create_auto_sets(ac_list);
399
400 /* 3. evaluate each set and configure the valid ones
401 This gets done in rf_buildroothack() */
402
403 /* schedule the creation of the thread to do the
404 "/ on RAID" stuff */
405
406 kthread_create(rf_buildroothack,config_sets);
407
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 }
412
413 }
414
415 void
416 rf_buildroothack(arg)
417 void *arg;
418 {
419 RF_ConfigSet_t *config_sets = arg;
420 RF_ConfigSet_t *cset;
421 RF_ConfigSet_t *next_cset;
422 int retcode;
423 int raidID;
424 int rootID;
425 int num_root;
426
427 num_root = 0;
428 cset = config_sets;
429 while(cset != NULL ) {
430 next_cset = cset->next;
431 if (rf_have_enough_components(cset) &&
432 cset->ac->clabel->autoconfigure==1) {
433 retcode = rf_auto_config_set(cset,&raidID);
434 if (!retcode) {
435 if (cset->rootable) {
436 rootID = raidID;
437 num_root++;
438 }
439 } else {
440 /* The autoconfig didn't work :( */
441 #if DEBUG
442 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
443 #endif
444 rf_release_all_vps(cset);
445 }
446 } else {
447 /* we're not autoconfiguring this set...
448 release the associated resources */
449 rf_release_all_vps(cset);
450 }
451 /* cleanup */
452 rf_cleanup_config_set(cset);
453 cset = next_cset;
454 }
455 if (boothowto & RB_ASKNAME) {
456 /* We don't auto-config... */
457 } else {
458 /* They didn't ask, and we found something bootable... */
459
460 if (num_root == 1) {
461 booted_device = &raidrootdev[rootID];
462 } else if (num_root > 1) {
463 /* we can't guess.. require the user to answer... */
464 boothowto |= RB_ASKNAME;
465 }
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_final_update_component_labels( raidPtrs[unit] );
639 }
640
641 raidunlock(rs);
642 return (0);
643
644 }
645
646 void
647 raidstrategy(bp)
648 register struct buf *bp;
649 {
650 register int s;
651
652 unsigned int raidID = raidunit(bp->b_dev);
653 RF_Raid_t *raidPtr;
654 struct raid_softc *rs = &raid_softc[raidID];
655 struct disklabel *lp;
656 int wlabel;
657
658 if ((rs->sc_flags & RAIDF_INITED) ==0) {
659 bp->b_error = ENXIO;
660 bp->b_flags = B_ERROR;
661 bp->b_resid = bp->b_bcount;
662 biodone(bp);
663 return;
664 }
665 if (raidID >= numraid || !raidPtrs[raidID]) {
666 bp->b_error = ENODEV;
667 bp->b_flags |= B_ERROR;
668 bp->b_resid = bp->b_bcount;
669 biodone(bp);
670 return;
671 }
672 raidPtr = raidPtrs[raidID];
673 if (!raidPtr->valid) {
674 bp->b_error = ENODEV;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (bp->b_bcount == 0) {
681 db1_printf(("b_bcount is zero..\n"));
682 biodone(bp);
683 return;
684 }
685 lp = rs->sc_dkdev.dk_label;
686
687 /*
688 * Do bounds checking and adjust transfer. If there's an
689 * error, the bounds check will flag that for us.
690 */
691
692 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
693 if (DISKPART(bp->b_dev) != RAW_PART)
694 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
695 db1_printf(("Bounds check failed!!:%d %d\n",
696 (int) bp->b_blkno, (int) wlabel));
697 biodone(bp);
698 return;
699 }
700 s = splbio();
701
702 bp->b_resid = 0;
703
704 /* stuff it onto our queue */
705 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
706
707 raidstart(raidPtrs[raidID]);
708
709 splx(s);
710 }
711 /* ARGSUSED */
712 int
713 raidread(dev, uio, flags)
714 dev_t dev;
715 struct uio *uio;
716 int flags;
717 {
718 int unit = raidunit(dev);
719 struct raid_softc *rs;
720 int part;
721
722 if (unit >= numraid)
723 return (ENXIO);
724 rs = &raid_softc[unit];
725
726 if ((rs->sc_flags & RAIDF_INITED) == 0)
727 return (ENXIO);
728 part = DISKPART(dev);
729
730 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
731
732 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
733
734 }
735 /* ARGSUSED */
736 int
737 raidwrite(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744
745 if (unit >= numraid)
746 return (ENXIO);
747 rs = &raid_softc[unit];
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return (ENXIO);
751 db1_printf(("raidwrite\n"));
752 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
753
754 }
755
756 int
757 raidioctl(dev, cmd, data, flag, p)
758 dev_t dev;
759 u_long cmd;
760 caddr_t data;
761 int flag;
762 struct proc *p;
763 {
764 int unit = raidunit(dev);
765 int error = 0;
766 int part, pmask;
767 struct raid_softc *rs;
768 RF_Config_t *k_cfg, *u_cfg;
769 RF_Raid_t *raidPtr;
770 RF_RaidDisk_t *diskPtr;
771 RF_AccTotals_t *totals;
772 RF_DeviceConfig_t *d_cfg, **ucfgp;
773 u_char *specific_buf;
774 int retcode = 0;
775 int row;
776 int column;
777 struct rf_recon_req *rrcopy, *rr;
778 RF_ComponentLabel_t *clabel;
779 RF_ComponentLabel_t ci_label;
780 RF_ComponentLabel_t **clabel_ptr;
781 RF_SingleComponent_t *sparePtr,*componentPtr;
782 RF_SingleComponent_t hot_spare;
783 RF_SingleComponent_t component;
784 int i, j, d;
785
786 if (unit >= numraid)
787 return (ENXIO);
788 rs = &raid_softc[unit];
789 raidPtr = raidPtrs[unit];
790
791 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
792 (int) DISKPART(dev), (int) unit, (int) cmd));
793
794 /* Must be open for writes for these commands... */
795 switch (cmd) {
796 case DIOCSDINFO:
797 case DIOCWDINFO:
798 case DIOCWLABEL:
799 if ((flag & FWRITE) == 0)
800 return (EBADF);
801 }
802
803 /* Must be initialized for these... */
804 switch (cmd) {
805 case DIOCGDINFO:
806 case DIOCSDINFO:
807 case DIOCWDINFO:
808 case DIOCGPART:
809 case DIOCWLABEL:
810 case DIOCGDEFLABEL:
811 case RAIDFRAME_SHUTDOWN:
812 case RAIDFRAME_REWRITEPARITY:
813 case RAIDFRAME_GET_INFO:
814 case RAIDFRAME_RESET_ACCTOTALS:
815 case RAIDFRAME_GET_ACCTOTALS:
816 case RAIDFRAME_KEEP_ACCTOTALS:
817 case RAIDFRAME_GET_SIZE:
818 case RAIDFRAME_FAIL_DISK:
819 case RAIDFRAME_COPYBACK:
820 case RAIDFRAME_CHECK_RECON_STATUS:
821 case RAIDFRAME_GET_COMPONENT_LABEL:
822 case RAIDFRAME_SET_COMPONENT_LABEL:
823 case RAIDFRAME_ADD_HOT_SPARE:
824 case RAIDFRAME_REMOVE_HOT_SPARE:
825 case RAIDFRAME_INIT_LABELS:
826 case RAIDFRAME_REBUILD_IN_PLACE:
827 case RAIDFRAME_CHECK_PARITY:
828 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
829 case RAIDFRAME_CHECK_COPYBACK_STATUS:
830 case RAIDFRAME_SET_AUTOCONFIG:
831 case RAIDFRAME_SET_ROOT:
832 if ((rs->sc_flags & RAIDF_INITED) == 0)
833 return (ENXIO);
834 }
835
836 switch (cmd) {
837
838 /* configure the system */
839 case RAIDFRAME_CONFIGURE:
840
841 if (raidPtr->valid) {
842 /* There is a valid RAID set running on this unit! */
843 printf("raid%d: Device already configured!\n",unit);
844 }
845
846 /* copy-in the configuration information */
847 /* data points to a pointer to the configuration structure */
848
849 u_cfg = *((RF_Config_t **) data);
850 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
851 if (k_cfg == NULL) {
852 return (ENOMEM);
853 }
854 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
855 sizeof(RF_Config_t));
856 if (retcode) {
857 RF_Free(k_cfg, sizeof(RF_Config_t));
858 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
859 retcode));
860 return (retcode);
861 }
862 /* allocate a buffer for the layout-specific data, and copy it
863 * in */
864 if (k_cfg->layoutSpecificSize) {
865 if (k_cfg->layoutSpecificSize > 10000) {
866 /* sanity check */
867 RF_Free(k_cfg, sizeof(RF_Config_t));
868 return (EINVAL);
869 }
870 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
871 (u_char *));
872 if (specific_buf == NULL) {
873 RF_Free(k_cfg, sizeof(RF_Config_t));
874 return (ENOMEM);
875 }
876 retcode = copyin(k_cfg->layoutSpecific,
877 (caddr_t) specific_buf,
878 k_cfg->layoutSpecificSize);
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 RF_Free(specific_buf,
882 k_cfg->layoutSpecificSize);
883 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
884 retcode));
885 return (retcode);
886 }
887 } else
888 specific_buf = NULL;
889 k_cfg->layoutSpecific = specific_buf;
890
891 /* should do some kind of sanity check on the configuration.
892 * Store the sum of all the bytes in the last byte? */
893
894 /* configure the system */
895
896 /*
897 * Clear the entire RAID descriptor, just to make sure
898 * there is no stale data left in the case of a
899 * reconfiguration
900 */
901 bzero((char *) raidPtr, sizeof(RF_Raid_t));
902 raidPtr->raidid = unit;
903
904 retcode = rf_Configure(raidPtr, k_cfg, NULL);
905
906 if (retcode == 0) {
907
908 /* allow this many simultaneous IO's to
909 this RAID device */
910 raidPtr->openings = RAIDOUTSTANDING;
911
912 raidinit(raidPtr);
913 rf_markalldirty(raidPtr);
914 }
915 /* free the buffers. No return code here. */
916 if (k_cfg->layoutSpecificSize) {
917 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
918 }
919 RF_Free(k_cfg, sizeof(RF_Config_t));
920
921 return (retcode);
922
923 /* shutdown the system */
924 case RAIDFRAME_SHUTDOWN:
925
926 if ((error = raidlock(rs)) != 0)
927 return (error);
928
929 /*
930 * If somebody has a partition mounted, we shouldn't
931 * shutdown.
932 */
933
934 part = DISKPART(dev);
935 pmask = (1 << part);
936 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
937 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
938 (rs->sc_dkdev.dk_copenmask & pmask))) {
939 raidunlock(rs);
940 return (EBUSY);
941 }
942
943 retcode = rf_Shutdown(raidPtr);
944
945 pool_destroy(&rs->sc_cbufpool);
946
947 /* It's no longer initialized... */
948 rs->sc_flags &= ~RAIDF_INITED;
949
950 /* Detach the disk. */
951 disk_detach(&rs->sc_dkdev);
952
953 raidunlock(rs);
954
955 return (retcode);
956 case RAIDFRAME_GET_COMPONENT_LABEL:
957 clabel_ptr = (RF_ComponentLabel_t **) data;
958 /* need to read the component label for the disk indicated
959 by row,column in clabel */
960
961 /* For practice, let's get it directly fromdisk, rather
962 than from the in-core copy */
963 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
964 (RF_ComponentLabel_t *));
965 if (clabel == NULL)
966 return (ENOMEM);
967
968 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
969
970 retcode = copyin( *clabel_ptr, clabel,
971 sizeof(RF_ComponentLabel_t));
972
973 if (retcode) {
974 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
975 return(retcode);
976 }
977
978 row = clabel->row;
979 column = clabel->column;
980
981 if ((row < 0) || (row >= raidPtr->numRow) ||
982 (column < 0) || (column >= raidPtr->numCol)) {
983 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
984 return(EINVAL);
985 }
986
987 raidread_component_label(raidPtr->Disks[row][column].dev,
988 raidPtr->raid_cinfo[row][column].ci_vp,
989 clabel );
990
991 retcode = copyout((caddr_t) clabel,
992 (caddr_t) *clabel_ptr,
993 sizeof(RF_ComponentLabel_t));
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return (retcode);
996
997 case RAIDFRAME_SET_COMPONENT_LABEL:
998 clabel = (RF_ComponentLabel_t *) data;
999
1000 /* XXX check the label for valid stuff... */
1001 /* Note that some things *should not* get modified --
1002 the user should be re-initing the labels instead of
1003 trying to patch things.
1004 */
1005
1006 printf("Got component label:\n");
1007 printf("Version: %d\n",clabel->version);
1008 printf("Serial Number: %d\n",clabel->serial_number);
1009 printf("Mod counter: %d\n",clabel->mod_counter);
1010 printf("Row: %d\n", clabel->row);
1011 printf("Column: %d\n", clabel->column);
1012 printf("Num Rows: %d\n", clabel->num_rows);
1013 printf("Num Columns: %d\n", clabel->num_columns);
1014 printf("Clean: %d\n", clabel->clean);
1015 printf("Status: %d\n", clabel->status);
1016
1017 row = clabel->row;
1018 column = clabel->column;
1019
1020 if ((row < 0) || (row >= raidPtr->numRow) ||
1021 (column < 0) || (column >= raidPtr->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 /* XXX this isn't allowed to do anything for now :-) */
1026
1027 /* XXX and before it is, we need to fill in the rest
1028 of the fields!?!?!?! */
1029 #if 0
1030 raidwrite_component_label(
1031 raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034 #endif
1035 return (0);
1036
1037 case RAIDFRAME_INIT_LABELS:
1038 clabel = (RF_ComponentLabel_t *) data;
1039 /*
1040 we only want the serial number from
1041 the above. We get all the rest of the information
1042 from the config that was used to create this RAID
1043 set.
1044 */
1045
1046 raidPtr->serial_number = clabel->serial_number;
1047
1048 raid_init_component_label(raidPtr, &ci_label);
1049 ci_label.serial_number = clabel->serial_number;
1050
1051 for(row=0;row<raidPtr->numRow;row++) {
1052 ci_label.row = row;
1053 for(column=0;column<raidPtr->numCol;column++) {
1054 diskPtr = &raidPtr->Disks[row][column];
1055 ci_label.partitionSize = diskPtr->partitionSize;
1056 ci_label.column = column;
1057 raidwrite_component_label(
1058 raidPtr->Disks[row][column].dev,
1059 raidPtr->raid_cinfo[row][column].ci_vp,
1060 &ci_label );
1061 }
1062 }
1063
1064 return (retcode);
1065 case RAIDFRAME_SET_AUTOCONFIG:
1066 d = rf_set_autoconfig(raidPtr, *data);
1067 printf("New autoconfig value is: %d\n", d);
1068 *data = d;
1069 return (retcode);
1070
1071 case RAIDFRAME_SET_ROOT:
1072 d = rf_set_rootpartition(raidPtr, *data);
1073 printf("New rootpartition value is: %d\n", d);
1074 *data = d;
1075 return (retcode);
1076
1077 /* initialize all parity */
1078 case RAIDFRAME_REWRITEPARITY:
1079
1080 if (raidPtr->Layout.map->faultsTolerated == 0) {
1081 /* Parity for RAID 0 is trivially correct */
1082 raidPtr->parity_good = RF_RAID_CLEAN;
1083 return(0);
1084 }
1085
1086 if (raidPtr->parity_rewrite_in_progress == 1) {
1087 /* Re-write is already in progress! */
1088 return(EINVAL);
1089 }
1090
1091 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1092 rf_RewriteParityThread,
1093 raidPtr,"raid_parity");
1094 return (retcode);
1095
1096
1097 case RAIDFRAME_ADD_HOT_SPARE:
1098 sparePtr = (RF_SingleComponent_t *) data;
1099 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1100 printf("Adding spare\n");
1101 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1102 return(retcode);
1103
1104 case RAIDFRAME_REMOVE_HOT_SPARE:
1105 return(retcode);
1106
1107 case RAIDFRAME_REBUILD_IN_PLACE:
1108
1109 if (raidPtr->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 if (raidPtr->recon_in_progress == 1) {
1115 /* a reconstruct is already in progress! */
1116 return(EINVAL);
1117 }
1118
1119 componentPtr = (RF_SingleComponent_t *) data;
1120 memcpy( &component, componentPtr,
1121 sizeof(RF_SingleComponent_t));
1122 row = component.row;
1123 column = component.column;
1124 printf("Rebuild: %d %d\n",row, column);
1125 if ((row < 0) || (row >= raidPtr->numRow) ||
1126 (column < 0) || (column >= raidPtr->numCol)) {
1127 return(EINVAL);
1128 }
1129
1130 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1131 if (rrcopy == NULL)
1132 return(ENOMEM);
1133
1134 rrcopy->raidPtr = (void *) raidPtr;
1135 rrcopy->row = row;
1136 rrcopy->col = column;
1137
1138 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1139 rf_ReconstructInPlaceThread,
1140 rrcopy,"raid_reconip");
1141 return(retcode);
1142
1143 case RAIDFRAME_GET_INFO:
1144 if (!raidPtr->valid)
1145 return (ENODEV);
1146 ucfgp = (RF_DeviceConfig_t **) data;
1147 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1148 (RF_DeviceConfig_t *));
1149 if (d_cfg == NULL)
1150 return (ENOMEM);
1151 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1152 d_cfg->rows = raidPtr->numRow;
1153 d_cfg->cols = raidPtr->numCol;
1154 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1155 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1156 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1157 return (ENOMEM);
1158 }
1159 d_cfg->nspares = raidPtr->numSpare;
1160 if (d_cfg->nspares >= RF_MAX_DISKS) {
1161 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1162 return (ENOMEM);
1163 }
1164 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1165 d = 0;
1166 for (i = 0; i < d_cfg->rows; i++) {
1167 for (j = 0; j < d_cfg->cols; j++) {
1168 d_cfg->devs[d] = raidPtr->Disks[i][j];
1169 d++;
1170 }
1171 }
1172 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1173 d_cfg->spares[i] = raidPtr->Disks[0][j];
1174 }
1175 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1176 sizeof(RF_DeviceConfig_t));
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178
1179 return (retcode);
1180
1181 case RAIDFRAME_CHECK_PARITY:
1182 *(int *) data = raidPtr->parity_good;
1183 return (0);
1184
1185 case RAIDFRAME_RESET_ACCTOTALS:
1186 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1187 return (0);
1188
1189 case RAIDFRAME_GET_ACCTOTALS:
1190 totals = (RF_AccTotals_t *) data;
1191 *totals = raidPtr->acc_totals;
1192 return (0);
1193
1194 case RAIDFRAME_KEEP_ACCTOTALS:
1195 raidPtr->keep_acc_totals = *(int *)data;
1196 return (0);
1197
1198 case RAIDFRAME_GET_SIZE:
1199 *(int *) data = raidPtr->totalSectors;
1200 return (0);
1201
1202 /* fail a disk & optionally start reconstruction */
1203 case RAIDFRAME_FAIL_DISK:
1204
1205 if (raidPtr->Layout.map->faultsTolerated == 0) {
1206 /* Can't do this on a RAID 0!! */
1207 return(EINVAL);
1208 }
1209
1210 rr = (struct rf_recon_req *) data;
1211
1212 if (rr->row < 0 || rr->row >= raidPtr->numRow
1213 || rr->col < 0 || rr->col >= raidPtr->numCol)
1214 return (EINVAL);
1215
1216 printf("raid%d: Failing the disk: row: %d col: %d\n",
1217 unit, rr->row, rr->col);
1218
1219 /* make a copy of the recon request so that we don't rely on
1220 * the user's buffer */
1221 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1222 if (rrcopy == NULL)
1223 return(ENOMEM);
1224 bcopy(rr, rrcopy, sizeof(*rr));
1225 rrcopy->raidPtr = (void *) raidPtr;
1226
1227 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1228 rf_ReconThread,
1229 rrcopy,"raid_recon");
1230 return (0);
1231
1232 /* invoke a copyback operation after recon on whatever disk
1233 * needs it, if any */
1234 case RAIDFRAME_COPYBACK:
1235
1236 if (raidPtr->Layout.map->faultsTolerated == 0) {
1237 /* This makes no sense on a RAID 0!! */
1238 return(EINVAL);
1239 }
1240
1241 if (raidPtr->copyback_in_progress == 1) {
1242 /* Copyback is already in progress! */
1243 return(EINVAL);
1244 }
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1247 rf_CopybackThread,
1248 raidPtr,"raid_copyback");
1249 return (retcode);
1250
1251 /* return the percentage completion of reconstruction */
1252 case RAIDFRAME_CHECK_RECON_STATUS:
1253 if (raidPtr->Layout.map->faultsTolerated == 0) {
1254 /* This makes no sense on a RAID 0 */
1255 return(EINVAL);
1256 }
1257 row = 0; /* XXX we only consider a single row... */
1258 if (raidPtr->status[row] != rf_rs_reconstructing)
1259 *(int *) data = 100;
1260 else
1261 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1262 return (0);
1263
1264 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1265 if (raidPtr->Layout.map->faultsTolerated == 0) {
1266 /* This makes no sense on a RAID 0 */
1267 return(EINVAL);
1268 }
1269 if (raidPtr->parity_rewrite_in_progress == 1) {
1270 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1271 } else {
1272 *(int *) data = 100;
1273 }
1274 return (0);
1275
1276 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* This makes no sense on a RAID 0 */
1279 return(EINVAL);
1280 }
1281 if (raidPtr->copyback_in_progress == 1) {
1282 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1283 raidPtr->Layout.numStripe;
1284 } else {
1285 *(int *) data = 100;
1286 }
1287 return (0);
1288
1289
1290 /* the sparetable daemon calls this to wait for the kernel to
1291 * need a spare table. this ioctl does not return until a
1292 * spare table is needed. XXX -- calling mpsleep here in the
1293 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1294 * -- I should either compute the spare table in the kernel,
1295 * or have a different -- XXX XXX -- interface (a different
1296 * character device) for delivering the table -- XXX */
1297 #if 0
1298 case RAIDFRAME_SPARET_WAIT:
1299 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1300 while (!rf_sparet_wait_queue)
1301 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1302 waitreq = rf_sparet_wait_queue;
1303 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1304 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1305
1306 /* structure assignment */
1307 *((RF_SparetWait_t *) data) = *waitreq;
1308
1309 RF_Free(waitreq, sizeof(*waitreq));
1310 return (0);
1311
1312 /* wakes up a process waiting on SPARET_WAIT and puts an error
1313 * code in it that will cause the dameon to exit */
1314 case RAIDFRAME_ABORT_SPARET_WAIT:
1315 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1316 waitreq->fcol = -1;
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 waitreq->next = rf_sparet_wait_queue;
1319 rf_sparet_wait_queue = waitreq;
1320 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1321 wakeup(&rf_sparet_wait_queue);
1322 return (0);
1323
1324 /* used by the spare table daemon to deliver a spare table
1325 * into the kernel */
1326 case RAIDFRAME_SEND_SPARET:
1327
1328 /* install the spare table */
1329 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1330
1331 /* respond to the requestor. the return status of the spare
1332 * table installation is passed in the "fcol" field */
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = retcode;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_resp_queue;
1337 rf_sparet_resp_queue = waitreq;
1338 wakeup(&rf_sparet_resp_queue);
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340
1341 return (retcode);
1342 #endif
1343
1344 default:
1345 break; /* fall through to the os-specific code below */
1346
1347 }
1348
1349 if (!raidPtr->valid)
1350 return (EINVAL);
1351
1352 /*
1353 * Add support for "regular" device ioctls here.
1354 */
1355
1356 switch (cmd) {
1357 case DIOCGDINFO:
1358 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1359 break;
1360
1361 case DIOCGPART:
1362 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1363 ((struct partinfo *) data)->part =
1364 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1365 break;
1366
1367 case DIOCWDINFO:
1368 case DIOCSDINFO:
1369 if ((error = raidlock(rs)) != 0)
1370 return (error);
1371
1372 rs->sc_flags |= RAIDF_LABELLING;
1373
1374 error = setdisklabel(rs->sc_dkdev.dk_label,
1375 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1376 if (error == 0) {
1377 if (cmd == DIOCWDINFO)
1378 error = writedisklabel(RAIDLABELDEV(dev),
1379 raidstrategy, rs->sc_dkdev.dk_label,
1380 rs->sc_dkdev.dk_cpulabel);
1381 }
1382 rs->sc_flags &= ~RAIDF_LABELLING;
1383
1384 raidunlock(rs);
1385
1386 if (error)
1387 return (error);
1388 break;
1389
1390 case DIOCWLABEL:
1391 if (*(int *) data != 0)
1392 rs->sc_flags |= RAIDF_WLABEL;
1393 else
1394 rs->sc_flags &= ~RAIDF_WLABEL;
1395 break;
1396
1397 case DIOCGDEFLABEL:
1398 raidgetdefaultlabel(raidPtr, rs,
1399 (struct disklabel *) data);
1400 break;
1401
1402 default:
1403 retcode = ENOTTY;
1404 }
1405 return (retcode);
1406
1407 }
1408
1409
1410 /* raidinit -- complete the rest of the initialization for the
1411 RAIDframe device. */
1412
1413
1414 static void
1415 raidinit(raidPtr)
1416 RF_Raid_t *raidPtr;
1417 {
1418 struct raid_softc *rs;
1419 int unit;
1420
1421 unit = raidPtr->raidid;
1422
1423 rs = &raid_softc[unit];
1424 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1425 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1426
1427
1428 /* XXX should check return code first... */
1429 rs->sc_flags |= RAIDF_INITED;
1430
1431 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1432
1433 rs->sc_dkdev.dk_name = rs->sc_xname;
1434
1435 /* disk_attach actually creates space for the CPU disklabel, among
1436 * other things, so it's critical to call this *BEFORE* we try putzing
1437 * with disklabels. */
1438
1439 disk_attach(&rs->sc_dkdev);
1440
1441 /* XXX There may be a weird interaction here between this, and
1442 * protectedSectors, as used in RAIDframe. */
1443
1444 rs->sc_size = raidPtr->totalSectors;
1445
1446 }
1447
1448 /* wake up the daemon & tell it to get us a spare table
1449 * XXX
1450 * the entries in the queues should be tagged with the raidPtr
1451 * so that in the extremely rare case that two recons happen at once,
1452 * we know for which device were requesting a spare table
1453 * XXX
1454 *
1455 * XXX This code is not currently used. GO
1456 */
1457 int
1458 rf_GetSpareTableFromDaemon(req)
1459 RF_SparetWait_t *req;
1460 {
1461 int retcode;
1462
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 req->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = req;
1466 wakeup(&rf_sparet_wait_queue);
1467
1468 /* mpsleep unlocks the mutex */
1469 while (!rf_sparet_resp_queue) {
1470 tsleep(&rf_sparet_resp_queue, PRIBIO,
1471 "raidframe getsparetable", 0);
1472 }
1473 req = rf_sparet_resp_queue;
1474 rf_sparet_resp_queue = req->next;
1475 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1476
1477 retcode = req->fcol;
1478 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1479 * alloc'd */
1480 return (retcode);
1481 }
1482
1483 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1484 * bp & passes it down.
1485 * any calls originating in the kernel must use non-blocking I/O
1486 * do some extra sanity checking to return "appropriate" error values for
1487 * certain conditions (to make some standard utilities work)
1488 *
1489 * Formerly known as: rf_DoAccessKernel
1490 */
1491 void
1492 raidstart(raidPtr)
1493 RF_Raid_t *raidPtr;
1494 {
1495 RF_SectorCount_t num_blocks, pb, sum;
1496 RF_RaidAddr_t raid_addr;
1497 int retcode;
1498 struct partition *pp;
1499 daddr_t blocknum;
1500 int unit;
1501 struct raid_softc *rs;
1502 int do_async;
1503 struct buf *bp;
1504
1505 unit = raidPtr->raidid;
1506 rs = &raid_softc[unit];
1507
1508 /* quick check to see if anything has died recently */
1509 RF_LOCK_MUTEX(raidPtr->mutex);
1510 if (raidPtr->numNewFailures > 0) {
1511 rf_update_component_labels(raidPtr);
1512 raidPtr->numNewFailures--;
1513 }
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /* Check to see if we're at the limit... */
1517 RF_LOCK_MUTEX(raidPtr->mutex);
1518 while (raidPtr->openings > 0) {
1519 RF_UNLOCK_MUTEX(raidPtr->mutex);
1520
1521 /* get the next item, if any, from the queue */
1522 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1523 /* nothing more to do */
1524 return;
1525 }
1526 BUFQ_REMOVE(&rs->buf_queue, bp);
1527
1528 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1529 * partition.. Need to make it absolute to the underlying
1530 * device.. */
1531
1532 blocknum = bp->b_blkno;
1533 if (DISKPART(bp->b_dev) != RAW_PART) {
1534 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1535 blocknum += pp->p_offset;
1536 }
1537
1538 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1539 (int) blocknum));
1540
1541 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1542 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1543
1544 /* *THIS* is where we adjust what block we're going to...
1545 * but DO NOT TOUCH bp->b_blkno!!! */
1546 raid_addr = blocknum;
1547
1548 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1549 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1550 sum = raid_addr + num_blocks + pb;
1551 if (1 || rf_debugKernelAccess) {
1552 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1553 (int) raid_addr, (int) sum, (int) num_blocks,
1554 (int) pb, (int) bp->b_resid));
1555 }
1556 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1557 || (sum < num_blocks) || (sum < pb)) {
1558 bp->b_error = ENOSPC;
1559 bp->b_flags |= B_ERROR;
1560 bp->b_resid = bp->b_bcount;
1561 biodone(bp);
1562 RF_LOCK_MUTEX(raidPtr->mutex);
1563 continue;
1564 }
1565 /*
1566 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1567 */
1568
1569 if (bp->b_bcount & raidPtr->sectorMask) {
1570 bp->b_error = EINVAL;
1571 bp->b_flags |= B_ERROR;
1572 bp->b_resid = bp->b_bcount;
1573 biodone(bp);
1574 RF_LOCK_MUTEX(raidPtr->mutex);
1575 continue;
1576
1577 }
1578 db1_printf(("Calling DoAccess..\n"));
1579
1580
1581 RF_LOCK_MUTEX(raidPtr->mutex);
1582 raidPtr->openings--;
1583 RF_UNLOCK_MUTEX(raidPtr->mutex);
1584
1585 /*
1586 * Everything is async.
1587 */
1588 do_async = 1;
1589
1590 /* don't ever condition on bp->b_flags & B_WRITE.
1591 * always condition on B_READ instead */
1592
1593 /* XXX we're still at splbio() here... do we *really*
1594 need to be? */
1595
1596
1597 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1598 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1599 do_async, raid_addr, num_blocks,
1600 bp->b_un.b_addr, bp, NULL, NULL,
1601 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1602
1603
1604 RF_LOCK_MUTEX(raidPtr->mutex);
1605 }
1606 RF_UNLOCK_MUTEX(raidPtr->mutex);
1607 }
1608
1609
1610
1611
1612 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1613
1614 int
1615 rf_DispatchKernelIO(queue, req)
1616 RF_DiskQueue_t *queue;
1617 RF_DiskQueueData_t *req;
1618 {
1619 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1620 struct buf *bp;
1621 struct raidbuf *raidbp = NULL;
1622 struct raid_softc *rs;
1623 int unit;
1624 int s;
1625
1626 s=0;
1627 /* s = splbio();*/ /* want to test this */
1628 /* XXX along with the vnode, we also need the softc associated with
1629 * this device.. */
1630
1631 req->queue = queue;
1632
1633 unit = queue->raidPtr->raidid;
1634
1635 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1636
1637 if (unit >= numraid) {
1638 printf("Invalid unit number: %d %d\n", unit, numraid);
1639 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1640 }
1641 rs = &raid_softc[unit];
1642
1643 /* XXX is this the right place? */
1644 disk_busy(&rs->sc_dkdev);
1645
1646 bp = req->bp;
1647 #if 1
1648 /* XXX when there is a physical disk failure, someone is passing us a
1649 * buffer that contains old stuff!! Attempt to deal with this problem
1650 * without taking a performance hit... (not sure where the real bug
1651 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1652
1653 if (bp->b_flags & B_ERROR) {
1654 bp->b_flags &= ~B_ERROR;
1655 }
1656 if (bp->b_error != 0) {
1657 bp->b_error = 0;
1658 }
1659 #endif
1660 raidbp = RAIDGETBUF(rs);
1661
1662 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1663
1664 /*
1665 * context for raidiodone
1666 */
1667 raidbp->rf_obp = bp;
1668 raidbp->req = req;
1669
1670 LIST_INIT(&raidbp->rf_buf.b_dep);
1671
1672 switch (req->type) {
1673 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1674 /* XXX need to do something extra here.. */
1675 /* I'm leaving this in, as I've never actually seen it used,
1676 * and I'd like folks to report it... GO */
1677 printf(("WAKEUP CALLED\n"));
1678 queue->numOutstanding++;
1679
1680 /* XXX need to glue the original buffer into this?? */
1681
1682 KernelWakeupFunc(&raidbp->rf_buf);
1683 break;
1684
1685 case RF_IO_TYPE_READ:
1686 case RF_IO_TYPE_WRITE:
1687
1688 if (req->tracerec) {
1689 RF_ETIMER_START(req->tracerec->timer);
1690 }
1691 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1692 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1693 req->sectorOffset, req->numSector,
1694 req->buf, KernelWakeupFunc, (void *) req,
1695 queue->raidPtr->logBytesPerSector, req->b_proc);
1696
1697 if (rf_debugKernelAccess) {
1698 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1699 (long) bp->b_blkno));
1700 }
1701 queue->numOutstanding++;
1702 queue->last_deq_sector = req->sectorOffset;
1703 /* acc wouldn't have been let in if there were any pending
1704 * reqs at any other priority */
1705 queue->curPriority = req->priority;
1706
1707 db1_printf(("Going for %c to unit %d row %d col %d\n",
1708 req->type, unit, queue->row, queue->col));
1709 db1_printf(("sector %d count %d (%d bytes) %d\n",
1710 (int) req->sectorOffset, (int) req->numSector,
1711 (int) (req->numSector <<
1712 queue->raidPtr->logBytesPerSector),
1713 (int) queue->raidPtr->logBytesPerSector));
1714 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1715 raidbp->rf_buf.b_vp->v_numoutput++;
1716 }
1717 VOP_STRATEGY(&raidbp->rf_buf);
1718
1719 break;
1720
1721 default:
1722 panic("bad req->type in rf_DispatchKernelIO");
1723 }
1724 db1_printf(("Exiting from DispatchKernelIO\n"));
1725 /* splx(s); */ /* want to test this */
1726 return (0);
1727 }
1728 /* this is the callback function associated with a I/O invoked from
1729 kernel code.
1730 */
1731 static void
1732 KernelWakeupFunc(vbp)
1733 struct buf *vbp;
1734 {
1735 RF_DiskQueueData_t *req = NULL;
1736 RF_DiskQueue_t *queue;
1737 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1738 struct buf *bp;
1739 struct raid_softc *rs;
1740 int unit;
1741 register int s;
1742
1743 s = splbio();
1744 db1_printf(("recovering the request queue:\n"));
1745 req = raidbp->req;
1746
1747 bp = raidbp->rf_obp;
1748
1749 queue = (RF_DiskQueue_t *) req->queue;
1750
1751 if (raidbp->rf_buf.b_flags & B_ERROR) {
1752 bp->b_flags |= B_ERROR;
1753 bp->b_error = raidbp->rf_buf.b_error ?
1754 raidbp->rf_buf.b_error : EIO;
1755 }
1756
1757 /* XXX methinks this could be wrong... */
1758 #if 1
1759 bp->b_resid = raidbp->rf_buf.b_resid;
1760 #endif
1761
1762 if (req->tracerec) {
1763 RF_ETIMER_STOP(req->tracerec->timer);
1764 RF_ETIMER_EVAL(req->tracerec->timer);
1765 RF_LOCK_MUTEX(rf_tracing_mutex);
1766 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1767 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1768 req->tracerec->num_phys_ios++;
1769 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1770 }
1771 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1772
1773 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1774
1775
1776 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1777 * ballistic, and mark the component as hosed... */
1778
1779 if (bp->b_flags & B_ERROR) {
1780 /* Mark the disk as dead */
1781 /* but only mark it once... */
1782 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1783 rf_ds_optimal) {
1784 printf("raid%d: IO Error. Marking %s as failed.\n",
1785 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1786 queue->raidPtr->Disks[queue->row][queue->col].status =
1787 rf_ds_failed;
1788 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1789 queue->raidPtr->numFailures++;
1790 queue->raidPtr->numNewFailures++;
1791 /* XXX here we should bump the version number for each component, and write that data out */
1792 } else { /* Disk is already dead... */
1793 /* printf("Disk already marked as dead!\n"); */
1794 }
1795
1796 }
1797
1798 rs = &raid_softc[unit];
1799 RAIDPUTBUF(rs, raidbp);
1800
1801
1802 if (bp->b_resid == 0) {
1803 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1804 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1805 }
1806
1807 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1808 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1809
1810 splx(s);
1811 }
1812
1813
1814
1815 /*
1816 * initialize a buf structure for doing an I/O in the kernel.
1817 */
1818 static void
1819 InitBP(
1820 struct buf * bp,
1821 struct vnode * b_vp,
1822 unsigned rw_flag,
1823 dev_t dev,
1824 RF_SectorNum_t startSect,
1825 RF_SectorCount_t numSect,
1826 caddr_t buf,
1827 void (*cbFunc) (struct buf *),
1828 void *cbArg,
1829 int logBytesPerSector,
1830 struct proc * b_proc)
1831 {
1832 /* bp->b_flags = B_PHYS | rw_flag; */
1833 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1834 bp->b_bcount = numSect << logBytesPerSector;
1835 bp->b_bufsize = bp->b_bcount;
1836 bp->b_error = 0;
1837 bp->b_dev = dev;
1838 bp->b_un.b_addr = buf;
1839 bp->b_blkno = startSect;
1840 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1841 if (bp->b_bcount == 0) {
1842 panic("bp->b_bcount is zero in InitBP!!\n");
1843 }
1844 bp->b_proc = b_proc;
1845 bp->b_iodone = cbFunc;
1846 bp->b_vp = b_vp;
1847
1848 }
1849
1850 static void
1851 raidgetdefaultlabel(raidPtr, rs, lp)
1852 RF_Raid_t *raidPtr;
1853 struct raid_softc *rs;
1854 struct disklabel *lp;
1855 {
1856 db1_printf(("Building a default label...\n"));
1857 bzero(lp, sizeof(*lp));
1858
1859 /* fabricate a label... */
1860 lp->d_secperunit = raidPtr->totalSectors;
1861 lp->d_secsize = raidPtr->bytesPerSector;
1862 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1863 lp->d_ntracks = 1;
1864 lp->d_ncylinders = raidPtr->totalSectors /
1865 (lp->d_nsectors * lp->d_ntracks);
1866 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1867
1868 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1869 lp->d_type = DTYPE_RAID;
1870 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1871 lp->d_rpm = 3600;
1872 lp->d_interleave = 1;
1873 lp->d_flags = 0;
1874
1875 lp->d_partitions[RAW_PART].p_offset = 0;
1876 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1877 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1878 lp->d_npartitions = RAW_PART + 1;
1879
1880 lp->d_magic = DISKMAGIC;
1881 lp->d_magic2 = DISKMAGIC;
1882 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1883
1884 }
1885 /*
1886 * Read the disklabel from the raid device. If one is not present, fake one
1887 * up.
1888 */
1889 static void
1890 raidgetdisklabel(dev)
1891 dev_t dev;
1892 {
1893 int unit = raidunit(dev);
1894 struct raid_softc *rs = &raid_softc[unit];
1895 char *errstring;
1896 struct disklabel *lp = rs->sc_dkdev.dk_label;
1897 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1898 RF_Raid_t *raidPtr;
1899
1900 db1_printf(("Getting the disklabel...\n"));
1901
1902 bzero(clp, sizeof(*clp));
1903
1904 raidPtr = raidPtrs[unit];
1905
1906 raidgetdefaultlabel(raidPtr, rs, lp);
1907
1908 /*
1909 * Call the generic disklabel extraction routine.
1910 */
1911 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1912 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1913 if (errstring)
1914 raidmakedisklabel(rs);
1915 else {
1916 int i;
1917 struct partition *pp;
1918
1919 /*
1920 * Sanity check whether the found disklabel is valid.
1921 *
1922 * This is necessary since total size of the raid device
1923 * may vary when an interleave is changed even though exactly
1924 * same componets are used, and old disklabel may used
1925 * if that is found.
1926 */
1927 if (lp->d_secperunit != rs->sc_size)
1928 printf("WARNING: %s: "
1929 "total sector size in disklabel (%d) != "
1930 "the size of raid (%ld)\n", rs->sc_xname,
1931 lp->d_secperunit, (long) rs->sc_size);
1932 for (i = 0; i < lp->d_npartitions; i++) {
1933 pp = &lp->d_partitions[i];
1934 if (pp->p_offset + pp->p_size > rs->sc_size)
1935 printf("WARNING: %s: end of partition `%c' "
1936 "exceeds the size of raid (%ld)\n",
1937 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1938 }
1939 }
1940
1941 }
1942 /*
1943 * Take care of things one might want to take care of in the event
1944 * that a disklabel isn't present.
1945 */
1946 static void
1947 raidmakedisklabel(rs)
1948 struct raid_softc *rs;
1949 {
1950 struct disklabel *lp = rs->sc_dkdev.dk_label;
1951 db1_printf(("Making a label..\n"));
1952
1953 /*
1954 * For historical reasons, if there's no disklabel present
1955 * the raw partition must be marked FS_BSDFFS.
1956 */
1957
1958 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1959
1960 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1961
1962 lp->d_checksum = dkcksum(lp);
1963 }
1964 /*
1965 * Lookup the provided name in the filesystem. If the file exists,
1966 * is a valid block device, and isn't being used by anyone else,
1967 * set *vpp to the file's vnode.
1968 * You'll find the original of this in ccd.c
1969 */
1970 int
1971 raidlookup(path, p, vpp)
1972 char *path;
1973 struct proc *p;
1974 struct vnode **vpp; /* result */
1975 {
1976 struct nameidata nd;
1977 struct vnode *vp;
1978 struct vattr va;
1979 int error;
1980
1981 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1982 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1983 #ifdef DEBUG
1984 printf("RAIDframe: vn_open returned %d\n", error);
1985 #endif
1986 return (error);
1987 }
1988 vp = nd.ni_vp;
1989 if (vp->v_usecount > 1) {
1990 VOP_UNLOCK(vp, 0);
1991 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1992 return (EBUSY);
1993 }
1994 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1995 VOP_UNLOCK(vp, 0);
1996 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1997 return (error);
1998 }
1999 /* XXX: eventually we should handle VREG, too. */
2000 if (va.va_type != VBLK) {
2001 VOP_UNLOCK(vp, 0);
2002 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2003 return (ENOTBLK);
2004 }
2005 VOP_UNLOCK(vp, 0);
2006 *vpp = vp;
2007 return (0);
2008 }
2009 /*
2010 * Wait interruptibly for an exclusive lock.
2011 *
2012 * XXX
2013 * Several drivers do this; it should be abstracted and made MP-safe.
2014 * (Hmm... where have we seen this warning before :-> GO )
2015 */
2016 static int
2017 raidlock(rs)
2018 struct raid_softc *rs;
2019 {
2020 int error;
2021
2022 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2023 rs->sc_flags |= RAIDF_WANTED;
2024 if ((error =
2025 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2026 return (error);
2027 }
2028 rs->sc_flags |= RAIDF_LOCKED;
2029 return (0);
2030 }
2031 /*
2032 * Unlock and wake up any waiters.
2033 */
2034 static void
2035 raidunlock(rs)
2036 struct raid_softc *rs;
2037 {
2038
2039 rs->sc_flags &= ~RAIDF_LOCKED;
2040 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2041 rs->sc_flags &= ~RAIDF_WANTED;
2042 wakeup(rs);
2043 }
2044 }
2045
2046
2047 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2048 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2049
2050 int
2051 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2052 {
2053 RF_ComponentLabel_t clabel;
2054 raidread_component_label(dev, b_vp, &clabel);
2055 clabel.mod_counter = mod_counter;
2056 clabel.clean = RF_RAID_CLEAN;
2057 raidwrite_component_label(dev, b_vp, &clabel);
2058 return(0);
2059 }
2060
2061
2062 int
2063 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2064 {
2065 RF_ComponentLabel_t clabel;
2066 raidread_component_label(dev, b_vp, &clabel);
2067 clabel.mod_counter = mod_counter;
2068 clabel.clean = RF_RAID_DIRTY;
2069 raidwrite_component_label(dev, b_vp, &clabel);
2070 return(0);
2071 }
2072
2073 /* ARGSUSED */
2074 int
2075 raidread_component_label(dev, b_vp, clabel)
2076 dev_t dev;
2077 struct vnode *b_vp;
2078 RF_ComponentLabel_t *clabel;
2079 {
2080 struct buf *bp;
2081 int error;
2082
2083 /* XXX should probably ensure that we don't try to do this if
2084 someone has changed rf_protected_sectors. */
2085
2086 /* get a block of the appropriate size... */
2087 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2088 bp->b_dev = dev;
2089
2090 /* get our ducks in a row for the read */
2091 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2092 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2093 bp->b_flags = B_BUSY | B_READ;
2094 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2095
2096 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2097
2098 error = biowait(bp);
2099
2100 if (!error) {
2101 memcpy(clabel, bp->b_un.b_addr,
2102 sizeof(RF_ComponentLabel_t));
2103 #if 0
2104 print_component_label( clabel );
2105 #endif
2106 } else {
2107 #if 0
2108 printf("Failed to read RAID component label!\n");
2109 #endif
2110 }
2111
2112 bp->b_flags = B_INVAL | B_AGE;
2113 brelse(bp);
2114 return(error);
2115 }
2116 /* ARGSUSED */
2117 int
2118 raidwrite_component_label(dev, b_vp, clabel)
2119 dev_t dev;
2120 struct vnode *b_vp;
2121 RF_ComponentLabel_t *clabel;
2122 {
2123 struct buf *bp;
2124 int error;
2125
2126 /* get a block of the appropriate size... */
2127 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2128 bp->b_dev = dev;
2129
2130 /* get our ducks in a row for the write */
2131 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2132 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2133 bp->b_flags = B_BUSY | B_WRITE;
2134 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2135
2136 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2137
2138 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2139
2140 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2141 error = biowait(bp);
2142 bp->b_flags = B_INVAL | B_AGE;
2143 brelse(bp);
2144 if (error) {
2145 #if 1
2146 printf("Failed to write RAID component info!\n");
2147 #endif
2148 }
2149
2150 return(error);
2151 }
2152
2153 void
2154 rf_markalldirty( raidPtr )
2155 RF_Raid_t *raidPtr;
2156 {
2157 RF_ComponentLabel_t clabel;
2158 int r,c;
2159
2160 raidPtr->mod_counter++;
2161 for (r = 0; r < raidPtr->numRow; r++) {
2162 for (c = 0; c < raidPtr->numCol; c++) {
2163 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2164 raidread_component_label(
2165 raidPtr->Disks[r][c].dev,
2166 raidPtr->raid_cinfo[r][c].ci_vp,
2167 &clabel);
2168 if (clabel.status == rf_ds_spared) {
2169 /* XXX do something special...
2170 but whatever you do, don't
2171 try to access it!! */
2172 } else {
2173 #if 0
2174 clabel.status =
2175 raidPtr->Disks[r][c].status;
2176 raidwrite_component_label(
2177 raidPtr->Disks[r][c].dev,
2178 raidPtr->raid_cinfo[r][c].ci_vp,
2179 &clabel);
2180 #endif
2181 raidmarkdirty(
2182 raidPtr->Disks[r][c].dev,
2183 raidPtr->raid_cinfo[r][c].ci_vp,
2184 raidPtr->mod_counter);
2185 }
2186 }
2187 }
2188 }
2189 /* printf("Component labels marked dirty.\n"); */
2190 #if 0
2191 for( c = 0; c < raidPtr->numSpare ; c++) {
2192 sparecol = raidPtr->numCol + c;
2193 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2194 /*
2195
2196 XXX this is where we get fancy and map this spare
2197 into it's correct spot in the array.
2198
2199 */
2200 /*
2201
2202 we claim this disk is "optimal" if it's
2203 rf_ds_used_spare, as that means it should be
2204 directly substitutable for the disk it replaced.
2205 We note that too...
2206
2207 */
2208
2209 for(i=0;i<raidPtr->numRow;i++) {
2210 for(j=0;j<raidPtr->numCol;j++) {
2211 if ((raidPtr->Disks[i][j].spareRow ==
2212 r) &&
2213 (raidPtr->Disks[i][j].spareCol ==
2214 sparecol)) {
2215 srow = r;
2216 scol = sparecol;
2217 break;
2218 }
2219 }
2220 }
2221
2222 raidread_component_label(
2223 raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2225 &clabel);
2226 /* make sure status is noted */
2227 clabel.version = RF_COMPONENT_LABEL_VERSION;
2228 clabel.mod_counter = raidPtr->mod_counter;
2229 clabel.serial_number = raidPtr->serial_number;
2230 clabel.row = srow;
2231 clabel.column = scol;
2232 clabel.num_rows = raidPtr->numRow;
2233 clabel.num_columns = raidPtr->numCol;
2234 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2235 clabel.status = rf_ds_optimal;
2236 raidwrite_component_label(
2237 raidPtr->Disks[r][sparecol].dev,
2238 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2239 &clabel);
2240 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2241 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2242 }
2243 }
2244
2245 #endif
2246 }
2247
2248
2249 void
2250 rf_update_component_labels( raidPtr )
2251 RF_Raid_t *raidPtr;
2252 {
2253 RF_ComponentLabel_t clabel;
2254 int sparecol;
2255 int r,c;
2256 int i,j;
2257 int srow, scol;
2258
2259 srow = -1;
2260 scol = -1;
2261
2262 /* XXX should do extra checks to make sure things really are clean,
2263 rather than blindly setting the clean bit... */
2264
2265 raidPtr->mod_counter++;
2266
2267 for (r = 0; r < raidPtr->numRow; r++) {
2268 for (c = 0; c < raidPtr->numCol; c++) {
2269 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2270 raidread_component_label(
2271 raidPtr->Disks[r][c].dev,
2272 raidPtr->raid_cinfo[r][c].ci_vp,
2273 &clabel);
2274 /* make sure status is noted */
2275 clabel.status = rf_ds_optimal;
2276 /* bump the counter */
2277 clabel.mod_counter = raidPtr->mod_counter;
2278
2279 raidwrite_component_label(
2280 raidPtr->Disks[r][c].dev,
2281 raidPtr->raid_cinfo[r][c].ci_vp,
2282 &clabel);
2283 }
2284 /* else we don't touch it.. */
2285 }
2286 }
2287
2288 for( c = 0; c < raidPtr->numSpare ; c++) {
2289 sparecol = raidPtr->numCol + c;
2290 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2291 /*
2292
2293 we claim this disk is "optimal" if it's
2294 rf_ds_used_spare, as that means it should be
2295 directly substitutable for the disk it replaced.
2296 We note that too...
2297
2298 */
2299
2300 for(i=0;i<raidPtr->numRow;i++) {
2301 for(j=0;j<raidPtr->numCol;j++) {
2302 if ((raidPtr->Disks[i][j].spareRow ==
2303 0) &&
2304 (raidPtr->Disks[i][j].spareCol ==
2305 sparecol)) {
2306 srow = i;
2307 scol = j;
2308 break;
2309 }
2310 }
2311 }
2312
2313 /* XXX shouldn't *really* need this... */
2314 raidread_component_label(
2315 raidPtr->Disks[0][sparecol].dev,
2316 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2317 &clabel);
2318 /* make sure status is noted */
2319
2320 raid_init_component_label(raidPtr, &clabel);
2321
2322 clabel.mod_counter = raidPtr->mod_counter;
2323 clabel.row = srow;
2324 clabel.column = scol;
2325 clabel.status = rf_ds_optimal;
2326
2327 raidwrite_component_label(
2328 raidPtr->Disks[0][sparecol].dev,
2329 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2330 &clabel);
2331 }
2332 }
2333 /* printf("Component labels updated\n"); */
2334 }
2335
2336
2337 void
2338 rf_final_update_component_labels( raidPtr )
2339 RF_Raid_t *raidPtr;
2340 {
2341 RF_ComponentLabel_t clabel;
2342 int sparecol;
2343 int r,c;
2344 int i,j;
2345 int srow, scol;
2346
2347 srow = -1;
2348 scol = -1;
2349
2350 /* XXX should do extra checks to make sure things really are clean,
2351 rather than blindly setting the clean bit... */
2352
2353 raidPtr->mod_counter++;
2354
2355 for (r = 0; r < raidPtr->numRow; r++) {
2356 for (c = 0; c < raidPtr->numCol; c++) {
2357 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2358 raidread_component_label(
2359 raidPtr->Disks[r][c].dev,
2360 raidPtr->raid_cinfo[r][c].ci_vp,
2361 &clabel);
2362 /* make sure status is noted */
2363 clabel.status = rf_ds_optimal;
2364 /* bump the counter */
2365 clabel.mod_counter = raidPtr->mod_counter;
2366
2367 raidwrite_component_label(
2368 raidPtr->Disks[r][c].dev,
2369 raidPtr->raid_cinfo[r][c].ci_vp,
2370 &clabel);
2371 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2372 raidmarkclean(
2373 raidPtr->Disks[r][c].dev,
2374 raidPtr->raid_cinfo[r][c].ci_vp,
2375 raidPtr->mod_counter);
2376 }
2377 }
2378 /* else we don't touch it.. */
2379 }
2380 }
2381
2382 for( c = 0; c < raidPtr->numSpare ; c++) {
2383 sparecol = raidPtr->numCol + c;
2384 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2385 /*
2386
2387 we claim this disk is "optimal" if it's
2388 rf_ds_used_spare, as that means it should be
2389 directly substitutable for the disk it replaced.
2390 We note that too...
2391
2392 */
2393
2394 for(i=0;i<raidPtr->numRow;i++) {
2395 for(j=0;j<raidPtr->numCol;j++) {
2396 if ((raidPtr->Disks[i][j].spareRow ==
2397 0) &&
2398 (raidPtr->Disks[i][j].spareCol ==
2399 sparecol)) {
2400 srow = i;
2401 scol = j;
2402 break;
2403 }
2404 }
2405 }
2406
2407 /* XXX shouldn't *really* need this... */
2408 raidread_component_label(
2409 raidPtr->Disks[0][sparecol].dev,
2410 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413
2414 raid_init_component_label(raidPtr, &clabel);
2415
2416 clabel.mod_counter = raidPtr->mod_counter;
2417 clabel.row = srow;
2418 clabel.column = scol;
2419 clabel.status = rf_ds_optimal;
2420
2421 raidwrite_component_label(
2422 raidPtr->Disks[0][sparecol].dev,
2423 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2424 &clabel);
2425 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2426 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2427 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2428 raidPtr->mod_counter);
2429 }
2430 }
2431 }
2432 /* printf("Component labels updated\n"); */
2433 }
2434
2435
2436 void
2437 rf_ReconThread(req)
2438 struct rf_recon_req *req;
2439 {
2440 int s;
2441 RF_Raid_t *raidPtr;
2442
2443 s = splbio();
2444 raidPtr = (RF_Raid_t *) req->raidPtr;
2445 raidPtr->recon_in_progress = 1;
2446
2447 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2448 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2449
2450 /* XXX get rid of this! we don't need it at all.. */
2451 RF_Free(req, sizeof(*req));
2452
2453 raidPtr->recon_in_progress = 0;
2454 splx(s);
2455
2456 /* That's all... */
2457 kthread_exit(0); /* does not return */
2458 }
2459
2460 void
2461 rf_RewriteParityThread(raidPtr)
2462 RF_Raid_t *raidPtr;
2463 {
2464 int retcode;
2465 int s;
2466
2467 raidPtr->parity_rewrite_in_progress = 1;
2468 s = splbio();
2469 retcode = rf_RewriteParity(raidPtr);
2470 splx(s);
2471 if (retcode) {
2472 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2473 } else {
2474 /* set the clean bit! If we shutdown correctly,
2475 the clean bit on each component label will get
2476 set */
2477 raidPtr->parity_good = RF_RAID_CLEAN;
2478 }
2479 raidPtr->parity_rewrite_in_progress = 0;
2480
2481 /* That's all... */
2482 kthread_exit(0); /* does not return */
2483 }
2484
2485
2486 void
2487 rf_CopybackThread(raidPtr)
2488 RF_Raid_t *raidPtr;
2489 {
2490 int s;
2491
2492 raidPtr->copyback_in_progress = 1;
2493 s = splbio();
2494 rf_CopybackReconstructedData(raidPtr);
2495 splx(s);
2496 raidPtr->copyback_in_progress = 0;
2497
2498 /* That's all... */
2499 kthread_exit(0); /* does not return */
2500 }
2501
2502
2503 void
2504 rf_ReconstructInPlaceThread(req)
2505 struct rf_recon_req *req;
2506 {
2507 int retcode;
2508 int s;
2509 RF_Raid_t *raidPtr;
2510
2511 s = splbio();
2512 raidPtr = req->raidPtr;
2513 raidPtr->recon_in_progress = 1;
2514 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2515 RF_Free(req, sizeof(*req));
2516 raidPtr->recon_in_progress = 0;
2517 splx(s);
2518
2519 /* That's all... */
2520 kthread_exit(0); /* does not return */
2521 }
2522
2523 void
2524 rf_mountroot_hook(dev)
2525 struct device *dev;
2526 {
2527
2528 }
2529
2530
2531 RF_AutoConfig_t *
2532 rf_find_raid_components()
2533 {
2534 struct devnametobdevmaj *dtobdm;
2535 struct vnode *vp;
2536 struct disklabel label;
2537 struct device *dv;
2538 char *cd_name;
2539 dev_t dev;
2540 int error;
2541 int i;
2542 int good_one;
2543 RF_ComponentLabel_t *clabel;
2544 RF_AutoConfig_t *ac_list;
2545 RF_AutoConfig_t *ac;
2546
2547
2548 /* initialize the AutoConfig list */
2549 ac_list = NULL;
2550
2551 if (raidautoconfig) {
2552
2553 /* we begin by trolling through *all* the devices on the system */
2554
2555 for (dv = alldevs.tqh_first; dv != NULL;
2556 dv = dv->dv_list.tqe_next) {
2557
2558 /* we are only interested in disks... */
2559 if (dv->dv_class != DV_DISK)
2560 continue;
2561
2562 /* we don't care about floppies... */
2563 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2564 continue;
2565 }
2566
2567 /* need to find the device_name_to_block_device_major stuff */
2568 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2569 dtobdm = dev_name2blk;
2570 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2571 dtobdm++;
2572 }
2573
2574 /* get a vnode for the raw partition of this disk */
2575
2576 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2577 if (bdevvp(dev, &vp))
2578 panic("RAID can't alloc vnode");
2579
2580 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2581
2582 if (error) {
2583 /* "Who cares." Continue looking
2584 for something that exists*/
2585 vput(vp);
2586 continue;
2587 }
2588
2589 /* Ok, the disk exists. Go get the disklabel. */
2590 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2591 FREAD, NOCRED, 0);
2592 if (error) {
2593 /*
2594 * XXX can't happen - open() would
2595 * have errored out (or faked up one)
2596 */
2597 printf("can't get label for dev %s%c (%d)!?!?\n",
2598 dv->dv_xname, 'a' + RAW_PART, error);
2599 }
2600
2601 /* don't need this any more. We'll allocate it again
2602 a little later if we really do... */
2603 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2604 vput(vp);
2605
2606 for (i=0; i < label.d_npartitions; i++) {
2607 /* We only support partitions marked as RAID */
2608 if (label.d_partitions[i].p_fstype != FS_RAID)
2609 continue;
2610
2611 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2612 if (bdevvp(dev, &vp))
2613 panic("RAID can't alloc vnode");
2614
2615 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2616 if (error) {
2617 /* Whatever... */
2618 vput(vp);
2619 continue;
2620 }
2621
2622 good_one = 0;
2623
2624 clabel = (RF_ComponentLabel_t *)
2625 malloc(sizeof(RF_ComponentLabel_t),
2626 M_RAIDFRAME, M_NOWAIT);
2627 if (clabel == NULL) {
2628 /* XXX CLEANUP HERE */
2629 printf("RAID auto config: out of memory!\n");
2630 return(NULL); /* XXX probably should panic? */
2631 }
2632
2633 if (!raidread_component_label(dev, vp, clabel)) {
2634 /* Got the label. Does it look reasonable? */
2635 if (rf_reasonable_label(clabel) &&
2636 (clabel->partitionSize <=
2637 label.d_partitions[i].p_size)) {
2638 #if DEBUG
2639 printf("Component on: %s%c: %d\n",
2640 dv->dv_xname, 'a'+i,
2641 label.d_partitions[i].p_size);
2642 print_component_label(clabel);
2643 #endif
2644 /* if it's reasonable, add it,
2645 else ignore it. */
2646 ac = (RF_AutoConfig_t *)
2647 malloc(sizeof(RF_AutoConfig_t),
2648 M_RAIDFRAME,
2649 M_NOWAIT);
2650 if (ac == NULL) {
2651 /* XXX should panic?? */
2652 return(NULL);
2653 }
2654
2655 sprintf(ac->devname, "%s%c",
2656 dv->dv_xname, 'a'+i);
2657 ac->dev = dev;
2658 ac->vp = vp;
2659 ac->clabel = clabel;
2660 ac->next = ac_list;
2661 ac_list = ac;
2662 good_one = 1;
2663 }
2664 }
2665 if (!good_one) {
2666 /* cleanup */
2667 free(clabel, M_RAIDFRAME);
2668 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2669 vput(vp);
2670 }
2671 }
2672 }
2673 }
2674 return(ac_list);
2675 }
2676
2677 static int
2678 rf_reasonable_label(clabel)
2679 RF_ComponentLabel_t *clabel;
2680 {
2681
2682 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2683 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2684 ((clabel->clean == RF_RAID_CLEAN) ||
2685 (clabel->clean == RF_RAID_DIRTY)) &&
2686 clabel->row >=0 &&
2687 clabel->column >= 0 &&
2688 clabel->num_rows > 0 &&
2689 clabel->num_columns > 0 &&
2690 clabel->row < clabel->num_rows &&
2691 clabel->column < clabel->num_columns &&
2692 clabel->blockSize > 0 &&
2693 clabel->numBlocks > 0) {
2694 /* label looks reasonable enough... */
2695 return(1);
2696 }
2697 return(0);
2698 }
2699
2700
2701 void
2702 print_component_label(clabel)
2703 RF_ComponentLabel_t *clabel;
2704 {
2705 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2706 clabel->row, clabel->column,
2707 clabel->num_rows, clabel->num_columns);
2708 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2709 clabel->version, clabel->serial_number,
2710 clabel->mod_counter);
2711 printf(" Clean: %s Status: %d\n",
2712 clabel->clean ? "Yes" : "No", clabel->status );
2713 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2714 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2715 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2716 (char) clabel->parityConfig, clabel->blockSize,
2717 clabel->numBlocks);
2718 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2719 printf(" Last configured as: raid%d\n", clabel->last_unit );
2720 #if 0
2721 printf(" Config order: %d\n", clabel->config_order);
2722 #endif
2723
2724 }
2725
2726 RF_ConfigSet_t *
2727 rf_create_auto_sets(ac_list)
2728 RF_AutoConfig_t *ac_list;
2729 {
2730 RF_AutoConfig_t *ac;
2731 RF_ConfigSet_t *config_sets;
2732 RF_ConfigSet_t *cset;
2733 RF_AutoConfig_t *ac_next;
2734
2735
2736 config_sets = NULL;
2737
2738 /* Go through the AutoConfig list, and figure out which components
2739 belong to what sets. */
2740 ac = ac_list;
2741 while(ac!=NULL) {
2742 /* we're going to putz with ac->next, so save it here
2743 for use at the end of the loop */
2744 ac_next = ac->next;
2745
2746 if (config_sets == NULL) {
2747 /* will need at least this one... */
2748 config_sets = (RF_ConfigSet_t *)
2749 malloc(sizeof(RF_ConfigSet_t),
2750 M_RAIDFRAME, M_NOWAIT);
2751 if (config_sets == NULL) {
2752 panic("rf_create_auto_sets: No memory!\n");
2753 }
2754 /* this one is easy :) */
2755 config_sets->ac = ac;
2756 config_sets->next = NULL;
2757 config_sets->rootable = 0;
2758 ac->next = NULL;
2759 } else {
2760 /* which set does this component fit into? */
2761 cset = config_sets;
2762 while(cset!=NULL) {
2763 if (rf_does_it_fit(cset, ac)) {
2764 /* looks like it matches */
2765 ac->next = cset->ac;
2766 cset->ac = ac;
2767 break;
2768 }
2769 cset = cset->next;
2770 }
2771 if (cset==NULL) {
2772 /* didn't find a match above... new set..*/
2773 cset = (RF_ConfigSet_t *)
2774 malloc(sizeof(RF_ConfigSet_t),
2775 M_RAIDFRAME, M_NOWAIT);
2776 if (cset == NULL) {
2777 panic("rf_create_auto_sets: No memory!\n");
2778 }
2779 cset->ac = ac;
2780 ac->next = NULL;
2781 cset->next = config_sets;
2782 cset->rootable = 0;
2783 config_sets = cset;
2784 }
2785 }
2786 ac = ac_next;
2787 }
2788
2789
2790 return(config_sets);
2791 }
2792
2793 static int
2794 rf_does_it_fit(cset, ac)
2795 RF_ConfigSet_t *cset;
2796 RF_AutoConfig_t *ac;
2797 {
2798 RF_ComponentLabel_t *clabel1, *clabel2;
2799
2800 /* If this one matches the *first* one in the set, that's good
2801 enough, since the other members of the set would have been
2802 through here too... */
2803 /* note that we are not checking partitionSize here..
2804
2805 Note that we are also not checking the mod_counters here.
2806 If everything else matches execpt the mod_counter, that's
2807 good enough for this test. We will deal with the mod_counters
2808 a little later in the autoconfiguration process.
2809
2810 (clabel1->mod_counter == clabel2->mod_counter) &&
2811
2812 */
2813
2814 clabel1 = cset->ac->clabel;
2815 clabel2 = ac->clabel;
2816 if ((clabel1->version == clabel2->version) &&
2817 (clabel1->serial_number == clabel2->serial_number) &&
2818 (clabel1->num_rows == clabel2->num_rows) &&
2819 (clabel1->num_columns == clabel2->num_columns) &&
2820 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2821 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2822 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2823 (clabel1->parityConfig == clabel2->parityConfig) &&
2824 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2825 (clabel1->blockSize == clabel2->blockSize) &&
2826 (clabel1->numBlocks == clabel2->numBlocks) &&
2827 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2828 (clabel1->root_partition == clabel2->root_partition) &&
2829 (clabel1->last_unit == clabel2->last_unit) &&
2830 (clabel1->config_order == clabel2->config_order)) {
2831 /* if it get's here, it almost *has* to be a match */
2832 } else {
2833 /* it's not consistent with somebody in the set..
2834 punt */
2835 return(0);
2836 }
2837 /* all was fine.. it must fit... */
2838 return(1);
2839 }
2840
2841 int
2842 rf_have_enough_components(cset)
2843 RF_ConfigSet_t *cset;
2844 {
2845 RF_AutoConfig_t *ac;
2846 RF_AutoConfig_t *auto_config;
2847 RF_ComponentLabel_t *clabel;
2848 int r,c;
2849 int num_rows;
2850 int num_cols;
2851 int num_missing;
2852
2853 /* check to see that we have enough 'live' components
2854 of this set. If so, we can configure it if necessary */
2855
2856 num_rows = cset->ac->clabel->num_rows;
2857 num_cols = cset->ac->clabel->num_columns;
2858
2859 /* XXX Check for duplicate components!?!?!? */
2860
2861 num_missing = 0;
2862 auto_config = cset->ac;
2863
2864 for(r=0; r<num_rows; r++) {
2865 for(c=0; c<num_cols; c++) {
2866 ac = auto_config;
2867 while(ac!=NULL) {
2868 if (ac->clabel==NULL) {
2869 /* big-time bad news. */
2870 goto fail;
2871 }
2872 if ((ac->clabel->row == r) &&
2873 (ac->clabel->column == c)) {
2874 /* it's this one... */
2875 #if DEBUG
2876 printf("Found: %s at %d,%d\n",
2877 ac->devname,r,c);
2878 #endif
2879 break;
2880 }
2881 ac=ac->next;
2882 }
2883 if (ac==NULL) {
2884 /* Didn't find one here! */
2885 num_missing++;
2886 }
2887 }
2888 }
2889
2890 clabel = cset->ac->clabel;
2891
2892 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2893 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2894 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2895 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2896 /* XXX this needs to be made *much* more general */
2897 /* Too many failures */
2898 return(0);
2899 }
2900 /* otherwise, all is well, and we've got enough to take a kick
2901 at autoconfiguring this set */
2902 return(1);
2903 fail:
2904 return(0);
2905
2906 }
2907
2908 void
2909 rf_create_configuration(ac,config,raidPtr)
2910 RF_AutoConfig_t *ac;
2911 RF_Config_t *config;
2912 RF_Raid_t *raidPtr;
2913 {
2914 RF_ComponentLabel_t *clabel;
2915
2916 clabel = ac->clabel;
2917
2918 /* 1. Fill in the common stuff */
2919 config->numRow = clabel->num_rows;
2920 config->numCol = clabel->num_columns;
2921 config->numSpare = 0; /* XXX should this be set here? */
2922 config->sectPerSU = clabel->sectPerSU;
2923 config->SUsPerPU = clabel->SUsPerPU;
2924 config->SUsPerRU = clabel->SUsPerRU;
2925 config->parityConfig = clabel->parityConfig;
2926 /* XXX... */
2927 strcpy(config->diskQueueType,"fifo");
2928 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2929 config->layoutSpecificSize = 0; /* XXX ?? */
2930
2931 while(ac!=NULL) {
2932 /* row/col values will be in range due to the checks
2933 in reasonable_label() */
2934 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2935 ac->devname);
2936 ac = ac->next;
2937 }
2938
2939 }
2940
2941 int
2942 rf_set_autoconfig(raidPtr, new_value)
2943 RF_Raid_t *raidPtr;
2944 int new_value;
2945 {
2946 RF_ComponentLabel_t clabel;
2947 struct vnode *vp;
2948 dev_t dev;
2949 int row, column;
2950
2951 raidPtr->autoconfigure = new_value;
2952 for(row=0; row<raidPtr->numRow; row++) {
2953 for(column=0; column<raidPtr->numCol; column++) {
2954 dev = raidPtr->Disks[row][column].dev;
2955 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2956 raidread_component_label(dev, vp, &clabel);
2957 clabel.autoconfigure = new_value;
2958 raidwrite_component_label(dev, vp, &clabel);
2959 }
2960 }
2961 return(new_value);
2962 }
2963
2964 int
2965 rf_set_rootpartition(raidPtr, new_value)
2966 RF_Raid_t *raidPtr;
2967 int new_value;
2968 {
2969 RF_ComponentLabel_t clabel;
2970 struct vnode *vp;
2971 dev_t dev;
2972 int row, column;
2973
2974 raidPtr->root_partition = new_value;
2975 for(row=0; row<raidPtr->numRow; row++) {
2976 for(column=0; column<raidPtr->numCol; column++) {
2977 dev = raidPtr->Disks[row][column].dev;
2978 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2979 raidread_component_label(dev, vp, &clabel);
2980 clabel.root_partition = new_value;
2981 raidwrite_component_label(dev, vp, &clabel);
2982 }
2983 }
2984 return(new_value);
2985 }
2986
2987 void
2988 rf_release_all_vps(cset)
2989 RF_ConfigSet_t *cset;
2990 {
2991 RF_AutoConfig_t *ac;
2992
2993 ac = cset->ac;
2994 while(ac!=NULL) {
2995 /* Close the vp, and give it back */
2996 if (ac->vp) {
2997 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2998 vput(ac->vp);
2999 }
3000 ac = ac->next;
3001 }
3002 }
3003
3004
3005 void
3006 rf_cleanup_config_set(cset)
3007 RF_ConfigSet_t *cset;
3008 {
3009 RF_AutoConfig_t *ac;
3010 RF_AutoConfig_t *next_ac;
3011
3012 ac = cset->ac;
3013 while(ac!=NULL) {
3014 next_ac = ac->next;
3015 /* nuke the label */
3016 free(ac->clabel, M_RAIDFRAME);
3017 /* cleanup the config structure */
3018 free(ac, M_RAIDFRAME);
3019 /* "next.." */
3020 ac = next_ac;
3021 }
3022 /* and, finally, nuke the config set */
3023 free(cset, M_RAIDFRAME);
3024 }
3025
3026
3027 void
3028 raid_init_component_label(raidPtr, clabel)
3029 RF_Raid_t *raidPtr;
3030 RF_ComponentLabel_t *clabel;
3031 {
3032 /* current version number */
3033 clabel->version = RF_COMPONENT_LABEL_VERSION;
3034 clabel->serial_number = raidPtr->serial_number;
3035 clabel->mod_counter = raidPtr->mod_counter;
3036 clabel->num_rows = raidPtr->numRow;
3037 clabel->num_columns = raidPtr->numCol;
3038 clabel->clean = RF_RAID_DIRTY; /* not clean */
3039 clabel->status = rf_ds_optimal; /* "It's good!" */
3040
3041 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3042 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3043 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3044
3045 clabel->blockSize = raidPtr->bytesPerSector;
3046 clabel->numBlocks = raidPtr->sectorsPerDisk;
3047
3048 /* XXX not portable */
3049 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3050 clabel->maxOutstanding = raidPtr->maxOutstanding;
3051 clabel->autoconfigure = raidPtr->autoconfigure;
3052 clabel->root_partition = raidPtr->root_partition;
3053 clabel->last_unit = raidPtr->raidid;
3054 clabel->config_order = raidPtr->config_order;
3055 }
3056
3057 int
3058 rf_auto_config_set(cset,unit)
3059 RF_ConfigSet_t *cset;
3060 int *unit;
3061 {
3062 RF_Raid_t *raidPtr;
3063 RF_Config_t *config;
3064 int raidID;
3065 int retcode;
3066
3067 printf("Starting autoconfigure on raid%d\n",raidID);
3068
3069 retcode = 0;
3070 *unit = -1;
3071
3072 /* 1. Create a config structure */
3073
3074 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3075 M_RAIDFRAME,
3076 M_NOWAIT);
3077 if (config==NULL) {
3078 printf("Out of mem!?!?\n");
3079 /* XXX do something more intelligent here. */
3080 return(1);
3081 }
3082 /* XXX raidID needs to be set correctly.. */
3083
3084 /*
3085 2. Figure out what RAID ID this one is supposed to live at
3086 See if we can get the same RAID dev that it was configured
3087 on last time..
3088 */
3089
3090 raidID = cset->ac->clabel->last_unit;
3091 if ((raidID < 0) || (raidID >= numraid)) {
3092 /* let's not wander off into lala land. */
3093 raidID = numraid - 1;
3094 }
3095 if (raidPtrs[raidID]->valid != 0) {
3096
3097 /*
3098 Nope... Go looking for an alternative...
3099 Start high so we don't immediately use raid0 if that's
3100 not taken.
3101 */
3102
3103 for(raidID = numraid; raidID >= 0; raidID--) {
3104 if (raidPtrs[raidID]->valid == 0) {
3105 /* can use this one! */
3106 break;
3107 }
3108 }
3109 }
3110
3111 if (raidID < 0) {
3112 /* punt... */
3113 printf("Unable to auto configure this set!\n");
3114 printf("(Out of RAID devs!)\n");
3115 return(1);
3116 }
3117
3118 raidPtr = raidPtrs[raidID];
3119
3120 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3121 raidPtr->raidid = raidID;
3122 raidPtr->openings = RAIDOUTSTANDING;
3123
3124 /* 3. Build the configuration structure */
3125 rf_create_configuration(cset->ac, config, raidPtr);
3126
3127 /* 4. Do the configuration */
3128 retcode = rf_Configure(raidPtr, config, cset->ac);
3129
3130 if (retcode == 0) {
3131
3132 raidinit(raidPtrs[raidID]);
3133
3134 rf_markalldirty(raidPtrs[raidID]);
3135 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3136 if (cset->ac->clabel->root_partition==1) {
3137 /* everything configured just fine. Make a note
3138 that this set is eligible to be root. */
3139 cset->rootable = 1;
3140 /* XXX do this here? */
3141 raidPtrs[raidID]->root_partition = 1;
3142 }
3143 }
3144
3145 /* 5. Cleanup */
3146 free(config, M_RAIDFRAME);
3147
3148 *unit = raidID;
3149 return(retcode);
3150 }
3151