rf_netbsdkintf.c revision 1.60 1 /* $NetBSD: rf_netbsdkintf.c,v 1.60 2000/02/25 19:56:56 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit __P((RF_Raid_t *));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that...
208 */
209
210 struct raid_softc {
211 int sc_flags; /* flags */
212 int sc_cflags; /* configuration flags */
213 size_t sc_size; /* size of the raid device */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293 int rf_have_enough_components __P((RF_ConfigSet_t *));
294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 if (raidautoconfig) {
384 /* 1. locate all RAID components on the system */
385
386 #if DEBUG
387 printf("Searching for raid components...\n");
388 #endif
389 ac_list = rf_find_raid_components();
390
391 /* 2. sort them into their respective sets */
392
393 config_sets = rf_create_auto_sets(ac_list);
394
395 /* 3. evaluate each set and configure the valid ones
396 This gets done in rf_buildroothack() */
397
398 /* schedule the creation of the thread to do the
399 "/ on RAID" stuff */
400
401 kthread_create(rf_buildroothack,config_sets);
402
403 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
404 /* XXXX pick raid0 for now... and this should be only done
405 if we find something that's bootable!!! */
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 }
410
411 }
412
413 void
414 rf_buildroothack(arg)
415 void *arg;
416 {
417 RF_ConfigSet_t *config_sets = arg;
418 RF_ConfigSet_t *cset;
419 RF_ConfigSet_t *next_cset;
420 int retcode;
421 int raidID;
422 int rootID;
423 int num_root;
424
425 num_root = 0;
426 cset = config_sets;
427 while(cset != NULL ) {
428 next_cset = cset->next;
429 if (rf_have_enough_components(cset) &&
430 cset->ac->clabel->autoconfigure==1) {
431 retcode = rf_auto_config_set(cset,&raidID);
432 if (!retcode) {
433 if (cset->rootable) {
434 rootID = raidID;
435 num_root++;
436 }
437 } else {
438 /* The autoconfig didn't work :( */
439 #if DEBUG
440 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
441 #endif
442 rf_release_all_vps(cset);
443 #if DEBUG
444 printf("Done cleanup\n");
445 #endif
446 }
447 } else {
448 /* we're not autoconfiguring this set...
449 release the associated resources */
450 #if DEBUG
451 printf("Releasing vp's\n");
452 #endif
453 rf_release_all_vps(cset);
454 #if DEBUG
455 printf("Done.\n");
456 #endif
457 }
458 /* cleanup */
459 #if DEBUG
460 printf("Cleaning up config set\n");
461 #endif
462 rf_cleanup_config_set(cset);
463 #if DEBUG
464 printf("Done cleanup\n");
465 #endif
466 cset = next_cset;
467 }
468 if (boothowto & RB_ASKNAME) {
469 /* We don't auto-config... */
470 } else {
471 /* They didn't ask, and we found something bootable... */
472 /* XXX pretend for now.. */
473 if (num_root == 1) {
474 #if 1
475 booted_device = &raidrootdev[rootID];
476 #endif
477 } else if (num_root > 1) {
478 /* we can't guess.. require the user to answer... */
479 boothowto |= RB_ASKNAME;
480 }
481 }
482 }
483
484
485 int
486 raidsize(dev)
487 dev_t dev;
488 {
489 struct raid_softc *rs;
490 struct disklabel *lp;
491 int part, unit, omask, size;
492
493 unit = raidunit(dev);
494 if (unit >= numraid)
495 return (-1);
496 rs = &raid_softc[unit];
497
498 if ((rs->sc_flags & RAIDF_INITED) == 0)
499 return (-1);
500
501 part = DISKPART(dev);
502 omask = rs->sc_dkdev.dk_openmask & (1 << part);
503 lp = rs->sc_dkdev.dk_label;
504
505 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
506 return (-1);
507
508 if (lp->d_partitions[part].p_fstype != FS_SWAP)
509 size = -1;
510 else
511 size = lp->d_partitions[part].p_size *
512 (lp->d_secsize / DEV_BSIZE);
513
514 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
515 return (-1);
516
517 return (size);
518
519 }
520
521 int
522 raiddump(dev, blkno, va, size)
523 dev_t dev;
524 daddr_t blkno;
525 caddr_t va;
526 size_t size;
527 {
528 /* Not implemented. */
529 return ENXIO;
530 }
531 /* ARGSUSED */
532 int
533 raidopen(dev, flags, fmt, p)
534 dev_t dev;
535 int flags, fmt;
536 struct proc *p;
537 {
538 int unit = raidunit(dev);
539 struct raid_softc *rs;
540 struct disklabel *lp;
541 int part, pmask;
542 int error = 0;
543
544 if (unit >= numraid)
545 return (ENXIO);
546 rs = &raid_softc[unit];
547
548 if ((error = raidlock(rs)) != 0)
549 return (error);
550 lp = rs->sc_dkdev.dk_label;
551
552 part = DISKPART(dev);
553 pmask = (1 << part);
554
555 db1_printf(("Opening raid device number: %d partition: %d\n",
556 unit, part));
557
558
559 if ((rs->sc_flags & RAIDF_INITED) &&
560 (rs->sc_dkdev.dk_openmask == 0))
561 raidgetdisklabel(dev);
562
563 /* make sure that this partition exists */
564
565 if (part != RAW_PART) {
566 db1_printf(("Not a raw partition..\n"));
567 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
568 ((part >= lp->d_npartitions) ||
569 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
570 error = ENXIO;
571 raidunlock(rs);
572 db1_printf(("Bailing out...\n"));
573 return (error);
574 }
575 }
576 /* Prevent this unit from being unconfigured while open. */
577 switch (fmt) {
578 case S_IFCHR:
579 rs->sc_dkdev.dk_copenmask |= pmask;
580 break;
581
582 case S_IFBLK:
583 rs->sc_dkdev.dk_bopenmask |= pmask;
584 break;
585 }
586
587 if ((rs->sc_dkdev.dk_openmask == 0) &&
588 ((rs->sc_flags & RAIDF_INITED) != 0)) {
589 /* First one... mark things as dirty... Note that we *MUST*
590 have done a configure before this. I DO NOT WANT TO BE
591 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
592 THAT THEY BELONG TOGETHER!!!!! */
593 /* XXX should check to see if we're only open for reading
594 here... If so, we needn't do this, but then need some
595 other way of keeping track of what's happened.. */
596
597 rf_markalldirty( raidPtrs[unit] );
598 }
599
600
601 rs->sc_dkdev.dk_openmask =
602 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
603
604 raidunlock(rs);
605
606 return (error);
607
608
609 }
610 /* ARGSUSED */
611 int
612 raidclose(dev, flags, fmt, p)
613 dev_t dev;
614 int flags, fmt;
615 struct proc *p;
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int error = 0;
620 int part;
621
622 if (unit >= numraid)
623 return (ENXIO);
624 rs = &raid_softc[unit];
625
626 if ((error = raidlock(rs)) != 0)
627 return (error);
628
629 part = DISKPART(dev);
630
631 /* ...that much closer to allowing unconfiguration... */
632 switch (fmt) {
633 case S_IFCHR:
634 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
635 break;
636
637 case S_IFBLK:
638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
639 break;
640 }
641 rs->sc_dkdev.dk_openmask =
642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
643
644 if ((rs->sc_dkdev.dk_openmask == 0) &&
645 ((rs->sc_flags & RAIDF_INITED) != 0)) {
646 /* Last one... device is not unconfigured yet.
647 Device shutdown has taken care of setting the
648 clean bits if RAIDF_INITED is not set
649 mark things as clean... */
650 #ifdef DEBUG
651 printf("Last one on raid%d. Updating status.\n",unit);
652 #endif
653 rf_update_component_labels( raidPtrs[unit] );
654 }
655
656 raidunlock(rs);
657 return (0);
658
659 }
660
661 void
662 raidstrategy(bp)
663 register struct buf *bp;
664 {
665 register int s;
666
667 unsigned int raidID = raidunit(bp->b_dev);
668 RF_Raid_t *raidPtr;
669 struct raid_softc *rs = &raid_softc[raidID];
670 struct disklabel *lp;
671 int wlabel;
672
673 if ((rs->sc_flags & RAIDF_INITED) ==0) {
674 bp->b_error = ENXIO;
675 bp->b_flags = B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 bp->b_resid = bp->b_bcount;
684 biodone(bp);
685 return;
686 }
687 raidPtr = raidPtrs[raidID];
688 if (!raidPtr->valid) {
689 bp->b_error = ENODEV;
690 bp->b_flags |= B_ERROR;
691 bp->b_resid = bp->b_bcount;
692 biodone(bp);
693 return;
694 }
695 if (bp->b_bcount == 0) {
696 db1_printf(("b_bcount is zero..\n"));
697 biodone(bp);
698 return;
699 }
700 lp = rs->sc_dkdev.dk_label;
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) != RAW_PART)
709 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
710 db1_printf(("Bounds check failed!!:%d %d\n",
711 (int) bp->b_blkno, (int) wlabel));
712 biodone(bp);
713 return;
714 }
715 s = splbio();
716
717 bp->b_resid = 0;
718
719 /* stuff it onto our queue */
720 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
721
722 raidstart(raidPtrs[raidID]);
723
724 splx(s);
725 }
726 /* ARGSUSED */
727 int
728 raidread(dev, uio, flags)
729 dev_t dev;
730 struct uio *uio;
731 int flags;
732 {
733 int unit = raidunit(dev);
734 struct raid_softc *rs;
735 int part;
736
737 if (unit >= numraid)
738 return (ENXIO);
739 rs = &raid_softc[unit];
740
741 if ((rs->sc_flags & RAIDF_INITED) == 0)
742 return (ENXIO);
743 part = DISKPART(dev);
744
745 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
746
747 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
748
749 }
750 /* ARGSUSED */
751 int
752 raidwrite(dev, uio, flags)
753 dev_t dev;
754 struct uio *uio;
755 int flags;
756 {
757 int unit = raidunit(dev);
758 struct raid_softc *rs;
759
760 if (unit >= numraid)
761 return (ENXIO);
762 rs = &raid_softc[unit];
763
764 if ((rs->sc_flags & RAIDF_INITED) == 0)
765 return (ENXIO);
766 db1_printf(("raidwrite\n"));
767 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
768
769 }
770
771 int
772 raidioctl(dev, cmd, data, flag, p)
773 dev_t dev;
774 u_long cmd;
775 caddr_t data;
776 int flag;
777 struct proc *p;
778 {
779 int unit = raidunit(dev);
780 int error = 0;
781 int part, pmask;
782 struct raid_softc *rs;
783 RF_Config_t *k_cfg, *u_cfg;
784 RF_Raid_t *raidPtr;
785 RF_RaidDisk_t *diskPtr;
786 RF_AccTotals_t *totals;
787 RF_DeviceConfig_t *d_cfg, **ucfgp;
788 u_char *specific_buf;
789 int retcode = 0;
790 int row;
791 int column;
792 struct rf_recon_req *rrcopy, *rr;
793 RF_ComponentLabel_t *clabel;
794 RF_ComponentLabel_t ci_label;
795 RF_ComponentLabel_t **clabel_ptr;
796 RF_SingleComponent_t *sparePtr,*componentPtr;
797 RF_SingleComponent_t hot_spare;
798 RF_SingleComponent_t component;
799 int i, j, d;
800
801 if (unit >= numraid)
802 return (ENXIO);
803 rs = &raid_softc[unit];
804 raidPtr = raidPtrs[unit];
805
806 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
807 (int) DISKPART(dev), (int) unit, (int) cmd));
808
809 /* Must be open for writes for these commands... */
810 switch (cmd) {
811 case DIOCSDINFO:
812 case DIOCWDINFO:
813 case DIOCWLABEL:
814 if ((flag & FWRITE) == 0)
815 return (EBADF);
816 }
817
818 /* Must be initialized for these... */
819 switch (cmd) {
820 case DIOCGDINFO:
821 case DIOCSDINFO:
822 case DIOCWDINFO:
823 case DIOCGPART:
824 case DIOCWLABEL:
825 case DIOCGDEFLABEL:
826 case RAIDFRAME_SHUTDOWN:
827 case RAIDFRAME_REWRITEPARITY:
828 case RAIDFRAME_GET_INFO:
829 case RAIDFRAME_RESET_ACCTOTALS:
830 case RAIDFRAME_GET_ACCTOTALS:
831 case RAIDFRAME_KEEP_ACCTOTALS:
832 case RAIDFRAME_GET_SIZE:
833 case RAIDFRAME_FAIL_DISK:
834 case RAIDFRAME_COPYBACK:
835 case RAIDFRAME_CHECK_RECON_STATUS:
836 case RAIDFRAME_GET_COMPONENT_LABEL:
837 case RAIDFRAME_SET_COMPONENT_LABEL:
838 case RAIDFRAME_ADD_HOT_SPARE:
839 case RAIDFRAME_REMOVE_HOT_SPARE:
840 case RAIDFRAME_INIT_LABELS:
841 case RAIDFRAME_REBUILD_IN_PLACE:
842 case RAIDFRAME_CHECK_PARITY:
843 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
844 case RAIDFRAME_CHECK_COPYBACK_STATUS:
845 case RAIDFRAME_SET_AUTOCONFIG:
846 case RAIDFRAME_SET_ROOT:
847 if ((rs->sc_flags & RAIDF_INITED) == 0)
848 return (ENXIO);
849 }
850
851 switch (cmd) {
852
853 /* configure the system */
854 case RAIDFRAME_CONFIGURE:
855
856 if (raidPtr->valid) {
857 /* There is a valid RAID set running on this unit! */
858 printf("raid%d: Device already configured!\n",unit);
859 }
860
861 /* copy-in the configuration information */
862 /* data points to a pointer to the configuration structure */
863
864 u_cfg = *((RF_Config_t **) data);
865 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
866 if (k_cfg == NULL) {
867 return (ENOMEM);
868 }
869 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
870 sizeof(RF_Config_t));
871 if (retcode) {
872 RF_Free(k_cfg, sizeof(RF_Config_t));
873 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
874 retcode));
875 return (retcode);
876 }
877 /* allocate a buffer for the layout-specific data, and copy it
878 * in */
879 if (k_cfg->layoutSpecificSize) {
880 if (k_cfg->layoutSpecificSize > 10000) {
881 /* sanity check */
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 return (EINVAL);
884 }
885 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
886 (u_char *));
887 if (specific_buf == NULL) {
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 return (ENOMEM);
890 }
891 retcode = copyin(k_cfg->layoutSpecific,
892 (caddr_t) specific_buf,
893 k_cfg->layoutSpecificSize);
894 if (retcode) {
895 RF_Free(k_cfg, sizeof(RF_Config_t));
896 RF_Free(specific_buf,
897 k_cfg->layoutSpecificSize);
898 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
899 retcode));
900 return (retcode);
901 }
902 } else
903 specific_buf = NULL;
904 k_cfg->layoutSpecific = specific_buf;
905
906 /* should do some kind of sanity check on the configuration.
907 * Store the sum of all the bytes in the last byte? */
908
909 /* configure the system */
910
911 /*
912 * Clear the entire RAID descriptor, just to make sure
913 * there is no stale data left in the case of a
914 * reconfiguration
915 */
916 bzero((char *) raidPtr, sizeof(RF_Raid_t));
917 raidPtr->raidid = unit;
918
919 retcode = rf_Configure(raidPtr, k_cfg, NULL);
920
921 if (retcode == 0) {
922
923 /* allow this many simultaneous IO's to
924 this RAID device */
925 raidPtr->openings = RAIDOUTSTANDING;
926
927 raidinit(raidPtr);
928 rf_markalldirty(raidPtr);
929 }
930 /* free the buffers. No return code here. */
931 if (k_cfg->layoutSpecificSize) {
932 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
933 }
934 RF_Free(k_cfg, sizeof(RF_Config_t));
935
936 return (retcode);
937
938 /* shutdown the system */
939 case RAIDFRAME_SHUTDOWN:
940
941 if ((error = raidlock(rs)) != 0)
942 return (error);
943
944 /*
945 * If somebody has a partition mounted, we shouldn't
946 * shutdown.
947 */
948
949 part = DISKPART(dev);
950 pmask = (1 << part);
951 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
952 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
953 (rs->sc_dkdev.dk_copenmask & pmask))) {
954 raidunlock(rs);
955 return (EBUSY);
956 }
957
958 retcode = rf_Shutdown(raidPtr);
959
960 pool_destroy(&rs->sc_cbufpool);
961
962 /* It's no longer initialized... */
963 rs->sc_flags &= ~RAIDF_INITED;
964
965 /* Detach the disk. */
966 disk_detach(&rs->sc_dkdev);
967
968 raidunlock(rs);
969
970 return (retcode);
971 case RAIDFRAME_GET_COMPONENT_LABEL:
972 clabel_ptr = (RF_ComponentLabel_t **) data;
973 /* need to read the component label for the disk indicated
974 by row,column in clabel */
975
976 /* For practice, let's get it directly fromdisk, rather
977 than from the in-core copy */
978 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
979 (RF_ComponentLabel_t *));
980 if (clabel == NULL)
981 return (ENOMEM);
982
983 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
984
985 retcode = copyin( *clabel_ptr, clabel,
986 sizeof(RF_ComponentLabel_t));
987
988 if (retcode) {
989 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
990 return(retcode);
991 }
992
993 row = clabel->row;
994 column = clabel->column;
995
996 if ((row < 0) || (row >= raidPtr->numRow) ||
997 (column < 0) || (column >= raidPtr->numCol)) {
998 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
999 return(EINVAL);
1000 }
1001
1002 raidread_component_label(raidPtr->Disks[row][column].dev,
1003 raidPtr->raid_cinfo[row][column].ci_vp,
1004 clabel );
1005
1006 retcode = copyout((caddr_t) clabel,
1007 (caddr_t) *clabel_ptr,
1008 sizeof(RF_ComponentLabel_t));
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return (retcode);
1011
1012 case RAIDFRAME_SET_COMPONENT_LABEL:
1013 clabel = (RF_ComponentLabel_t *) data;
1014
1015 /* XXX check the label for valid stuff... */
1016 /* Note that some things *should not* get modified --
1017 the user should be re-initing the labels instead of
1018 trying to patch things.
1019 */
1020
1021 printf("Got component label:\n");
1022 printf("Version: %d\n",clabel->version);
1023 printf("Serial Number: %d\n",clabel->serial_number);
1024 printf("Mod counter: %d\n",clabel->mod_counter);
1025 printf("Row: %d\n", clabel->row);
1026 printf("Column: %d\n", clabel->column);
1027 printf("Num Rows: %d\n", clabel->num_rows);
1028 printf("Num Columns: %d\n", clabel->num_columns);
1029 printf("Clean: %d\n", clabel->clean);
1030 printf("Status: %d\n", clabel->status);
1031
1032 row = clabel->row;
1033 column = clabel->column;
1034
1035 if ((row < 0) || (row >= raidPtr->numRow) ||
1036 (column < 0) || (column >= raidPtr->numCol)) {
1037 return(EINVAL);
1038 }
1039
1040 /* XXX this isn't allowed to do anything for now :-) */
1041
1042 /* XXX and before it is, we need to fill in the rest
1043 of the fields!?!?!?! */
1044 #if 0
1045 raidwrite_component_label(
1046 raidPtr->Disks[row][column].dev,
1047 raidPtr->raid_cinfo[row][column].ci_vp,
1048 clabel );
1049 #endif
1050 return (0);
1051
1052 case RAIDFRAME_INIT_LABELS:
1053 clabel = (RF_ComponentLabel_t *) data;
1054 /*
1055 we only want the serial number from
1056 the above. We get all the rest of the information
1057 from the config that was used to create this RAID
1058 set.
1059 */
1060
1061 raidPtr->serial_number = clabel->serial_number;
1062
1063 raid_init_component_label(raidPtr, &ci_label);
1064 ci_label.serial_number = clabel->serial_number;
1065
1066 for(row=0;row<raidPtr->numRow;row++) {
1067 ci_label.row = row;
1068 for(column=0;column<raidPtr->numCol;column++) {
1069 diskPtr = &raidPtr->Disks[row][column];
1070 ci_label.partitionSize = diskPtr->partitionSize;
1071 ci_label.column = column;
1072 raidwrite_component_label(
1073 raidPtr->Disks[row][column].dev,
1074 raidPtr->raid_cinfo[row][column].ci_vp,
1075 &ci_label );
1076 }
1077 }
1078
1079 return (retcode);
1080 case RAIDFRAME_SET_AUTOCONFIG:
1081 d = rf_set_autoconfig(raidPtr, *data);
1082 printf("New autoconfig value is: %d\n", d);
1083 *data = d;
1084 return (retcode);
1085
1086 case RAIDFRAME_SET_ROOT:
1087 d = rf_set_rootpartition(raidPtr, *data);
1088 printf("New rootpartition value is: %d\n", d);
1089 *data = d;
1090 return (retcode);
1091
1092 /* initialize all parity */
1093 case RAIDFRAME_REWRITEPARITY:
1094
1095 if (raidPtr->Layout.map->faultsTolerated == 0) {
1096 /* Parity for RAID 0 is trivially correct */
1097 raidPtr->parity_good = RF_RAID_CLEAN;
1098 return(0);
1099 }
1100
1101 if (raidPtr->parity_rewrite_in_progress == 1) {
1102 /* Re-write is already in progress! */
1103 return(EINVAL);
1104 }
1105
1106 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1107 rf_RewriteParityThread,
1108 raidPtr,"raid_parity");
1109 return (retcode);
1110
1111
1112 case RAIDFRAME_ADD_HOT_SPARE:
1113 sparePtr = (RF_SingleComponent_t *) data;
1114 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1115 printf("Adding spare\n");
1116 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1117 return(retcode);
1118
1119 case RAIDFRAME_REMOVE_HOT_SPARE:
1120 return(retcode);
1121
1122 case RAIDFRAME_REBUILD_IN_PLACE:
1123
1124 if (raidPtr->Layout.map->faultsTolerated == 0) {
1125 /* Can't do this on a RAID 0!! */
1126 return(EINVAL);
1127 }
1128
1129 if (raidPtr->recon_in_progress == 1) {
1130 /* a reconstruct is already in progress! */
1131 return(EINVAL);
1132 }
1133
1134 componentPtr = (RF_SingleComponent_t *) data;
1135 memcpy( &component, componentPtr,
1136 sizeof(RF_SingleComponent_t));
1137 row = component.row;
1138 column = component.column;
1139 printf("Rebuild: %d %d\n",row, column);
1140 if ((row < 0) || (row >= raidPtr->numRow) ||
1141 (column < 0) || (column >= raidPtr->numCol)) {
1142 return(EINVAL);
1143 }
1144
1145 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1146 if (rrcopy == NULL)
1147 return(ENOMEM);
1148
1149 rrcopy->raidPtr = (void *) raidPtr;
1150 rrcopy->row = row;
1151 rrcopy->col = column;
1152
1153 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1154 rf_ReconstructInPlaceThread,
1155 rrcopy,"raid_reconip");
1156 return(retcode);
1157
1158 case RAIDFRAME_GET_INFO:
1159 if (!raidPtr->valid)
1160 return (ENODEV);
1161 ucfgp = (RF_DeviceConfig_t **) data;
1162 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1163 (RF_DeviceConfig_t *));
1164 if (d_cfg == NULL)
1165 return (ENOMEM);
1166 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1167 d_cfg->rows = raidPtr->numRow;
1168 d_cfg->cols = raidPtr->numCol;
1169 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1170 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1171 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1172 return (ENOMEM);
1173 }
1174 d_cfg->nspares = raidPtr->numSpare;
1175 if (d_cfg->nspares >= RF_MAX_DISKS) {
1176 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1177 return (ENOMEM);
1178 }
1179 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1180 d = 0;
1181 for (i = 0; i < d_cfg->rows; i++) {
1182 for (j = 0; j < d_cfg->cols; j++) {
1183 d_cfg->devs[d] = raidPtr->Disks[i][j];
1184 d++;
1185 }
1186 }
1187 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1188 d_cfg->spares[i] = raidPtr->Disks[0][j];
1189 }
1190 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1191 sizeof(RF_DeviceConfig_t));
1192 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1193
1194 return (retcode);
1195
1196 case RAIDFRAME_CHECK_PARITY:
1197 *(int *) data = raidPtr->parity_good;
1198 return (0);
1199
1200 case RAIDFRAME_RESET_ACCTOTALS:
1201 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1202 return (0);
1203
1204 case RAIDFRAME_GET_ACCTOTALS:
1205 totals = (RF_AccTotals_t *) data;
1206 *totals = raidPtr->acc_totals;
1207 return (0);
1208
1209 case RAIDFRAME_KEEP_ACCTOTALS:
1210 raidPtr->keep_acc_totals = *(int *)data;
1211 return (0);
1212
1213 case RAIDFRAME_GET_SIZE:
1214 *(int *) data = raidPtr->totalSectors;
1215 return (0);
1216
1217 /* fail a disk & optionally start reconstruction */
1218 case RAIDFRAME_FAIL_DISK:
1219
1220 if (raidPtr->Layout.map->faultsTolerated == 0) {
1221 /* Can't do this on a RAID 0!! */
1222 return(EINVAL);
1223 }
1224
1225 rr = (struct rf_recon_req *) data;
1226
1227 if (rr->row < 0 || rr->row >= raidPtr->numRow
1228 || rr->col < 0 || rr->col >= raidPtr->numCol)
1229 return (EINVAL);
1230
1231 printf("raid%d: Failing the disk: row: %d col: %d\n",
1232 unit, rr->row, rr->col);
1233
1234 /* make a copy of the recon request so that we don't rely on
1235 * the user's buffer */
1236 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1237 if (rrcopy == NULL)
1238 return(ENOMEM);
1239 bcopy(rr, rrcopy, sizeof(*rr));
1240 rrcopy->raidPtr = (void *) raidPtr;
1241
1242 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1243 rf_ReconThread,
1244 rrcopy,"raid_recon");
1245 return (0);
1246
1247 /* invoke a copyback operation after recon on whatever disk
1248 * needs it, if any */
1249 case RAIDFRAME_COPYBACK:
1250
1251 if (raidPtr->Layout.map->faultsTolerated == 0) {
1252 /* This makes no sense on a RAID 0!! */
1253 return(EINVAL);
1254 }
1255
1256 if (raidPtr->copyback_in_progress == 1) {
1257 /* Copyback is already in progress! */
1258 return(EINVAL);
1259 }
1260
1261 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1262 rf_CopybackThread,
1263 raidPtr,"raid_copyback");
1264 return (retcode);
1265
1266 /* return the percentage completion of reconstruction */
1267 case RAIDFRAME_CHECK_RECON_STATUS:
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* This makes no sense on a RAID 0 */
1270 return(EINVAL);
1271 }
1272 row = 0; /* XXX we only consider a single row... */
1273 if (raidPtr->status[row] != rf_rs_reconstructing)
1274 *(int *) data = 100;
1275 else
1276 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1277 return (0);
1278
1279 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1280 if (raidPtr->Layout.map->faultsTolerated == 0) {
1281 /* This makes no sense on a RAID 0 */
1282 return(EINVAL);
1283 }
1284 if (raidPtr->parity_rewrite_in_progress == 1) {
1285 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1286 } else {
1287 *(int *) data = 100;
1288 }
1289 return (0);
1290
1291 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1292 if (raidPtr->Layout.map->faultsTolerated == 0) {
1293 /* This makes no sense on a RAID 0 */
1294 return(EINVAL);
1295 }
1296 if (raidPtr->copyback_in_progress == 1) {
1297 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1298 raidPtr->Layout.numStripe;
1299 } else {
1300 *(int *) data = 100;
1301 }
1302 return (0);
1303
1304
1305 /* the sparetable daemon calls this to wait for the kernel to
1306 * need a spare table. this ioctl does not return until a
1307 * spare table is needed. XXX -- calling mpsleep here in the
1308 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1309 * -- I should either compute the spare table in the kernel,
1310 * or have a different -- XXX XXX -- interface (a different
1311 * character device) for delivering the table -- XXX */
1312 #if 0
1313 case RAIDFRAME_SPARET_WAIT:
1314 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1315 while (!rf_sparet_wait_queue)
1316 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1317 waitreq = rf_sparet_wait_queue;
1318 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1319 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1320
1321 /* structure assignment */
1322 *((RF_SparetWait_t *) data) = *waitreq;
1323
1324 RF_Free(waitreq, sizeof(*waitreq));
1325 return (0);
1326
1327 /* wakes up a process waiting on SPARET_WAIT and puts an error
1328 * code in it that will cause the dameon to exit */
1329 case RAIDFRAME_ABORT_SPARET_WAIT:
1330 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1331 waitreq->fcol = -1;
1332 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1333 waitreq->next = rf_sparet_wait_queue;
1334 rf_sparet_wait_queue = waitreq;
1335 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1336 wakeup(&rf_sparet_wait_queue);
1337 return (0);
1338
1339 /* used by the spare table daemon to deliver a spare table
1340 * into the kernel */
1341 case RAIDFRAME_SEND_SPARET:
1342
1343 /* install the spare table */
1344 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1345
1346 /* respond to the requestor. the return status of the spare
1347 * table installation is passed in the "fcol" field */
1348 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1349 waitreq->fcol = retcode;
1350 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1351 waitreq->next = rf_sparet_resp_queue;
1352 rf_sparet_resp_queue = waitreq;
1353 wakeup(&rf_sparet_resp_queue);
1354 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1355
1356 return (retcode);
1357 #endif
1358
1359 default:
1360 break; /* fall through to the os-specific code below */
1361
1362 }
1363
1364 if (!raidPtr->valid)
1365 return (EINVAL);
1366
1367 /*
1368 * Add support for "regular" device ioctls here.
1369 */
1370
1371 switch (cmd) {
1372 case DIOCGDINFO:
1373 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1374 break;
1375
1376 case DIOCGPART:
1377 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1378 ((struct partinfo *) data)->part =
1379 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1380 break;
1381
1382 case DIOCWDINFO:
1383 case DIOCSDINFO:
1384 if ((error = raidlock(rs)) != 0)
1385 return (error);
1386
1387 rs->sc_flags |= RAIDF_LABELLING;
1388
1389 error = setdisklabel(rs->sc_dkdev.dk_label,
1390 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1391 if (error == 0) {
1392 if (cmd == DIOCWDINFO)
1393 error = writedisklabel(RAIDLABELDEV(dev),
1394 raidstrategy, rs->sc_dkdev.dk_label,
1395 rs->sc_dkdev.dk_cpulabel);
1396 }
1397 rs->sc_flags &= ~RAIDF_LABELLING;
1398
1399 raidunlock(rs);
1400
1401 if (error)
1402 return (error);
1403 break;
1404
1405 case DIOCWLABEL:
1406 if (*(int *) data != 0)
1407 rs->sc_flags |= RAIDF_WLABEL;
1408 else
1409 rs->sc_flags &= ~RAIDF_WLABEL;
1410 break;
1411
1412 case DIOCGDEFLABEL:
1413 raidgetdefaultlabel(raidPtr, rs,
1414 (struct disklabel *) data);
1415 break;
1416
1417 default:
1418 retcode = ENOTTY;
1419 }
1420 return (retcode);
1421
1422 }
1423
1424
1425 /* raidinit -- complete the rest of the initialization for the
1426 RAIDframe device. */
1427
1428
1429 static void
1430 raidinit(raidPtr)
1431 RF_Raid_t *raidPtr;
1432 {
1433 struct raid_softc *rs;
1434 int unit;
1435
1436 unit = raidPtr->raidid;
1437
1438 rs = &raid_softc[unit];
1439 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1440 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1441
1442
1443 /* XXX should check return code first... */
1444 rs->sc_flags |= RAIDF_INITED;
1445
1446 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1447
1448 rs->sc_dkdev.dk_name = rs->sc_xname;
1449
1450 /* disk_attach actually creates space for the CPU disklabel, among
1451 * other things, so it's critical to call this *BEFORE* we try putzing
1452 * with disklabels. */
1453
1454 disk_attach(&rs->sc_dkdev);
1455
1456 /* XXX There may be a weird interaction here between this, and
1457 * protectedSectors, as used in RAIDframe. */
1458
1459 rs->sc_size = raidPtr->totalSectors;
1460
1461 }
1462
1463 /* wake up the daemon & tell it to get us a spare table
1464 * XXX
1465 * the entries in the queues should be tagged with the raidPtr
1466 * so that in the extremely rare case that two recons happen at once,
1467 * we know for which device were requesting a spare table
1468 * XXX
1469 *
1470 * XXX This code is not currently used. GO
1471 */
1472 int
1473 rf_GetSpareTableFromDaemon(req)
1474 RF_SparetWait_t *req;
1475 {
1476 int retcode;
1477
1478 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1479 req->next = rf_sparet_wait_queue;
1480 rf_sparet_wait_queue = req;
1481 wakeup(&rf_sparet_wait_queue);
1482
1483 /* mpsleep unlocks the mutex */
1484 while (!rf_sparet_resp_queue) {
1485 tsleep(&rf_sparet_resp_queue, PRIBIO,
1486 "raidframe getsparetable", 0);
1487 }
1488 req = rf_sparet_resp_queue;
1489 rf_sparet_resp_queue = req->next;
1490 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1491
1492 retcode = req->fcol;
1493 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1494 * alloc'd */
1495 return (retcode);
1496 }
1497
1498 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1499 * bp & passes it down.
1500 * any calls originating in the kernel must use non-blocking I/O
1501 * do some extra sanity checking to return "appropriate" error values for
1502 * certain conditions (to make some standard utilities work)
1503 *
1504 * Formerly known as: rf_DoAccessKernel
1505 */
1506 void
1507 raidstart(raidPtr)
1508 RF_Raid_t *raidPtr;
1509 {
1510 RF_SectorCount_t num_blocks, pb, sum;
1511 RF_RaidAddr_t raid_addr;
1512 int retcode;
1513 struct partition *pp;
1514 daddr_t blocknum;
1515 int unit;
1516 struct raid_softc *rs;
1517 int do_async;
1518 struct buf *bp;
1519
1520 unit = raidPtr->raidid;
1521 rs = &raid_softc[unit];
1522
1523 /* quick check to see if anything has died recently */
1524 RF_LOCK_MUTEX(raidPtr->mutex);
1525 if (raidPtr->numNewFailures > 0) {
1526 rf_update_component_labels(raidPtr);
1527 raidPtr->numNewFailures--;
1528 }
1529 RF_UNLOCK_MUTEX(raidPtr->mutex);
1530
1531 /* Check to see if we're at the limit... */
1532 RF_LOCK_MUTEX(raidPtr->mutex);
1533 while (raidPtr->openings > 0) {
1534 RF_UNLOCK_MUTEX(raidPtr->mutex);
1535
1536 /* get the next item, if any, from the queue */
1537 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1538 /* nothing more to do */
1539 return;
1540 }
1541 BUFQ_REMOVE(&rs->buf_queue, bp);
1542
1543 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1544 * partition.. Need to make it absolute to the underlying
1545 * device.. */
1546
1547 blocknum = bp->b_blkno;
1548 if (DISKPART(bp->b_dev) != RAW_PART) {
1549 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1550 blocknum += pp->p_offset;
1551 }
1552
1553 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1554 (int) blocknum));
1555
1556 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1557 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1558
1559 /* *THIS* is where we adjust what block we're going to...
1560 * but DO NOT TOUCH bp->b_blkno!!! */
1561 raid_addr = blocknum;
1562
1563 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1564 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1565 sum = raid_addr + num_blocks + pb;
1566 if (1 || rf_debugKernelAccess) {
1567 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1568 (int) raid_addr, (int) sum, (int) num_blocks,
1569 (int) pb, (int) bp->b_resid));
1570 }
1571 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1572 || (sum < num_blocks) || (sum < pb)) {
1573 bp->b_error = ENOSPC;
1574 bp->b_flags |= B_ERROR;
1575 bp->b_resid = bp->b_bcount;
1576 biodone(bp);
1577 RF_LOCK_MUTEX(raidPtr->mutex);
1578 continue;
1579 }
1580 /*
1581 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1582 */
1583
1584 if (bp->b_bcount & raidPtr->sectorMask) {
1585 bp->b_error = EINVAL;
1586 bp->b_flags |= B_ERROR;
1587 bp->b_resid = bp->b_bcount;
1588 biodone(bp);
1589 RF_LOCK_MUTEX(raidPtr->mutex);
1590 continue;
1591
1592 }
1593 db1_printf(("Calling DoAccess..\n"));
1594
1595
1596 RF_LOCK_MUTEX(raidPtr->mutex);
1597 raidPtr->openings--;
1598 RF_UNLOCK_MUTEX(raidPtr->mutex);
1599
1600 /*
1601 * Everything is async.
1602 */
1603 do_async = 1;
1604
1605 /* don't ever condition on bp->b_flags & B_WRITE.
1606 * always condition on B_READ instead */
1607
1608 /* XXX we're still at splbio() here... do we *really*
1609 need to be? */
1610
1611
1612 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1613 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1614 do_async, raid_addr, num_blocks,
1615 bp->b_un.b_addr, bp, NULL, NULL,
1616 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1617
1618
1619 RF_LOCK_MUTEX(raidPtr->mutex);
1620 }
1621 RF_UNLOCK_MUTEX(raidPtr->mutex);
1622 }
1623
1624
1625
1626
1627 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1628
1629 int
1630 rf_DispatchKernelIO(queue, req)
1631 RF_DiskQueue_t *queue;
1632 RF_DiskQueueData_t *req;
1633 {
1634 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1635 struct buf *bp;
1636 struct raidbuf *raidbp = NULL;
1637 struct raid_softc *rs;
1638 int unit;
1639 int s;
1640
1641 s=0;
1642 /* s = splbio();*/ /* want to test this */
1643 /* XXX along with the vnode, we also need the softc associated with
1644 * this device.. */
1645
1646 req->queue = queue;
1647
1648 unit = queue->raidPtr->raidid;
1649
1650 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1651
1652 if (unit >= numraid) {
1653 printf("Invalid unit number: %d %d\n", unit, numraid);
1654 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1655 }
1656 rs = &raid_softc[unit];
1657
1658 /* XXX is this the right place? */
1659 disk_busy(&rs->sc_dkdev);
1660
1661 bp = req->bp;
1662 #if 1
1663 /* XXX when there is a physical disk failure, someone is passing us a
1664 * buffer that contains old stuff!! Attempt to deal with this problem
1665 * without taking a performance hit... (not sure where the real bug
1666 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1667
1668 if (bp->b_flags & B_ERROR) {
1669 bp->b_flags &= ~B_ERROR;
1670 }
1671 if (bp->b_error != 0) {
1672 bp->b_error = 0;
1673 }
1674 #endif
1675 raidbp = RAIDGETBUF(rs);
1676
1677 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1678
1679 /*
1680 * context for raidiodone
1681 */
1682 raidbp->rf_obp = bp;
1683 raidbp->req = req;
1684
1685 LIST_INIT(&raidbp->rf_buf.b_dep);
1686
1687 switch (req->type) {
1688 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1689 /* XXX need to do something extra here.. */
1690 /* I'm leaving this in, as I've never actually seen it used,
1691 * and I'd like folks to report it... GO */
1692 printf(("WAKEUP CALLED\n"));
1693 queue->numOutstanding++;
1694
1695 /* XXX need to glue the original buffer into this?? */
1696
1697 KernelWakeupFunc(&raidbp->rf_buf);
1698 break;
1699
1700 case RF_IO_TYPE_READ:
1701 case RF_IO_TYPE_WRITE:
1702
1703 if (req->tracerec) {
1704 RF_ETIMER_START(req->tracerec->timer);
1705 }
1706 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1707 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1708 req->sectorOffset, req->numSector,
1709 req->buf, KernelWakeupFunc, (void *) req,
1710 queue->raidPtr->logBytesPerSector, req->b_proc);
1711
1712 if (rf_debugKernelAccess) {
1713 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1714 (long) bp->b_blkno));
1715 }
1716 queue->numOutstanding++;
1717 queue->last_deq_sector = req->sectorOffset;
1718 /* acc wouldn't have been let in if there were any pending
1719 * reqs at any other priority */
1720 queue->curPriority = req->priority;
1721
1722 db1_printf(("Going for %c to unit %d row %d col %d\n",
1723 req->type, unit, queue->row, queue->col));
1724 db1_printf(("sector %d count %d (%d bytes) %d\n",
1725 (int) req->sectorOffset, (int) req->numSector,
1726 (int) (req->numSector <<
1727 queue->raidPtr->logBytesPerSector),
1728 (int) queue->raidPtr->logBytesPerSector));
1729 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1730 raidbp->rf_buf.b_vp->v_numoutput++;
1731 }
1732 VOP_STRATEGY(&raidbp->rf_buf);
1733
1734 break;
1735
1736 default:
1737 panic("bad req->type in rf_DispatchKernelIO");
1738 }
1739 db1_printf(("Exiting from DispatchKernelIO\n"));
1740 /* splx(s); */ /* want to test this */
1741 return (0);
1742 }
1743 /* this is the callback function associated with a I/O invoked from
1744 kernel code.
1745 */
1746 static void
1747 KernelWakeupFunc(vbp)
1748 struct buf *vbp;
1749 {
1750 RF_DiskQueueData_t *req = NULL;
1751 RF_DiskQueue_t *queue;
1752 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1753 struct buf *bp;
1754 struct raid_softc *rs;
1755 int unit;
1756 register int s;
1757
1758 s = splbio();
1759 db1_printf(("recovering the request queue:\n"));
1760 req = raidbp->req;
1761
1762 bp = raidbp->rf_obp;
1763
1764 queue = (RF_DiskQueue_t *) req->queue;
1765
1766 if (raidbp->rf_buf.b_flags & B_ERROR) {
1767 bp->b_flags |= B_ERROR;
1768 bp->b_error = raidbp->rf_buf.b_error ?
1769 raidbp->rf_buf.b_error : EIO;
1770 }
1771
1772 /* XXX methinks this could be wrong... */
1773 #if 1
1774 bp->b_resid = raidbp->rf_buf.b_resid;
1775 #endif
1776
1777 if (req->tracerec) {
1778 RF_ETIMER_STOP(req->tracerec->timer);
1779 RF_ETIMER_EVAL(req->tracerec->timer);
1780 RF_LOCK_MUTEX(rf_tracing_mutex);
1781 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1782 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1783 req->tracerec->num_phys_ios++;
1784 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1785 }
1786 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1787
1788 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1789
1790
1791 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1792 * ballistic, and mark the component as hosed... */
1793
1794 if (bp->b_flags & B_ERROR) {
1795 /* Mark the disk as dead */
1796 /* but only mark it once... */
1797 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1798 rf_ds_optimal) {
1799 printf("raid%d: IO Error. Marking %s as failed.\n",
1800 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1801 queue->raidPtr->Disks[queue->row][queue->col].status =
1802 rf_ds_failed;
1803 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1804 queue->raidPtr->numFailures++;
1805 queue->raidPtr->numNewFailures++;
1806 /* XXX here we should bump the version number for each component, and write that data out */
1807 } else { /* Disk is already dead... */
1808 /* printf("Disk already marked as dead!\n"); */
1809 }
1810
1811 }
1812
1813 rs = &raid_softc[unit];
1814 RAIDPUTBUF(rs, raidbp);
1815
1816
1817 if (bp->b_resid == 0) {
1818 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1819 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1820 }
1821
1822 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1823 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1824
1825 splx(s);
1826 }
1827
1828
1829
1830 /*
1831 * initialize a buf structure for doing an I/O in the kernel.
1832 */
1833 static void
1834 InitBP(
1835 struct buf * bp,
1836 struct vnode * b_vp,
1837 unsigned rw_flag,
1838 dev_t dev,
1839 RF_SectorNum_t startSect,
1840 RF_SectorCount_t numSect,
1841 caddr_t buf,
1842 void (*cbFunc) (struct buf *),
1843 void *cbArg,
1844 int logBytesPerSector,
1845 struct proc * b_proc)
1846 {
1847 /* bp->b_flags = B_PHYS | rw_flag; */
1848 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1849 bp->b_bcount = numSect << logBytesPerSector;
1850 bp->b_bufsize = bp->b_bcount;
1851 bp->b_error = 0;
1852 bp->b_dev = dev;
1853 bp->b_un.b_addr = buf;
1854 bp->b_blkno = startSect;
1855 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1856 if (bp->b_bcount == 0) {
1857 panic("bp->b_bcount is zero in InitBP!!\n");
1858 }
1859 bp->b_proc = b_proc;
1860 bp->b_iodone = cbFunc;
1861 bp->b_vp = b_vp;
1862
1863 }
1864
1865 static void
1866 raidgetdefaultlabel(raidPtr, rs, lp)
1867 RF_Raid_t *raidPtr;
1868 struct raid_softc *rs;
1869 struct disklabel *lp;
1870 {
1871 db1_printf(("Building a default label...\n"));
1872 bzero(lp, sizeof(*lp));
1873
1874 /* fabricate a label... */
1875 lp->d_secperunit = raidPtr->totalSectors;
1876 lp->d_secsize = raidPtr->bytesPerSector;
1877 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1878 lp->d_ntracks = 1;
1879 lp->d_ncylinders = raidPtr->totalSectors /
1880 (lp->d_nsectors * lp->d_ntracks);
1881 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1882
1883 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1884 lp->d_type = DTYPE_RAID;
1885 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1886 lp->d_rpm = 3600;
1887 lp->d_interleave = 1;
1888 lp->d_flags = 0;
1889
1890 lp->d_partitions[RAW_PART].p_offset = 0;
1891 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1892 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1893 lp->d_npartitions = RAW_PART + 1;
1894
1895 lp->d_magic = DISKMAGIC;
1896 lp->d_magic2 = DISKMAGIC;
1897 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1898
1899 }
1900 /*
1901 * Read the disklabel from the raid device. If one is not present, fake one
1902 * up.
1903 */
1904 static void
1905 raidgetdisklabel(dev)
1906 dev_t dev;
1907 {
1908 int unit = raidunit(dev);
1909 struct raid_softc *rs = &raid_softc[unit];
1910 char *errstring;
1911 struct disklabel *lp = rs->sc_dkdev.dk_label;
1912 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1913 RF_Raid_t *raidPtr;
1914
1915 db1_printf(("Getting the disklabel...\n"));
1916
1917 bzero(clp, sizeof(*clp));
1918
1919 raidPtr = raidPtrs[unit];
1920
1921 raidgetdefaultlabel(raidPtr, rs, lp);
1922
1923 /*
1924 * Call the generic disklabel extraction routine.
1925 */
1926 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1927 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1928 if (errstring)
1929 raidmakedisklabel(rs);
1930 else {
1931 int i;
1932 struct partition *pp;
1933
1934 /*
1935 * Sanity check whether the found disklabel is valid.
1936 *
1937 * This is necessary since total size of the raid device
1938 * may vary when an interleave is changed even though exactly
1939 * same componets are used, and old disklabel may used
1940 * if that is found.
1941 */
1942 if (lp->d_secperunit != rs->sc_size)
1943 printf("WARNING: %s: "
1944 "total sector size in disklabel (%d) != "
1945 "the size of raid (%ld)\n", rs->sc_xname,
1946 lp->d_secperunit, (long) rs->sc_size);
1947 for (i = 0; i < lp->d_npartitions; i++) {
1948 pp = &lp->d_partitions[i];
1949 if (pp->p_offset + pp->p_size > rs->sc_size)
1950 printf("WARNING: %s: end of partition `%c' "
1951 "exceeds the size of raid (%ld)\n",
1952 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1953 }
1954 }
1955
1956 }
1957 /*
1958 * Take care of things one might want to take care of in the event
1959 * that a disklabel isn't present.
1960 */
1961 static void
1962 raidmakedisklabel(rs)
1963 struct raid_softc *rs;
1964 {
1965 struct disklabel *lp = rs->sc_dkdev.dk_label;
1966 db1_printf(("Making a label..\n"));
1967
1968 /*
1969 * For historical reasons, if there's no disklabel present
1970 * the raw partition must be marked FS_BSDFFS.
1971 */
1972
1973 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1974
1975 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1976
1977 lp->d_checksum = dkcksum(lp);
1978 }
1979 /*
1980 * Lookup the provided name in the filesystem. If the file exists,
1981 * is a valid block device, and isn't being used by anyone else,
1982 * set *vpp to the file's vnode.
1983 * You'll find the original of this in ccd.c
1984 */
1985 int
1986 raidlookup(path, p, vpp)
1987 char *path;
1988 struct proc *p;
1989 struct vnode **vpp; /* result */
1990 {
1991 struct nameidata nd;
1992 struct vnode *vp;
1993 struct vattr va;
1994 int error;
1995
1996 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1997 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1998 #ifdef DEBUG
1999 printf("RAIDframe: vn_open returned %d\n", error);
2000 #endif
2001 return (error);
2002 }
2003 vp = nd.ni_vp;
2004 if (vp->v_usecount > 1) {
2005 VOP_UNLOCK(vp, 0);
2006 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2007 return (EBUSY);
2008 }
2009 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2010 VOP_UNLOCK(vp, 0);
2011 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2012 return (error);
2013 }
2014 /* XXX: eventually we should handle VREG, too. */
2015 if (va.va_type != VBLK) {
2016 VOP_UNLOCK(vp, 0);
2017 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2018 return (ENOTBLK);
2019 }
2020 VOP_UNLOCK(vp, 0);
2021 *vpp = vp;
2022 return (0);
2023 }
2024 /*
2025 * Wait interruptibly for an exclusive lock.
2026 *
2027 * XXX
2028 * Several drivers do this; it should be abstracted and made MP-safe.
2029 * (Hmm... where have we seen this warning before :-> GO )
2030 */
2031 static int
2032 raidlock(rs)
2033 struct raid_softc *rs;
2034 {
2035 int error;
2036
2037 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2038 rs->sc_flags |= RAIDF_WANTED;
2039 if ((error =
2040 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2041 return (error);
2042 }
2043 rs->sc_flags |= RAIDF_LOCKED;
2044 return (0);
2045 }
2046 /*
2047 * Unlock and wake up any waiters.
2048 */
2049 static void
2050 raidunlock(rs)
2051 struct raid_softc *rs;
2052 {
2053
2054 rs->sc_flags &= ~RAIDF_LOCKED;
2055 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2056 rs->sc_flags &= ~RAIDF_WANTED;
2057 wakeup(rs);
2058 }
2059 }
2060
2061
2062 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2063 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2064
2065 int
2066 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2067 {
2068 RF_ComponentLabel_t clabel;
2069 raidread_component_label(dev, b_vp, &clabel);
2070 clabel.mod_counter = mod_counter;
2071 clabel.clean = RF_RAID_CLEAN;
2072 raidwrite_component_label(dev, b_vp, &clabel);
2073 return(0);
2074 }
2075
2076
2077 int
2078 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2079 {
2080 RF_ComponentLabel_t clabel;
2081 raidread_component_label(dev, b_vp, &clabel);
2082 clabel.mod_counter = mod_counter;
2083 clabel.clean = RF_RAID_DIRTY;
2084 raidwrite_component_label(dev, b_vp, &clabel);
2085 return(0);
2086 }
2087
2088 /* ARGSUSED */
2089 int
2090 raidread_component_label(dev, b_vp, clabel)
2091 dev_t dev;
2092 struct vnode *b_vp;
2093 RF_ComponentLabel_t *clabel;
2094 {
2095 struct buf *bp;
2096 int error;
2097
2098 /* XXX should probably ensure that we don't try to do this if
2099 someone has changed rf_protected_sectors. */
2100
2101 /* get a block of the appropriate size... */
2102 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2103 bp->b_dev = dev;
2104
2105 /* get our ducks in a row for the read */
2106 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2107 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2108 bp->b_flags = B_BUSY | B_READ;
2109 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2110
2111 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2112
2113 error = biowait(bp);
2114
2115 if (!error) {
2116 memcpy(clabel, bp->b_un.b_addr,
2117 sizeof(RF_ComponentLabel_t));
2118 #if 0
2119 print_component_label( clabel );
2120 #endif
2121 } else {
2122 #if 0
2123 printf("Failed to read RAID component label!\n");
2124 #endif
2125 }
2126
2127 bp->b_flags = B_INVAL | B_AGE;
2128 brelse(bp);
2129 return(error);
2130 }
2131 /* ARGSUSED */
2132 int
2133 raidwrite_component_label(dev, b_vp, clabel)
2134 dev_t dev;
2135 struct vnode *b_vp;
2136 RF_ComponentLabel_t *clabel;
2137 {
2138 struct buf *bp;
2139 int error;
2140
2141 /* get a block of the appropriate size... */
2142 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2143 bp->b_dev = dev;
2144
2145 /* get our ducks in a row for the write */
2146 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2147 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2148 bp->b_flags = B_BUSY | B_WRITE;
2149 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2150
2151 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2152
2153 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2154
2155 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2156 error = biowait(bp);
2157 bp->b_flags = B_INVAL | B_AGE;
2158 brelse(bp);
2159 if (error) {
2160 #if 1
2161 printf("Failed to write RAID component info!\n");
2162 #endif
2163 }
2164
2165 return(error);
2166 }
2167
2168 void
2169 rf_markalldirty( raidPtr )
2170 RF_Raid_t *raidPtr;
2171 {
2172 RF_ComponentLabel_t clabel;
2173 int r,c;
2174
2175 raidPtr->mod_counter++;
2176 for (r = 0; r < raidPtr->numRow; r++) {
2177 for (c = 0; c < raidPtr->numCol; c++) {
2178 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2179 raidread_component_label(
2180 raidPtr->Disks[r][c].dev,
2181 raidPtr->raid_cinfo[r][c].ci_vp,
2182 &clabel);
2183 if (clabel.status == rf_ds_spared) {
2184 /* XXX do something special...
2185 but whatever you do, don't
2186 try to access it!! */
2187 } else {
2188 #if 0
2189 clabel.status =
2190 raidPtr->Disks[r][c].status;
2191 raidwrite_component_label(
2192 raidPtr->Disks[r][c].dev,
2193 raidPtr->raid_cinfo[r][c].ci_vp,
2194 &clabel);
2195 #endif
2196 raidmarkdirty(
2197 raidPtr->Disks[r][c].dev,
2198 raidPtr->raid_cinfo[r][c].ci_vp,
2199 raidPtr->mod_counter);
2200 }
2201 }
2202 }
2203 }
2204 /* printf("Component labels marked dirty.\n"); */
2205 #if 0
2206 for( c = 0; c < raidPtr->numSpare ; c++) {
2207 sparecol = raidPtr->numCol + c;
2208 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2209 /*
2210
2211 XXX this is where we get fancy and map this spare
2212 into it's correct spot in the array.
2213
2214 */
2215 /*
2216
2217 we claim this disk is "optimal" if it's
2218 rf_ds_used_spare, as that means it should be
2219 directly substitutable for the disk it replaced.
2220 We note that too...
2221
2222 */
2223
2224 for(i=0;i<raidPtr->numRow;i++) {
2225 for(j=0;j<raidPtr->numCol;j++) {
2226 if ((raidPtr->Disks[i][j].spareRow ==
2227 r) &&
2228 (raidPtr->Disks[i][j].spareCol ==
2229 sparecol)) {
2230 srow = r;
2231 scol = sparecol;
2232 break;
2233 }
2234 }
2235 }
2236
2237 raidread_component_label(
2238 raidPtr->Disks[r][sparecol].dev,
2239 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2240 &clabel);
2241 /* make sure status is noted */
2242 clabel.version = RF_COMPONENT_LABEL_VERSION;
2243 clabel.mod_counter = raidPtr->mod_counter;
2244 clabel.serial_number = raidPtr->serial_number;
2245 clabel.row = srow;
2246 clabel.column = scol;
2247 clabel.num_rows = raidPtr->numRow;
2248 clabel.num_columns = raidPtr->numCol;
2249 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2250 clabel.status = rf_ds_optimal;
2251 raidwrite_component_label(
2252 raidPtr->Disks[r][sparecol].dev,
2253 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2254 &clabel);
2255 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2256 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2257 }
2258 }
2259
2260 #endif
2261 }
2262
2263
2264 void
2265 rf_update_component_labels( raidPtr )
2266 RF_Raid_t *raidPtr;
2267 {
2268 RF_ComponentLabel_t clabel;
2269 int sparecol;
2270 int r,c;
2271 int i,j;
2272 int srow, scol;
2273
2274 srow = -1;
2275 scol = -1;
2276
2277 /* XXX should do extra checks to make sure things really are clean,
2278 rather than blindly setting the clean bit... */
2279
2280 raidPtr->mod_counter++;
2281
2282 for (r = 0; r < raidPtr->numRow; r++) {
2283 for (c = 0; c < raidPtr->numCol; c++) {
2284 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2285 raidread_component_label(
2286 raidPtr->Disks[r][c].dev,
2287 raidPtr->raid_cinfo[r][c].ci_vp,
2288 &clabel);
2289 /* make sure status is noted */
2290 clabel.status = rf_ds_optimal;
2291 /* bump the counter */
2292 #if 0
2293 clabel.mod_counter++;
2294 #endif
2295 clabel.mod_counter = raidPtr->mod_counter;
2296 #if 0
2297 /* note where this set was configured last */
2298 clabel.last_unit = raidPtr->raidid;
2299 #endif
2300 #if DEBUG
2301 if (raidPtr->mod_counter !=
2302 clabel.mod_counter) {
2303 printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
2304 }
2305 #endif
2306
2307 raidwrite_component_label(
2308 raidPtr->Disks[r][c].dev,
2309 raidPtr->raid_cinfo[r][c].ci_vp,
2310 &clabel);
2311 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2312 raidmarkclean(
2313 raidPtr->Disks[r][c].dev,
2314 raidPtr->raid_cinfo[r][c].ci_vp,
2315 raidPtr->mod_counter);
2316 }
2317 }
2318 /* else we don't touch it.. */
2319 #if 0
2320 else if (raidPtr->Disks[r][c].status !=
2321 rf_ds_failed) {
2322 raidread_component_label(
2323 raidPtr->Disks[r][c].dev,
2324 raidPtr->raid_cinfo[r][c].ci_vp,
2325 &clabel);
2326 /* make sure status is noted */
2327 clabel.status =
2328 raidPtr->Disks[r][c].status;
2329 raidwrite_component_label(
2330 raidPtr->Disks[r][c].dev,
2331 raidPtr->raid_cinfo[r][c].ci_vp,
2332 &clabel);
2333 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2334 raidmarkclean(
2335 raidPtr->Disks[r][c].dev,
2336 raidPtr->raid_cinfo[r][c].ci_vp,
2337 raidPtr->mod_counter);
2338 }
2339 }
2340 #endif
2341 }
2342 }
2343
2344 for( c = 0; c < raidPtr->numSpare ; c++) {
2345 sparecol = raidPtr->numCol + c;
2346 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2347 /*
2348
2349 we claim this disk is "optimal" if it's
2350 rf_ds_used_spare, as that means it should be
2351 directly substitutable for the disk it replaced.
2352 We note that too...
2353
2354 */
2355
2356 for(i=0;i<raidPtr->numRow;i++) {
2357 for(j=0;j<raidPtr->numCol;j++) {
2358 if ((raidPtr->Disks[i][j].spareRow ==
2359 0) &&
2360 (raidPtr->Disks[i][j].spareCol ==
2361 sparecol)) {
2362 srow = i;
2363 scol = j;
2364 break;
2365 }
2366 }
2367 }
2368
2369 /* XXX shouldn't *really* need this... */
2370 raidread_component_label(
2371 raidPtr->Disks[0][sparecol].dev,
2372 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2373 &clabel);
2374 /* make sure status is noted */
2375
2376 raid_init_component_label(raidPtr, &clabel);
2377
2378 clabel.mod_counter = raidPtr->mod_counter;
2379 clabel.row = srow;
2380 clabel.column = scol;
2381 clabel.status = rf_ds_optimal;
2382
2383 raidwrite_component_label(
2384 raidPtr->Disks[0][sparecol].dev,
2385 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2386 &clabel);
2387 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2388 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2389 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2390 raidPtr->mod_counter);
2391 }
2392 }
2393 }
2394 /* printf("Component labels updated\n"); */
2395 }
2396
2397 void
2398 rf_ReconThread(req)
2399 struct rf_recon_req *req;
2400 {
2401 int s;
2402 RF_Raid_t *raidPtr;
2403
2404 s = splbio();
2405 raidPtr = (RF_Raid_t *) req->raidPtr;
2406 raidPtr->recon_in_progress = 1;
2407
2408 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2409 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2410
2411 /* XXX get rid of this! we don't need it at all.. */
2412 RF_Free(req, sizeof(*req));
2413
2414 raidPtr->recon_in_progress = 0;
2415 splx(s);
2416
2417 /* That's all... */
2418 kthread_exit(0); /* does not return */
2419 }
2420
2421 void
2422 rf_RewriteParityThread(raidPtr)
2423 RF_Raid_t *raidPtr;
2424 {
2425 int retcode;
2426 int s;
2427
2428 raidPtr->parity_rewrite_in_progress = 1;
2429 s = splbio();
2430 retcode = rf_RewriteParity(raidPtr);
2431 splx(s);
2432 if (retcode) {
2433 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2434 } else {
2435 /* set the clean bit! If we shutdown correctly,
2436 the clean bit on each component label will get
2437 set */
2438 raidPtr->parity_good = RF_RAID_CLEAN;
2439 }
2440 raidPtr->parity_rewrite_in_progress = 0;
2441
2442 /* That's all... */
2443 kthread_exit(0); /* does not return */
2444 }
2445
2446
2447 void
2448 rf_CopybackThread(raidPtr)
2449 RF_Raid_t *raidPtr;
2450 {
2451 int s;
2452
2453 raidPtr->copyback_in_progress = 1;
2454 s = splbio();
2455 rf_CopybackReconstructedData(raidPtr);
2456 splx(s);
2457 raidPtr->copyback_in_progress = 0;
2458
2459 /* That's all... */
2460 kthread_exit(0); /* does not return */
2461 }
2462
2463
2464 void
2465 rf_ReconstructInPlaceThread(req)
2466 struct rf_recon_req *req;
2467 {
2468 int retcode;
2469 int s;
2470 RF_Raid_t *raidPtr;
2471
2472 s = splbio();
2473 raidPtr = req->raidPtr;
2474 raidPtr->recon_in_progress = 1;
2475 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2476 RF_Free(req, sizeof(*req));
2477 raidPtr->recon_in_progress = 0;
2478 splx(s);
2479
2480 /* That's all... */
2481 kthread_exit(0); /* does not return */
2482 }
2483
2484 void
2485 rf_mountroot_hook(dev)
2486 struct device *dev;
2487 {
2488
2489 }
2490
2491
2492 RF_AutoConfig_t *
2493 rf_find_raid_components()
2494 {
2495 struct devnametobdevmaj *dtobdm;
2496 struct vnode *vp;
2497 struct disklabel label;
2498 struct device *dv;
2499 char *cd_name;
2500 dev_t dev;
2501 int error;
2502 int i;
2503 int good_one;
2504 RF_ComponentLabel_t *clabel;
2505 RF_AutoConfig_t *ac_list;
2506 RF_AutoConfig_t *ac;
2507
2508
2509 /* initialize the AutoConfig list */
2510 ac_list = NULL;
2511
2512 if (raidautoconfig) {
2513
2514 /* we begin by trolling through *all* the devices on the system */
2515
2516 for (dv = alldevs.tqh_first; dv != NULL;
2517 dv = dv->dv_list.tqe_next) {
2518
2519 /* we are only interested in disks... */
2520 if (dv->dv_class != DV_DISK)
2521 continue;
2522
2523 /* we don't care about floppies... */
2524 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2525 continue;
2526 }
2527
2528 /* need to find the device_name_to_block_device_major stuff */
2529 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2530 dtobdm = dev_name2blk;
2531 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2532 dtobdm++;
2533 }
2534
2535 /* get a vnode for the raw partition of this disk */
2536
2537 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2538 if (bdevvp(dev, &vp))
2539 panic("RAID can't alloc vnode");
2540
2541 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2542
2543 if (error) {
2544 /* "Who cares." Continue looking
2545 for something that exists*/
2546 vput(vp);
2547 continue;
2548 }
2549
2550 /* Ok, the disk exists. Go get the disklabel. */
2551 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2552 FREAD, NOCRED, 0);
2553 if (error) {
2554 /*
2555 * XXX can't happen - open() would
2556 * have errored out (or faked up one)
2557 */
2558 printf("can't get label for dev %s%c (%d)!?!?\n",
2559 dv->dv_xname, 'a' + RAW_PART, error);
2560 }
2561
2562 /* don't need this any more. We'll allocate it again
2563 a little later if we really do... */
2564 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2565 vput(vp);
2566
2567 for (i=0; i < label.d_npartitions; i++) {
2568 /* We only support partitions marked as RAID */
2569 if (label.d_partitions[i].p_fstype != FS_RAID)
2570 continue;
2571
2572 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2573 if (bdevvp(dev, &vp))
2574 panic("RAID can't alloc vnode");
2575
2576 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2577 if (error) {
2578 /* Whatever... */
2579 vput(vp);
2580 continue;
2581 }
2582
2583 good_one = 0;
2584
2585 clabel = (RF_ComponentLabel_t *)
2586 malloc(sizeof(RF_ComponentLabel_t),
2587 M_RAIDFRAME, M_NOWAIT);
2588 if (clabel == NULL) {
2589 /* XXX CLEANUP HERE */
2590 printf("RAID auto config: out of memory!\n");
2591 return(NULL); /* XXX probably should panic? */
2592 }
2593
2594 if (!raidread_component_label(dev, vp, clabel)) {
2595 /* Got the label. Does it look reasonable? */
2596 if (rf_reasonable_label(clabel) &&
2597 (clabel->partitionSize <=
2598 label.d_partitions[i].p_size)) {
2599 #if DEBUG
2600 printf("Component on: %s%c: %d\n",
2601 dv->dv_xname, 'a'+i,
2602 label.d_partitions[i].p_size);
2603 print_component_label(clabel);
2604 #endif
2605 /* if it's reasonable, add it,
2606 else ignore it. */
2607 ac = (RF_AutoConfig_t *)
2608 malloc(sizeof(RF_AutoConfig_t),
2609 M_RAIDFRAME,
2610 M_NOWAIT);
2611 if (ac == NULL) {
2612 /* XXX should panic?? */
2613 return(NULL);
2614 }
2615
2616 sprintf(ac->devname, "%s%c",
2617 dv->dv_xname, 'a'+i);
2618 ac->dev = dev;
2619 ac->vp = vp;
2620 ac->clabel = clabel;
2621 ac->next = ac_list;
2622 ac_list = ac;
2623 good_one = 1;
2624 }
2625 }
2626 if (!good_one) {
2627 /* cleanup */
2628 free(clabel, M_RAIDFRAME);
2629 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2630 vput(vp);
2631 }
2632 }
2633 }
2634 }
2635 return(ac_list);
2636 }
2637
2638 static int
2639 rf_reasonable_label(clabel)
2640 RF_ComponentLabel_t *clabel;
2641 {
2642
2643 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2644 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2645 ((clabel->clean == RF_RAID_CLEAN) ||
2646 (clabel->clean == RF_RAID_DIRTY)) &&
2647 clabel->row >=0 &&
2648 clabel->column >= 0 &&
2649 clabel->num_rows > 0 &&
2650 clabel->num_columns > 0 &&
2651 clabel->row < clabel->num_rows &&
2652 clabel->column < clabel->num_columns &&
2653 clabel->blockSize > 0 &&
2654 clabel->numBlocks > 0) {
2655 /* label looks reasonable enough... */
2656 return(1);
2657 }
2658 return(0);
2659 }
2660
2661
2662 void
2663 print_component_label(clabel)
2664 RF_ComponentLabel_t *clabel;
2665 {
2666 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2667 clabel->row, clabel->column,
2668 clabel->num_rows, clabel->num_columns);
2669 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2670 clabel->version, clabel->serial_number,
2671 clabel->mod_counter);
2672 printf(" Clean: %s Status: %d\n",
2673 clabel->clean ? "Yes" : "No", clabel->status );
2674 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2675 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2676 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2677 (char) clabel->parityConfig, clabel->blockSize,
2678 clabel->numBlocks);
2679 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2680 printf(" Last configured as: raid%d\n", clabel->last_unit );
2681 #if 0
2682 printf(" Config order: %d\n", clabel->config_order);
2683 #endif
2684
2685 }
2686
2687 RF_ConfigSet_t *
2688 rf_create_auto_sets(ac_list)
2689 RF_AutoConfig_t *ac_list;
2690 {
2691 RF_AutoConfig_t *ac;
2692 RF_ConfigSet_t *config_sets;
2693 RF_ConfigSet_t *cset;
2694 RF_AutoConfig_t *ac_next;
2695
2696
2697 config_sets = NULL;
2698
2699 /* Go through the AutoConfig list, and figure out which components
2700 belong to what sets. */
2701 ac = ac_list;
2702 while(ac!=NULL) {
2703 /* we're going to putz with ac->next, so save it here
2704 for use at the end of the loop */
2705 ac_next = ac->next;
2706
2707 if (config_sets == NULL) {
2708 /* will need at least this one... */
2709 config_sets = (RF_ConfigSet_t *)
2710 malloc(sizeof(RF_ConfigSet_t),
2711 M_RAIDFRAME, M_NOWAIT);
2712 if (config_sets == NULL) {
2713 panic("rf_create_auto_sets: No memory!\n");
2714 }
2715 /* this one is easy :) */
2716 config_sets->ac = ac;
2717 config_sets->next = NULL;
2718 config_sets->rootable = 0;
2719 ac->next = NULL;
2720 } else {
2721 /* which set does this component fit into? */
2722 cset = config_sets;
2723 while(cset!=NULL) {
2724 if (rf_does_it_fit(cset, ac)) {
2725 /* looks like it matches */
2726 ac->next = cset->ac;
2727 cset->ac = ac;
2728 break;
2729 }
2730 cset = cset->next;
2731 }
2732 if (cset==NULL) {
2733 /* didn't find a match above... new set..*/
2734 cset = (RF_ConfigSet_t *)
2735 malloc(sizeof(RF_ConfigSet_t),
2736 M_RAIDFRAME, M_NOWAIT);
2737 if (cset == NULL) {
2738 panic("rf_create_auto_sets: No memory!\n");
2739 }
2740 cset->ac = ac;
2741 ac->next = NULL;
2742 cset->next = config_sets;
2743 cset->rootable = 0;
2744 config_sets = cset;
2745 }
2746 }
2747 ac = ac_next;
2748 }
2749
2750
2751 return(config_sets);
2752 }
2753
2754 static int
2755 rf_does_it_fit(cset, ac)
2756 RF_ConfigSet_t *cset;
2757 RF_AutoConfig_t *ac;
2758 {
2759 RF_ComponentLabel_t *clabel1, *clabel2;
2760
2761 /* If this one matches the *first* one in the set, that's good
2762 enough, since the other members of the set would have been
2763 through here too... */
2764 /* note that we are not checking partitionSize here..
2765
2766 Note that we are also not checking the mod_counters here.
2767 If everything else matches execpt the mod_counter, that's
2768 good enough for this test. We will deal with the mod_counters
2769 a little later in the autoconfiguration process.
2770
2771 (clabel1->mod_counter == clabel2->mod_counter) &&
2772
2773 */
2774
2775 clabel1 = cset->ac->clabel;
2776 clabel2 = ac->clabel;
2777 if ((clabel1->version == clabel2->version) &&
2778 (clabel1->serial_number == clabel2->serial_number) &&
2779 (clabel1->num_rows == clabel2->num_rows) &&
2780 (clabel1->num_columns == clabel2->num_columns) &&
2781 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2782 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2783 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2784 (clabel1->parityConfig == clabel2->parityConfig) &&
2785 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2786 (clabel1->blockSize == clabel2->blockSize) &&
2787 (clabel1->numBlocks == clabel2->numBlocks) &&
2788 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2789 (clabel1->root_partition == clabel2->root_partition) &&
2790 (clabel1->last_unit == clabel2->last_unit) &&
2791 (clabel1->config_order == clabel2->config_order)) {
2792 /* if it get's here, it almost *has* to be a match */
2793 } else {
2794 /* it's not consistent with somebody in the set..
2795 punt */
2796 return(0);
2797 }
2798 /* all was fine.. it must fit... */
2799 return(1);
2800 }
2801
2802 int
2803 rf_have_enough_components(cset)
2804 RF_ConfigSet_t *cset;
2805 {
2806 RF_AutoConfig_t *ac;
2807 RF_AutoConfig_t *auto_config;
2808 RF_ComponentLabel_t *clabel;
2809 int r,c;
2810 int num_rows;
2811 int num_cols;
2812 int num_missing;
2813
2814 /* check to see that we have enough 'live' components
2815 of this set. If so, we can configure it if necessary */
2816
2817 num_rows = cset->ac->clabel->num_rows;
2818 num_cols = cset->ac->clabel->num_columns;
2819
2820 /* XXX Check for duplicate components!?!?!? */
2821
2822 num_missing = 0;
2823 auto_config = cset->ac;
2824
2825 for(r=0; r<num_rows; r++) {
2826 for(c=0; c<num_cols; c++) {
2827 ac = auto_config;
2828 while(ac!=NULL) {
2829 if (ac->clabel==NULL) {
2830 /* big-time bad news. */
2831 goto fail;
2832 }
2833 if ((ac->clabel->row == r) &&
2834 (ac->clabel->column == c)) {
2835 /* it's this one... */
2836 #if DEBUG
2837 printf("Found: %s at %d,%d\n",
2838 ac->devname,r,c);
2839 #endif
2840 break;
2841 }
2842 ac=ac->next;
2843 }
2844 if (ac==NULL) {
2845 /* Didn't find one here! */
2846 num_missing++;
2847 }
2848 }
2849 }
2850
2851 clabel = cset->ac->clabel;
2852
2853 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2854 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2855 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2856 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2857 /* XXX this needs to be made *much* more general */
2858 /* Too many failures */
2859 return(0);
2860 }
2861 /* otherwise, all is well, and we've got enough to take a kick
2862 at autoconfiguring this set */
2863 return(1);
2864 fail:
2865 return(0);
2866
2867 }
2868
2869 void
2870 rf_create_configuration(ac,config,raidPtr)
2871 RF_AutoConfig_t *ac;
2872 RF_Config_t *config;
2873 RF_Raid_t *raidPtr;
2874 {
2875 RF_ComponentLabel_t *clabel;
2876
2877 clabel = ac->clabel;
2878
2879 /* 1. Fill in the common stuff */
2880 config->numRow = clabel->num_rows;
2881 config->numCol = clabel->num_columns;
2882 config->numSpare = 0; /* XXX should this be set here? */
2883 config->sectPerSU = clabel->sectPerSU;
2884 config->SUsPerPU = clabel->SUsPerPU;
2885 config->SUsPerRU = clabel->SUsPerRU;
2886 config->parityConfig = clabel->parityConfig;
2887 /* XXX... */
2888 strcpy(config->diskQueueType,"fifo");
2889 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2890 config->layoutSpecificSize = 0; /* XXX ?? */
2891
2892 while(ac!=NULL) {
2893 /* row/col values will be in range due to the checks
2894 in reasonable_label() */
2895 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2896 ac->devname);
2897 ac = ac->next;
2898 }
2899
2900 }
2901
2902 int
2903 rf_set_autoconfig(raidPtr, new_value)
2904 RF_Raid_t *raidPtr;
2905 int new_value;
2906 {
2907 RF_ComponentLabel_t clabel;
2908 struct vnode *vp;
2909 dev_t dev;
2910 int row, column;
2911
2912 raidPtr->autoconfigure = new_value;
2913 for(row=0; row<raidPtr->numRow; row++) {
2914 for(column=0; column<raidPtr->numCol; column++) {
2915 dev = raidPtr->Disks[row][column].dev;
2916 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2917 raidread_component_label(dev, vp, &clabel);
2918 clabel.autoconfigure = new_value;
2919 raidwrite_component_label(dev, vp, &clabel);
2920 }
2921 }
2922 return(new_value);
2923 }
2924
2925 int
2926 rf_set_rootpartition(raidPtr, new_value)
2927 RF_Raid_t *raidPtr;
2928 int new_value;
2929 {
2930 RF_ComponentLabel_t clabel;
2931 struct vnode *vp;
2932 dev_t dev;
2933 int row, column;
2934
2935 raidPtr->root_partition = new_value;
2936 for(row=0; row<raidPtr->numRow; row++) {
2937 for(column=0; column<raidPtr->numCol; column++) {
2938 dev = raidPtr->Disks[row][column].dev;
2939 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2940 raidread_component_label(dev, vp, &clabel);
2941 clabel.root_partition = new_value;
2942 raidwrite_component_label(dev, vp, &clabel);
2943 }
2944 }
2945 return(new_value);
2946 }
2947
2948 void
2949 rf_release_all_vps(cset)
2950 RF_ConfigSet_t *cset;
2951 {
2952 RF_AutoConfig_t *ac;
2953
2954 ac = cset->ac;
2955 while(ac!=NULL) {
2956 /* Close the vp, and give it back */
2957 if (ac->vp) {
2958 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2959 vput(ac->vp);
2960 }
2961 ac = ac->next;
2962 }
2963 }
2964
2965
2966 void
2967 rf_cleanup_config_set(cset)
2968 RF_ConfigSet_t *cset;
2969 {
2970 RF_AutoConfig_t *ac;
2971 RF_AutoConfig_t *next_ac;
2972
2973 ac = cset->ac;
2974 while(ac!=NULL) {
2975 next_ac = ac->next;
2976 /* nuke the label */
2977 free(ac->clabel, M_RAIDFRAME);
2978 /* cleanup the config structure */
2979 free(ac, M_RAIDFRAME);
2980 /* "next.." */
2981 ac = next_ac;
2982 }
2983 /* and, finally, nuke the config set */
2984 free(cset, M_RAIDFRAME);
2985 }
2986
2987
2988 void
2989 raid_init_component_label(raidPtr, clabel)
2990 RF_Raid_t *raidPtr;
2991 RF_ComponentLabel_t *clabel;
2992 {
2993 /* current version number */
2994 clabel->version = RF_COMPONENT_LABEL_VERSION;
2995 clabel->serial_number = raidPtr->serial_number;
2996 clabel->mod_counter = raidPtr->mod_counter;
2997 clabel->num_rows = raidPtr->numRow;
2998 clabel->num_columns = raidPtr->numCol;
2999 clabel->clean = RF_RAID_DIRTY; /* not clean */
3000 clabel->status = rf_ds_optimal; /* "It's good!" */
3001
3002 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3003 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3004 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3005
3006 clabel->blockSize = raidPtr->bytesPerSector;
3007 clabel->numBlocks = raidPtr->sectorsPerDisk;
3008
3009 /* XXX not portable */
3010 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3011 clabel->maxOutstanding = raidPtr->maxOutstanding;
3012 clabel->autoconfigure = raidPtr->autoconfigure;
3013 clabel->root_partition = raidPtr->root_partition;
3014 clabel->last_unit = raidPtr->raidid;
3015 clabel->config_order = raidPtr->config_order;
3016 }
3017
3018 int
3019 rf_auto_config_set(cset,unit)
3020 RF_ConfigSet_t *cset;
3021 int *unit;
3022 {
3023 RF_Raid_t *raidPtr;
3024 RF_Config_t *config;
3025 int raidID;
3026 int retcode;
3027
3028 printf("Starting autoconfigure on raid%d\n",raidID);
3029
3030 retcode = 0;
3031 *unit = -1;
3032
3033 /* 1. Create a config structure */
3034
3035 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3036 M_RAIDFRAME,
3037 M_NOWAIT);
3038 if (config==NULL) {
3039 printf("Out of mem!?!?\n");
3040 /* XXX do something more intelligent here. */
3041 return(1);
3042 }
3043 /* XXX raidID needs to be set correctly.. */
3044
3045 /*
3046 2. Figure out what RAID ID this one is supposed to live at
3047 See if we can get the same RAID dev that it was configured
3048 on last time..
3049 */
3050
3051 raidID = cset->ac->clabel->last_unit;
3052 if ((raidID < 0) || (raidID >= numraid)) {
3053 /* let's not wander off into lala land. */
3054 raidID = numraid - 1;
3055 }
3056 if (raidPtrs[raidID]->valid != 0) {
3057
3058 /*
3059 Nope... Go looking for an alternative...
3060 Start high so we don't immediately use raid0 if that's
3061 not taken.
3062 */
3063
3064 for(raidID = numraid; raidID >= 0; raidID--) {
3065 if (raidPtrs[raidID]->valid == 0) {
3066 /* can use this one! */
3067 break;
3068 }
3069 }
3070 }
3071
3072 if (raidID < 0) {
3073 /* punt... */
3074 printf("Unable to auto configure this set!\n");
3075 printf("(Out of RAID devs!)\n");
3076 return(1);
3077 }
3078
3079 raidPtr = raidPtrs[raidID];
3080
3081 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3082 raidPtr->raidid = raidID;
3083 raidPtr->openings = RAIDOUTSTANDING;
3084
3085 /* 3. Build the configuration structure */
3086 rf_create_configuration(cset->ac, config, raidPtr);
3087
3088 /* 4. Do the configuration */
3089 retcode = rf_Configure(raidPtr, config, cset->ac);
3090
3091 if (retcode == 0) {
3092 #if DEBUG
3093 printf("Calling raidinit()\n");
3094 #endif
3095 /* XXX the 0 below is bogus! */
3096 raidinit(raidPtrs[raidID]);
3097
3098 rf_markalldirty(raidPtrs[raidID]);
3099 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3100 if (cset->ac->clabel->root_partition==1) {
3101 /* everything configured just fine. Make a note
3102 that this set is eligible to be root. */
3103 cset->rootable = 1;
3104 /* XXX do this here? */
3105 raidPtrs[raidID]->root_partition = 1;
3106 }
3107 }
3108
3109 /* 5. Cleanup */
3110 free(config, M_RAIDFRAME);
3111
3112 *unit = raidID;
3113 return(retcode);
3114 }
3115