rf_netbsdkintf.c revision 1.70 1 /* $NetBSD: rf_netbsdkintf.c,v 1.70 2000/03/07 03:09:47 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 register struct buf *bp;
648 {
649 register int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 if ((rs->sc_flags & RAIDF_INITED) == 0)
832 return (ENXIO);
833 }
834
835 switch (cmd) {
836
837 /* configure the system */
838 case RAIDFRAME_CONFIGURE:
839
840 if (raidPtr->valid) {
841 /* There is a valid RAID set running on this unit! */
842 printf("raid%d: Device already configured!\n",unit);
843 return(EINVAL);
844 }
845
846 /* copy-in the configuration information */
847 /* data points to a pointer to the configuration structure */
848
849 u_cfg = *((RF_Config_t **) data);
850 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
851 if (k_cfg == NULL) {
852 return (ENOMEM);
853 }
854 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
855 sizeof(RF_Config_t));
856 if (retcode) {
857 RF_Free(k_cfg, sizeof(RF_Config_t));
858 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
859 retcode));
860 return (retcode);
861 }
862 /* allocate a buffer for the layout-specific data, and copy it
863 * in */
864 if (k_cfg->layoutSpecificSize) {
865 if (k_cfg->layoutSpecificSize > 10000) {
866 /* sanity check */
867 RF_Free(k_cfg, sizeof(RF_Config_t));
868 return (EINVAL);
869 }
870 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
871 (u_char *));
872 if (specific_buf == NULL) {
873 RF_Free(k_cfg, sizeof(RF_Config_t));
874 return (ENOMEM);
875 }
876 retcode = copyin(k_cfg->layoutSpecific,
877 (caddr_t) specific_buf,
878 k_cfg->layoutSpecificSize);
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 RF_Free(specific_buf,
882 k_cfg->layoutSpecificSize);
883 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
884 retcode));
885 return (retcode);
886 }
887 } else
888 specific_buf = NULL;
889 k_cfg->layoutSpecific = specific_buf;
890
891 /* should do some kind of sanity check on the configuration.
892 * Store the sum of all the bytes in the last byte? */
893
894 /* configure the system */
895
896 /*
897 * Clear the entire RAID descriptor, just to make sure
898 * there is no stale data left in the case of a
899 * reconfiguration
900 */
901 bzero((char *) raidPtr, sizeof(RF_Raid_t));
902 raidPtr->raidid = unit;
903
904 retcode = rf_Configure(raidPtr, k_cfg, NULL);
905
906 if (retcode == 0) {
907
908 /* allow this many simultaneous IO's to
909 this RAID device */
910 raidPtr->openings = RAIDOUTSTANDING;
911
912 raidinit(raidPtr);
913 rf_markalldirty(raidPtr);
914 }
915 /* free the buffers. No return code here. */
916 if (k_cfg->layoutSpecificSize) {
917 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
918 }
919 RF_Free(k_cfg, sizeof(RF_Config_t));
920
921 return (retcode);
922
923 /* shutdown the system */
924 case RAIDFRAME_SHUTDOWN:
925
926 if ((error = raidlock(rs)) != 0)
927 return (error);
928
929 /*
930 * If somebody has a partition mounted, we shouldn't
931 * shutdown.
932 */
933
934 part = DISKPART(dev);
935 pmask = (1 << part);
936 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
937 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
938 (rs->sc_dkdev.dk_copenmask & pmask))) {
939 raidunlock(rs);
940 return (EBUSY);
941 }
942
943 retcode = rf_Shutdown(raidPtr);
944
945 pool_destroy(&rs->sc_cbufpool);
946
947 /* It's no longer initialized... */
948 rs->sc_flags &= ~RAIDF_INITED;
949
950 /* Detach the disk. */
951 disk_detach(&rs->sc_dkdev);
952
953 raidunlock(rs);
954
955 return (retcode);
956 case RAIDFRAME_GET_COMPONENT_LABEL:
957 clabel_ptr = (RF_ComponentLabel_t **) data;
958 /* need to read the component label for the disk indicated
959 by row,column in clabel */
960
961 /* For practice, let's get it directly fromdisk, rather
962 than from the in-core copy */
963 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
964 (RF_ComponentLabel_t *));
965 if (clabel == NULL)
966 return (ENOMEM);
967
968 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
969
970 retcode = copyin( *clabel_ptr, clabel,
971 sizeof(RF_ComponentLabel_t));
972
973 if (retcode) {
974 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
975 return(retcode);
976 }
977
978 row = clabel->row;
979 column = clabel->column;
980
981 if ((row < 0) || (row >= raidPtr->numRow) ||
982 (column < 0) || (column >= raidPtr->numCol)) {
983 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
984 return(EINVAL);
985 }
986
987 raidread_component_label(raidPtr->Disks[row][column].dev,
988 raidPtr->raid_cinfo[row][column].ci_vp,
989 clabel );
990
991 retcode = copyout((caddr_t) clabel,
992 (caddr_t) *clabel_ptr,
993 sizeof(RF_ComponentLabel_t));
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return (retcode);
996
997 case RAIDFRAME_SET_COMPONENT_LABEL:
998 clabel = (RF_ComponentLabel_t *) data;
999
1000 /* XXX check the label for valid stuff... */
1001 /* Note that some things *should not* get modified --
1002 the user should be re-initing the labels instead of
1003 trying to patch things.
1004 */
1005
1006 printf("Got component label:\n");
1007 printf("Version: %d\n",clabel->version);
1008 printf("Serial Number: %d\n",clabel->serial_number);
1009 printf("Mod counter: %d\n",clabel->mod_counter);
1010 printf("Row: %d\n", clabel->row);
1011 printf("Column: %d\n", clabel->column);
1012 printf("Num Rows: %d\n", clabel->num_rows);
1013 printf("Num Columns: %d\n", clabel->num_columns);
1014 printf("Clean: %d\n", clabel->clean);
1015 printf("Status: %d\n", clabel->status);
1016
1017 row = clabel->row;
1018 column = clabel->column;
1019
1020 if ((row < 0) || (row >= raidPtr->numRow) ||
1021 (column < 0) || (column >= raidPtr->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 /* XXX this isn't allowed to do anything for now :-) */
1026
1027 /* XXX and before it is, we need to fill in the rest
1028 of the fields!?!?!?! */
1029 #if 0
1030 raidwrite_component_label(
1031 raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034 #endif
1035 return (0);
1036
1037 case RAIDFRAME_INIT_LABELS:
1038 clabel = (RF_ComponentLabel_t *) data;
1039 /*
1040 we only want the serial number from
1041 the above. We get all the rest of the information
1042 from the config that was used to create this RAID
1043 set.
1044 */
1045
1046 raidPtr->serial_number = clabel->serial_number;
1047
1048 raid_init_component_label(raidPtr, &ci_label);
1049 ci_label.serial_number = clabel->serial_number;
1050
1051 for(row=0;row<raidPtr->numRow;row++) {
1052 ci_label.row = row;
1053 for(column=0;column<raidPtr->numCol;column++) {
1054 diskPtr = &raidPtr->Disks[row][column];
1055 ci_label.partitionSize = diskPtr->partitionSize;
1056 ci_label.column = column;
1057 raidwrite_component_label(
1058 raidPtr->Disks[row][column].dev,
1059 raidPtr->raid_cinfo[row][column].ci_vp,
1060 &ci_label );
1061 }
1062 }
1063
1064 return (retcode);
1065 case RAIDFRAME_SET_AUTOCONFIG:
1066 d = rf_set_autoconfig(raidPtr, *data);
1067 printf("New autoconfig value is: %d\n", d);
1068 *data = d;
1069 return (retcode);
1070
1071 case RAIDFRAME_SET_ROOT:
1072 d = rf_set_rootpartition(raidPtr, *data);
1073 printf("New rootpartition value is: %d\n", d);
1074 *data = d;
1075 return (retcode);
1076
1077 /* initialize all parity */
1078 case RAIDFRAME_REWRITEPARITY:
1079
1080 if (raidPtr->Layout.map->faultsTolerated == 0) {
1081 /* Parity for RAID 0 is trivially correct */
1082 raidPtr->parity_good = RF_RAID_CLEAN;
1083 return(0);
1084 }
1085
1086 if (raidPtr->parity_rewrite_in_progress == 1) {
1087 /* Re-write is already in progress! */
1088 return(EINVAL);
1089 }
1090
1091 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1092 rf_RewriteParityThread,
1093 raidPtr,"raid_parity");
1094 return (retcode);
1095
1096
1097 case RAIDFRAME_ADD_HOT_SPARE:
1098 sparePtr = (RF_SingleComponent_t *) data;
1099 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1100 printf("Adding spare\n");
1101 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1102 return(retcode);
1103
1104 case RAIDFRAME_REMOVE_HOT_SPARE:
1105 return(retcode);
1106
1107 case RAIDFRAME_REBUILD_IN_PLACE:
1108
1109 if (raidPtr->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 if (raidPtr->recon_in_progress == 1) {
1115 /* a reconstruct is already in progress! */
1116 return(EINVAL);
1117 }
1118
1119 componentPtr = (RF_SingleComponent_t *) data;
1120 memcpy( &component, componentPtr,
1121 sizeof(RF_SingleComponent_t));
1122 row = component.row;
1123 column = component.column;
1124 printf("Rebuild: %d %d\n",row, column);
1125 if ((row < 0) || (row >= raidPtr->numRow) ||
1126 (column < 0) || (column >= raidPtr->numCol)) {
1127 return(EINVAL);
1128 }
1129
1130 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1131 if (rrcopy == NULL)
1132 return(ENOMEM);
1133
1134 rrcopy->raidPtr = (void *) raidPtr;
1135 rrcopy->row = row;
1136 rrcopy->col = column;
1137
1138 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1139 rf_ReconstructInPlaceThread,
1140 rrcopy,"raid_reconip");
1141 return(retcode);
1142
1143 case RAIDFRAME_GET_INFO:
1144 if (!raidPtr->valid)
1145 return (ENODEV);
1146 ucfgp = (RF_DeviceConfig_t **) data;
1147 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1148 (RF_DeviceConfig_t *));
1149 if (d_cfg == NULL)
1150 return (ENOMEM);
1151 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1152 d_cfg->rows = raidPtr->numRow;
1153 d_cfg->cols = raidPtr->numCol;
1154 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1155 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1156 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1157 return (ENOMEM);
1158 }
1159 d_cfg->nspares = raidPtr->numSpare;
1160 if (d_cfg->nspares >= RF_MAX_DISKS) {
1161 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1162 return (ENOMEM);
1163 }
1164 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1165 d = 0;
1166 for (i = 0; i < d_cfg->rows; i++) {
1167 for (j = 0; j < d_cfg->cols; j++) {
1168 d_cfg->devs[d] = raidPtr->Disks[i][j];
1169 d++;
1170 }
1171 }
1172 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1173 d_cfg->spares[i] = raidPtr->Disks[0][j];
1174 }
1175 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1176 sizeof(RF_DeviceConfig_t));
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178
1179 return (retcode);
1180
1181 case RAIDFRAME_CHECK_PARITY:
1182 *(int *) data = raidPtr->parity_good;
1183 return (0);
1184
1185 case RAIDFRAME_RESET_ACCTOTALS:
1186 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1187 return (0);
1188
1189 case RAIDFRAME_GET_ACCTOTALS:
1190 totals = (RF_AccTotals_t *) data;
1191 *totals = raidPtr->acc_totals;
1192 return (0);
1193
1194 case RAIDFRAME_KEEP_ACCTOTALS:
1195 raidPtr->keep_acc_totals = *(int *)data;
1196 return (0);
1197
1198 case RAIDFRAME_GET_SIZE:
1199 *(int *) data = raidPtr->totalSectors;
1200 return (0);
1201
1202 /* fail a disk & optionally start reconstruction */
1203 case RAIDFRAME_FAIL_DISK:
1204
1205 if (raidPtr->Layout.map->faultsTolerated == 0) {
1206 /* Can't do this on a RAID 0!! */
1207 return(EINVAL);
1208 }
1209
1210 rr = (struct rf_recon_req *) data;
1211
1212 if (rr->row < 0 || rr->row >= raidPtr->numRow
1213 || rr->col < 0 || rr->col >= raidPtr->numCol)
1214 return (EINVAL);
1215
1216 printf("raid%d: Failing the disk: row: %d col: %d\n",
1217 unit, rr->row, rr->col);
1218
1219 /* make a copy of the recon request so that we don't rely on
1220 * the user's buffer */
1221 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1222 if (rrcopy == NULL)
1223 return(ENOMEM);
1224 bcopy(rr, rrcopy, sizeof(*rr));
1225 rrcopy->raidPtr = (void *) raidPtr;
1226
1227 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1228 rf_ReconThread,
1229 rrcopy,"raid_recon");
1230 return (0);
1231
1232 /* invoke a copyback operation after recon on whatever disk
1233 * needs it, if any */
1234 case RAIDFRAME_COPYBACK:
1235
1236 if (raidPtr->Layout.map->faultsTolerated == 0) {
1237 /* This makes no sense on a RAID 0!! */
1238 return(EINVAL);
1239 }
1240
1241 if (raidPtr->copyback_in_progress == 1) {
1242 /* Copyback is already in progress! */
1243 return(EINVAL);
1244 }
1245
1246 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1247 rf_CopybackThread,
1248 raidPtr,"raid_copyback");
1249 return (retcode);
1250
1251 /* return the percentage completion of reconstruction */
1252 case RAIDFRAME_CHECK_RECON_STATUS:
1253 if (raidPtr->Layout.map->faultsTolerated == 0) {
1254 /* This makes no sense on a RAID 0 */
1255 return(EINVAL);
1256 }
1257 row = 0; /* XXX we only consider a single row... */
1258 if (raidPtr->status[row] != rf_rs_reconstructing)
1259 *(int *) data = 100;
1260 else
1261 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1262 return (0);
1263
1264 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1265 if (raidPtr->Layout.map->faultsTolerated == 0) {
1266 /* This makes no sense on a RAID 0 */
1267 return(EINVAL);
1268 }
1269 if (raidPtr->parity_rewrite_in_progress == 1) {
1270 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1271 } else {
1272 *(int *) data = 100;
1273 }
1274 return (0);
1275
1276 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* This makes no sense on a RAID 0 */
1279 return(EINVAL);
1280 }
1281 if (raidPtr->copyback_in_progress == 1) {
1282 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1283 raidPtr->Layout.numStripe;
1284 } else {
1285 *(int *) data = 100;
1286 }
1287 return (0);
1288
1289
1290 /* the sparetable daemon calls this to wait for the kernel to
1291 * need a spare table. this ioctl does not return until a
1292 * spare table is needed. XXX -- calling mpsleep here in the
1293 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1294 * -- I should either compute the spare table in the kernel,
1295 * or have a different -- XXX XXX -- interface (a different
1296 * character device) for delivering the table -- XXX */
1297 #if 0
1298 case RAIDFRAME_SPARET_WAIT:
1299 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1300 while (!rf_sparet_wait_queue)
1301 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1302 waitreq = rf_sparet_wait_queue;
1303 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1304 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1305
1306 /* structure assignment */
1307 *((RF_SparetWait_t *) data) = *waitreq;
1308
1309 RF_Free(waitreq, sizeof(*waitreq));
1310 return (0);
1311
1312 /* wakes up a process waiting on SPARET_WAIT and puts an error
1313 * code in it that will cause the dameon to exit */
1314 case RAIDFRAME_ABORT_SPARET_WAIT:
1315 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1316 waitreq->fcol = -1;
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 waitreq->next = rf_sparet_wait_queue;
1319 rf_sparet_wait_queue = waitreq;
1320 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1321 wakeup(&rf_sparet_wait_queue);
1322 return (0);
1323
1324 /* used by the spare table daemon to deliver a spare table
1325 * into the kernel */
1326 case RAIDFRAME_SEND_SPARET:
1327
1328 /* install the spare table */
1329 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1330
1331 /* respond to the requestor. the return status of the spare
1332 * table installation is passed in the "fcol" field */
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = retcode;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_resp_queue;
1337 rf_sparet_resp_queue = waitreq;
1338 wakeup(&rf_sparet_resp_queue);
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340
1341 return (retcode);
1342 #endif
1343
1344 default:
1345 break; /* fall through to the os-specific code below */
1346
1347 }
1348
1349 if (!raidPtr->valid)
1350 return (EINVAL);
1351
1352 /*
1353 * Add support for "regular" device ioctls here.
1354 */
1355
1356 switch (cmd) {
1357 case DIOCGDINFO:
1358 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1359 break;
1360
1361 case DIOCGPART:
1362 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1363 ((struct partinfo *) data)->part =
1364 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1365 break;
1366
1367 case DIOCWDINFO:
1368 case DIOCSDINFO:
1369 if ((error = raidlock(rs)) != 0)
1370 return (error);
1371
1372 rs->sc_flags |= RAIDF_LABELLING;
1373
1374 error = setdisklabel(rs->sc_dkdev.dk_label,
1375 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1376 if (error == 0) {
1377 if (cmd == DIOCWDINFO)
1378 error = writedisklabel(RAIDLABELDEV(dev),
1379 raidstrategy, rs->sc_dkdev.dk_label,
1380 rs->sc_dkdev.dk_cpulabel);
1381 }
1382 rs->sc_flags &= ~RAIDF_LABELLING;
1383
1384 raidunlock(rs);
1385
1386 if (error)
1387 return (error);
1388 break;
1389
1390 case DIOCWLABEL:
1391 if (*(int *) data != 0)
1392 rs->sc_flags |= RAIDF_WLABEL;
1393 else
1394 rs->sc_flags &= ~RAIDF_WLABEL;
1395 break;
1396
1397 case DIOCGDEFLABEL:
1398 raidgetdefaultlabel(raidPtr, rs,
1399 (struct disklabel *) data);
1400 break;
1401
1402 default:
1403 retcode = ENOTTY;
1404 }
1405 return (retcode);
1406
1407 }
1408
1409
1410 /* raidinit -- complete the rest of the initialization for the
1411 RAIDframe device. */
1412
1413
1414 static void
1415 raidinit(raidPtr)
1416 RF_Raid_t *raidPtr;
1417 {
1418 struct raid_softc *rs;
1419 int unit;
1420
1421 unit = raidPtr->raidid;
1422
1423 rs = &raid_softc[unit];
1424 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1425 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1426
1427
1428 /* XXX should check return code first... */
1429 rs->sc_flags |= RAIDF_INITED;
1430
1431 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1432
1433 rs->sc_dkdev.dk_name = rs->sc_xname;
1434
1435 /* disk_attach actually creates space for the CPU disklabel, among
1436 * other things, so it's critical to call this *BEFORE* we try putzing
1437 * with disklabels. */
1438
1439 disk_attach(&rs->sc_dkdev);
1440
1441 /* XXX There may be a weird interaction here between this, and
1442 * protectedSectors, as used in RAIDframe. */
1443
1444 rs->sc_size = raidPtr->totalSectors;
1445
1446 }
1447
1448 /* wake up the daemon & tell it to get us a spare table
1449 * XXX
1450 * the entries in the queues should be tagged with the raidPtr
1451 * so that in the extremely rare case that two recons happen at once,
1452 * we know for which device were requesting a spare table
1453 * XXX
1454 *
1455 * XXX This code is not currently used. GO
1456 */
1457 int
1458 rf_GetSpareTableFromDaemon(req)
1459 RF_SparetWait_t *req;
1460 {
1461 int retcode;
1462
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 req->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = req;
1466 wakeup(&rf_sparet_wait_queue);
1467
1468 /* mpsleep unlocks the mutex */
1469 while (!rf_sparet_resp_queue) {
1470 tsleep(&rf_sparet_resp_queue, PRIBIO,
1471 "raidframe getsparetable", 0);
1472 }
1473 req = rf_sparet_resp_queue;
1474 rf_sparet_resp_queue = req->next;
1475 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1476
1477 retcode = req->fcol;
1478 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1479 * alloc'd */
1480 return (retcode);
1481 }
1482
1483 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1484 * bp & passes it down.
1485 * any calls originating in the kernel must use non-blocking I/O
1486 * do some extra sanity checking to return "appropriate" error values for
1487 * certain conditions (to make some standard utilities work)
1488 *
1489 * Formerly known as: rf_DoAccessKernel
1490 */
1491 void
1492 raidstart(raidPtr)
1493 RF_Raid_t *raidPtr;
1494 {
1495 RF_SectorCount_t num_blocks, pb, sum;
1496 RF_RaidAddr_t raid_addr;
1497 int retcode;
1498 struct partition *pp;
1499 daddr_t blocknum;
1500 int unit;
1501 struct raid_softc *rs;
1502 int do_async;
1503 struct buf *bp;
1504
1505 unit = raidPtr->raidid;
1506 rs = &raid_softc[unit];
1507
1508 /* quick check to see if anything has died recently */
1509 RF_LOCK_MUTEX(raidPtr->mutex);
1510 if (raidPtr->numNewFailures > 0) {
1511 rf_update_component_labels(raidPtr);
1512 raidPtr->numNewFailures--;
1513 }
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /* Check to see if we're at the limit... */
1517 RF_LOCK_MUTEX(raidPtr->mutex);
1518 while (raidPtr->openings > 0) {
1519 RF_UNLOCK_MUTEX(raidPtr->mutex);
1520
1521 /* get the next item, if any, from the queue */
1522 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1523 /* nothing more to do */
1524 return;
1525 }
1526 BUFQ_REMOVE(&rs->buf_queue, bp);
1527
1528 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1529 * partition.. Need to make it absolute to the underlying
1530 * device.. */
1531
1532 blocknum = bp->b_blkno;
1533 if (DISKPART(bp->b_dev) != RAW_PART) {
1534 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1535 blocknum += pp->p_offset;
1536 }
1537
1538 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1539 (int) blocknum));
1540
1541 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1542 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1543
1544 /* *THIS* is where we adjust what block we're going to...
1545 * but DO NOT TOUCH bp->b_blkno!!! */
1546 raid_addr = blocknum;
1547
1548 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1549 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1550 sum = raid_addr + num_blocks + pb;
1551 if (1 || rf_debugKernelAccess) {
1552 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1553 (int) raid_addr, (int) sum, (int) num_blocks,
1554 (int) pb, (int) bp->b_resid));
1555 }
1556 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1557 || (sum < num_blocks) || (sum < pb)) {
1558 bp->b_error = ENOSPC;
1559 bp->b_flags |= B_ERROR;
1560 bp->b_resid = bp->b_bcount;
1561 biodone(bp);
1562 RF_LOCK_MUTEX(raidPtr->mutex);
1563 continue;
1564 }
1565 /*
1566 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1567 */
1568
1569 if (bp->b_bcount & raidPtr->sectorMask) {
1570 bp->b_error = EINVAL;
1571 bp->b_flags |= B_ERROR;
1572 bp->b_resid = bp->b_bcount;
1573 biodone(bp);
1574 RF_LOCK_MUTEX(raidPtr->mutex);
1575 continue;
1576
1577 }
1578 db1_printf(("Calling DoAccess..\n"));
1579
1580
1581 RF_LOCK_MUTEX(raidPtr->mutex);
1582 raidPtr->openings--;
1583 RF_UNLOCK_MUTEX(raidPtr->mutex);
1584
1585 /*
1586 * Everything is async.
1587 */
1588 do_async = 1;
1589
1590 /* don't ever condition on bp->b_flags & B_WRITE.
1591 * always condition on B_READ instead */
1592
1593 /* XXX we're still at splbio() here... do we *really*
1594 need to be? */
1595
1596
1597 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1598 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1599 do_async, raid_addr, num_blocks,
1600 bp->b_un.b_addr, bp, NULL, NULL,
1601 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1602
1603
1604 RF_LOCK_MUTEX(raidPtr->mutex);
1605 }
1606 RF_UNLOCK_MUTEX(raidPtr->mutex);
1607 }
1608
1609
1610
1611
1612 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1613
1614 int
1615 rf_DispatchKernelIO(queue, req)
1616 RF_DiskQueue_t *queue;
1617 RF_DiskQueueData_t *req;
1618 {
1619 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1620 struct buf *bp;
1621 struct raidbuf *raidbp = NULL;
1622 struct raid_softc *rs;
1623 int unit;
1624 int s;
1625
1626 s=0;
1627 /* s = splbio();*/ /* want to test this */
1628 /* XXX along with the vnode, we also need the softc associated with
1629 * this device.. */
1630
1631 req->queue = queue;
1632
1633 unit = queue->raidPtr->raidid;
1634
1635 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1636
1637 if (unit >= numraid) {
1638 printf("Invalid unit number: %d %d\n", unit, numraid);
1639 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1640 }
1641 rs = &raid_softc[unit];
1642
1643 /* XXX is this the right place? */
1644 disk_busy(&rs->sc_dkdev);
1645
1646 bp = req->bp;
1647 #if 1
1648 /* XXX when there is a physical disk failure, someone is passing us a
1649 * buffer that contains old stuff!! Attempt to deal with this problem
1650 * without taking a performance hit... (not sure where the real bug
1651 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1652
1653 if (bp->b_flags & B_ERROR) {
1654 bp->b_flags &= ~B_ERROR;
1655 }
1656 if (bp->b_error != 0) {
1657 bp->b_error = 0;
1658 }
1659 #endif
1660 raidbp = RAIDGETBUF(rs);
1661
1662 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1663
1664 /*
1665 * context for raidiodone
1666 */
1667 raidbp->rf_obp = bp;
1668 raidbp->req = req;
1669
1670 LIST_INIT(&raidbp->rf_buf.b_dep);
1671
1672 switch (req->type) {
1673 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1674 /* XXX need to do something extra here.. */
1675 /* I'm leaving this in, as I've never actually seen it used,
1676 * and I'd like folks to report it... GO */
1677 printf(("WAKEUP CALLED\n"));
1678 queue->numOutstanding++;
1679
1680 /* XXX need to glue the original buffer into this?? */
1681
1682 KernelWakeupFunc(&raidbp->rf_buf);
1683 break;
1684
1685 case RF_IO_TYPE_READ:
1686 case RF_IO_TYPE_WRITE:
1687
1688 if (req->tracerec) {
1689 RF_ETIMER_START(req->tracerec->timer);
1690 }
1691 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1692 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1693 req->sectorOffset, req->numSector,
1694 req->buf, KernelWakeupFunc, (void *) req,
1695 queue->raidPtr->logBytesPerSector, req->b_proc);
1696
1697 if (rf_debugKernelAccess) {
1698 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1699 (long) bp->b_blkno));
1700 }
1701 queue->numOutstanding++;
1702 queue->last_deq_sector = req->sectorOffset;
1703 /* acc wouldn't have been let in if there were any pending
1704 * reqs at any other priority */
1705 queue->curPriority = req->priority;
1706
1707 db1_printf(("Going for %c to unit %d row %d col %d\n",
1708 req->type, unit, queue->row, queue->col));
1709 db1_printf(("sector %d count %d (%d bytes) %d\n",
1710 (int) req->sectorOffset, (int) req->numSector,
1711 (int) (req->numSector <<
1712 queue->raidPtr->logBytesPerSector),
1713 (int) queue->raidPtr->logBytesPerSector));
1714 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1715 raidbp->rf_buf.b_vp->v_numoutput++;
1716 }
1717 VOP_STRATEGY(&raidbp->rf_buf);
1718
1719 break;
1720
1721 default:
1722 panic("bad req->type in rf_DispatchKernelIO");
1723 }
1724 db1_printf(("Exiting from DispatchKernelIO\n"));
1725 /* splx(s); */ /* want to test this */
1726 return (0);
1727 }
1728 /* this is the callback function associated with a I/O invoked from
1729 kernel code.
1730 */
1731 static void
1732 KernelWakeupFunc(vbp)
1733 struct buf *vbp;
1734 {
1735 RF_DiskQueueData_t *req = NULL;
1736 RF_DiskQueue_t *queue;
1737 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1738 struct buf *bp;
1739 struct raid_softc *rs;
1740 int unit;
1741 register int s;
1742
1743 s = splbio();
1744 db1_printf(("recovering the request queue:\n"));
1745 req = raidbp->req;
1746
1747 bp = raidbp->rf_obp;
1748
1749 queue = (RF_DiskQueue_t *) req->queue;
1750
1751 if (raidbp->rf_buf.b_flags & B_ERROR) {
1752 bp->b_flags |= B_ERROR;
1753 bp->b_error = raidbp->rf_buf.b_error ?
1754 raidbp->rf_buf.b_error : EIO;
1755 }
1756
1757 /* XXX methinks this could be wrong... */
1758 #if 1
1759 bp->b_resid = raidbp->rf_buf.b_resid;
1760 #endif
1761
1762 if (req->tracerec) {
1763 RF_ETIMER_STOP(req->tracerec->timer);
1764 RF_ETIMER_EVAL(req->tracerec->timer);
1765 RF_LOCK_MUTEX(rf_tracing_mutex);
1766 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1767 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1768 req->tracerec->num_phys_ios++;
1769 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1770 }
1771 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1772
1773 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1774
1775
1776 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1777 * ballistic, and mark the component as hosed... */
1778
1779 if (bp->b_flags & B_ERROR) {
1780 /* Mark the disk as dead */
1781 /* but only mark it once... */
1782 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1783 rf_ds_optimal) {
1784 printf("raid%d: IO Error. Marking %s as failed.\n",
1785 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1786 queue->raidPtr->Disks[queue->row][queue->col].status =
1787 rf_ds_failed;
1788 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1789 queue->raidPtr->numFailures++;
1790 queue->raidPtr->numNewFailures++;
1791 /* XXX here we should bump the version number for each component, and write that data out */
1792 } else { /* Disk is already dead... */
1793 /* printf("Disk already marked as dead!\n"); */
1794 }
1795
1796 }
1797
1798 rs = &raid_softc[unit];
1799 RAIDPUTBUF(rs, raidbp);
1800
1801
1802 if (bp->b_resid == 0) {
1803 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1804 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1805 }
1806
1807 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1808 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1809
1810 splx(s);
1811 }
1812
1813
1814
1815 /*
1816 * initialize a buf structure for doing an I/O in the kernel.
1817 */
1818 static void
1819 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1820 logBytesPerSector, b_proc)
1821 struct buf *bp;
1822 struct vnode *b_vp;
1823 unsigned rw_flag;
1824 dev_t dev;
1825 RF_SectorNum_t startSect;
1826 RF_SectorCount_t numSect;
1827 caddr_t buf;
1828 void (*cbFunc) (struct buf *);
1829 void *cbArg;
1830 int logBytesPerSector;
1831 struct proc *b_proc;
1832 {
1833 /* bp->b_flags = B_PHYS | rw_flag; */
1834 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1835 bp->b_bcount = numSect << logBytesPerSector;
1836 bp->b_bufsize = bp->b_bcount;
1837 bp->b_error = 0;
1838 bp->b_dev = dev;
1839 bp->b_un.b_addr = buf;
1840 bp->b_blkno = startSect;
1841 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1842 if (bp->b_bcount == 0) {
1843 panic("bp->b_bcount is zero in InitBP!!\n");
1844 }
1845 bp->b_proc = b_proc;
1846 bp->b_iodone = cbFunc;
1847 bp->b_vp = b_vp;
1848
1849 }
1850
1851 static void
1852 raidgetdefaultlabel(raidPtr, rs, lp)
1853 RF_Raid_t *raidPtr;
1854 struct raid_softc *rs;
1855 struct disklabel *lp;
1856 {
1857 db1_printf(("Building a default label...\n"));
1858 bzero(lp, sizeof(*lp));
1859
1860 /* fabricate a label... */
1861 lp->d_secperunit = raidPtr->totalSectors;
1862 lp->d_secsize = raidPtr->bytesPerSector;
1863 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1864 lp->d_ntracks = 1;
1865 lp->d_ncylinders = raidPtr->totalSectors /
1866 (lp->d_nsectors * lp->d_ntracks);
1867 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1868
1869 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1870 lp->d_type = DTYPE_RAID;
1871 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1872 lp->d_rpm = 3600;
1873 lp->d_interleave = 1;
1874 lp->d_flags = 0;
1875
1876 lp->d_partitions[RAW_PART].p_offset = 0;
1877 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1878 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1879 lp->d_npartitions = RAW_PART + 1;
1880
1881 lp->d_magic = DISKMAGIC;
1882 lp->d_magic2 = DISKMAGIC;
1883 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1884
1885 }
1886 /*
1887 * Read the disklabel from the raid device. If one is not present, fake one
1888 * up.
1889 */
1890 static void
1891 raidgetdisklabel(dev)
1892 dev_t dev;
1893 {
1894 int unit = raidunit(dev);
1895 struct raid_softc *rs = &raid_softc[unit];
1896 char *errstring;
1897 struct disklabel *lp = rs->sc_dkdev.dk_label;
1898 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1899 RF_Raid_t *raidPtr;
1900
1901 db1_printf(("Getting the disklabel...\n"));
1902
1903 bzero(clp, sizeof(*clp));
1904
1905 raidPtr = raidPtrs[unit];
1906
1907 raidgetdefaultlabel(raidPtr, rs, lp);
1908
1909 /*
1910 * Call the generic disklabel extraction routine.
1911 */
1912 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1913 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1914 if (errstring)
1915 raidmakedisklabel(rs);
1916 else {
1917 int i;
1918 struct partition *pp;
1919
1920 /*
1921 * Sanity check whether the found disklabel is valid.
1922 *
1923 * This is necessary since total size of the raid device
1924 * may vary when an interleave is changed even though exactly
1925 * same componets are used, and old disklabel may used
1926 * if that is found.
1927 */
1928 if (lp->d_secperunit != rs->sc_size)
1929 printf("WARNING: %s: "
1930 "total sector size in disklabel (%d) != "
1931 "the size of raid (%ld)\n", rs->sc_xname,
1932 lp->d_secperunit, (long) rs->sc_size);
1933 for (i = 0; i < lp->d_npartitions; i++) {
1934 pp = &lp->d_partitions[i];
1935 if (pp->p_offset + pp->p_size > rs->sc_size)
1936 printf("WARNING: %s: end of partition `%c' "
1937 "exceeds the size of raid (%ld)\n",
1938 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1939 }
1940 }
1941
1942 }
1943 /*
1944 * Take care of things one might want to take care of in the event
1945 * that a disklabel isn't present.
1946 */
1947 static void
1948 raidmakedisklabel(rs)
1949 struct raid_softc *rs;
1950 {
1951 struct disklabel *lp = rs->sc_dkdev.dk_label;
1952 db1_printf(("Making a label..\n"));
1953
1954 /*
1955 * For historical reasons, if there's no disklabel present
1956 * the raw partition must be marked FS_BSDFFS.
1957 */
1958
1959 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1960
1961 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1962
1963 lp->d_checksum = dkcksum(lp);
1964 }
1965 /*
1966 * Lookup the provided name in the filesystem. If the file exists,
1967 * is a valid block device, and isn't being used by anyone else,
1968 * set *vpp to the file's vnode.
1969 * You'll find the original of this in ccd.c
1970 */
1971 int
1972 raidlookup(path, p, vpp)
1973 char *path;
1974 struct proc *p;
1975 struct vnode **vpp; /* result */
1976 {
1977 struct nameidata nd;
1978 struct vnode *vp;
1979 struct vattr va;
1980 int error;
1981
1982 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1983 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1984 #ifdef DEBUG
1985 printf("RAIDframe: vn_open returned %d\n", error);
1986 #endif
1987 return (error);
1988 }
1989 vp = nd.ni_vp;
1990 if (vp->v_usecount > 1) {
1991 VOP_UNLOCK(vp, 0);
1992 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1993 return (EBUSY);
1994 }
1995 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1996 VOP_UNLOCK(vp, 0);
1997 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1998 return (error);
1999 }
2000 /* XXX: eventually we should handle VREG, too. */
2001 if (va.va_type != VBLK) {
2002 VOP_UNLOCK(vp, 0);
2003 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2004 return (ENOTBLK);
2005 }
2006 VOP_UNLOCK(vp, 0);
2007 *vpp = vp;
2008 return (0);
2009 }
2010 /*
2011 * Wait interruptibly for an exclusive lock.
2012 *
2013 * XXX
2014 * Several drivers do this; it should be abstracted and made MP-safe.
2015 * (Hmm... where have we seen this warning before :-> GO )
2016 */
2017 static int
2018 raidlock(rs)
2019 struct raid_softc *rs;
2020 {
2021 int error;
2022
2023 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2024 rs->sc_flags |= RAIDF_WANTED;
2025 if ((error =
2026 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2027 return (error);
2028 }
2029 rs->sc_flags |= RAIDF_LOCKED;
2030 return (0);
2031 }
2032 /*
2033 * Unlock and wake up any waiters.
2034 */
2035 static void
2036 raidunlock(rs)
2037 struct raid_softc *rs;
2038 {
2039
2040 rs->sc_flags &= ~RAIDF_LOCKED;
2041 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2042 rs->sc_flags &= ~RAIDF_WANTED;
2043 wakeup(rs);
2044 }
2045 }
2046
2047
2048 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2049 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2050
2051 int
2052 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2053 {
2054 RF_ComponentLabel_t clabel;
2055 raidread_component_label(dev, b_vp, &clabel);
2056 clabel.mod_counter = mod_counter;
2057 clabel.clean = RF_RAID_CLEAN;
2058 raidwrite_component_label(dev, b_vp, &clabel);
2059 return(0);
2060 }
2061
2062
2063 int
2064 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2065 {
2066 RF_ComponentLabel_t clabel;
2067 raidread_component_label(dev, b_vp, &clabel);
2068 clabel.mod_counter = mod_counter;
2069 clabel.clean = RF_RAID_DIRTY;
2070 raidwrite_component_label(dev, b_vp, &clabel);
2071 return(0);
2072 }
2073
2074 /* ARGSUSED */
2075 int
2076 raidread_component_label(dev, b_vp, clabel)
2077 dev_t dev;
2078 struct vnode *b_vp;
2079 RF_ComponentLabel_t *clabel;
2080 {
2081 struct buf *bp;
2082 int error;
2083
2084 /* XXX should probably ensure that we don't try to do this if
2085 someone has changed rf_protected_sectors. */
2086
2087 /* get a block of the appropriate size... */
2088 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2089 bp->b_dev = dev;
2090
2091 /* get our ducks in a row for the read */
2092 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2093 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2094 bp->b_flags = B_BUSY | B_READ;
2095 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2096
2097 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2098
2099 error = biowait(bp);
2100
2101 if (!error) {
2102 memcpy(clabel, bp->b_un.b_addr,
2103 sizeof(RF_ComponentLabel_t));
2104 #if 0
2105 rf_print_component_label( clabel );
2106 #endif
2107 } else {
2108 #if 0
2109 printf("Failed to read RAID component label!\n");
2110 #endif
2111 }
2112
2113 bp->b_flags = B_INVAL | B_AGE;
2114 brelse(bp);
2115 return(error);
2116 }
2117 /* ARGSUSED */
2118 int
2119 raidwrite_component_label(dev, b_vp, clabel)
2120 dev_t dev;
2121 struct vnode *b_vp;
2122 RF_ComponentLabel_t *clabel;
2123 {
2124 struct buf *bp;
2125 int error;
2126
2127 /* get a block of the appropriate size... */
2128 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2129 bp->b_dev = dev;
2130
2131 /* get our ducks in a row for the write */
2132 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2133 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2134 bp->b_flags = B_BUSY | B_WRITE;
2135 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2136
2137 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2138
2139 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2140
2141 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2142 error = biowait(bp);
2143 bp->b_flags = B_INVAL | B_AGE;
2144 brelse(bp);
2145 if (error) {
2146 #if 1
2147 printf("Failed to write RAID component info!\n");
2148 #endif
2149 }
2150
2151 return(error);
2152 }
2153
2154 void
2155 rf_markalldirty(raidPtr)
2156 RF_Raid_t *raidPtr;
2157 {
2158 RF_ComponentLabel_t clabel;
2159 int r,c;
2160
2161 raidPtr->mod_counter++;
2162 for (r = 0; r < raidPtr->numRow; r++) {
2163 for (c = 0; c < raidPtr->numCol; c++) {
2164 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2165 raidread_component_label(
2166 raidPtr->Disks[r][c].dev,
2167 raidPtr->raid_cinfo[r][c].ci_vp,
2168 &clabel);
2169 if (clabel.status == rf_ds_spared) {
2170 /* XXX do something special...
2171 but whatever you do, don't
2172 try to access it!! */
2173 } else {
2174 #if 0
2175 clabel.status =
2176 raidPtr->Disks[r][c].status;
2177 raidwrite_component_label(
2178 raidPtr->Disks[r][c].dev,
2179 raidPtr->raid_cinfo[r][c].ci_vp,
2180 &clabel);
2181 #endif
2182 raidmarkdirty(
2183 raidPtr->Disks[r][c].dev,
2184 raidPtr->raid_cinfo[r][c].ci_vp,
2185 raidPtr->mod_counter);
2186 }
2187 }
2188 }
2189 }
2190 /* printf("Component labels marked dirty.\n"); */
2191 #if 0
2192 for( c = 0; c < raidPtr->numSpare ; c++) {
2193 sparecol = raidPtr->numCol + c;
2194 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2195 /*
2196
2197 XXX this is where we get fancy and map this spare
2198 into it's correct spot in the array.
2199
2200 */
2201 /*
2202
2203 we claim this disk is "optimal" if it's
2204 rf_ds_used_spare, as that means it should be
2205 directly substitutable for the disk it replaced.
2206 We note that too...
2207
2208 */
2209
2210 for(i=0;i<raidPtr->numRow;i++) {
2211 for(j=0;j<raidPtr->numCol;j++) {
2212 if ((raidPtr->Disks[i][j].spareRow ==
2213 r) &&
2214 (raidPtr->Disks[i][j].spareCol ==
2215 sparecol)) {
2216 srow = r;
2217 scol = sparecol;
2218 break;
2219 }
2220 }
2221 }
2222
2223 raidread_component_label(
2224 raidPtr->Disks[r][sparecol].dev,
2225 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2226 &clabel);
2227 /* make sure status is noted */
2228 clabel.version = RF_COMPONENT_LABEL_VERSION;
2229 clabel.mod_counter = raidPtr->mod_counter;
2230 clabel.serial_number = raidPtr->serial_number;
2231 clabel.row = srow;
2232 clabel.column = scol;
2233 clabel.num_rows = raidPtr->numRow;
2234 clabel.num_columns = raidPtr->numCol;
2235 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2236 clabel.status = rf_ds_optimal;
2237 raidwrite_component_label(
2238 raidPtr->Disks[r][sparecol].dev,
2239 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2240 &clabel);
2241 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2242 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2243 }
2244 }
2245
2246 #endif
2247 }
2248
2249
2250 void
2251 rf_update_component_labels(raidPtr)
2252 RF_Raid_t *raidPtr;
2253 {
2254 RF_ComponentLabel_t clabel;
2255 int sparecol;
2256 int r,c;
2257 int i,j;
2258 int srow, scol;
2259
2260 srow = -1;
2261 scol = -1;
2262
2263 /* XXX should do extra checks to make sure things really are clean,
2264 rather than blindly setting the clean bit... */
2265
2266 raidPtr->mod_counter++;
2267
2268 for (r = 0; r < raidPtr->numRow; r++) {
2269 for (c = 0; c < raidPtr->numCol; c++) {
2270 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2271 raidread_component_label(
2272 raidPtr->Disks[r][c].dev,
2273 raidPtr->raid_cinfo[r][c].ci_vp,
2274 &clabel);
2275 /* make sure status is noted */
2276 clabel.status = rf_ds_optimal;
2277 /* bump the counter */
2278 clabel.mod_counter = raidPtr->mod_counter;
2279
2280 raidwrite_component_label(
2281 raidPtr->Disks[r][c].dev,
2282 raidPtr->raid_cinfo[r][c].ci_vp,
2283 &clabel);
2284 }
2285 /* else we don't touch it.. */
2286 }
2287 }
2288
2289 for( c = 0; c < raidPtr->numSpare ; c++) {
2290 sparecol = raidPtr->numCol + c;
2291 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2292 /*
2293
2294 we claim this disk is "optimal" if it's
2295 rf_ds_used_spare, as that means it should be
2296 directly substitutable for the disk it replaced.
2297 We note that too...
2298
2299 */
2300
2301 for(i=0;i<raidPtr->numRow;i++) {
2302 for(j=0;j<raidPtr->numCol;j++) {
2303 if ((raidPtr->Disks[i][j].spareRow ==
2304 0) &&
2305 (raidPtr->Disks[i][j].spareCol ==
2306 sparecol)) {
2307 srow = i;
2308 scol = j;
2309 break;
2310 }
2311 }
2312 }
2313
2314 /* XXX shouldn't *really* need this... */
2315 raidread_component_label(
2316 raidPtr->Disks[0][sparecol].dev,
2317 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2318 &clabel);
2319 /* make sure status is noted */
2320
2321 raid_init_component_label(raidPtr, &clabel);
2322
2323 clabel.mod_counter = raidPtr->mod_counter;
2324 clabel.row = srow;
2325 clabel.column = scol;
2326 clabel.status = rf_ds_optimal;
2327
2328 raidwrite_component_label(
2329 raidPtr->Disks[0][sparecol].dev,
2330 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2331 &clabel);
2332 }
2333 }
2334 /* printf("Component labels updated\n"); */
2335 }
2336
2337
2338 void
2339 rf_final_update_component_labels(raidPtr)
2340 RF_Raid_t *raidPtr;
2341 {
2342 RF_ComponentLabel_t clabel;
2343 int sparecol;
2344 int r,c;
2345 int i,j;
2346 int srow, scol;
2347
2348 srow = -1;
2349 scol = -1;
2350
2351 /* XXX should do extra checks to make sure things really are clean,
2352 rather than blindly setting the clean bit... */
2353
2354 raidPtr->mod_counter++;
2355
2356 for (r = 0; r < raidPtr->numRow; r++) {
2357 for (c = 0; c < raidPtr->numCol; c++) {
2358 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2359 raidread_component_label(
2360 raidPtr->Disks[r][c].dev,
2361 raidPtr->raid_cinfo[r][c].ci_vp,
2362 &clabel);
2363 /* make sure status is noted */
2364 clabel.status = rf_ds_optimal;
2365 /* bump the counter */
2366 clabel.mod_counter = raidPtr->mod_counter;
2367
2368 raidwrite_component_label(
2369 raidPtr->Disks[r][c].dev,
2370 raidPtr->raid_cinfo[r][c].ci_vp,
2371 &clabel);
2372 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2373 raidmarkclean(
2374 raidPtr->Disks[r][c].dev,
2375 raidPtr->raid_cinfo[r][c].ci_vp,
2376 raidPtr->mod_counter);
2377 }
2378 }
2379 /* else we don't touch it.. */
2380 }
2381 }
2382
2383 for( c = 0; c < raidPtr->numSpare ; c++) {
2384 sparecol = raidPtr->numCol + c;
2385 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2386 /*
2387
2388 we claim this disk is "optimal" if it's
2389 rf_ds_used_spare, as that means it should be
2390 directly substitutable for the disk it replaced.
2391 We note that too...
2392
2393 */
2394
2395 for(i=0;i<raidPtr->numRow;i++) {
2396 for(j=0;j<raidPtr->numCol;j++) {
2397 if ((raidPtr->Disks[i][j].spareRow ==
2398 0) &&
2399 (raidPtr->Disks[i][j].spareCol ==
2400 sparecol)) {
2401 srow = i;
2402 scol = j;
2403 break;
2404 }
2405 }
2406 }
2407
2408 /* XXX shouldn't *really* need this... */
2409 raidread_component_label(
2410 raidPtr->Disks[0][sparecol].dev,
2411 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2412 &clabel);
2413 /* make sure status is noted */
2414
2415 raid_init_component_label(raidPtr, &clabel);
2416
2417 clabel.mod_counter = raidPtr->mod_counter;
2418 clabel.row = srow;
2419 clabel.column = scol;
2420 clabel.status = rf_ds_optimal;
2421
2422 raidwrite_component_label(
2423 raidPtr->Disks[0][sparecol].dev,
2424 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2425 &clabel);
2426 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2427 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2428 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2429 raidPtr->mod_counter);
2430 }
2431 }
2432 }
2433 /* printf("Component labels updated\n"); */
2434 }
2435
2436 void
2437 rf_close_component(raidPtr, vp, auto_configured)
2438 RF_Raid_t *raidPtr;
2439 struct vnode *vp;
2440 int auto_configured;
2441 {
2442 struct proc *p;
2443
2444 p = raidPtr->engine_thread;
2445
2446 if (vp != NULL) {
2447 if (auto_configured == 1) {
2448 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2449 vput(vp);
2450
2451 } else {
2452 VOP_UNLOCK(vp, 0);
2453 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2454 }
2455 } else {
2456 printf("vnode was NULL\n");
2457 }
2458 }
2459
2460
2461 void
2462 rf_UnconfigureVnodes(raidPtr)
2463 RF_Raid_t *raidPtr;
2464 {
2465 int r,c;
2466 struct proc *p;
2467 struct vnode *vp;
2468 int acd;
2469
2470
2471 /* We take this opportunity to close the vnodes like we should.. */
2472
2473 p = raidPtr->engine_thread;
2474
2475 for (r = 0; r < raidPtr->numRow; r++) {
2476 for (c = 0; c < raidPtr->numCol; c++) {
2477 printf("Closing vnode for row: %d col: %d\n", r, c);
2478 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2479 acd = raidPtr->Disks[r][c].auto_configured;
2480 rf_close_component(raidPtr, vp, acd);
2481 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2482 raidPtr->Disks[r][c].auto_configured = 0;
2483 }
2484 }
2485 for (r = 0; r < raidPtr->numSpare; r++) {
2486 printf("Closing vnode for spare: %d\n", r);
2487 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2488 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2489 rf_close_component(raidPtr, vp, acd);
2490 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2491 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2492 }
2493 }
2494
2495
2496 void
2497 rf_ReconThread(req)
2498 struct rf_recon_req *req;
2499 {
2500 int s;
2501 RF_Raid_t *raidPtr;
2502
2503 s = splbio();
2504 raidPtr = (RF_Raid_t *) req->raidPtr;
2505 raidPtr->recon_in_progress = 1;
2506
2507 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2508 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2509
2510 /* XXX get rid of this! we don't need it at all.. */
2511 RF_Free(req, sizeof(*req));
2512
2513 raidPtr->recon_in_progress = 0;
2514 splx(s);
2515
2516 /* That's all... */
2517 kthread_exit(0); /* does not return */
2518 }
2519
2520 void
2521 rf_RewriteParityThread(raidPtr)
2522 RF_Raid_t *raidPtr;
2523 {
2524 int retcode;
2525 int s;
2526
2527 raidPtr->parity_rewrite_in_progress = 1;
2528 s = splbio();
2529 retcode = rf_RewriteParity(raidPtr);
2530 splx(s);
2531 if (retcode) {
2532 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2533 } else {
2534 /* set the clean bit! If we shutdown correctly,
2535 the clean bit on each component label will get
2536 set */
2537 raidPtr->parity_good = RF_RAID_CLEAN;
2538 }
2539 raidPtr->parity_rewrite_in_progress = 0;
2540
2541 /* That's all... */
2542 kthread_exit(0); /* does not return */
2543 }
2544
2545
2546 void
2547 rf_CopybackThread(raidPtr)
2548 RF_Raid_t *raidPtr;
2549 {
2550 int s;
2551
2552 raidPtr->copyback_in_progress = 1;
2553 s = splbio();
2554 rf_CopybackReconstructedData(raidPtr);
2555 splx(s);
2556 raidPtr->copyback_in_progress = 0;
2557
2558 /* That's all... */
2559 kthread_exit(0); /* does not return */
2560 }
2561
2562
2563 void
2564 rf_ReconstructInPlaceThread(req)
2565 struct rf_recon_req *req;
2566 {
2567 int retcode;
2568 int s;
2569 RF_Raid_t *raidPtr;
2570
2571 s = splbio();
2572 raidPtr = req->raidPtr;
2573 raidPtr->recon_in_progress = 1;
2574 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2575 RF_Free(req, sizeof(*req));
2576 raidPtr->recon_in_progress = 0;
2577 splx(s);
2578
2579 /* That's all... */
2580 kthread_exit(0); /* does not return */
2581 }
2582
2583 void
2584 rf_mountroot_hook(dev)
2585 struct device *dev;
2586 {
2587
2588 }
2589
2590
2591 RF_AutoConfig_t *
2592 rf_find_raid_components()
2593 {
2594 struct devnametobdevmaj *dtobdm;
2595 struct vnode *vp;
2596 struct disklabel label;
2597 struct device *dv;
2598 char *cd_name;
2599 dev_t dev;
2600 int error;
2601 int i;
2602 int good_one;
2603 RF_ComponentLabel_t *clabel;
2604 RF_AutoConfig_t *ac_list;
2605 RF_AutoConfig_t *ac;
2606
2607
2608 /* initialize the AutoConfig list */
2609 ac_list = NULL;
2610
2611 if (raidautoconfig) {
2612
2613 /* we begin by trolling through *all* the devices on the system */
2614
2615 for (dv = alldevs.tqh_first; dv != NULL;
2616 dv = dv->dv_list.tqe_next) {
2617
2618 /* we are only interested in disks... */
2619 if (dv->dv_class != DV_DISK)
2620 continue;
2621
2622 /* we don't care about floppies... */
2623 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2624 continue;
2625 }
2626
2627 /* need to find the device_name_to_block_device_major stuff */
2628 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2629 dtobdm = dev_name2blk;
2630 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2631 dtobdm++;
2632 }
2633
2634 /* get a vnode for the raw partition of this disk */
2635
2636 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2637 if (bdevvp(dev, &vp))
2638 panic("RAID can't alloc vnode");
2639
2640 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2641
2642 if (error) {
2643 /* "Who cares." Continue looking
2644 for something that exists*/
2645 vput(vp);
2646 continue;
2647 }
2648
2649 /* Ok, the disk exists. Go get the disklabel. */
2650 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2651 FREAD, NOCRED, 0);
2652 if (error) {
2653 /*
2654 * XXX can't happen - open() would
2655 * have errored out (or faked up one)
2656 */
2657 printf("can't get label for dev %s%c (%d)!?!?\n",
2658 dv->dv_xname, 'a' + RAW_PART, error);
2659 }
2660
2661 /* don't need this any more. We'll allocate it again
2662 a little later if we really do... */
2663 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2664 vput(vp);
2665
2666 for (i=0; i < label.d_npartitions; i++) {
2667 /* We only support partitions marked as RAID */
2668 if (label.d_partitions[i].p_fstype != FS_RAID)
2669 continue;
2670
2671 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2672 if (bdevvp(dev, &vp))
2673 panic("RAID can't alloc vnode");
2674
2675 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2676 if (error) {
2677 /* Whatever... */
2678 vput(vp);
2679 continue;
2680 }
2681
2682 good_one = 0;
2683
2684 clabel = (RF_ComponentLabel_t *)
2685 malloc(sizeof(RF_ComponentLabel_t),
2686 M_RAIDFRAME, M_NOWAIT);
2687 if (clabel == NULL) {
2688 /* XXX CLEANUP HERE */
2689 printf("RAID auto config: out of memory!\n");
2690 return(NULL); /* XXX probably should panic? */
2691 }
2692
2693 if (!raidread_component_label(dev, vp, clabel)) {
2694 /* Got the label. Does it look reasonable? */
2695 if (rf_reasonable_label(clabel) &&
2696 (clabel->partitionSize <=
2697 label.d_partitions[i].p_size)) {
2698 #if DEBUG
2699 printf("Component on: %s%c: %d\n",
2700 dv->dv_xname, 'a'+i,
2701 label.d_partitions[i].p_size);
2702 rf_print_component_label(clabel);
2703 #endif
2704 /* if it's reasonable, add it,
2705 else ignore it. */
2706 ac = (RF_AutoConfig_t *)
2707 malloc(sizeof(RF_AutoConfig_t),
2708 M_RAIDFRAME,
2709 M_NOWAIT);
2710 if (ac == NULL) {
2711 /* XXX should panic?? */
2712 return(NULL);
2713 }
2714
2715 sprintf(ac->devname, "%s%c",
2716 dv->dv_xname, 'a'+i);
2717 ac->dev = dev;
2718 ac->vp = vp;
2719 ac->clabel = clabel;
2720 ac->next = ac_list;
2721 ac_list = ac;
2722 good_one = 1;
2723 }
2724 }
2725 if (!good_one) {
2726 /* cleanup */
2727 free(clabel, M_RAIDFRAME);
2728 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2729 vput(vp);
2730 }
2731 }
2732 }
2733 }
2734 return(ac_list);
2735 }
2736
2737 static int
2738 rf_reasonable_label(clabel)
2739 RF_ComponentLabel_t *clabel;
2740 {
2741
2742 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2743 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2744 ((clabel->clean == RF_RAID_CLEAN) ||
2745 (clabel->clean == RF_RAID_DIRTY)) &&
2746 clabel->row >=0 &&
2747 clabel->column >= 0 &&
2748 clabel->num_rows > 0 &&
2749 clabel->num_columns > 0 &&
2750 clabel->row < clabel->num_rows &&
2751 clabel->column < clabel->num_columns &&
2752 clabel->blockSize > 0 &&
2753 clabel->numBlocks > 0) {
2754 /* label looks reasonable enough... */
2755 return(1);
2756 }
2757 return(0);
2758 }
2759
2760
2761 void
2762 rf_print_component_label(clabel)
2763 RF_ComponentLabel_t *clabel;
2764 {
2765 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2766 clabel->row, clabel->column,
2767 clabel->num_rows, clabel->num_columns);
2768 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2769 clabel->version, clabel->serial_number,
2770 clabel->mod_counter);
2771 printf(" Clean: %s Status: %d\n",
2772 clabel->clean ? "Yes" : "No", clabel->status );
2773 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2774 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2775 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2776 (char) clabel->parityConfig, clabel->blockSize,
2777 clabel->numBlocks);
2778 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2779 printf(" Last configured as: raid%d\n", clabel->last_unit );
2780 #if 0
2781 printf(" Config order: %d\n", clabel->config_order);
2782 #endif
2783
2784 }
2785
2786 RF_ConfigSet_t *
2787 rf_create_auto_sets(ac_list)
2788 RF_AutoConfig_t *ac_list;
2789 {
2790 RF_AutoConfig_t *ac;
2791 RF_ConfigSet_t *config_sets;
2792 RF_ConfigSet_t *cset;
2793 RF_AutoConfig_t *ac_next;
2794
2795
2796 config_sets = NULL;
2797
2798 /* Go through the AutoConfig list, and figure out which components
2799 belong to what sets. */
2800 ac = ac_list;
2801 while(ac!=NULL) {
2802 /* we're going to putz with ac->next, so save it here
2803 for use at the end of the loop */
2804 ac_next = ac->next;
2805
2806 if (config_sets == NULL) {
2807 /* will need at least this one... */
2808 config_sets = (RF_ConfigSet_t *)
2809 malloc(sizeof(RF_ConfigSet_t),
2810 M_RAIDFRAME, M_NOWAIT);
2811 if (config_sets == NULL) {
2812 panic("rf_create_auto_sets: No memory!\n");
2813 }
2814 /* this one is easy :) */
2815 config_sets->ac = ac;
2816 config_sets->next = NULL;
2817 config_sets->rootable = 0;
2818 ac->next = NULL;
2819 } else {
2820 /* which set does this component fit into? */
2821 cset = config_sets;
2822 while(cset!=NULL) {
2823 if (rf_does_it_fit(cset, ac)) {
2824 /* looks like it matches */
2825 ac->next = cset->ac;
2826 cset->ac = ac;
2827 break;
2828 }
2829 cset = cset->next;
2830 }
2831 if (cset==NULL) {
2832 /* didn't find a match above... new set..*/
2833 cset = (RF_ConfigSet_t *)
2834 malloc(sizeof(RF_ConfigSet_t),
2835 M_RAIDFRAME, M_NOWAIT);
2836 if (cset == NULL) {
2837 panic("rf_create_auto_sets: No memory!\n");
2838 }
2839 cset->ac = ac;
2840 ac->next = NULL;
2841 cset->next = config_sets;
2842 cset->rootable = 0;
2843 config_sets = cset;
2844 }
2845 }
2846 ac = ac_next;
2847 }
2848
2849
2850 return(config_sets);
2851 }
2852
2853 static int
2854 rf_does_it_fit(cset, ac)
2855 RF_ConfigSet_t *cset;
2856 RF_AutoConfig_t *ac;
2857 {
2858 RF_ComponentLabel_t *clabel1, *clabel2;
2859
2860 /* If this one matches the *first* one in the set, that's good
2861 enough, since the other members of the set would have been
2862 through here too... */
2863 /* note that we are not checking partitionSize here..
2864
2865 Note that we are also not checking the mod_counters here.
2866 If everything else matches execpt the mod_counter, that's
2867 good enough for this test. We will deal with the mod_counters
2868 a little later in the autoconfiguration process.
2869
2870 (clabel1->mod_counter == clabel2->mod_counter) &&
2871
2872 */
2873
2874 clabel1 = cset->ac->clabel;
2875 clabel2 = ac->clabel;
2876 if ((clabel1->version == clabel2->version) &&
2877 (clabel1->serial_number == clabel2->serial_number) &&
2878 (clabel1->num_rows == clabel2->num_rows) &&
2879 (clabel1->num_columns == clabel2->num_columns) &&
2880 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2881 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2882 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2883 (clabel1->parityConfig == clabel2->parityConfig) &&
2884 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2885 (clabel1->blockSize == clabel2->blockSize) &&
2886 (clabel1->numBlocks == clabel2->numBlocks) &&
2887 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2888 (clabel1->root_partition == clabel2->root_partition) &&
2889 (clabel1->last_unit == clabel2->last_unit) &&
2890 (clabel1->config_order == clabel2->config_order)) {
2891 /* if it get's here, it almost *has* to be a match */
2892 } else {
2893 /* it's not consistent with somebody in the set..
2894 punt */
2895 return(0);
2896 }
2897 /* all was fine.. it must fit... */
2898 return(1);
2899 }
2900
2901 int
2902 rf_have_enough_components(cset)
2903 RF_ConfigSet_t *cset;
2904 {
2905 RF_AutoConfig_t *ac;
2906 RF_AutoConfig_t *auto_config;
2907 RF_ComponentLabel_t *clabel;
2908 int r,c;
2909 int num_rows;
2910 int num_cols;
2911 int num_missing;
2912
2913 /* check to see that we have enough 'live' components
2914 of this set. If so, we can configure it if necessary */
2915
2916 num_rows = cset->ac->clabel->num_rows;
2917 num_cols = cset->ac->clabel->num_columns;
2918
2919 /* XXX Check for duplicate components!?!?!? */
2920
2921 num_missing = 0;
2922 auto_config = cset->ac;
2923
2924 for(r=0; r<num_rows; r++) {
2925 for(c=0; c<num_cols; c++) {
2926 ac = auto_config;
2927 while(ac!=NULL) {
2928 if (ac->clabel==NULL) {
2929 /* big-time bad news. */
2930 goto fail;
2931 }
2932 if ((ac->clabel->row == r) &&
2933 (ac->clabel->column == c)) {
2934 /* it's this one... */
2935 #if DEBUG
2936 printf("Found: %s at %d,%d\n",
2937 ac->devname,r,c);
2938 #endif
2939 break;
2940 }
2941 ac=ac->next;
2942 }
2943 if (ac==NULL) {
2944 /* Didn't find one here! */
2945 num_missing++;
2946 }
2947 }
2948 }
2949
2950 clabel = cset->ac->clabel;
2951
2952 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2953 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2954 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2955 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2956 /* XXX this needs to be made *much* more general */
2957 /* Too many failures */
2958 return(0);
2959 }
2960 /* otherwise, all is well, and we've got enough to take a kick
2961 at autoconfiguring this set */
2962 return(1);
2963 fail:
2964 return(0);
2965
2966 }
2967
2968 void
2969 rf_create_configuration(ac,config,raidPtr)
2970 RF_AutoConfig_t *ac;
2971 RF_Config_t *config;
2972 RF_Raid_t *raidPtr;
2973 {
2974 RF_ComponentLabel_t *clabel;
2975
2976 clabel = ac->clabel;
2977
2978 /* 1. Fill in the common stuff */
2979 config->numRow = clabel->num_rows;
2980 config->numCol = clabel->num_columns;
2981 config->numSpare = 0; /* XXX should this be set here? */
2982 config->sectPerSU = clabel->sectPerSU;
2983 config->SUsPerPU = clabel->SUsPerPU;
2984 config->SUsPerRU = clabel->SUsPerRU;
2985 config->parityConfig = clabel->parityConfig;
2986 /* XXX... */
2987 strcpy(config->diskQueueType,"fifo");
2988 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2989 config->layoutSpecificSize = 0; /* XXX ?? */
2990
2991 while(ac!=NULL) {
2992 /* row/col values will be in range due to the checks
2993 in reasonable_label() */
2994 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2995 ac->devname);
2996 ac = ac->next;
2997 }
2998
2999 }
3000
3001 int
3002 rf_set_autoconfig(raidPtr, new_value)
3003 RF_Raid_t *raidPtr;
3004 int new_value;
3005 {
3006 RF_ComponentLabel_t clabel;
3007 struct vnode *vp;
3008 dev_t dev;
3009 int row, column;
3010
3011 raidPtr->autoconfigure = new_value;
3012 for(row=0; row<raidPtr->numRow; row++) {
3013 for(column=0; column<raidPtr->numCol; column++) {
3014 dev = raidPtr->Disks[row][column].dev;
3015 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3016 raidread_component_label(dev, vp, &clabel);
3017 clabel.autoconfigure = new_value;
3018 raidwrite_component_label(dev, vp, &clabel);
3019 }
3020 }
3021 return(new_value);
3022 }
3023
3024 int
3025 rf_set_rootpartition(raidPtr, new_value)
3026 RF_Raid_t *raidPtr;
3027 int new_value;
3028 {
3029 RF_ComponentLabel_t clabel;
3030 struct vnode *vp;
3031 dev_t dev;
3032 int row, column;
3033
3034 raidPtr->root_partition = new_value;
3035 for(row=0; row<raidPtr->numRow; row++) {
3036 for(column=0; column<raidPtr->numCol; column++) {
3037 dev = raidPtr->Disks[row][column].dev;
3038 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3039 raidread_component_label(dev, vp, &clabel);
3040 clabel.root_partition = new_value;
3041 raidwrite_component_label(dev, vp, &clabel);
3042 }
3043 }
3044 return(new_value);
3045 }
3046
3047 void
3048 rf_release_all_vps(cset)
3049 RF_ConfigSet_t *cset;
3050 {
3051 RF_AutoConfig_t *ac;
3052
3053 ac = cset->ac;
3054 while(ac!=NULL) {
3055 /* Close the vp, and give it back */
3056 if (ac->vp) {
3057 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3058 vput(ac->vp);
3059 }
3060 ac = ac->next;
3061 }
3062 }
3063
3064
3065 void
3066 rf_cleanup_config_set(cset)
3067 RF_ConfigSet_t *cset;
3068 {
3069 RF_AutoConfig_t *ac;
3070 RF_AutoConfig_t *next_ac;
3071
3072 ac = cset->ac;
3073 while(ac!=NULL) {
3074 next_ac = ac->next;
3075 /* nuke the label */
3076 free(ac->clabel, M_RAIDFRAME);
3077 /* cleanup the config structure */
3078 free(ac, M_RAIDFRAME);
3079 /* "next.." */
3080 ac = next_ac;
3081 }
3082 /* and, finally, nuke the config set */
3083 free(cset, M_RAIDFRAME);
3084 }
3085
3086
3087 void
3088 raid_init_component_label(raidPtr, clabel)
3089 RF_Raid_t *raidPtr;
3090 RF_ComponentLabel_t *clabel;
3091 {
3092 /* current version number */
3093 clabel->version = RF_COMPONENT_LABEL_VERSION;
3094 clabel->serial_number = raidPtr->serial_number;
3095 clabel->mod_counter = raidPtr->mod_counter;
3096 clabel->num_rows = raidPtr->numRow;
3097 clabel->num_columns = raidPtr->numCol;
3098 clabel->clean = RF_RAID_DIRTY; /* not clean */
3099 clabel->status = rf_ds_optimal; /* "It's good!" */
3100
3101 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3102 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3103 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3104
3105 clabel->blockSize = raidPtr->bytesPerSector;
3106 clabel->numBlocks = raidPtr->sectorsPerDisk;
3107
3108 /* XXX not portable */
3109 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3110 clabel->maxOutstanding = raidPtr->maxOutstanding;
3111 clabel->autoconfigure = raidPtr->autoconfigure;
3112 clabel->root_partition = raidPtr->root_partition;
3113 clabel->last_unit = raidPtr->raidid;
3114 clabel->config_order = raidPtr->config_order;
3115 }
3116
3117 int
3118 rf_auto_config_set(cset,unit)
3119 RF_ConfigSet_t *cset;
3120 int *unit;
3121 {
3122 RF_Raid_t *raidPtr;
3123 RF_Config_t *config;
3124 int raidID;
3125 int retcode;
3126
3127 printf("Starting autoconfigure on raid%d\n",raidID);
3128
3129 retcode = 0;
3130 *unit = -1;
3131
3132 /* 1. Create a config structure */
3133
3134 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3135 M_RAIDFRAME,
3136 M_NOWAIT);
3137 if (config==NULL) {
3138 printf("Out of mem!?!?\n");
3139 /* XXX do something more intelligent here. */
3140 return(1);
3141 }
3142 /* XXX raidID needs to be set correctly.. */
3143
3144 /*
3145 2. Figure out what RAID ID this one is supposed to live at
3146 See if we can get the same RAID dev that it was configured
3147 on last time..
3148 */
3149
3150 raidID = cset->ac->clabel->last_unit;
3151 if ((raidID < 0) || (raidID >= numraid)) {
3152 /* let's not wander off into lala land. */
3153 raidID = numraid - 1;
3154 }
3155 if (raidPtrs[raidID]->valid != 0) {
3156
3157 /*
3158 Nope... Go looking for an alternative...
3159 Start high so we don't immediately use raid0 if that's
3160 not taken.
3161 */
3162
3163 for(raidID = numraid; raidID >= 0; raidID--) {
3164 if (raidPtrs[raidID]->valid == 0) {
3165 /* can use this one! */
3166 break;
3167 }
3168 }
3169 }
3170
3171 if (raidID < 0) {
3172 /* punt... */
3173 printf("Unable to auto configure this set!\n");
3174 printf("(Out of RAID devs!)\n");
3175 return(1);
3176 }
3177
3178 raidPtr = raidPtrs[raidID];
3179
3180 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3181 raidPtr->raidid = raidID;
3182 raidPtr->openings = RAIDOUTSTANDING;
3183
3184 /* 3. Build the configuration structure */
3185 rf_create_configuration(cset->ac, config, raidPtr);
3186
3187 /* 4. Do the configuration */
3188 retcode = rf_Configure(raidPtr, config, cset->ac);
3189
3190 if (retcode == 0) {
3191
3192 raidinit(raidPtrs[raidID]);
3193
3194 rf_markalldirty(raidPtrs[raidID]);
3195 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3196 if (cset->ac->clabel->root_partition==1) {
3197 /* everything configured just fine. Make a note
3198 that this set is eligible to be root. */
3199 cset->rootable = 1;
3200 /* XXX do this here? */
3201 raidPtrs[raidID]->root_partition = 1;
3202 }
3203 }
3204
3205 /* 5. Cleanup */
3206 free(config, M_RAIDFRAME);
3207
3208 *unit = raidID;
3209 return(retcode);
3210 }
3211