rf_netbsdkintf.c revision 1.78 1 /* $NetBSD: rf_netbsdkintf.c,v 1.78 2000/05/19 04:53:25 minoura Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 struct buf *bp;
648 {
649 int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 case RAIDFRAME_DELETE_COMPONENT:
832 case RAIDFRAME_INCORPORATE_HOT_SPARE:
833 if ((rs->sc_flags & RAIDF_INITED) == 0)
834 return (ENXIO);
835 }
836
837 switch (cmd) {
838
839 /* configure the system */
840 case RAIDFRAME_CONFIGURE:
841
842 if (raidPtr->valid) {
843 /* There is a valid RAID set running on this unit! */
844 printf("raid%d: Device already configured!\n",unit);
845 return(EINVAL);
846 }
847
848 /* copy-in the configuration information */
849 /* data points to a pointer to the configuration structure */
850
851 u_cfg = *((RF_Config_t **) data);
852 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
853 if (k_cfg == NULL) {
854 return (ENOMEM);
855 }
856 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
857 sizeof(RF_Config_t));
858 if (retcode) {
859 RF_Free(k_cfg, sizeof(RF_Config_t));
860 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
861 retcode));
862 return (retcode);
863 }
864 /* allocate a buffer for the layout-specific data, and copy it
865 * in */
866 if (k_cfg->layoutSpecificSize) {
867 if (k_cfg->layoutSpecificSize > 10000) {
868 /* sanity check */
869 RF_Free(k_cfg, sizeof(RF_Config_t));
870 return (EINVAL);
871 }
872 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
873 (u_char *));
874 if (specific_buf == NULL) {
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 return (ENOMEM);
877 }
878 retcode = copyin(k_cfg->layoutSpecific,
879 (caddr_t) specific_buf,
880 k_cfg->layoutSpecificSize);
881 if (retcode) {
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 RF_Free(specific_buf,
884 k_cfg->layoutSpecificSize);
885 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
886 retcode));
887 return (retcode);
888 }
889 } else
890 specific_buf = NULL;
891 k_cfg->layoutSpecific = specific_buf;
892
893 /* should do some kind of sanity check on the configuration.
894 * Store the sum of all the bytes in the last byte? */
895
896 /* configure the system */
897
898 /*
899 * Clear the entire RAID descriptor, just to make sure
900 * there is no stale data left in the case of a
901 * reconfiguration
902 */
903 bzero((char *) raidPtr, sizeof(RF_Raid_t));
904 raidPtr->raidid = unit;
905
906 retcode = rf_Configure(raidPtr, k_cfg, NULL);
907
908 if (retcode == 0) {
909
910 /* allow this many simultaneous IO's to
911 this RAID device */
912 raidPtr->openings = RAIDOUTSTANDING;
913
914 raidinit(raidPtr);
915 rf_markalldirty(raidPtr);
916 }
917 /* free the buffers. No return code here. */
918 if (k_cfg->layoutSpecificSize) {
919 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
920 }
921 RF_Free(k_cfg, sizeof(RF_Config_t));
922
923 return (retcode);
924
925 /* shutdown the system */
926 case RAIDFRAME_SHUTDOWN:
927
928 if ((error = raidlock(rs)) != 0)
929 return (error);
930
931 /*
932 * If somebody has a partition mounted, we shouldn't
933 * shutdown.
934 */
935
936 part = DISKPART(dev);
937 pmask = (1 << part);
938 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
939 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
940 (rs->sc_dkdev.dk_copenmask & pmask))) {
941 raidunlock(rs);
942 return (EBUSY);
943 }
944
945 retcode = rf_Shutdown(raidPtr);
946
947 pool_destroy(&rs->sc_cbufpool);
948
949 /* It's no longer initialized... */
950 rs->sc_flags &= ~RAIDF_INITED;
951
952 /* Detach the disk. */
953 disk_detach(&rs->sc_dkdev);
954
955 raidunlock(rs);
956
957 return (retcode);
958 case RAIDFRAME_GET_COMPONENT_LABEL:
959 clabel_ptr = (RF_ComponentLabel_t **) data;
960 /* need to read the component label for the disk indicated
961 by row,column in clabel */
962
963 /* For practice, let's get it directly fromdisk, rather
964 than from the in-core copy */
965 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
966 (RF_ComponentLabel_t *));
967 if (clabel == NULL)
968 return (ENOMEM);
969
970 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
971
972 retcode = copyin( *clabel_ptr, clabel,
973 sizeof(RF_ComponentLabel_t));
974
975 if (retcode) {
976 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
977 return(retcode);
978 }
979
980 row = clabel->row;
981 column = clabel->column;
982
983 if ((row < 0) || (row >= raidPtr->numRow) ||
984 (column < 0) || (column >= raidPtr->numCol)) {
985 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
986 return(EINVAL);
987 }
988
989 raidread_component_label(raidPtr->Disks[row][column].dev,
990 raidPtr->raid_cinfo[row][column].ci_vp,
991 clabel );
992
993 retcode = copyout((caddr_t) clabel,
994 (caddr_t) *clabel_ptr,
995 sizeof(RF_ComponentLabel_t));
996 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
997 return (retcode);
998
999 case RAIDFRAME_SET_COMPONENT_LABEL:
1000 clabel = (RF_ComponentLabel_t *) data;
1001
1002 /* XXX check the label for valid stuff... */
1003 /* Note that some things *should not* get modified --
1004 the user should be re-initing the labels instead of
1005 trying to patch things.
1006 */
1007
1008 printf("Got component label:\n");
1009 printf("Version: %d\n",clabel->version);
1010 printf("Serial Number: %d\n",clabel->serial_number);
1011 printf("Mod counter: %d\n",clabel->mod_counter);
1012 printf("Row: %d\n", clabel->row);
1013 printf("Column: %d\n", clabel->column);
1014 printf("Num Rows: %d\n", clabel->num_rows);
1015 printf("Num Columns: %d\n", clabel->num_columns);
1016 printf("Clean: %d\n", clabel->clean);
1017 printf("Status: %d\n", clabel->status);
1018
1019 row = clabel->row;
1020 column = clabel->column;
1021
1022 if ((row < 0) || (row >= raidPtr->numRow) ||
1023 (column < 0) || (column >= raidPtr->numCol)) {
1024 return(EINVAL);
1025 }
1026
1027 /* XXX this isn't allowed to do anything for now :-) */
1028
1029 /* XXX and before it is, we need to fill in the rest
1030 of the fields!?!?!?! */
1031 #if 0
1032 raidwrite_component_label(
1033 raidPtr->Disks[row][column].dev,
1034 raidPtr->raid_cinfo[row][column].ci_vp,
1035 clabel );
1036 #endif
1037 return (0);
1038
1039 case RAIDFRAME_INIT_LABELS:
1040 clabel = (RF_ComponentLabel_t *) data;
1041 /*
1042 we only want the serial number from
1043 the above. We get all the rest of the information
1044 from the config that was used to create this RAID
1045 set.
1046 */
1047
1048 raidPtr->serial_number = clabel->serial_number;
1049
1050 raid_init_component_label(raidPtr, &ci_label);
1051 ci_label.serial_number = clabel->serial_number;
1052
1053 for(row=0;row<raidPtr->numRow;row++) {
1054 ci_label.row = row;
1055 for(column=0;column<raidPtr->numCol;column++) {
1056 diskPtr = &raidPtr->Disks[row][column];
1057 ci_label.partitionSize = diskPtr->partitionSize;
1058 ci_label.column = column;
1059 raidwrite_component_label(
1060 raidPtr->Disks[row][column].dev,
1061 raidPtr->raid_cinfo[row][column].ci_vp,
1062 &ci_label );
1063 }
1064 }
1065
1066 return (retcode);
1067 case RAIDFRAME_SET_AUTOCONFIG:
1068 d = rf_set_autoconfig(raidPtr, *(int *) data);
1069 printf("New autoconfig value is: %d\n", d);
1070 *(int *) data = d;
1071 return (retcode);
1072
1073 case RAIDFRAME_SET_ROOT:
1074 d = rf_set_rootpartition(raidPtr, *(int *) data);
1075 printf("New rootpartition value is: %d\n", d);
1076 *(int *) data = d;
1077 return (retcode);
1078
1079 /* initialize all parity */
1080 case RAIDFRAME_REWRITEPARITY:
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Parity for RAID 0 is trivially correct */
1084 raidPtr->parity_good = RF_RAID_CLEAN;
1085 return(0);
1086 }
1087
1088 if (raidPtr->parity_rewrite_in_progress == 1) {
1089 /* Re-write is already in progress! */
1090 return(EINVAL);
1091 }
1092
1093 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1094 rf_RewriteParityThread,
1095 raidPtr,"raid_parity");
1096 return (retcode);
1097
1098
1099 case RAIDFRAME_ADD_HOT_SPARE:
1100 sparePtr = (RF_SingleComponent_t *) data;
1101 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1102 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1103 return(retcode);
1104
1105 case RAIDFRAME_REMOVE_HOT_SPARE:
1106 return(retcode);
1107
1108 case RAIDFRAME_DELETE_COMPONENT:
1109 componentPtr = (RF_SingleComponent_t *)data;
1110 memcpy( &component, componentPtr,
1111 sizeof(RF_SingleComponent_t));
1112 retcode = rf_delete_component(raidPtr, &component);
1113 return(retcode);
1114
1115 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1116 componentPtr = (RF_SingleComponent_t *)data;
1117 memcpy( &component, componentPtr,
1118 sizeof(RF_SingleComponent_t));
1119 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1120 return(retcode);
1121
1122 case RAIDFRAME_REBUILD_IN_PLACE:
1123
1124 if (raidPtr->Layout.map->faultsTolerated == 0) {
1125 /* Can't do this on a RAID 0!! */
1126 return(EINVAL);
1127 }
1128
1129 if (raidPtr->recon_in_progress == 1) {
1130 /* a reconstruct is already in progress! */
1131 return(EINVAL);
1132 }
1133
1134 componentPtr = (RF_SingleComponent_t *) data;
1135 memcpy( &component, componentPtr,
1136 sizeof(RF_SingleComponent_t));
1137 row = component.row;
1138 column = component.column;
1139 printf("Rebuild: %d %d\n",row, column);
1140 if ((row < 0) || (row >= raidPtr->numRow) ||
1141 (column < 0) || (column >= raidPtr->numCol)) {
1142 return(EINVAL);
1143 }
1144
1145 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1146 if (rrcopy == NULL)
1147 return(ENOMEM);
1148
1149 rrcopy->raidPtr = (void *) raidPtr;
1150 rrcopy->row = row;
1151 rrcopy->col = column;
1152
1153 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1154 rf_ReconstructInPlaceThread,
1155 rrcopy,"raid_reconip");
1156 return(retcode);
1157
1158 case RAIDFRAME_GET_INFO:
1159 if (!raidPtr->valid)
1160 return (ENODEV);
1161 ucfgp = (RF_DeviceConfig_t **) data;
1162 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1163 (RF_DeviceConfig_t *));
1164 if (d_cfg == NULL)
1165 return (ENOMEM);
1166 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1167 d_cfg->rows = raidPtr->numRow;
1168 d_cfg->cols = raidPtr->numCol;
1169 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1170 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1171 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1172 return (ENOMEM);
1173 }
1174 d_cfg->nspares = raidPtr->numSpare;
1175 if (d_cfg->nspares >= RF_MAX_DISKS) {
1176 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1177 return (ENOMEM);
1178 }
1179 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1180 d = 0;
1181 for (i = 0; i < d_cfg->rows; i++) {
1182 for (j = 0; j < d_cfg->cols; j++) {
1183 d_cfg->devs[d] = raidPtr->Disks[i][j];
1184 d++;
1185 }
1186 }
1187 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1188 d_cfg->spares[i] = raidPtr->Disks[0][j];
1189 }
1190 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1191 sizeof(RF_DeviceConfig_t));
1192 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1193
1194 return (retcode);
1195
1196 case RAIDFRAME_CHECK_PARITY:
1197 *(int *) data = raidPtr->parity_good;
1198 return (0);
1199
1200 case RAIDFRAME_RESET_ACCTOTALS:
1201 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1202 return (0);
1203
1204 case RAIDFRAME_GET_ACCTOTALS:
1205 totals = (RF_AccTotals_t *) data;
1206 *totals = raidPtr->acc_totals;
1207 return (0);
1208
1209 case RAIDFRAME_KEEP_ACCTOTALS:
1210 raidPtr->keep_acc_totals = *(int *)data;
1211 return (0);
1212
1213 case RAIDFRAME_GET_SIZE:
1214 *(int *) data = raidPtr->totalSectors;
1215 return (0);
1216
1217 /* fail a disk & optionally start reconstruction */
1218 case RAIDFRAME_FAIL_DISK:
1219
1220 if (raidPtr->Layout.map->faultsTolerated == 0) {
1221 /* Can't do this on a RAID 0!! */
1222 return(EINVAL);
1223 }
1224
1225 rr = (struct rf_recon_req *) data;
1226
1227 if (rr->row < 0 || rr->row >= raidPtr->numRow
1228 || rr->col < 0 || rr->col >= raidPtr->numCol)
1229 return (EINVAL);
1230
1231 printf("raid%d: Failing the disk: row: %d col: %d\n",
1232 unit, rr->row, rr->col);
1233
1234 /* make a copy of the recon request so that we don't rely on
1235 * the user's buffer */
1236 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1237 if (rrcopy == NULL)
1238 return(ENOMEM);
1239 bcopy(rr, rrcopy, sizeof(*rr));
1240 rrcopy->raidPtr = (void *) raidPtr;
1241
1242 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1243 rf_ReconThread,
1244 rrcopy,"raid_recon");
1245 return (0);
1246
1247 /* invoke a copyback operation after recon on whatever disk
1248 * needs it, if any */
1249 case RAIDFRAME_COPYBACK:
1250
1251 if (raidPtr->Layout.map->faultsTolerated == 0) {
1252 /* This makes no sense on a RAID 0!! */
1253 return(EINVAL);
1254 }
1255
1256 if (raidPtr->copyback_in_progress == 1) {
1257 /* Copyback is already in progress! */
1258 return(EINVAL);
1259 }
1260
1261 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1262 rf_CopybackThread,
1263 raidPtr,"raid_copyback");
1264 return (retcode);
1265
1266 /* return the percentage completion of reconstruction */
1267 case RAIDFRAME_CHECK_RECON_STATUS:
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* This makes no sense on a RAID 0, so tell the
1270 user it's done. */
1271 *(int *) data = 100;
1272 return(0);
1273 }
1274 row = 0; /* XXX we only consider a single row... */
1275 if (raidPtr->status[row] != rf_rs_reconstructing)
1276 *(int *) data = 100;
1277 else
1278 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1279 return (0);
1280
1281 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1282 if (raidPtr->Layout.map->faultsTolerated == 0) {
1283 /* This makes no sense on a RAID 0 */
1284 return(EINVAL);
1285 }
1286 if (raidPtr->parity_rewrite_in_progress == 1) {
1287 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1288 } else {
1289 *(int *) data = 100;
1290 }
1291 return (0);
1292
1293 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1294 if (raidPtr->Layout.map->faultsTolerated == 0) {
1295 /* This makes no sense on a RAID 0 */
1296 return(EINVAL);
1297 }
1298 if (raidPtr->copyback_in_progress == 1) {
1299 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1300 raidPtr->Layout.numStripe;
1301 } else {
1302 *(int *) data = 100;
1303 }
1304 return (0);
1305
1306
1307 /* the sparetable daemon calls this to wait for the kernel to
1308 * need a spare table. this ioctl does not return until a
1309 * spare table is needed. XXX -- calling mpsleep here in the
1310 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1311 * -- I should either compute the spare table in the kernel,
1312 * or have a different -- XXX XXX -- interface (a different
1313 * character device) for delivering the table -- XXX */
1314 #if 0
1315 case RAIDFRAME_SPARET_WAIT:
1316 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1317 while (!rf_sparet_wait_queue)
1318 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1319 waitreq = rf_sparet_wait_queue;
1320 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1321 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1322
1323 /* structure assignment */
1324 *((RF_SparetWait_t *) data) = *waitreq;
1325
1326 RF_Free(waitreq, sizeof(*waitreq));
1327 return (0);
1328
1329 /* wakes up a process waiting on SPARET_WAIT and puts an error
1330 * code in it that will cause the dameon to exit */
1331 case RAIDFRAME_ABORT_SPARET_WAIT:
1332 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1333 waitreq->fcol = -1;
1334 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1335 waitreq->next = rf_sparet_wait_queue;
1336 rf_sparet_wait_queue = waitreq;
1337 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1338 wakeup(&rf_sparet_wait_queue);
1339 return (0);
1340
1341 /* used by the spare table daemon to deliver a spare table
1342 * into the kernel */
1343 case RAIDFRAME_SEND_SPARET:
1344
1345 /* install the spare table */
1346 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1347
1348 /* respond to the requestor. the return status of the spare
1349 * table installation is passed in the "fcol" field */
1350 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1351 waitreq->fcol = retcode;
1352 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1353 waitreq->next = rf_sparet_resp_queue;
1354 rf_sparet_resp_queue = waitreq;
1355 wakeup(&rf_sparet_resp_queue);
1356 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1357
1358 return (retcode);
1359 #endif
1360
1361 default:
1362 break; /* fall through to the os-specific code below */
1363
1364 }
1365
1366 if (!raidPtr->valid)
1367 return (EINVAL);
1368
1369 /*
1370 * Add support for "regular" device ioctls here.
1371 */
1372
1373 switch (cmd) {
1374 case DIOCGDINFO:
1375 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1376 break;
1377
1378 case DIOCGPART:
1379 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1380 ((struct partinfo *) data)->part =
1381 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1382 break;
1383
1384 case DIOCWDINFO:
1385 case DIOCSDINFO:
1386 if ((error = raidlock(rs)) != 0)
1387 return (error);
1388
1389 rs->sc_flags |= RAIDF_LABELLING;
1390
1391 error = setdisklabel(rs->sc_dkdev.dk_label,
1392 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1393 if (error == 0) {
1394 if (cmd == DIOCWDINFO)
1395 error = writedisklabel(RAIDLABELDEV(dev),
1396 raidstrategy, rs->sc_dkdev.dk_label,
1397 rs->sc_dkdev.dk_cpulabel);
1398 }
1399 rs->sc_flags &= ~RAIDF_LABELLING;
1400
1401 raidunlock(rs);
1402
1403 if (error)
1404 return (error);
1405 break;
1406
1407 case DIOCWLABEL:
1408 if (*(int *) data != 0)
1409 rs->sc_flags |= RAIDF_WLABEL;
1410 else
1411 rs->sc_flags &= ~RAIDF_WLABEL;
1412 break;
1413
1414 case DIOCGDEFLABEL:
1415 raidgetdefaultlabel(raidPtr, rs,
1416 (struct disklabel *) data);
1417 break;
1418
1419 default:
1420 retcode = ENOTTY;
1421 }
1422 return (retcode);
1423
1424 }
1425
1426
1427 /* raidinit -- complete the rest of the initialization for the
1428 RAIDframe device. */
1429
1430
1431 static void
1432 raidinit(raidPtr)
1433 RF_Raid_t *raidPtr;
1434 {
1435 struct raid_softc *rs;
1436 int unit;
1437
1438 unit = raidPtr->raidid;
1439
1440 rs = &raid_softc[unit];
1441 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1442 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1443
1444
1445 /* XXX should check return code first... */
1446 rs->sc_flags |= RAIDF_INITED;
1447
1448 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1449
1450 rs->sc_dkdev.dk_name = rs->sc_xname;
1451
1452 /* disk_attach actually creates space for the CPU disklabel, among
1453 * other things, so it's critical to call this *BEFORE* we try putzing
1454 * with disklabels. */
1455
1456 disk_attach(&rs->sc_dkdev);
1457
1458 /* XXX There may be a weird interaction here between this, and
1459 * protectedSectors, as used in RAIDframe. */
1460
1461 rs->sc_size = raidPtr->totalSectors;
1462
1463 }
1464
1465 /* wake up the daemon & tell it to get us a spare table
1466 * XXX
1467 * the entries in the queues should be tagged with the raidPtr
1468 * so that in the extremely rare case that two recons happen at once,
1469 * we know for which device were requesting a spare table
1470 * XXX
1471 *
1472 * XXX This code is not currently used. GO
1473 */
1474 int
1475 rf_GetSpareTableFromDaemon(req)
1476 RF_SparetWait_t *req;
1477 {
1478 int retcode;
1479
1480 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1481 req->next = rf_sparet_wait_queue;
1482 rf_sparet_wait_queue = req;
1483 wakeup(&rf_sparet_wait_queue);
1484
1485 /* mpsleep unlocks the mutex */
1486 while (!rf_sparet_resp_queue) {
1487 tsleep(&rf_sparet_resp_queue, PRIBIO,
1488 "raidframe getsparetable", 0);
1489 }
1490 req = rf_sparet_resp_queue;
1491 rf_sparet_resp_queue = req->next;
1492 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1493
1494 retcode = req->fcol;
1495 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1496 * alloc'd */
1497 return (retcode);
1498 }
1499
1500 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1501 * bp & passes it down.
1502 * any calls originating in the kernel must use non-blocking I/O
1503 * do some extra sanity checking to return "appropriate" error values for
1504 * certain conditions (to make some standard utilities work)
1505 *
1506 * Formerly known as: rf_DoAccessKernel
1507 */
1508 void
1509 raidstart(raidPtr)
1510 RF_Raid_t *raidPtr;
1511 {
1512 RF_SectorCount_t num_blocks, pb, sum;
1513 RF_RaidAddr_t raid_addr;
1514 int retcode;
1515 struct partition *pp;
1516 daddr_t blocknum;
1517 int unit;
1518 struct raid_softc *rs;
1519 int do_async;
1520 struct buf *bp;
1521
1522 unit = raidPtr->raidid;
1523 rs = &raid_softc[unit];
1524
1525 /* quick check to see if anything has died recently */
1526 RF_LOCK_MUTEX(raidPtr->mutex);
1527 if (raidPtr->numNewFailures > 0) {
1528 rf_update_component_labels(raidPtr);
1529 raidPtr->numNewFailures--;
1530 }
1531 RF_UNLOCK_MUTEX(raidPtr->mutex);
1532
1533 /* Check to see if we're at the limit... */
1534 RF_LOCK_MUTEX(raidPtr->mutex);
1535 while (raidPtr->openings > 0) {
1536 RF_UNLOCK_MUTEX(raidPtr->mutex);
1537
1538 /* get the next item, if any, from the queue */
1539 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1540 /* nothing more to do */
1541 return;
1542 }
1543 BUFQ_REMOVE(&rs->buf_queue, bp);
1544
1545 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1546 * partition.. Need to make it absolute to the underlying
1547 * device.. */
1548
1549 blocknum = bp->b_blkno;
1550 if (DISKPART(bp->b_dev) != RAW_PART) {
1551 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1552 blocknum += pp->p_offset;
1553 }
1554
1555 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1556 (int) blocknum));
1557
1558 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1559 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1560
1561 /* *THIS* is where we adjust what block we're going to...
1562 * but DO NOT TOUCH bp->b_blkno!!! */
1563 raid_addr = blocknum;
1564
1565 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1566 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1567 sum = raid_addr + num_blocks + pb;
1568 if (1 || rf_debugKernelAccess) {
1569 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1570 (int) raid_addr, (int) sum, (int) num_blocks,
1571 (int) pb, (int) bp->b_resid));
1572 }
1573 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1574 || (sum < num_blocks) || (sum < pb)) {
1575 bp->b_error = ENOSPC;
1576 bp->b_flags |= B_ERROR;
1577 bp->b_resid = bp->b_bcount;
1578 biodone(bp);
1579 RF_LOCK_MUTEX(raidPtr->mutex);
1580 continue;
1581 }
1582 /*
1583 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1584 */
1585
1586 if (bp->b_bcount & raidPtr->sectorMask) {
1587 bp->b_error = EINVAL;
1588 bp->b_flags |= B_ERROR;
1589 bp->b_resid = bp->b_bcount;
1590 biodone(bp);
1591 RF_LOCK_MUTEX(raidPtr->mutex);
1592 continue;
1593
1594 }
1595 db1_printf(("Calling DoAccess..\n"));
1596
1597
1598 RF_LOCK_MUTEX(raidPtr->mutex);
1599 raidPtr->openings--;
1600 RF_UNLOCK_MUTEX(raidPtr->mutex);
1601
1602 /*
1603 * Everything is async.
1604 */
1605 do_async = 1;
1606
1607 /* don't ever condition on bp->b_flags & B_WRITE.
1608 * always condition on B_READ instead */
1609
1610 /* XXX we're still at splbio() here... do we *really*
1611 need to be? */
1612
1613
1614 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1615 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1616 do_async, raid_addr, num_blocks,
1617 bp->b_un.b_addr, bp, NULL, NULL,
1618 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1619
1620
1621 RF_LOCK_MUTEX(raidPtr->mutex);
1622 }
1623 RF_UNLOCK_MUTEX(raidPtr->mutex);
1624 }
1625
1626
1627
1628
1629 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1630
1631 int
1632 rf_DispatchKernelIO(queue, req)
1633 RF_DiskQueue_t *queue;
1634 RF_DiskQueueData_t *req;
1635 {
1636 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1637 struct buf *bp;
1638 struct raidbuf *raidbp = NULL;
1639 struct raid_softc *rs;
1640 int unit;
1641 int s;
1642
1643 s=0;
1644 /* s = splbio();*/ /* want to test this */
1645 /* XXX along with the vnode, we also need the softc associated with
1646 * this device.. */
1647
1648 req->queue = queue;
1649
1650 unit = queue->raidPtr->raidid;
1651
1652 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1653
1654 if (unit >= numraid) {
1655 printf("Invalid unit number: %d %d\n", unit, numraid);
1656 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1657 }
1658 rs = &raid_softc[unit];
1659
1660 /* XXX is this the right place? */
1661 disk_busy(&rs->sc_dkdev);
1662
1663 bp = req->bp;
1664 #if 1
1665 /* XXX when there is a physical disk failure, someone is passing us a
1666 * buffer that contains old stuff!! Attempt to deal with this problem
1667 * without taking a performance hit... (not sure where the real bug
1668 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1669
1670 if (bp->b_flags & B_ERROR) {
1671 bp->b_flags &= ~B_ERROR;
1672 }
1673 if (bp->b_error != 0) {
1674 bp->b_error = 0;
1675 }
1676 #endif
1677 raidbp = RAIDGETBUF(rs);
1678
1679 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1680
1681 /*
1682 * context for raidiodone
1683 */
1684 raidbp->rf_obp = bp;
1685 raidbp->req = req;
1686
1687 LIST_INIT(&raidbp->rf_buf.b_dep);
1688
1689 switch (req->type) {
1690 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1691 /* XXX need to do something extra here.. */
1692 /* I'm leaving this in, as I've never actually seen it used,
1693 * and I'd like folks to report it... GO */
1694 printf(("WAKEUP CALLED\n"));
1695 queue->numOutstanding++;
1696
1697 /* XXX need to glue the original buffer into this?? */
1698
1699 KernelWakeupFunc(&raidbp->rf_buf);
1700 break;
1701
1702 case RF_IO_TYPE_READ:
1703 case RF_IO_TYPE_WRITE:
1704
1705 if (req->tracerec) {
1706 RF_ETIMER_START(req->tracerec->timer);
1707 }
1708 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1709 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1710 req->sectorOffset, req->numSector,
1711 req->buf, KernelWakeupFunc, (void *) req,
1712 queue->raidPtr->logBytesPerSector, req->b_proc);
1713
1714 if (rf_debugKernelAccess) {
1715 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1716 (long) bp->b_blkno));
1717 }
1718 queue->numOutstanding++;
1719 queue->last_deq_sector = req->sectorOffset;
1720 /* acc wouldn't have been let in if there were any pending
1721 * reqs at any other priority */
1722 queue->curPriority = req->priority;
1723
1724 db1_printf(("Going for %c to unit %d row %d col %d\n",
1725 req->type, unit, queue->row, queue->col));
1726 db1_printf(("sector %d count %d (%d bytes) %d\n",
1727 (int) req->sectorOffset, (int) req->numSector,
1728 (int) (req->numSector <<
1729 queue->raidPtr->logBytesPerSector),
1730 (int) queue->raidPtr->logBytesPerSector));
1731 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1732 raidbp->rf_buf.b_vp->v_numoutput++;
1733 }
1734 VOP_STRATEGY(&raidbp->rf_buf);
1735
1736 break;
1737
1738 default:
1739 panic("bad req->type in rf_DispatchKernelIO");
1740 }
1741 db1_printf(("Exiting from DispatchKernelIO\n"));
1742 /* splx(s); */ /* want to test this */
1743 return (0);
1744 }
1745 /* this is the callback function associated with a I/O invoked from
1746 kernel code.
1747 */
1748 static void
1749 KernelWakeupFunc(vbp)
1750 struct buf *vbp;
1751 {
1752 RF_DiskQueueData_t *req = NULL;
1753 RF_DiskQueue_t *queue;
1754 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1755 struct buf *bp;
1756 struct raid_softc *rs;
1757 int unit;
1758 int s;
1759
1760 s = splbio();
1761 db1_printf(("recovering the request queue:\n"));
1762 req = raidbp->req;
1763
1764 bp = raidbp->rf_obp;
1765
1766 queue = (RF_DiskQueue_t *) req->queue;
1767
1768 if (raidbp->rf_buf.b_flags & B_ERROR) {
1769 bp->b_flags |= B_ERROR;
1770 bp->b_error = raidbp->rf_buf.b_error ?
1771 raidbp->rf_buf.b_error : EIO;
1772 }
1773
1774 /* XXX methinks this could be wrong... */
1775 #if 1
1776 bp->b_resid = raidbp->rf_buf.b_resid;
1777 #endif
1778
1779 if (req->tracerec) {
1780 RF_ETIMER_STOP(req->tracerec->timer);
1781 RF_ETIMER_EVAL(req->tracerec->timer);
1782 RF_LOCK_MUTEX(rf_tracing_mutex);
1783 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1784 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1785 req->tracerec->num_phys_ios++;
1786 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1787 }
1788 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1789
1790 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1791
1792
1793 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1794 * ballistic, and mark the component as hosed... */
1795
1796 if (bp->b_flags & B_ERROR) {
1797 /* Mark the disk as dead */
1798 /* but only mark it once... */
1799 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1800 rf_ds_optimal) {
1801 printf("raid%d: IO Error. Marking %s as failed.\n",
1802 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1803 queue->raidPtr->Disks[queue->row][queue->col].status =
1804 rf_ds_failed;
1805 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1806 queue->raidPtr->numFailures++;
1807 queue->raidPtr->numNewFailures++;
1808 /* XXX here we should bump the version number for each component, and write that data out */
1809 } else { /* Disk is already dead... */
1810 /* printf("Disk already marked as dead!\n"); */
1811 }
1812
1813 }
1814
1815 rs = &raid_softc[unit];
1816 RAIDPUTBUF(rs, raidbp);
1817
1818
1819 if (bp->b_resid == 0) {
1820 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1821 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1822 }
1823
1824 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1825 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1826
1827 splx(s);
1828 }
1829
1830
1831
1832 /*
1833 * initialize a buf structure for doing an I/O in the kernel.
1834 */
1835 static void
1836 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1837 logBytesPerSector, b_proc)
1838 struct buf *bp;
1839 struct vnode *b_vp;
1840 unsigned rw_flag;
1841 dev_t dev;
1842 RF_SectorNum_t startSect;
1843 RF_SectorCount_t numSect;
1844 caddr_t buf;
1845 void (*cbFunc) (struct buf *);
1846 void *cbArg;
1847 int logBytesPerSector;
1848 struct proc *b_proc;
1849 {
1850 /* bp->b_flags = B_PHYS | rw_flag; */
1851 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1852 bp->b_bcount = numSect << logBytesPerSector;
1853 bp->b_bufsize = bp->b_bcount;
1854 bp->b_error = 0;
1855 bp->b_dev = dev;
1856 bp->b_un.b_addr = buf;
1857 bp->b_blkno = startSect;
1858 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1859 if (bp->b_bcount == 0) {
1860 panic("bp->b_bcount is zero in InitBP!!\n");
1861 }
1862 bp->b_proc = b_proc;
1863 bp->b_iodone = cbFunc;
1864 bp->b_vp = b_vp;
1865
1866 }
1867
1868 static void
1869 raidgetdefaultlabel(raidPtr, rs, lp)
1870 RF_Raid_t *raidPtr;
1871 struct raid_softc *rs;
1872 struct disklabel *lp;
1873 {
1874 db1_printf(("Building a default label...\n"));
1875 bzero(lp, sizeof(*lp));
1876
1877 /* fabricate a label... */
1878 lp->d_secperunit = raidPtr->totalSectors;
1879 lp->d_secsize = raidPtr->bytesPerSector;
1880 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1881 lp->d_ntracks = 1;
1882 lp->d_ncylinders = raidPtr->totalSectors /
1883 (lp->d_nsectors * lp->d_ntracks);
1884 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1885
1886 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1887 lp->d_type = DTYPE_RAID;
1888 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1889 lp->d_rpm = 3600;
1890 lp->d_interleave = 1;
1891 lp->d_flags = 0;
1892
1893 lp->d_partitions[RAW_PART].p_offset = 0;
1894 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1895 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1896 lp->d_npartitions = RAW_PART + 1;
1897
1898 lp->d_magic = DISKMAGIC;
1899 lp->d_magic2 = DISKMAGIC;
1900 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1901
1902 }
1903 /*
1904 * Read the disklabel from the raid device. If one is not present, fake one
1905 * up.
1906 */
1907 static void
1908 raidgetdisklabel(dev)
1909 dev_t dev;
1910 {
1911 int unit = raidunit(dev);
1912 struct raid_softc *rs = &raid_softc[unit];
1913 char *errstring;
1914 struct disklabel *lp = rs->sc_dkdev.dk_label;
1915 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1916 RF_Raid_t *raidPtr;
1917
1918 db1_printf(("Getting the disklabel...\n"));
1919
1920 bzero(clp, sizeof(*clp));
1921
1922 raidPtr = raidPtrs[unit];
1923
1924 raidgetdefaultlabel(raidPtr, rs, lp);
1925
1926 /*
1927 * Call the generic disklabel extraction routine.
1928 */
1929 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1930 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1931 if (errstring)
1932 raidmakedisklabel(rs);
1933 else {
1934 int i;
1935 struct partition *pp;
1936
1937 /*
1938 * Sanity check whether the found disklabel is valid.
1939 *
1940 * This is necessary since total size of the raid device
1941 * may vary when an interleave is changed even though exactly
1942 * same componets are used, and old disklabel may used
1943 * if that is found.
1944 */
1945 if (lp->d_secperunit != rs->sc_size)
1946 printf("WARNING: %s: "
1947 "total sector size in disklabel (%d) != "
1948 "the size of raid (%ld)\n", rs->sc_xname,
1949 lp->d_secperunit, (long) rs->sc_size);
1950 for (i = 0; i < lp->d_npartitions; i++) {
1951 pp = &lp->d_partitions[i];
1952 if (pp->p_offset + pp->p_size > rs->sc_size)
1953 printf("WARNING: %s: end of partition `%c' "
1954 "exceeds the size of raid (%ld)\n",
1955 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1956 }
1957 }
1958
1959 }
1960 /*
1961 * Take care of things one might want to take care of in the event
1962 * that a disklabel isn't present.
1963 */
1964 static void
1965 raidmakedisklabel(rs)
1966 struct raid_softc *rs;
1967 {
1968 struct disklabel *lp = rs->sc_dkdev.dk_label;
1969 db1_printf(("Making a label..\n"));
1970
1971 /*
1972 * For historical reasons, if there's no disklabel present
1973 * the raw partition must be marked FS_BSDFFS.
1974 */
1975
1976 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1977
1978 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1979
1980 lp->d_checksum = dkcksum(lp);
1981 }
1982 /*
1983 * Lookup the provided name in the filesystem. If the file exists,
1984 * is a valid block device, and isn't being used by anyone else,
1985 * set *vpp to the file's vnode.
1986 * You'll find the original of this in ccd.c
1987 */
1988 int
1989 raidlookup(path, p, vpp)
1990 char *path;
1991 struct proc *p;
1992 struct vnode **vpp; /* result */
1993 {
1994 struct nameidata nd;
1995 struct vnode *vp;
1996 struct vattr va;
1997 int error;
1998
1999 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2000 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2001 #ifdef DEBUG
2002 printf("RAIDframe: vn_open returned %d\n", error);
2003 #endif
2004 return (error);
2005 }
2006 vp = nd.ni_vp;
2007 if (vp->v_usecount > 1) {
2008 VOP_UNLOCK(vp, 0);
2009 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2010 return (EBUSY);
2011 }
2012 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2013 VOP_UNLOCK(vp, 0);
2014 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2015 return (error);
2016 }
2017 /* XXX: eventually we should handle VREG, too. */
2018 if (va.va_type != VBLK) {
2019 VOP_UNLOCK(vp, 0);
2020 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2021 return (ENOTBLK);
2022 }
2023 VOP_UNLOCK(vp, 0);
2024 *vpp = vp;
2025 return (0);
2026 }
2027 /*
2028 * Wait interruptibly for an exclusive lock.
2029 *
2030 * XXX
2031 * Several drivers do this; it should be abstracted and made MP-safe.
2032 * (Hmm... where have we seen this warning before :-> GO )
2033 */
2034 static int
2035 raidlock(rs)
2036 struct raid_softc *rs;
2037 {
2038 int error;
2039
2040 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2041 rs->sc_flags |= RAIDF_WANTED;
2042 if ((error =
2043 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2044 return (error);
2045 }
2046 rs->sc_flags |= RAIDF_LOCKED;
2047 return (0);
2048 }
2049 /*
2050 * Unlock and wake up any waiters.
2051 */
2052 static void
2053 raidunlock(rs)
2054 struct raid_softc *rs;
2055 {
2056
2057 rs->sc_flags &= ~RAIDF_LOCKED;
2058 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2059 rs->sc_flags &= ~RAIDF_WANTED;
2060 wakeup(rs);
2061 }
2062 }
2063
2064
2065 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2066 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2067
2068 int
2069 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2070 {
2071 RF_ComponentLabel_t clabel;
2072 raidread_component_label(dev, b_vp, &clabel);
2073 clabel.mod_counter = mod_counter;
2074 clabel.clean = RF_RAID_CLEAN;
2075 raidwrite_component_label(dev, b_vp, &clabel);
2076 return(0);
2077 }
2078
2079
2080 int
2081 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2082 {
2083 RF_ComponentLabel_t clabel;
2084 raidread_component_label(dev, b_vp, &clabel);
2085 clabel.mod_counter = mod_counter;
2086 clabel.clean = RF_RAID_DIRTY;
2087 raidwrite_component_label(dev, b_vp, &clabel);
2088 return(0);
2089 }
2090
2091 /* ARGSUSED */
2092 int
2093 raidread_component_label(dev, b_vp, clabel)
2094 dev_t dev;
2095 struct vnode *b_vp;
2096 RF_ComponentLabel_t *clabel;
2097 {
2098 struct buf *bp;
2099 int error;
2100
2101 /* XXX should probably ensure that we don't try to do this if
2102 someone has changed rf_protected_sectors. */
2103
2104 /* get a block of the appropriate size... */
2105 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2106 bp->b_dev = dev;
2107
2108 /* get our ducks in a row for the read */
2109 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2110 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2111 bp->b_flags = B_BUSY | B_READ;
2112 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2113
2114 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2115
2116 error = biowait(bp);
2117
2118 if (!error) {
2119 memcpy(clabel, bp->b_un.b_addr,
2120 sizeof(RF_ComponentLabel_t));
2121 #if 0
2122 rf_print_component_label( clabel );
2123 #endif
2124 } else {
2125 #if 0
2126 printf("Failed to read RAID component label!\n");
2127 #endif
2128 }
2129
2130 bp->b_flags = B_INVAL | B_AGE;
2131 brelse(bp);
2132 return(error);
2133 }
2134 /* ARGSUSED */
2135 int
2136 raidwrite_component_label(dev, b_vp, clabel)
2137 dev_t dev;
2138 struct vnode *b_vp;
2139 RF_ComponentLabel_t *clabel;
2140 {
2141 struct buf *bp;
2142 int error;
2143
2144 /* get a block of the appropriate size... */
2145 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2146 bp->b_dev = dev;
2147
2148 /* get our ducks in a row for the write */
2149 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2150 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2151 bp->b_flags = B_BUSY | B_WRITE;
2152 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2153
2154 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2155
2156 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2157
2158 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2159 error = biowait(bp);
2160 bp->b_flags = B_INVAL | B_AGE;
2161 brelse(bp);
2162 if (error) {
2163 #if 1
2164 printf("Failed to write RAID component info!\n");
2165 #endif
2166 }
2167
2168 return(error);
2169 }
2170
2171 void
2172 rf_markalldirty(raidPtr)
2173 RF_Raid_t *raidPtr;
2174 {
2175 RF_ComponentLabel_t clabel;
2176 int r,c;
2177
2178 raidPtr->mod_counter++;
2179 for (r = 0; r < raidPtr->numRow; r++) {
2180 for (c = 0; c < raidPtr->numCol; c++) {
2181 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2182 raidread_component_label(
2183 raidPtr->Disks[r][c].dev,
2184 raidPtr->raid_cinfo[r][c].ci_vp,
2185 &clabel);
2186 if (clabel.status == rf_ds_spared) {
2187 /* XXX do something special...
2188 but whatever you do, don't
2189 try to access it!! */
2190 } else {
2191 #if 0
2192 clabel.status =
2193 raidPtr->Disks[r][c].status;
2194 raidwrite_component_label(
2195 raidPtr->Disks[r][c].dev,
2196 raidPtr->raid_cinfo[r][c].ci_vp,
2197 &clabel);
2198 #endif
2199 raidmarkdirty(
2200 raidPtr->Disks[r][c].dev,
2201 raidPtr->raid_cinfo[r][c].ci_vp,
2202 raidPtr->mod_counter);
2203 }
2204 }
2205 }
2206 }
2207 /* printf("Component labels marked dirty.\n"); */
2208 #if 0
2209 for( c = 0; c < raidPtr->numSpare ; c++) {
2210 sparecol = raidPtr->numCol + c;
2211 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2212 /*
2213
2214 XXX this is where we get fancy and map this spare
2215 into it's correct spot in the array.
2216
2217 */
2218 /*
2219
2220 we claim this disk is "optimal" if it's
2221 rf_ds_used_spare, as that means it should be
2222 directly substitutable for the disk it replaced.
2223 We note that too...
2224
2225 */
2226
2227 for(i=0;i<raidPtr->numRow;i++) {
2228 for(j=0;j<raidPtr->numCol;j++) {
2229 if ((raidPtr->Disks[i][j].spareRow ==
2230 r) &&
2231 (raidPtr->Disks[i][j].spareCol ==
2232 sparecol)) {
2233 srow = r;
2234 scol = sparecol;
2235 break;
2236 }
2237 }
2238 }
2239
2240 raidread_component_label(
2241 raidPtr->Disks[r][sparecol].dev,
2242 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2243 &clabel);
2244 /* make sure status is noted */
2245 clabel.version = RF_COMPONENT_LABEL_VERSION;
2246 clabel.mod_counter = raidPtr->mod_counter;
2247 clabel.serial_number = raidPtr->serial_number;
2248 clabel.row = srow;
2249 clabel.column = scol;
2250 clabel.num_rows = raidPtr->numRow;
2251 clabel.num_columns = raidPtr->numCol;
2252 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2253 clabel.status = rf_ds_optimal;
2254 raidwrite_component_label(
2255 raidPtr->Disks[r][sparecol].dev,
2256 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2257 &clabel);
2258 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2259 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2260 }
2261 }
2262
2263 #endif
2264 }
2265
2266
2267 void
2268 rf_update_component_labels(raidPtr)
2269 RF_Raid_t *raidPtr;
2270 {
2271 RF_ComponentLabel_t clabel;
2272 int sparecol;
2273 int r,c;
2274 int i,j;
2275 int srow, scol;
2276
2277 srow = -1;
2278 scol = -1;
2279
2280 /* XXX should do extra checks to make sure things really are clean,
2281 rather than blindly setting the clean bit... */
2282
2283 raidPtr->mod_counter++;
2284
2285 for (r = 0; r < raidPtr->numRow; r++) {
2286 for (c = 0; c < raidPtr->numCol; c++) {
2287 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2288 raidread_component_label(
2289 raidPtr->Disks[r][c].dev,
2290 raidPtr->raid_cinfo[r][c].ci_vp,
2291 &clabel);
2292 /* make sure status is noted */
2293 clabel.status = rf_ds_optimal;
2294 /* bump the counter */
2295 clabel.mod_counter = raidPtr->mod_counter;
2296
2297 raidwrite_component_label(
2298 raidPtr->Disks[r][c].dev,
2299 raidPtr->raid_cinfo[r][c].ci_vp,
2300 &clabel);
2301 }
2302 /* else we don't touch it.. */
2303 }
2304 }
2305
2306 for( c = 0; c < raidPtr->numSpare ; c++) {
2307 sparecol = raidPtr->numCol + c;
2308 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2309 /*
2310
2311 we claim this disk is "optimal" if it's
2312 rf_ds_used_spare, as that means it should be
2313 directly substitutable for the disk it replaced.
2314 We note that too...
2315
2316 */
2317
2318 for(i=0;i<raidPtr->numRow;i++) {
2319 for(j=0;j<raidPtr->numCol;j++) {
2320 if ((raidPtr->Disks[i][j].spareRow ==
2321 0) &&
2322 (raidPtr->Disks[i][j].spareCol ==
2323 sparecol)) {
2324 srow = i;
2325 scol = j;
2326 break;
2327 }
2328 }
2329 }
2330
2331 /* XXX shouldn't *really* need this... */
2332 raidread_component_label(
2333 raidPtr->Disks[0][sparecol].dev,
2334 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2335 &clabel);
2336 /* make sure status is noted */
2337
2338 raid_init_component_label(raidPtr, &clabel);
2339
2340 clabel.mod_counter = raidPtr->mod_counter;
2341 clabel.row = srow;
2342 clabel.column = scol;
2343 clabel.status = rf_ds_optimal;
2344
2345 raidwrite_component_label(
2346 raidPtr->Disks[0][sparecol].dev,
2347 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2348 &clabel);
2349 }
2350 }
2351 /* printf("Component labels updated\n"); */
2352 }
2353
2354
2355 void
2356 rf_final_update_component_labels(raidPtr)
2357 RF_Raid_t *raidPtr;
2358 {
2359 RF_ComponentLabel_t clabel;
2360 int sparecol;
2361 int r,c;
2362 int i,j;
2363 int srow, scol;
2364
2365 srow = -1;
2366 scol = -1;
2367
2368 /* XXX should do extra checks to make sure things really are clean,
2369 rather than blindly setting the clean bit... */
2370
2371 raidPtr->mod_counter++;
2372
2373 for (r = 0; r < raidPtr->numRow; r++) {
2374 for (c = 0; c < raidPtr->numCol; c++) {
2375 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2376 raidread_component_label(
2377 raidPtr->Disks[r][c].dev,
2378 raidPtr->raid_cinfo[r][c].ci_vp,
2379 &clabel);
2380 /* make sure status is noted */
2381 clabel.status = rf_ds_optimal;
2382 /* bump the counter */
2383 clabel.mod_counter = raidPtr->mod_counter;
2384
2385 raidwrite_component_label(
2386 raidPtr->Disks[r][c].dev,
2387 raidPtr->raid_cinfo[r][c].ci_vp,
2388 &clabel);
2389 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2390 raidmarkclean(
2391 raidPtr->Disks[r][c].dev,
2392 raidPtr->raid_cinfo[r][c].ci_vp,
2393 raidPtr->mod_counter);
2394 }
2395 }
2396 /* else we don't touch it.. */
2397 }
2398 }
2399
2400 for( c = 0; c < raidPtr->numSpare ; c++) {
2401 sparecol = raidPtr->numCol + c;
2402 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2403 /*
2404
2405 we claim this disk is "optimal" if it's
2406 rf_ds_used_spare, as that means it should be
2407 directly substitutable for the disk it replaced.
2408 We note that too...
2409
2410 */
2411
2412 for(i=0;i<raidPtr->numRow;i++) {
2413 for(j=0;j<raidPtr->numCol;j++) {
2414 if ((raidPtr->Disks[i][j].spareRow ==
2415 0) &&
2416 (raidPtr->Disks[i][j].spareCol ==
2417 sparecol)) {
2418 srow = i;
2419 scol = j;
2420 break;
2421 }
2422 }
2423 }
2424
2425 /* XXX shouldn't *really* need this... */
2426 raidread_component_label(
2427 raidPtr->Disks[0][sparecol].dev,
2428 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2429 &clabel);
2430 /* make sure status is noted */
2431
2432 raid_init_component_label(raidPtr, &clabel);
2433
2434 clabel.mod_counter = raidPtr->mod_counter;
2435 clabel.row = srow;
2436 clabel.column = scol;
2437 clabel.status = rf_ds_optimal;
2438
2439 raidwrite_component_label(
2440 raidPtr->Disks[0][sparecol].dev,
2441 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2442 &clabel);
2443 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2444 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2445 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2446 raidPtr->mod_counter);
2447 }
2448 }
2449 }
2450 /* printf("Component labels updated\n"); */
2451 }
2452
2453 void
2454 rf_close_component(raidPtr, vp, auto_configured)
2455 RF_Raid_t *raidPtr;
2456 struct vnode *vp;
2457 int auto_configured;
2458 {
2459 struct proc *p;
2460
2461 p = raidPtr->engine_thread;
2462
2463 if (vp != NULL) {
2464 if (auto_configured == 1) {
2465 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2466 vput(vp);
2467
2468 } else {
2469 VOP_UNLOCK(vp, 0);
2470 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2471 }
2472 } else {
2473 printf("vnode was NULL\n");
2474 }
2475 }
2476
2477
2478 void
2479 rf_UnconfigureVnodes(raidPtr)
2480 RF_Raid_t *raidPtr;
2481 {
2482 int r,c;
2483 struct proc *p;
2484 struct vnode *vp;
2485 int acd;
2486
2487
2488 /* We take this opportunity to close the vnodes like we should.. */
2489
2490 p = raidPtr->engine_thread;
2491
2492 for (r = 0; r < raidPtr->numRow; r++) {
2493 for (c = 0; c < raidPtr->numCol; c++) {
2494 printf("Closing vnode for row: %d col: %d\n", r, c);
2495 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2496 acd = raidPtr->Disks[r][c].auto_configured;
2497 rf_close_component(raidPtr, vp, acd);
2498 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2499 raidPtr->Disks[r][c].auto_configured = 0;
2500 }
2501 }
2502 for (r = 0; r < raidPtr->numSpare; r++) {
2503 printf("Closing vnode for spare: %d\n", r);
2504 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2505 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2506 rf_close_component(raidPtr, vp, acd);
2507 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2508 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2509 }
2510 }
2511
2512
2513 void
2514 rf_ReconThread(req)
2515 struct rf_recon_req *req;
2516 {
2517 int s;
2518 RF_Raid_t *raidPtr;
2519
2520 s = splbio();
2521 raidPtr = (RF_Raid_t *) req->raidPtr;
2522 raidPtr->recon_in_progress = 1;
2523
2524 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2525 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2526
2527 /* XXX get rid of this! we don't need it at all.. */
2528 RF_Free(req, sizeof(*req));
2529
2530 raidPtr->recon_in_progress = 0;
2531 splx(s);
2532
2533 /* That's all... */
2534 kthread_exit(0); /* does not return */
2535 }
2536
2537 void
2538 rf_RewriteParityThread(raidPtr)
2539 RF_Raid_t *raidPtr;
2540 {
2541 int retcode;
2542 int s;
2543
2544 raidPtr->parity_rewrite_in_progress = 1;
2545 s = splbio();
2546 retcode = rf_RewriteParity(raidPtr);
2547 splx(s);
2548 if (retcode) {
2549 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2550 } else {
2551 /* set the clean bit! If we shutdown correctly,
2552 the clean bit on each component label will get
2553 set */
2554 raidPtr->parity_good = RF_RAID_CLEAN;
2555 }
2556 raidPtr->parity_rewrite_in_progress = 0;
2557
2558 /* That's all... */
2559 kthread_exit(0); /* does not return */
2560 }
2561
2562
2563 void
2564 rf_CopybackThread(raidPtr)
2565 RF_Raid_t *raidPtr;
2566 {
2567 int s;
2568
2569 raidPtr->copyback_in_progress = 1;
2570 s = splbio();
2571 rf_CopybackReconstructedData(raidPtr);
2572 splx(s);
2573 raidPtr->copyback_in_progress = 0;
2574
2575 /* That's all... */
2576 kthread_exit(0); /* does not return */
2577 }
2578
2579
2580 void
2581 rf_ReconstructInPlaceThread(req)
2582 struct rf_recon_req *req;
2583 {
2584 int retcode;
2585 int s;
2586 RF_Raid_t *raidPtr;
2587
2588 s = splbio();
2589 raidPtr = req->raidPtr;
2590 raidPtr->recon_in_progress = 1;
2591 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2592 RF_Free(req, sizeof(*req));
2593 raidPtr->recon_in_progress = 0;
2594 splx(s);
2595
2596 /* That's all... */
2597 kthread_exit(0); /* does not return */
2598 }
2599
2600 void
2601 rf_mountroot_hook(dev)
2602 struct device *dev;
2603 {
2604
2605 }
2606
2607
2608 RF_AutoConfig_t *
2609 rf_find_raid_components()
2610 {
2611 struct devnametobdevmaj *dtobdm;
2612 struct vnode *vp;
2613 struct disklabel label;
2614 struct device *dv;
2615 char *cd_name;
2616 dev_t dev;
2617 int error;
2618 int i;
2619 int good_one;
2620 RF_ComponentLabel_t *clabel;
2621 RF_AutoConfig_t *ac_list;
2622 RF_AutoConfig_t *ac;
2623
2624
2625 /* initialize the AutoConfig list */
2626 ac_list = NULL;
2627
2628 if (raidautoconfig) {
2629
2630 /* we begin by trolling through *all* the devices on the system */
2631
2632 for (dv = alldevs.tqh_first; dv != NULL;
2633 dv = dv->dv_list.tqe_next) {
2634
2635 /* we are only interested in disks... */
2636 if (dv->dv_class != DV_DISK)
2637 continue;
2638
2639 /* we don't care about floppies... */
2640 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2641 continue;
2642 }
2643
2644 /* need to find the device_name_to_block_device_major stuff */
2645 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2646 dtobdm = dev_name2blk;
2647 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2648 dtobdm++;
2649 }
2650
2651 /* get a vnode for the raw partition of this disk */
2652
2653 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2654 if (bdevvp(dev, &vp))
2655 panic("RAID can't alloc vnode");
2656
2657 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2658
2659 if (error) {
2660 /* "Who cares." Continue looking
2661 for something that exists*/
2662 vput(vp);
2663 continue;
2664 }
2665
2666 /* Ok, the disk exists. Go get the disklabel. */
2667 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2668 FREAD, NOCRED, 0);
2669 if (error) {
2670 /*
2671 * XXX can't happen - open() would
2672 * have errored out (or faked up one)
2673 */
2674 printf("can't get label for dev %s%c (%d)!?!?\n",
2675 dv->dv_xname, 'a' + RAW_PART, error);
2676 }
2677
2678 /* don't need this any more. We'll allocate it again
2679 a little later if we really do... */
2680 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2681 vput(vp);
2682
2683 for (i=0; i < label.d_npartitions; i++) {
2684 /* We only support partitions marked as RAID */
2685 if (label.d_partitions[i].p_fstype != FS_RAID)
2686 continue;
2687
2688 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2689 if (bdevvp(dev, &vp))
2690 panic("RAID can't alloc vnode");
2691
2692 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2693 if (error) {
2694 /* Whatever... */
2695 vput(vp);
2696 continue;
2697 }
2698
2699 good_one = 0;
2700
2701 clabel = (RF_ComponentLabel_t *)
2702 malloc(sizeof(RF_ComponentLabel_t),
2703 M_RAIDFRAME, M_NOWAIT);
2704 if (clabel == NULL) {
2705 /* XXX CLEANUP HERE */
2706 printf("RAID auto config: out of memory!\n");
2707 return(NULL); /* XXX probably should panic? */
2708 }
2709
2710 if (!raidread_component_label(dev, vp, clabel)) {
2711 /* Got the label. Does it look reasonable? */
2712 if (rf_reasonable_label(clabel) &&
2713 (clabel->partitionSize <=
2714 label.d_partitions[i].p_size)) {
2715 #if DEBUG
2716 printf("Component on: %s%c: %d\n",
2717 dv->dv_xname, 'a'+i,
2718 label.d_partitions[i].p_size);
2719 rf_print_component_label(clabel);
2720 #endif
2721 /* if it's reasonable, add it,
2722 else ignore it. */
2723 ac = (RF_AutoConfig_t *)
2724 malloc(sizeof(RF_AutoConfig_t),
2725 M_RAIDFRAME,
2726 M_NOWAIT);
2727 if (ac == NULL) {
2728 /* XXX should panic?? */
2729 return(NULL);
2730 }
2731
2732 sprintf(ac->devname, "%s%c",
2733 dv->dv_xname, 'a'+i);
2734 ac->dev = dev;
2735 ac->vp = vp;
2736 ac->clabel = clabel;
2737 ac->next = ac_list;
2738 ac_list = ac;
2739 good_one = 1;
2740 }
2741 }
2742 if (!good_one) {
2743 /* cleanup */
2744 free(clabel, M_RAIDFRAME);
2745 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2746 vput(vp);
2747 }
2748 }
2749 }
2750 }
2751 return(ac_list);
2752 }
2753
2754 static int
2755 rf_reasonable_label(clabel)
2756 RF_ComponentLabel_t *clabel;
2757 {
2758
2759 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2760 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2761 ((clabel->clean == RF_RAID_CLEAN) ||
2762 (clabel->clean == RF_RAID_DIRTY)) &&
2763 clabel->row >=0 &&
2764 clabel->column >= 0 &&
2765 clabel->num_rows > 0 &&
2766 clabel->num_columns > 0 &&
2767 clabel->row < clabel->num_rows &&
2768 clabel->column < clabel->num_columns &&
2769 clabel->blockSize > 0 &&
2770 clabel->numBlocks > 0) {
2771 /* label looks reasonable enough... */
2772 return(1);
2773 }
2774 return(0);
2775 }
2776
2777
2778 void
2779 rf_print_component_label(clabel)
2780 RF_ComponentLabel_t *clabel;
2781 {
2782 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2783 clabel->row, clabel->column,
2784 clabel->num_rows, clabel->num_columns);
2785 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2786 clabel->version, clabel->serial_number,
2787 clabel->mod_counter);
2788 printf(" Clean: %s Status: %d\n",
2789 clabel->clean ? "Yes" : "No", clabel->status );
2790 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2791 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2792 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2793 (char) clabel->parityConfig, clabel->blockSize,
2794 clabel->numBlocks);
2795 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2796 printf(" Contains root partition: %s\n",
2797 clabel->root_partition ? "Yes" : "No" );
2798 printf(" Last configured as: raid%d\n", clabel->last_unit );
2799 #if 0
2800 printf(" Config order: %d\n", clabel->config_order);
2801 #endif
2802
2803 }
2804
2805 RF_ConfigSet_t *
2806 rf_create_auto_sets(ac_list)
2807 RF_AutoConfig_t *ac_list;
2808 {
2809 RF_AutoConfig_t *ac;
2810 RF_ConfigSet_t *config_sets;
2811 RF_ConfigSet_t *cset;
2812 RF_AutoConfig_t *ac_next;
2813
2814
2815 config_sets = NULL;
2816
2817 /* Go through the AutoConfig list, and figure out which components
2818 belong to what sets. */
2819 ac = ac_list;
2820 while(ac!=NULL) {
2821 /* we're going to putz with ac->next, so save it here
2822 for use at the end of the loop */
2823 ac_next = ac->next;
2824
2825 if (config_sets == NULL) {
2826 /* will need at least this one... */
2827 config_sets = (RF_ConfigSet_t *)
2828 malloc(sizeof(RF_ConfigSet_t),
2829 M_RAIDFRAME, M_NOWAIT);
2830 if (config_sets == NULL) {
2831 panic("rf_create_auto_sets: No memory!\n");
2832 }
2833 /* this one is easy :) */
2834 config_sets->ac = ac;
2835 config_sets->next = NULL;
2836 config_sets->rootable = 0;
2837 ac->next = NULL;
2838 } else {
2839 /* which set does this component fit into? */
2840 cset = config_sets;
2841 while(cset!=NULL) {
2842 if (rf_does_it_fit(cset, ac)) {
2843 /* looks like it matches */
2844 ac->next = cset->ac;
2845 cset->ac = ac;
2846 break;
2847 }
2848 cset = cset->next;
2849 }
2850 if (cset==NULL) {
2851 /* didn't find a match above... new set..*/
2852 cset = (RF_ConfigSet_t *)
2853 malloc(sizeof(RF_ConfigSet_t),
2854 M_RAIDFRAME, M_NOWAIT);
2855 if (cset == NULL) {
2856 panic("rf_create_auto_sets: No memory!\n");
2857 }
2858 cset->ac = ac;
2859 ac->next = NULL;
2860 cset->next = config_sets;
2861 cset->rootable = 0;
2862 config_sets = cset;
2863 }
2864 }
2865 ac = ac_next;
2866 }
2867
2868
2869 return(config_sets);
2870 }
2871
2872 static int
2873 rf_does_it_fit(cset, ac)
2874 RF_ConfigSet_t *cset;
2875 RF_AutoConfig_t *ac;
2876 {
2877 RF_ComponentLabel_t *clabel1, *clabel2;
2878
2879 /* If this one matches the *first* one in the set, that's good
2880 enough, since the other members of the set would have been
2881 through here too... */
2882 /* note that we are not checking partitionSize here..
2883
2884 Note that we are also not checking the mod_counters here.
2885 If everything else matches execpt the mod_counter, that's
2886 good enough for this test. We will deal with the mod_counters
2887 a little later in the autoconfiguration process.
2888
2889 (clabel1->mod_counter == clabel2->mod_counter) &&
2890
2891 */
2892
2893 clabel1 = cset->ac->clabel;
2894 clabel2 = ac->clabel;
2895 if ((clabel1->version == clabel2->version) &&
2896 (clabel1->serial_number == clabel2->serial_number) &&
2897 (clabel1->num_rows == clabel2->num_rows) &&
2898 (clabel1->num_columns == clabel2->num_columns) &&
2899 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2900 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2901 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2902 (clabel1->parityConfig == clabel2->parityConfig) &&
2903 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2904 (clabel1->blockSize == clabel2->blockSize) &&
2905 (clabel1->numBlocks == clabel2->numBlocks) &&
2906 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2907 (clabel1->root_partition == clabel2->root_partition) &&
2908 (clabel1->last_unit == clabel2->last_unit) &&
2909 (clabel1->config_order == clabel2->config_order)) {
2910 /* if it get's here, it almost *has* to be a match */
2911 } else {
2912 /* it's not consistent with somebody in the set..
2913 punt */
2914 return(0);
2915 }
2916 /* all was fine.. it must fit... */
2917 return(1);
2918 }
2919
2920 int
2921 rf_have_enough_components(cset)
2922 RF_ConfigSet_t *cset;
2923 {
2924 RF_AutoConfig_t *ac;
2925 RF_AutoConfig_t *auto_config;
2926 RF_ComponentLabel_t *clabel;
2927 int r,c;
2928 int num_rows;
2929 int num_cols;
2930 int num_missing;
2931
2932 /* check to see that we have enough 'live' components
2933 of this set. If so, we can configure it if necessary */
2934
2935 num_rows = cset->ac->clabel->num_rows;
2936 num_cols = cset->ac->clabel->num_columns;
2937
2938 /* XXX Check for duplicate components!?!?!? */
2939
2940 num_missing = 0;
2941 auto_config = cset->ac;
2942
2943 for(r=0; r<num_rows; r++) {
2944 for(c=0; c<num_cols; c++) {
2945 ac = auto_config;
2946 while(ac!=NULL) {
2947 if (ac->clabel==NULL) {
2948 /* big-time bad news. */
2949 goto fail;
2950 }
2951 if ((ac->clabel->row == r) &&
2952 (ac->clabel->column == c)) {
2953 /* it's this one... */
2954 #if DEBUG
2955 printf("Found: %s at %d,%d\n",
2956 ac->devname,r,c);
2957 #endif
2958 break;
2959 }
2960 ac=ac->next;
2961 }
2962 if (ac==NULL) {
2963 /* Didn't find one here! */
2964 num_missing++;
2965 }
2966 }
2967 }
2968
2969 clabel = cset->ac->clabel;
2970
2971 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2972 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2973 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2974 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2975 /* XXX this needs to be made *much* more general */
2976 /* Too many failures */
2977 return(0);
2978 }
2979 /* otherwise, all is well, and we've got enough to take a kick
2980 at autoconfiguring this set */
2981 return(1);
2982 fail:
2983 return(0);
2984
2985 }
2986
2987 void
2988 rf_create_configuration(ac,config,raidPtr)
2989 RF_AutoConfig_t *ac;
2990 RF_Config_t *config;
2991 RF_Raid_t *raidPtr;
2992 {
2993 RF_ComponentLabel_t *clabel;
2994 int i;
2995
2996 clabel = ac->clabel;
2997
2998 /* 1. Fill in the common stuff */
2999 config->numRow = clabel->num_rows;
3000 config->numCol = clabel->num_columns;
3001 config->numSpare = 0; /* XXX should this be set here? */
3002 config->sectPerSU = clabel->sectPerSU;
3003 config->SUsPerPU = clabel->SUsPerPU;
3004 config->SUsPerRU = clabel->SUsPerRU;
3005 config->parityConfig = clabel->parityConfig;
3006 /* XXX... */
3007 strcpy(config->diskQueueType,"fifo");
3008 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3009 config->layoutSpecificSize = 0; /* XXX ?? */
3010
3011 while(ac!=NULL) {
3012 /* row/col values will be in range due to the checks
3013 in reasonable_label() */
3014 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3015 ac->devname);
3016 ac = ac->next;
3017 }
3018
3019 for(i=0;i<RF_MAXDBGV;i++) {
3020 config->debugVars[i][0] = NULL;
3021 }
3022 }
3023
3024 int
3025 rf_set_autoconfig(raidPtr, new_value)
3026 RF_Raid_t *raidPtr;
3027 int new_value;
3028 {
3029 RF_ComponentLabel_t clabel;
3030 struct vnode *vp;
3031 dev_t dev;
3032 int row, column;
3033
3034 raidPtr->autoconfigure = new_value;
3035 for(row=0; row<raidPtr->numRow; row++) {
3036 for(column=0; column<raidPtr->numCol; column++) {
3037 dev = raidPtr->Disks[row][column].dev;
3038 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3039 raidread_component_label(dev, vp, &clabel);
3040 clabel.autoconfigure = new_value;
3041 raidwrite_component_label(dev, vp, &clabel);
3042 }
3043 }
3044 return(new_value);
3045 }
3046
3047 int
3048 rf_set_rootpartition(raidPtr, new_value)
3049 RF_Raid_t *raidPtr;
3050 int new_value;
3051 {
3052 RF_ComponentLabel_t clabel;
3053 struct vnode *vp;
3054 dev_t dev;
3055 int row, column;
3056
3057 raidPtr->root_partition = new_value;
3058 for(row=0; row<raidPtr->numRow; row++) {
3059 for(column=0; column<raidPtr->numCol; column++) {
3060 dev = raidPtr->Disks[row][column].dev;
3061 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3062 raidread_component_label(dev, vp, &clabel);
3063 clabel.root_partition = new_value;
3064 raidwrite_component_label(dev, vp, &clabel);
3065 }
3066 }
3067 return(new_value);
3068 }
3069
3070 void
3071 rf_release_all_vps(cset)
3072 RF_ConfigSet_t *cset;
3073 {
3074 RF_AutoConfig_t *ac;
3075
3076 ac = cset->ac;
3077 while(ac!=NULL) {
3078 /* Close the vp, and give it back */
3079 if (ac->vp) {
3080 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3081 vput(ac->vp);
3082 }
3083 ac = ac->next;
3084 }
3085 }
3086
3087
3088 void
3089 rf_cleanup_config_set(cset)
3090 RF_ConfigSet_t *cset;
3091 {
3092 RF_AutoConfig_t *ac;
3093 RF_AutoConfig_t *next_ac;
3094
3095 ac = cset->ac;
3096 while(ac!=NULL) {
3097 next_ac = ac->next;
3098 /* nuke the label */
3099 free(ac->clabel, M_RAIDFRAME);
3100 /* cleanup the config structure */
3101 free(ac, M_RAIDFRAME);
3102 /* "next.." */
3103 ac = next_ac;
3104 }
3105 /* and, finally, nuke the config set */
3106 free(cset, M_RAIDFRAME);
3107 }
3108
3109
3110 void
3111 raid_init_component_label(raidPtr, clabel)
3112 RF_Raid_t *raidPtr;
3113 RF_ComponentLabel_t *clabel;
3114 {
3115 /* current version number */
3116 clabel->version = RF_COMPONENT_LABEL_VERSION;
3117 clabel->serial_number = raidPtr->serial_number;
3118 clabel->mod_counter = raidPtr->mod_counter;
3119 clabel->num_rows = raidPtr->numRow;
3120 clabel->num_columns = raidPtr->numCol;
3121 clabel->clean = RF_RAID_DIRTY; /* not clean */
3122 clabel->status = rf_ds_optimal; /* "It's good!" */
3123
3124 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3125 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3126 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3127
3128 clabel->blockSize = raidPtr->bytesPerSector;
3129 clabel->numBlocks = raidPtr->sectorsPerDisk;
3130
3131 /* XXX not portable */
3132 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3133 clabel->maxOutstanding = raidPtr->maxOutstanding;
3134 clabel->autoconfigure = raidPtr->autoconfigure;
3135 clabel->root_partition = raidPtr->root_partition;
3136 clabel->last_unit = raidPtr->raidid;
3137 clabel->config_order = raidPtr->config_order;
3138 }
3139
3140 int
3141 rf_auto_config_set(cset,unit)
3142 RF_ConfigSet_t *cset;
3143 int *unit;
3144 {
3145 RF_Raid_t *raidPtr;
3146 RF_Config_t *config;
3147 int raidID;
3148 int retcode;
3149
3150 printf("RAID autoconfigure\n");
3151
3152 retcode = 0;
3153 *unit = -1;
3154
3155 /* 1. Create a config structure */
3156
3157 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3158 M_RAIDFRAME,
3159 M_NOWAIT);
3160 if (config==NULL) {
3161 printf("Out of mem!?!?\n");
3162 /* XXX do something more intelligent here. */
3163 return(1);
3164 }
3165
3166 memset(config, 0, sizeof(RF_Config_t));
3167
3168 /* XXX raidID needs to be set correctly.. */
3169
3170 /*
3171 2. Figure out what RAID ID this one is supposed to live at
3172 See if we can get the same RAID dev that it was configured
3173 on last time..
3174 */
3175
3176 raidID = cset->ac->clabel->last_unit;
3177 if ((raidID < 0) || (raidID >= numraid)) {
3178 /* let's not wander off into lala land. */
3179 raidID = numraid - 1;
3180 }
3181 if (raidPtrs[raidID]->valid != 0) {
3182
3183 /*
3184 Nope... Go looking for an alternative...
3185 Start high so we don't immediately use raid0 if that's
3186 not taken.
3187 */
3188
3189 for(raidID = numraid; raidID >= 0; raidID--) {
3190 if (raidPtrs[raidID]->valid == 0) {
3191 /* can use this one! */
3192 break;
3193 }
3194 }
3195 }
3196
3197 if (raidID < 0) {
3198 /* punt... */
3199 printf("Unable to auto configure this set!\n");
3200 printf("(Out of RAID devs!)\n");
3201 return(1);
3202 }
3203 printf("Configuring raid%d:\n",raidID);
3204 raidPtr = raidPtrs[raidID];
3205
3206 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3207 raidPtr->raidid = raidID;
3208 raidPtr->openings = RAIDOUTSTANDING;
3209
3210 /* 3. Build the configuration structure */
3211 rf_create_configuration(cset->ac, config, raidPtr);
3212
3213 /* 4. Do the configuration */
3214 retcode = rf_Configure(raidPtr, config, cset->ac);
3215
3216 if (retcode == 0) {
3217
3218 raidinit(raidPtrs[raidID]);
3219
3220 rf_markalldirty(raidPtrs[raidID]);
3221 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3222 if (cset->ac->clabel->root_partition==1) {
3223 /* everything configured just fine. Make a note
3224 that this set is eligible to be root. */
3225 cset->rootable = 1;
3226 /* XXX do this here? */
3227 raidPtrs[raidID]->root_partition = 1;
3228 }
3229 }
3230
3231 /* 5. Cleanup */
3232 free(config, M_RAIDFRAME);
3233
3234 *unit = raidID;
3235 return(retcode);
3236 }
3237