rf_netbsdkintf.c revision 1.73 1 /* $NetBSD: rf_netbsdkintf.c,v 1.73 2000/03/26 22:38:28 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 register struct buf *bp;
648 {
649 register int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 case RAIDFRAME_DELETE_COMPONENT:
832 case RAIDFRAME_INCORPORATE_HOT_SPARE:
833 if ((rs->sc_flags & RAIDF_INITED) == 0)
834 return (ENXIO);
835 }
836
837 switch (cmd) {
838
839 /* configure the system */
840 case RAIDFRAME_CONFIGURE:
841
842 if (raidPtr->valid) {
843 /* There is a valid RAID set running on this unit! */
844 printf("raid%d: Device already configured!\n",unit);
845 return(EINVAL);
846 }
847
848 /* copy-in the configuration information */
849 /* data points to a pointer to the configuration structure */
850
851 u_cfg = *((RF_Config_t **) data);
852 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
853 if (k_cfg == NULL) {
854 return (ENOMEM);
855 }
856 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
857 sizeof(RF_Config_t));
858 if (retcode) {
859 RF_Free(k_cfg, sizeof(RF_Config_t));
860 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
861 retcode));
862 return (retcode);
863 }
864 /* allocate a buffer for the layout-specific data, and copy it
865 * in */
866 if (k_cfg->layoutSpecificSize) {
867 if (k_cfg->layoutSpecificSize > 10000) {
868 /* sanity check */
869 RF_Free(k_cfg, sizeof(RF_Config_t));
870 return (EINVAL);
871 }
872 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
873 (u_char *));
874 if (specific_buf == NULL) {
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 return (ENOMEM);
877 }
878 retcode = copyin(k_cfg->layoutSpecific,
879 (caddr_t) specific_buf,
880 k_cfg->layoutSpecificSize);
881 if (retcode) {
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 RF_Free(specific_buf,
884 k_cfg->layoutSpecificSize);
885 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
886 retcode));
887 return (retcode);
888 }
889 } else
890 specific_buf = NULL;
891 k_cfg->layoutSpecific = specific_buf;
892
893 /* should do some kind of sanity check on the configuration.
894 * Store the sum of all the bytes in the last byte? */
895
896 /* configure the system */
897
898 /*
899 * Clear the entire RAID descriptor, just to make sure
900 * there is no stale data left in the case of a
901 * reconfiguration
902 */
903 bzero((char *) raidPtr, sizeof(RF_Raid_t));
904 raidPtr->raidid = unit;
905
906 retcode = rf_Configure(raidPtr, k_cfg, NULL);
907
908 if (retcode == 0) {
909
910 /* allow this many simultaneous IO's to
911 this RAID device */
912 raidPtr->openings = RAIDOUTSTANDING;
913
914 raidinit(raidPtr);
915 rf_markalldirty(raidPtr);
916 }
917 /* free the buffers. No return code here. */
918 if (k_cfg->layoutSpecificSize) {
919 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
920 }
921 RF_Free(k_cfg, sizeof(RF_Config_t));
922
923 return (retcode);
924
925 /* shutdown the system */
926 case RAIDFRAME_SHUTDOWN:
927
928 if ((error = raidlock(rs)) != 0)
929 return (error);
930
931 /*
932 * If somebody has a partition mounted, we shouldn't
933 * shutdown.
934 */
935
936 part = DISKPART(dev);
937 pmask = (1 << part);
938 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
939 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
940 (rs->sc_dkdev.dk_copenmask & pmask))) {
941 raidunlock(rs);
942 return (EBUSY);
943 }
944
945 retcode = rf_Shutdown(raidPtr);
946
947 pool_destroy(&rs->sc_cbufpool);
948
949 /* It's no longer initialized... */
950 rs->sc_flags &= ~RAIDF_INITED;
951
952 /* Detach the disk. */
953 disk_detach(&rs->sc_dkdev);
954
955 raidunlock(rs);
956
957 return (retcode);
958 case RAIDFRAME_GET_COMPONENT_LABEL:
959 clabel_ptr = (RF_ComponentLabel_t **) data;
960 /* need to read the component label for the disk indicated
961 by row,column in clabel */
962
963 /* For practice, let's get it directly fromdisk, rather
964 than from the in-core copy */
965 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
966 (RF_ComponentLabel_t *));
967 if (clabel == NULL)
968 return (ENOMEM);
969
970 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
971
972 retcode = copyin( *clabel_ptr, clabel,
973 sizeof(RF_ComponentLabel_t));
974
975 if (retcode) {
976 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
977 return(retcode);
978 }
979
980 row = clabel->row;
981 column = clabel->column;
982
983 if ((row < 0) || (row >= raidPtr->numRow) ||
984 (column < 0) || (column >= raidPtr->numCol)) {
985 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
986 return(EINVAL);
987 }
988
989 raidread_component_label(raidPtr->Disks[row][column].dev,
990 raidPtr->raid_cinfo[row][column].ci_vp,
991 clabel );
992
993 retcode = copyout((caddr_t) clabel,
994 (caddr_t) *clabel_ptr,
995 sizeof(RF_ComponentLabel_t));
996 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
997 return (retcode);
998
999 case RAIDFRAME_SET_COMPONENT_LABEL:
1000 clabel = (RF_ComponentLabel_t *) data;
1001
1002 /* XXX check the label for valid stuff... */
1003 /* Note that some things *should not* get modified --
1004 the user should be re-initing the labels instead of
1005 trying to patch things.
1006 */
1007
1008 printf("Got component label:\n");
1009 printf("Version: %d\n",clabel->version);
1010 printf("Serial Number: %d\n",clabel->serial_number);
1011 printf("Mod counter: %d\n",clabel->mod_counter);
1012 printf("Row: %d\n", clabel->row);
1013 printf("Column: %d\n", clabel->column);
1014 printf("Num Rows: %d\n", clabel->num_rows);
1015 printf("Num Columns: %d\n", clabel->num_columns);
1016 printf("Clean: %d\n", clabel->clean);
1017 printf("Status: %d\n", clabel->status);
1018
1019 row = clabel->row;
1020 column = clabel->column;
1021
1022 if ((row < 0) || (row >= raidPtr->numRow) ||
1023 (column < 0) || (column >= raidPtr->numCol)) {
1024 return(EINVAL);
1025 }
1026
1027 /* XXX this isn't allowed to do anything for now :-) */
1028
1029 /* XXX and before it is, we need to fill in the rest
1030 of the fields!?!?!?! */
1031 #if 0
1032 raidwrite_component_label(
1033 raidPtr->Disks[row][column].dev,
1034 raidPtr->raid_cinfo[row][column].ci_vp,
1035 clabel );
1036 #endif
1037 return (0);
1038
1039 case RAIDFRAME_INIT_LABELS:
1040 clabel = (RF_ComponentLabel_t *) data;
1041 /*
1042 we only want the serial number from
1043 the above. We get all the rest of the information
1044 from the config that was used to create this RAID
1045 set.
1046 */
1047
1048 raidPtr->serial_number = clabel->serial_number;
1049
1050 raid_init_component_label(raidPtr, &ci_label);
1051 ci_label.serial_number = clabel->serial_number;
1052
1053 for(row=0;row<raidPtr->numRow;row++) {
1054 ci_label.row = row;
1055 for(column=0;column<raidPtr->numCol;column++) {
1056 diskPtr = &raidPtr->Disks[row][column];
1057 ci_label.partitionSize = diskPtr->partitionSize;
1058 ci_label.column = column;
1059 raidwrite_component_label(
1060 raidPtr->Disks[row][column].dev,
1061 raidPtr->raid_cinfo[row][column].ci_vp,
1062 &ci_label );
1063 }
1064 }
1065
1066 return (retcode);
1067 case RAIDFRAME_SET_AUTOCONFIG:
1068 d = rf_set_autoconfig(raidPtr, *data);
1069 printf("New autoconfig value is: %d\n", d);
1070 *data = d;
1071 return (retcode);
1072
1073 case RAIDFRAME_SET_ROOT:
1074 d = rf_set_rootpartition(raidPtr, *data);
1075 printf("New rootpartition value is: %d\n", d);
1076 *data = d;
1077 return (retcode);
1078
1079 /* initialize all parity */
1080 case RAIDFRAME_REWRITEPARITY:
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Parity for RAID 0 is trivially correct */
1084 raidPtr->parity_good = RF_RAID_CLEAN;
1085 return(0);
1086 }
1087
1088 if (raidPtr->parity_rewrite_in_progress == 1) {
1089 /* Re-write is already in progress! */
1090 return(EINVAL);
1091 }
1092
1093 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1094 rf_RewriteParityThread,
1095 raidPtr,"raid_parity");
1096 return (retcode);
1097
1098
1099 case RAIDFRAME_ADD_HOT_SPARE:
1100 sparePtr = (RF_SingleComponent_t *) data;
1101 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1102 printf("Adding spare\n");
1103 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1104 return(retcode);
1105
1106 case RAIDFRAME_REMOVE_HOT_SPARE:
1107 return(retcode);
1108
1109 case RAIDFRAME_DELETE_COMPONENT:
1110 componentPtr = (RF_SingleComponent_t *)data;
1111 memcpy( &component, componentPtr,
1112 sizeof(RF_SingleComponent_t));
1113 retcode = rf_delete_component(raidPtr, &component);
1114 return(retcode);
1115
1116 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1117 componentPtr = (RF_SingleComponent_t *)data;
1118 memcpy( &component, componentPtr,
1119 sizeof(RF_SingleComponent_t));
1120 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1121 return(retcode);
1122
1123 case RAIDFRAME_REBUILD_IN_PLACE:
1124
1125 if (raidPtr->Layout.map->faultsTolerated == 0) {
1126 /* Can't do this on a RAID 0!! */
1127 return(EINVAL);
1128 }
1129
1130 if (raidPtr->recon_in_progress == 1) {
1131 /* a reconstruct is already in progress! */
1132 return(EINVAL);
1133 }
1134
1135 componentPtr = (RF_SingleComponent_t *) data;
1136 memcpy( &component, componentPtr,
1137 sizeof(RF_SingleComponent_t));
1138 row = component.row;
1139 column = component.column;
1140 printf("Rebuild: %d %d\n",row, column);
1141 if ((row < 0) || (row >= raidPtr->numRow) ||
1142 (column < 0) || (column >= raidPtr->numCol)) {
1143 return(EINVAL);
1144 }
1145
1146 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1147 if (rrcopy == NULL)
1148 return(ENOMEM);
1149
1150 rrcopy->raidPtr = (void *) raidPtr;
1151 rrcopy->row = row;
1152 rrcopy->col = column;
1153
1154 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1155 rf_ReconstructInPlaceThread,
1156 rrcopy,"raid_reconip");
1157 return(retcode);
1158
1159 case RAIDFRAME_GET_INFO:
1160 if (!raidPtr->valid)
1161 return (ENODEV);
1162 ucfgp = (RF_DeviceConfig_t **) data;
1163 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1164 (RF_DeviceConfig_t *));
1165 if (d_cfg == NULL)
1166 return (ENOMEM);
1167 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1168 d_cfg->rows = raidPtr->numRow;
1169 d_cfg->cols = raidPtr->numCol;
1170 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1171 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1172 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1173 return (ENOMEM);
1174 }
1175 d_cfg->nspares = raidPtr->numSpare;
1176 if (d_cfg->nspares >= RF_MAX_DISKS) {
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178 return (ENOMEM);
1179 }
1180 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1181 d = 0;
1182 for (i = 0; i < d_cfg->rows; i++) {
1183 for (j = 0; j < d_cfg->cols; j++) {
1184 d_cfg->devs[d] = raidPtr->Disks[i][j];
1185 d++;
1186 }
1187 }
1188 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1189 d_cfg->spares[i] = raidPtr->Disks[0][j];
1190 }
1191 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1192 sizeof(RF_DeviceConfig_t));
1193 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1194
1195 return (retcode);
1196
1197 case RAIDFRAME_CHECK_PARITY:
1198 *(int *) data = raidPtr->parity_good;
1199 return (0);
1200
1201 case RAIDFRAME_RESET_ACCTOTALS:
1202 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1203 return (0);
1204
1205 case RAIDFRAME_GET_ACCTOTALS:
1206 totals = (RF_AccTotals_t *) data;
1207 *totals = raidPtr->acc_totals;
1208 return (0);
1209
1210 case RAIDFRAME_KEEP_ACCTOTALS:
1211 raidPtr->keep_acc_totals = *(int *)data;
1212 return (0);
1213
1214 case RAIDFRAME_GET_SIZE:
1215 *(int *) data = raidPtr->totalSectors;
1216 return (0);
1217
1218 /* fail a disk & optionally start reconstruction */
1219 case RAIDFRAME_FAIL_DISK:
1220
1221 if (raidPtr->Layout.map->faultsTolerated == 0) {
1222 /* Can't do this on a RAID 0!! */
1223 return(EINVAL);
1224 }
1225
1226 rr = (struct rf_recon_req *) data;
1227
1228 if (rr->row < 0 || rr->row >= raidPtr->numRow
1229 || rr->col < 0 || rr->col >= raidPtr->numCol)
1230 return (EINVAL);
1231
1232 printf("raid%d: Failing the disk: row: %d col: %d\n",
1233 unit, rr->row, rr->col);
1234
1235 /* make a copy of the recon request so that we don't rely on
1236 * the user's buffer */
1237 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1238 if (rrcopy == NULL)
1239 return(ENOMEM);
1240 bcopy(rr, rrcopy, sizeof(*rr));
1241 rrcopy->raidPtr = (void *) raidPtr;
1242
1243 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1244 rf_ReconThread,
1245 rrcopy,"raid_recon");
1246 return (0);
1247
1248 /* invoke a copyback operation after recon on whatever disk
1249 * needs it, if any */
1250 case RAIDFRAME_COPYBACK:
1251
1252 if (raidPtr->Layout.map->faultsTolerated == 0) {
1253 /* This makes no sense on a RAID 0!! */
1254 return(EINVAL);
1255 }
1256
1257 if (raidPtr->copyback_in_progress == 1) {
1258 /* Copyback is already in progress! */
1259 return(EINVAL);
1260 }
1261
1262 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1263 rf_CopybackThread,
1264 raidPtr,"raid_copyback");
1265 return (retcode);
1266
1267 /* return the percentage completion of reconstruction */
1268 case RAIDFRAME_CHECK_RECON_STATUS:
1269 if (raidPtr->Layout.map->faultsTolerated == 0) {
1270 /* This makes no sense on a RAID 0, so tell the
1271 user it's done. */
1272 *(int *) data = 100;
1273 return(0);
1274 }
1275 row = 0; /* XXX we only consider a single row... */
1276 if (raidPtr->status[row] != rf_rs_reconstructing)
1277 *(int *) data = 100;
1278 else
1279 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1280 return (0);
1281
1282 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1283 if (raidPtr->Layout.map->faultsTolerated == 0) {
1284 /* This makes no sense on a RAID 0 */
1285 return(EINVAL);
1286 }
1287 if (raidPtr->parity_rewrite_in_progress == 1) {
1288 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1289 } else {
1290 *(int *) data = 100;
1291 }
1292 return (0);
1293
1294 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1295 if (raidPtr->Layout.map->faultsTolerated == 0) {
1296 /* This makes no sense on a RAID 0 */
1297 return(EINVAL);
1298 }
1299 if (raidPtr->copyback_in_progress == 1) {
1300 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1301 raidPtr->Layout.numStripe;
1302 } else {
1303 *(int *) data = 100;
1304 }
1305 return (0);
1306
1307
1308 /* the sparetable daemon calls this to wait for the kernel to
1309 * need a spare table. this ioctl does not return until a
1310 * spare table is needed. XXX -- calling mpsleep here in the
1311 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1312 * -- I should either compute the spare table in the kernel,
1313 * or have a different -- XXX XXX -- interface (a different
1314 * character device) for delivering the table -- XXX */
1315 #if 0
1316 case RAIDFRAME_SPARET_WAIT:
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 while (!rf_sparet_wait_queue)
1319 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1320 waitreq = rf_sparet_wait_queue;
1321 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1322 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1323
1324 /* structure assignment */
1325 *((RF_SparetWait_t *) data) = *waitreq;
1326
1327 RF_Free(waitreq, sizeof(*waitreq));
1328 return (0);
1329
1330 /* wakes up a process waiting on SPARET_WAIT and puts an error
1331 * code in it that will cause the dameon to exit */
1332 case RAIDFRAME_ABORT_SPARET_WAIT:
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = -1;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_wait_queue;
1337 rf_sparet_wait_queue = waitreq;
1338 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1339 wakeup(&rf_sparet_wait_queue);
1340 return (0);
1341
1342 /* used by the spare table daemon to deliver a spare table
1343 * into the kernel */
1344 case RAIDFRAME_SEND_SPARET:
1345
1346 /* install the spare table */
1347 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1348
1349 /* respond to the requestor. the return status of the spare
1350 * table installation is passed in the "fcol" field */
1351 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1352 waitreq->fcol = retcode;
1353 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1354 waitreq->next = rf_sparet_resp_queue;
1355 rf_sparet_resp_queue = waitreq;
1356 wakeup(&rf_sparet_resp_queue);
1357 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1358
1359 return (retcode);
1360 #endif
1361
1362 default:
1363 break; /* fall through to the os-specific code below */
1364
1365 }
1366
1367 if (!raidPtr->valid)
1368 return (EINVAL);
1369
1370 /*
1371 * Add support for "regular" device ioctls here.
1372 */
1373
1374 switch (cmd) {
1375 case DIOCGDINFO:
1376 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1377 break;
1378
1379 case DIOCGPART:
1380 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1381 ((struct partinfo *) data)->part =
1382 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1383 break;
1384
1385 case DIOCWDINFO:
1386 case DIOCSDINFO:
1387 if ((error = raidlock(rs)) != 0)
1388 return (error);
1389
1390 rs->sc_flags |= RAIDF_LABELLING;
1391
1392 error = setdisklabel(rs->sc_dkdev.dk_label,
1393 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1394 if (error == 0) {
1395 if (cmd == DIOCWDINFO)
1396 error = writedisklabel(RAIDLABELDEV(dev),
1397 raidstrategy, rs->sc_dkdev.dk_label,
1398 rs->sc_dkdev.dk_cpulabel);
1399 }
1400 rs->sc_flags &= ~RAIDF_LABELLING;
1401
1402 raidunlock(rs);
1403
1404 if (error)
1405 return (error);
1406 break;
1407
1408 case DIOCWLABEL:
1409 if (*(int *) data != 0)
1410 rs->sc_flags |= RAIDF_WLABEL;
1411 else
1412 rs->sc_flags &= ~RAIDF_WLABEL;
1413 break;
1414
1415 case DIOCGDEFLABEL:
1416 raidgetdefaultlabel(raidPtr, rs,
1417 (struct disklabel *) data);
1418 break;
1419
1420 default:
1421 retcode = ENOTTY;
1422 }
1423 return (retcode);
1424
1425 }
1426
1427
1428 /* raidinit -- complete the rest of the initialization for the
1429 RAIDframe device. */
1430
1431
1432 static void
1433 raidinit(raidPtr)
1434 RF_Raid_t *raidPtr;
1435 {
1436 struct raid_softc *rs;
1437 int unit;
1438
1439 unit = raidPtr->raidid;
1440
1441 rs = &raid_softc[unit];
1442 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1443 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1444
1445
1446 /* XXX should check return code first... */
1447 rs->sc_flags |= RAIDF_INITED;
1448
1449 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1450
1451 rs->sc_dkdev.dk_name = rs->sc_xname;
1452
1453 /* disk_attach actually creates space for the CPU disklabel, among
1454 * other things, so it's critical to call this *BEFORE* we try putzing
1455 * with disklabels. */
1456
1457 disk_attach(&rs->sc_dkdev);
1458
1459 /* XXX There may be a weird interaction here between this, and
1460 * protectedSectors, as used in RAIDframe. */
1461
1462 rs->sc_size = raidPtr->totalSectors;
1463
1464 }
1465
1466 /* wake up the daemon & tell it to get us a spare table
1467 * XXX
1468 * the entries in the queues should be tagged with the raidPtr
1469 * so that in the extremely rare case that two recons happen at once,
1470 * we know for which device were requesting a spare table
1471 * XXX
1472 *
1473 * XXX This code is not currently used. GO
1474 */
1475 int
1476 rf_GetSpareTableFromDaemon(req)
1477 RF_SparetWait_t *req;
1478 {
1479 int retcode;
1480
1481 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1482 req->next = rf_sparet_wait_queue;
1483 rf_sparet_wait_queue = req;
1484 wakeup(&rf_sparet_wait_queue);
1485
1486 /* mpsleep unlocks the mutex */
1487 while (!rf_sparet_resp_queue) {
1488 tsleep(&rf_sparet_resp_queue, PRIBIO,
1489 "raidframe getsparetable", 0);
1490 }
1491 req = rf_sparet_resp_queue;
1492 rf_sparet_resp_queue = req->next;
1493 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1494
1495 retcode = req->fcol;
1496 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1497 * alloc'd */
1498 return (retcode);
1499 }
1500
1501 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1502 * bp & passes it down.
1503 * any calls originating in the kernel must use non-blocking I/O
1504 * do some extra sanity checking to return "appropriate" error values for
1505 * certain conditions (to make some standard utilities work)
1506 *
1507 * Formerly known as: rf_DoAccessKernel
1508 */
1509 void
1510 raidstart(raidPtr)
1511 RF_Raid_t *raidPtr;
1512 {
1513 RF_SectorCount_t num_blocks, pb, sum;
1514 RF_RaidAddr_t raid_addr;
1515 int retcode;
1516 struct partition *pp;
1517 daddr_t blocknum;
1518 int unit;
1519 struct raid_softc *rs;
1520 int do_async;
1521 struct buf *bp;
1522
1523 unit = raidPtr->raidid;
1524 rs = &raid_softc[unit];
1525
1526 /* quick check to see if anything has died recently */
1527 RF_LOCK_MUTEX(raidPtr->mutex);
1528 if (raidPtr->numNewFailures > 0) {
1529 rf_update_component_labels(raidPtr);
1530 raidPtr->numNewFailures--;
1531 }
1532 RF_UNLOCK_MUTEX(raidPtr->mutex);
1533
1534 /* Check to see if we're at the limit... */
1535 RF_LOCK_MUTEX(raidPtr->mutex);
1536 while (raidPtr->openings > 0) {
1537 RF_UNLOCK_MUTEX(raidPtr->mutex);
1538
1539 /* get the next item, if any, from the queue */
1540 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1541 /* nothing more to do */
1542 return;
1543 }
1544 BUFQ_REMOVE(&rs->buf_queue, bp);
1545
1546 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1547 * partition.. Need to make it absolute to the underlying
1548 * device.. */
1549
1550 blocknum = bp->b_blkno;
1551 if (DISKPART(bp->b_dev) != RAW_PART) {
1552 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1553 blocknum += pp->p_offset;
1554 }
1555
1556 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1557 (int) blocknum));
1558
1559 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1560 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1561
1562 /* *THIS* is where we adjust what block we're going to...
1563 * but DO NOT TOUCH bp->b_blkno!!! */
1564 raid_addr = blocknum;
1565
1566 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1567 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1568 sum = raid_addr + num_blocks + pb;
1569 if (1 || rf_debugKernelAccess) {
1570 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1571 (int) raid_addr, (int) sum, (int) num_blocks,
1572 (int) pb, (int) bp->b_resid));
1573 }
1574 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1575 || (sum < num_blocks) || (sum < pb)) {
1576 bp->b_error = ENOSPC;
1577 bp->b_flags |= B_ERROR;
1578 bp->b_resid = bp->b_bcount;
1579 biodone(bp);
1580 RF_LOCK_MUTEX(raidPtr->mutex);
1581 continue;
1582 }
1583 /*
1584 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1585 */
1586
1587 if (bp->b_bcount & raidPtr->sectorMask) {
1588 bp->b_error = EINVAL;
1589 bp->b_flags |= B_ERROR;
1590 bp->b_resid = bp->b_bcount;
1591 biodone(bp);
1592 RF_LOCK_MUTEX(raidPtr->mutex);
1593 continue;
1594
1595 }
1596 db1_printf(("Calling DoAccess..\n"));
1597
1598
1599 RF_LOCK_MUTEX(raidPtr->mutex);
1600 raidPtr->openings--;
1601 RF_UNLOCK_MUTEX(raidPtr->mutex);
1602
1603 /*
1604 * Everything is async.
1605 */
1606 do_async = 1;
1607
1608 /* don't ever condition on bp->b_flags & B_WRITE.
1609 * always condition on B_READ instead */
1610
1611 /* XXX we're still at splbio() here... do we *really*
1612 need to be? */
1613
1614
1615 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1616 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1617 do_async, raid_addr, num_blocks,
1618 bp->b_un.b_addr, bp, NULL, NULL,
1619 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1620
1621
1622 RF_LOCK_MUTEX(raidPtr->mutex);
1623 }
1624 RF_UNLOCK_MUTEX(raidPtr->mutex);
1625 }
1626
1627
1628
1629
1630 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1631
1632 int
1633 rf_DispatchKernelIO(queue, req)
1634 RF_DiskQueue_t *queue;
1635 RF_DiskQueueData_t *req;
1636 {
1637 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1638 struct buf *bp;
1639 struct raidbuf *raidbp = NULL;
1640 struct raid_softc *rs;
1641 int unit;
1642 int s;
1643
1644 s=0;
1645 /* s = splbio();*/ /* want to test this */
1646 /* XXX along with the vnode, we also need the softc associated with
1647 * this device.. */
1648
1649 req->queue = queue;
1650
1651 unit = queue->raidPtr->raidid;
1652
1653 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1654
1655 if (unit >= numraid) {
1656 printf("Invalid unit number: %d %d\n", unit, numraid);
1657 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1658 }
1659 rs = &raid_softc[unit];
1660
1661 /* XXX is this the right place? */
1662 disk_busy(&rs->sc_dkdev);
1663
1664 bp = req->bp;
1665 #if 1
1666 /* XXX when there is a physical disk failure, someone is passing us a
1667 * buffer that contains old stuff!! Attempt to deal with this problem
1668 * without taking a performance hit... (not sure where the real bug
1669 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1670
1671 if (bp->b_flags & B_ERROR) {
1672 bp->b_flags &= ~B_ERROR;
1673 }
1674 if (bp->b_error != 0) {
1675 bp->b_error = 0;
1676 }
1677 #endif
1678 raidbp = RAIDGETBUF(rs);
1679
1680 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1681
1682 /*
1683 * context for raidiodone
1684 */
1685 raidbp->rf_obp = bp;
1686 raidbp->req = req;
1687
1688 LIST_INIT(&raidbp->rf_buf.b_dep);
1689
1690 switch (req->type) {
1691 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1692 /* XXX need to do something extra here.. */
1693 /* I'm leaving this in, as I've never actually seen it used,
1694 * and I'd like folks to report it... GO */
1695 printf(("WAKEUP CALLED\n"));
1696 queue->numOutstanding++;
1697
1698 /* XXX need to glue the original buffer into this?? */
1699
1700 KernelWakeupFunc(&raidbp->rf_buf);
1701 break;
1702
1703 case RF_IO_TYPE_READ:
1704 case RF_IO_TYPE_WRITE:
1705
1706 if (req->tracerec) {
1707 RF_ETIMER_START(req->tracerec->timer);
1708 }
1709 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1710 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1711 req->sectorOffset, req->numSector,
1712 req->buf, KernelWakeupFunc, (void *) req,
1713 queue->raidPtr->logBytesPerSector, req->b_proc);
1714
1715 if (rf_debugKernelAccess) {
1716 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1717 (long) bp->b_blkno));
1718 }
1719 queue->numOutstanding++;
1720 queue->last_deq_sector = req->sectorOffset;
1721 /* acc wouldn't have been let in if there were any pending
1722 * reqs at any other priority */
1723 queue->curPriority = req->priority;
1724
1725 db1_printf(("Going for %c to unit %d row %d col %d\n",
1726 req->type, unit, queue->row, queue->col));
1727 db1_printf(("sector %d count %d (%d bytes) %d\n",
1728 (int) req->sectorOffset, (int) req->numSector,
1729 (int) (req->numSector <<
1730 queue->raidPtr->logBytesPerSector),
1731 (int) queue->raidPtr->logBytesPerSector));
1732 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1733 raidbp->rf_buf.b_vp->v_numoutput++;
1734 }
1735 VOP_STRATEGY(&raidbp->rf_buf);
1736
1737 break;
1738
1739 default:
1740 panic("bad req->type in rf_DispatchKernelIO");
1741 }
1742 db1_printf(("Exiting from DispatchKernelIO\n"));
1743 /* splx(s); */ /* want to test this */
1744 return (0);
1745 }
1746 /* this is the callback function associated with a I/O invoked from
1747 kernel code.
1748 */
1749 static void
1750 KernelWakeupFunc(vbp)
1751 struct buf *vbp;
1752 {
1753 RF_DiskQueueData_t *req = NULL;
1754 RF_DiskQueue_t *queue;
1755 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1756 struct buf *bp;
1757 struct raid_softc *rs;
1758 int unit;
1759 register int s;
1760
1761 s = splbio();
1762 db1_printf(("recovering the request queue:\n"));
1763 req = raidbp->req;
1764
1765 bp = raidbp->rf_obp;
1766
1767 queue = (RF_DiskQueue_t *) req->queue;
1768
1769 if (raidbp->rf_buf.b_flags & B_ERROR) {
1770 bp->b_flags |= B_ERROR;
1771 bp->b_error = raidbp->rf_buf.b_error ?
1772 raidbp->rf_buf.b_error : EIO;
1773 }
1774
1775 /* XXX methinks this could be wrong... */
1776 #if 1
1777 bp->b_resid = raidbp->rf_buf.b_resid;
1778 #endif
1779
1780 if (req->tracerec) {
1781 RF_ETIMER_STOP(req->tracerec->timer);
1782 RF_ETIMER_EVAL(req->tracerec->timer);
1783 RF_LOCK_MUTEX(rf_tracing_mutex);
1784 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1785 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1786 req->tracerec->num_phys_ios++;
1787 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1788 }
1789 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1790
1791 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1792
1793
1794 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1795 * ballistic, and mark the component as hosed... */
1796
1797 if (bp->b_flags & B_ERROR) {
1798 /* Mark the disk as dead */
1799 /* but only mark it once... */
1800 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1801 rf_ds_optimal) {
1802 printf("raid%d: IO Error. Marking %s as failed.\n",
1803 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1804 queue->raidPtr->Disks[queue->row][queue->col].status =
1805 rf_ds_failed;
1806 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1807 queue->raidPtr->numFailures++;
1808 queue->raidPtr->numNewFailures++;
1809 /* XXX here we should bump the version number for each component, and write that data out */
1810 } else { /* Disk is already dead... */
1811 /* printf("Disk already marked as dead!\n"); */
1812 }
1813
1814 }
1815
1816 rs = &raid_softc[unit];
1817 RAIDPUTBUF(rs, raidbp);
1818
1819
1820 if (bp->b_resid == 0) {
1821 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1822 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1823 }
1824
1825 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1826 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1827
1828 splx(s);
1829 }
1830
1831
1832
1833 /*
1834 * initialize a buf structure for doing an I/O in the kernel.
1835 */
1836 static void
1837 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1838 logBytesPerSector, b_proc)
1839 struct buf *bp;
1840 struct vnode *b_vp;
1841 unsigned rw_flag;
1842 dev_t dev;
1843 RF_SectorNum_t startSect;
1844 RF_SectorCount_t numSect;
1845 caddr_t buf;
1846 void (*cbFunc) (struct buf *);
1847 void *cbArg;
1848 int logBytesPerSector;
1849 struct proc *b_proc;
1850 {
1851 /* bp->b_flags = B_PHYS | rw_flag; */
1852 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1853 bp->b_bcount = numSect << logBytesPerSector;
1854 bp->b_bufsize = bp->b_bcount;
1855 bp->b_error = 0;
1856 bp->b_dev = dev;
1857 bp->b_un.b_addr = buf;
1858 bp->b_blkno = startSect;
1859 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1860 if (bp->b_bcount == 0) {
1861 panic("bp->b_bcount is zero in InitBP!!\n");
1862 }
1863 bp->b_proc = b_proc;
1864 bp->b_iodone = cbFunc;
1865 bp->b_vp = b_vp;
1866
1867 }
1868
1869 static void
1870 raidgetdefaultlabel(raidPtr, rs, lp)
1871 RF_Raid_t *raidPtr;
1872 struct raid_softc *rs;
1873 struct disklabel *lp;
1874 {
1875 db1_printf(("Building a default label...\n"));
1876 bzero(lp, sizeof(*lp));
1877
1878 /* fabricate a label... */
1879 lp->d_secperunit = raidPtr->totalSectors;
1880 lp->d_secsize = raidPtr->bytesPerSector;
1881 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1882 lp->d_ntracks = 1;
1883 lp->d_ncylinders = raidPtr->totalSectors /
1884 (lp->d_nsectors * lp->d_ntracks);
1885 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1886
1887 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1888 lp->d_type = DTYPE_RAID;
1889 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1890 lp->d_rpm = 3600;
1891 lp->d_interleave = 1;
1892 lp->d_flags = 0;
1893
1894 lp->d_partitions[RAW_PART].p_offset = 0;
1895 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1896 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1897 lp->d_npartitions = RAW_PART + 1;
1898
1899 lp->d_magic = DISKMAGIC;
1900 lp->d_magic2 = DISKMAGIC;
1901 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1902
1903 }
1904 /*
1905 * Read the disklabel from the raid device. If one is not present, fake one
1906 * up.
1907 */
1908 static void
1909 raidgetdisklabel(dev)
1910 dev_t dev;
1911 {
1912 int unit = raidunit(dev);
1913 struct raid_softc *rs = &raid_softc[unit];
1914 char *errstring;
1915 struct disklabel *lp = rs->sc_dkdev.dk_label;
1916 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1917 RF_Raid_t *raidPtr;
1918
1919 db1_printf(("Getting the disklabel...\n"));
1920
1921 bzero(clp, sizeof(*clp));
1922
1923 raidPtr = raidPtrs[unit];
1924
1925 raidgetdefaultlabel(raidPtr, rs, lp);
1926
1927 /*
1928 * Call the generic disklabel extraction routine.
1929 */
1930 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1931 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1932 if (errstring)
1933 raidmakedisklabel(rs);
1934 else {
1935 int i;
1936 struct partition *pp;
1937
1938 /*
1939 * Sanity check whether the found disklabel is valid.
1940 *
1941 * This is necessary since total size of the raid device
1942 * may vary when an interleave is changed even though exactly
1943 * same componets are used, and old disklabel may used
1944 * if that is found.
1945 */
1946 if (lp->d_secperunit != rs->sc_size)
1947 printf("WARNING: %s: "
1948 "total sector size in disklabel (%d) != "
1949 "the size of raid (%ld)\n", rs->sc_xname,
1950 lp->d_secperunit, (long) rs->sc_size);
1951 for (i = 0; i < lp->d_npartitions; i++) {
1952 pp = &lp->d_partitions[i];
1953 if (pp->p_offset + pp->p_size > rs->sc_size)
1954 printf("WARNING: %s: end of partition `%c' "
1955 "exceeds the size of raid (%ld)\n",
1956 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1957 }
1958 }
1959
1960 }
1961 /*
1962 * Take care of things one might want to take care of in the event
1963 * that a disklabel isn't present.
1964 */
1965 static void
1966 raidmakedisklabel(rs)
1967 struct raid_softc *rs;
1968 {
1969 struct disklabel *lp = rs->sc_dkdev.dk_label;
1970 db1_printf(("Making a label..\n"));
1971
1972 /*
1973 * For historical reasons, if there's no disklabel present
1974 * the raw partition must be marked FS_BSDFFS.
1975 */
1976
1977 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1978
1979 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1980
1981 lp->d_checksum = dkcksum(lp);
1982 }
1983 /*
1984 * Lookup the provided name in the filesystem. If the file exists,
1985 * is a valid block device, and isn't being used by anyone else,
1986 * set *vpp to the file's vnode.
1987 * You'll find the original of this in ccd.c
1988 */
1989 int
1990 raidlookup(path, p, vpp)
1991 char *path;
1992 struct proc *p;
1993 struct vnode **vpp; /* result */
1994 {
1995 struct nameidata nd;
1996 struct vnode *vp;
1997 struct vattr va;
1998 int error;
1999
2000 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2001 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2002 #ifdef DEBUG
2003 printf("RAIDframe: vn_open returned %d\n", error);
2004 #endif
2005 return (error);
2006 }
2007 vp = nd.ni_vp;
2008 if (vp->v_usecount > 1) {
2009 VOP_UNLOCK(vp, 0);
2010 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2011 return (EBUSY);
2012 }
2013 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2014 VOP_UNLOCK(vp, 0);
2015 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2016 return (error);
2017 }
2018 /* XXX: eventually we should handle VREG, too. */
2019 if (va.va_type != VBLK) {
2020 VOP_UNLOCK(vp, 0);
2021 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2022 return (ENOTBLK);
2023 }
2024 VOP_UNLOCK(vp, 0);
2025 *vpp = vp;
2026 return (0);
2027 }
2028 /*
2029 * Wait interruptibly for an exclusive lock.
2030 *
2031 * XXX
2032 * Several drivers do this; it should be abstracted and made MP-safe.
2033 * (Hmm... where have we seen this warning before :-> GO )
2034 */
2035 static int
2036 raidlock(rs)
2037 struct raid_softc *rs;
2038 {
2039 int error;
2040
2041 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2042 rs->sc_flags |= RAIDF_WANTED;
2043 if ((error =
2044 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2045 return (error);
2046 }
2047 rs->sc_flags |= RAIDF_LOCKED;
2048 return (0);
2049 }
2050 /*
2051 * Unlock and wake up any waiters.
2052 */
2053 static void
2054 raidunlock(rs)
2055 struct raid_softc *rs;
2056 {
2057
2058 rs->sc_flags &= ~RAIDF_LOCKED;
2059 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2060 rs->sc_flags &= ~RAIDF_WANTED;
2061 wakeup(rs);
2062 }
2063 }
2064
2065
2066 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2067 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2068
2069 int
2070 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2071 {
2072 RF_ComponentLabel_t clabel;
2073 raidread_component_label(dev, b_vp, &clabel);
2074 clabel.mod_counter = mod_counter;
2075 clabel.clean = RF_RAID_CLEAN;
2076 raidwrite_component_label(dev, b_vp, &clabel);
2077 return(0);
2078 }
2079
2080
2081 int
2082 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2083 {
2084 RF_ComponentLabel_t clabel;
2085 raidread_component_label(dev, b_vp, &clabel);
2086 clabel.mod_counter = mod_counter;
2087 clabel.clean = RF_RAID_DIRTY;
2088 raidwrite_component_label(dev, b_vp, &clabel);
2089 return(0);
2090 }
2091
2092 /* ARGSUSED */
2093 int
2094 raidread_component_label(dev, b_vp, clabel)
2095 dev_t dev;
2096 struct vnode *b_vp;
2097 RF_ComponentLabel_t *clabel;
2098 {
2099 struct buf *bp;
2100 int error;
2101
2102 /* XXX should probably ensure that we don't try to do this if
2103 someone has changed rf_protected_sectors. */
2104
2105 /* get a block of the appropriate size... */
2106 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2107 bp->b_dev = dev;
2108
2109 /* get our ducks in a row for the read */
2110 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2111 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2112 bp->b_flags = B_BUSY | B_READ;
2113 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2114
2115 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2116
2117 error = biowait(bp);
2118
2119 if (!error) {
2120 memcpy(clabel, bp->b_un.b_addr,
2121 sizeof(RF_ComponentLabel_t));
2122 #if 0
2123 rf_print_component_label( clabel );
2124 #endif
2125 } else {
2126 #if 0
2127 printf("Failed to read RAID component label!\n");
2128 #endif
2129 }
2130
2131 bp->b_flags = B_INVAL | B_AGE;
2132 brelse(bp);
2133 return(error);
2134 }
2135 /* ARGSUSED */
2136 int
2137 raidwrite_component_label(dev, b_vp, clabel)
2138 dev_t dev;
2139 struct vnode *b_vp;
2140 RF_ComponentLabel_t *clabel;
2141 {
2142 struct buf *bp;
2143 int error;
2144
2145 /* get a block of the appropriate size... */
2146 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2147 bp->b_dev = dev;
2148
2149 /* get our ducks in a row for the write */
2150 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2151 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2152 bp->b_flags = B_BUSY | B_WRITE;
2153 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2154
2155 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2156
2157 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2158
2159 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2160 error = biowait(bp);
2161 bp->b_flags = B_INVAL | B_AGE;
2162 brelse(bp);
2163 if (error) {
2164 #if 1
2165 printf("Failed to write RAID component info!\n");
2166 #endif
2167 }
2168
2169 return(error);
2170 }
2171
2172 void
2173 rf_markalldirty(raidPtr)
2174 RF_Raid_t *raidPtr;
2175 {
2176 RF_ComponentLabel_t clabel;
2177 int r,c;
2178
2179 raidPtr->mod_counter++;
2180 for (r = 0; r < raidPtr->numRow; r++) {
2181 for (c = 0; c < raidPtr->numCol; c++) {
2182 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2183 raidread_component_label(
2184 raidPtr->Disks[r][c].dev,
2185 raidPtr->raid_cinfo[r][c].ci_vp,
2186 &clabel);
2187 if (clabel.status == rf_ds_spared) {
2188 /* XXX do something special...
2189 but whatever you do, don't
2190 try to access it!! */
2191 } else {
2192 #if 0
2193 clabel.status =
2194 raidPtr->Disks[r][c].status;
2195 raidwrite_component_label(
2196 raidPtr->Disks[r][c].dev,
2197 raidPtr->raid_cinfo[r][c].ci_vp,
2198 &clabel);
2199 #endif
2200 raidmarkdirty(
2201 raidPtr->Disks[r][c].dev,
2202 raidPtr->raid_cinfo[r][c].ci_vp,
2203 raidPtr->mod_counter);
2204 }
2205 }
2206 }
2207 }
2208 /* printf("Component labels marked dirty.\n"); */
2209 #if 0
2210 for( c = 0; c < raidPtr->numSpare ; c++) {
2211 sparecol = raidPtr->numCol + c;
2212 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2213 /*
2214
2215 XXX this is where we get fancy and map this spare
2216 into it's correct spot in the array.
2217
2218 */
2219 /*
2220
2221 we claim this disk is "optimal" if it's
2222 rf_ds_used_spare, as that means it should be
2223 directly substitutable for the disk it replaced.
2224 We note that too...
2225
2226 */
2227
2228 for(i=0;i<raidPtr->numRow;i++) {
2229 for(j=0;j<raidPtr->numCol;j++) {
2230 if ((raidPtr->Disks[i][j].spareRow ==
2231 r) &&
2232 (raidPtr->Disks[i][j].spareCol ==
2233 sparecol)) {
2234 srow = r;
2235 scol = sparecol;
2236 break;
2237 }
2238 }
2239 }
2240
2241 raidread_component_label(
2242 raidPtr->Disks[r][sparecol].dev,
2243 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2244 &clabel);
2245 /* make sure status is noted */
2246 clabel.version = RF_COMPONENT_LABEL_VERSION;
2247 clabel.mod_counter = raidPtr->mod_counter;
2248 clabel.serial_number = raidPtr->serial_number;
2249 clabel.row = srow;
2250 clabel.column = scol;
2251 clabel.num_rows = raidPtr->numRow;
2252 clabel.num_columns = raidPtr->numCol;
2253 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2254 clabel.status = rf_ds_optimal;
2255 raidwrite_component_label(
2256 raidPtr->Disks[r][sparecol].dev,
2257 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2258 &clabel);
2259 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2260 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2261 }
2262 }
2263
2264 #endif
2265 }
2266
2267
2268 void
2269 rf_update_component_labels(raidPtr)
2270 RF_Raid_t *raidPtr;
2271 {
2272 RF_ComponentLabel_t clabel;
2273 int sparecol;
2274 int r,c;
2275 int i,j;
2276 int srow, scol;
2277
2278 srow = -1;
2279 scol = -1;
2280
2281 /* XXX should do extra checks to make sure things really are clean,
2282 rather than blindly setting the clean bit... */
2283
2284 raidPtr->mod_counter++;
2285
2286 for (r = 0; r < raidPtr->numRow; r++) {
2287 for (c = 0; c < raidPtr->numCol; c++) {
2288 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2289 raidread_component_label(
2290 raidPtr->Disks[r][c].dev,
2291 raidPtr->raid_cinfo[r][c].ci_vp,
2292 &clabel);
2293 /* make sure status is noted */
2294 clabel.status = rf_ds_optimal;
2295 /* bump the counter */
2296 clabel.mod_counter = raidPtr->mod_counter;
2297
2298 raidwrite_component_label(
2299 raidPtr->Disks[r][c].dev,
2300 raidPtr->raid_cinfo[r][c].ci_vp,
2301 &clabel);
2302 }
2303 /* else we don't touch it.. */
2304 }
2305 }
2306
2307 for( c = 0; c < raidPtr->numSpare ; c++) {
2308 sparecol = raidPtr->numCol + c;
2309 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2310 /*
2311
2312 we claim this disk is "optimal" if it's
2313 rf_ds_used_spare, as that means it should be
2314 directly substitutable for the disk it replaced.
2315 We note that too...
2316
2317 */
2318
2319 for(i=0;i<raidPtr->numRow;i++) {
2320 for(j=0;j<raidPtr->numCol;j++) {
2321 if ((raidPtr->Disks[i][j].spareRow ==
2322 0) &&
2323 (raidPtr->Disks[i][j].spareCol ==
2324 sparecol)) {
2325 srow = i;
2326 scol = j;
2327 break;
2328 }
2329 }
2330 }
2331
2332 /* XXX shouldn't *really* need this... */
2333 raidread_component_label(
2334 raidPtr->Disks[0][sparecol].dev,
2335 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2336 &clabel);
2337 /* make sure status is noted */
2338
2339 raid_init_component_label(raidPtr, &clabel);
2340
2341 clabel.mod_counter = raidPtr->mod_counter;
2342 clabel.row = srow;
2343 clabel.column = scol;
2344 clabel.status = rf_ds_optimal;
2345
2346 raidwrite_component_label(
2347 raidPtr->Disks[0][sparecol].dev,
2348 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2349 &clabel);
2350 }
2351 }
2352 /* printf("Component labels updated\n"); */
2353 }
2354
2355
2356 void
2357 rf_final_update_component_labels(raidPtr)
2358 RF_Raid_t *raidPtr;
2359 {
2360 RF_ComponentLabel_t clabel;
2361 int sparecol;
2362 int r,c;
2363 int i,j;
2364 int srow, scol;
2365
2366 srow = -1;
2367 scol = -1;
2368
2369 /* XXX should do extra checks to make sure things really are clean,
2370 rather than blindly setting the clean bit... */
2371
2372 raidPtr->mod_counter++;
2373
2374 for (r = 0; r < raidPtr->numRow; r++) {
2375 for (c = 0; c < raidPtr->numCol; c++) {
2376 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2377 raidread_component_label(
2378 raidPtr->Disks[r][c].dev,
2379 raidPtr->raid_cinfo[r][c].ci_vp,
2380 &clabel);
2381 /* make sure status is noted */
2382 clabel.status = rf_ds_optimal;
2383 /* bump the counter */
2384 clabel.mod_counter = raidPtr->mod_counter;
2385
2386 raidwrite_component_label(
2387 raidPtr->Disks[r][c].dev,
2388 raidPtr->raid_cinfo[r][c].ci_vp,
2389 &clabel);
2390 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2391 raidmarkclean(
2392 raidPtr->Disks[r][c].dev,
2393 raidPtr->raid_cinfo[r][c].ci_vp,
2394 raidPtr->mod_counter);
2395 }
2396 }
2397 /* else we don't touch it.. */
2398 }
2399 }
2400
2401 for( c = 0; c < raidPtr->numSpare ; c++) {
2402 sparecol = raidPtr->numCol + c;
2403 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2404 /*
2405
2406 we claim this disk is "optimal" if it's
2407 rf_ds_used_spare, as that means it should be
2408 directly substitutable for the disk it replaced.
2409 We note that too...
2410
2411 */
2412
2413 for(i=0;i<raidPtr->numRow;i++) {
2414 for(j=0;j<raidPtr->numCol;j++) {
2415 if ((raidPtr->Disks[i][j].spareRow ==
2416 0) &&
2417 (raidPtr->Disks[i][j].spareCol ==
2418 sparecol)) {
2419 srow = i;
2420 scol = j;
2421 break;
2422 }
2423 }
2424 }
2425
2426 /* XXX shouldn't *really* need this... */
2427 raidread_component_label(
2428 raidPtr->Disks[0][sparecol].dev,
2429 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2430 &clabel);
2431 /* make sure status is noted */
2432
2433 raid_init_component_label(raidPtr, &clabel);
2434
2435 clabel.mod_counter = raidPtr->mod_counter;
2436 clabel.row = srow;
2437 clabel.column = scol;
2438 clabel.status = rf_ds_optimal;
2439
2440 raidwrite_component_label(
2441 raidPtr->Disks[0][sparecol].dev,
2442 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2443 &clabel);
2444 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2445 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2446 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2447 raidPtr->mod_counter);
2448 }
2449 }
2450 }
2451 /* printf("Component labels updated\n"); */
2452 }
2453
2454 void
2455 rf_close_component(raidPtr, vp, auto_configured)
2456 RF_Raid_t *raidPtr;
2457 struct vnode *vp;
2458 int auto_configured;
2459 {
2460 struct proc *p;
2461
2462 p = raidPtr->engine_thread;
2463
2464 if (vp != NULL) {
2465 if (auto_configured == 1) {
2466 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2467 vput(vp);
2468
2469 } else {
2470 VOP_UNLOCK(vp, 0);
2471 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2472 }
2473 } else {
2474 printf("vnode was NULL\n");
2475 }
2476 }
2477
2478
2479 void
2480 rf_UnconfigureVnodes(raidPtr)
2481 RF_Raid_t *raidPtr;
2482 {
2483 int r,c;
2484 struct proc *p;
2485 struct vnode *vp;
2486 int acd;
2487
2488
2489 /* We take this opportunity to close the vnodes like we should.. */
2490
2491 p = raidPtr->engine_thread;
2492
2493 for (r = 0; r < raidPtr->numRow; r++) {
2494 for (c = 0; c < raidPtr->numCol; c++) {
2495 printf("Closing vnode for row: %d col: %d\n", r, c);
2496 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2497 acd = raidPtr->Disks[r][c].auto_configured;
2498 rf_close_component(raidPtr, vp, acd);
2499 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2500 raidPtr->Disks[r][c].auto_configured = 0;
2501 }
2502 }
2503 for (r = 0; r < raidPtr->numSpare; r++) {
2504 printf("Closing vnode for spare: %d\n", r);
2505 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2506 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2507 rf_close_component(raidPtr, vp, acd);
2508 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2509 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2510 }
2511 }
2512
2513
2514 void
2515 rf_ReconThread(req)
2516 struct rf_recon_req *req;
2517 {
2518 int s;
2519 RF_Raid_t *raidPtr;
2520
2521 s = splbio();
2522 raidPtr = (RF_Raid_t *) req->raidPtr;
2523 raidPtr->recon_in_progress = 1;
2524
2525 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2526 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2527
2528 /* XXX get rid of this! we don't need it at all.. */
2529 RF_Free(req, sizeof(*req));
2530
2531 raidPtr->recon_in_progress = 0;
2532 splx(s);
2533
2534 /* That's all... */
2535 kthread_exit(0); /* does not return */
2536 }
2537
2538 void
2539 rf_RewriteParityThread(raidPtr)
2540 RF_Raid_t *raidPtr;
2541 {
2542 int retcode;
2543 int s;
2544
2545 raidPtr->parity_rewrite_in_progress = 1;
2546 s = splbio();
2547 retcode = rf_RewriteParity(raidPtr);
2548 splx(s);
2549 if (retcode) {
2550 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2551 } else {
2552 /* set the clean bit! If we shutdown correctly,
2553 the clean bit on each component label will get
2554 set */
2555 raidPtr->parity_good = RF_RAID_CLEAN;
2556 }
2557 raidPtr->parity_rewrite_in_progress = 0;
2558
2559 /* That's all... */
2560 kthread_exit(0); /* does not return */
2561 }
2562
2563
2564 void
2565 rf_CopybackThread(raidPtr)
2566 RF_Raid_t *raidPtr;
2567 {
2568 int s;
2569
2570 raidPtr->copyback_in_progress = 1;
2571 s = splbio();
2572 rf_CopybackReconstructedData(raidPtr);
2573 splx(s);
2574 raidPtr->copyback_in_progress = 0;
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580
2581 void
2582 rf_ReconstructInPlaceThread(req)
2583 struct rf_recon_req *req;
2584 {
2585 int retcode;
2586 int s;
2587 RF_Raid_t *raidPtr;
2588
2589 s = splbio();
2590 raidPtr = req->raidPtr;
2591 raidPtr->recon_in_progress = 1;
2592 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2593 RF_Free(req, sizeof(*req));
2594 raidPtr->recon_in_progress = 0;
2595 splx(s);
2596
2597 /* That's all... */
2598 kthread_exit(0); /* does not return */
2599 }
2600
2601 void
2602 rf_mountroot_hook(dev)
2603 struct device *dev;
2604 {
2605
2606 }
2607
2608
2609 RF_AutoConfig_t *
2610 rf_find_raid_components()
2611 {
2612 struct devnametobdevmaj *dtobdm;
2613 struct vnode *vp;
2614 struct disklabel label;
2615 struct device *dv;
2616 char *cd_name;
2617 dev_t dev;
2618 int error;
2619 int i;
2620 int good_one;
2621 RF_ComponentLabel_t *clabel;
2622 RF_AutoConfig_t *ac_list;
2623 RF_AutoConfig_t *ac;
2624
2625
2626 /* initialize the AutoConfig list */
2627 ac_list = NULL;
2628
2629 if (raidautoconfig) {
2630
2631 /* we begin by trolling through *all* the devices on the system */
2632
2633 for (dv = alldevs.tqh_first; dv != NULL;
2634 dv = dv->dv_list.tqe_next) {
2635
2636 /* we are only interested in disks... */
2637 if (dv->dv_class != DV_DISK)
2638 continue;
2639
2640 /* we don't care about floppies... */
2641 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2642 continue;
2643 }
2644
2645 /* need to find the device_name_to_block_device_major stuff */
2646 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2647 dtobdm = dev_name2blk;
2648 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2649 dtobdm++;
2650 }
2651
2652 /* get a vnode for the raw partition of this disk */
2653
2654 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2655 if (bdevvp(dev, &vp))
2656 panic("RAID can't alloc vnode");
2657
2658 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2659
2660 if (error) {
2661 /* "Who cares." Continue looking
2662 for something that exists*/
2663 vput(vp);
2664 continue;
2665 }
2666
2667 /* Ok, the disk exists. Go get the disklabel. */
2668 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2669 FREAD, NOCRED, 0);
2670 if (error) {
2671 /*
2672 * XXX can't happen - open() would
2673 * have errored out (or faked up one)
2674 */
2675 printf("can't get label for dev %s%c (%d)!?!?\n",
2676 dv->dv_xname, 'a' + RAW_PART, error);
2677 }
2678
2679 /* don't need this any more. We'll allocate it again
2680 a little later if we really do... */
2681 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2682 vput(vp);
2683
2684 for (i=0; i < label.d_npartitions; i++) {
2685 /* We only support partitions marked as RAID */
2686 if (label.d_partitions[i].p_fstype != FS_RAID)
2687 continue;
2688
2689 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2690 if (bdevvp(dev, &vp))
2691 panic("RAID can't alloc vnode");
2692
2693 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2694 if (error) {
2695 /* Whatever... */
2696 vput(vp);
2697 continue;
2698 }
2699
2700 good_one = 0;
2701
2702 clabel = (RF_ComponentLabel_t *)
2703 malloc(sizeof(RF_ComponentLabel_t),
2704 M_RAIDFRAME, M_NOWAIT);
2705 if (clabel == NULL) {
2706 /* XXX CLEANUP HERE */
2707 printf("RAID auto config: out of memory!\n");
2708 return(NULL); /* XXX probably should panic? */
2709 }
2710
2711 if (!raidread_component_label(dev, vp, clabel)) {
2712 /* Got the label. Does it look reasonable? */
2713 if (rf_reasonable_label(clabel) &&
2714 (clabel->partitionSize <=
2715 label.d_partitions[i].p_size)) {
2716 #if DEBUG
2717 printf("Component on: %s%c: %d\n",
2718 dv->dv_xname, 'a'+i,
2719 label.d_partitions[i].p_size);
2720 rf_print_component_label(clabel);
2721 #endif
2722 /* if it's reasonable, add it,
2723 else ignore it. */
2724 ac = (RF_AutoConfig_t *)
2725 malloc(sizeof(RF_AutoConfig_t),
2726 M_RAIDFRAME,
2727 M_NOWAIT);
2728 if (ac == NULL) {
2729 /* XXX should panic?? */
2730 return(NULL);
2731 }
2732
2733 sprintf(ac->devname, "%s%c",
2734 dv->dv_xname, 'a'+i);
2735 ac->dev = dev;
2736 ac->vp = vp;
2737 ac->clabel = clabel;
2738 ac->next = ac_list;
2739 ac_list = ac;
2740 good_one = 1;
2741 }
2742 }
2743 if (!good_one) {
2744 /* cleanup */
2745 free(clabel, M_RAIDFRAME);
2746 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2747 vput(vp);
2748 }
2749 }
2750 }
2751 }
2752 return(ac_list);
2753 }
2754
2755 static int
2756 rf_reasonable_label(clabel)
2757 RF_ComponentLabel_t *clabel;
2758 {
2759
2760 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2761 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2762 ((clabel->clean == RF_RAID_CLEAN) ||
2763 (clabel->clean == RF_RAID_DIRTY)) &&
2764 clabel->row >=0 &&
2765 clabel->column >= 0 &&
2766 clabel->num_rows > 0 &&
2767 clabel->num_columns > 0 &&
2768 clabel->row < clabel->num_rows &&
2769 clabel->column < clabel->num_columns &&
2770 clabel->blockSize > 0 &&
2771 clabel->numBlocks > 0) {
2772 /* label looks reasonable enough... */
2773 return(1);
2774 }
2775 return(0);
2776 }
2777
2778
2779 void
2780 rf_print_component_label(clabel)
2781 RF_ComponentLabel_t *clabel;
2782 {
2783 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2784 clabel->row, clabel->column,
2785 clabel->num_rows, clabel->num_columns);
2786 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2787 clabel->version, clabel->serial_number,
2788 clabel->mod_counter);
2789 printf(" Clean: %s Status: %d\n",
2790 clabel->clean ? "Yes" : "No", clabel->status );
2791 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2792 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2793 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2794 (char) clabel->parityConfig, clabel->blockSize,
2795 clabel->numBlocks);
2796 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2797 printf(" Last configured as: raid%d\n", clabel->last_unit );
2798 #if 0
2799 printf(" Config order: %d\n", clabel->config_order);
2800 #endif
2801
2802 }
2803
2804 RF_ConfigSet_t *
2805 rf_create_auto_sets(ac_list)
2806 RF_AutoConfig_t *ac_list;
2807 {
2808 RF_AutoConfig_t *ac;
2809 RF_ConfigSet_t *config_sets;
2810 RF_ConfigSet_t *cset;
2811 RF_AutoConfig_t *ac_next;
2812
2813
2814 config_sets = NULL;
2815
2816 /* Go through the AutoConfig list, and figure out which components
2817 belong to what sets. */
2818 ac = ac_list;
2819 while(ac!=NULL) {
2820 /* we're going to putz with ac->next, so save it here
2821 for use at the end of the loop */
2822 ac_next = ac->next;
2823
2824 if (config_sets == NULL) {
2825 /* will need at least this one... */
2826 config_sets = (RF_ConfigSet_t *)
2827 malloc(sizeof(RF_ConfigSet_t),
2828 M_RAIDFRAME, M_NOWAIT);
2829 if (config_sets == NULL) {
2830 panic("rf_create_auto_sets: No memory!\n");
2831 }
2832 /* this one is easy :) */
2833 config_sets->ac = ac;
2834 config_sets->next = NULL;
2835 config_sets->rootable = 0;
2836 ac->next = NULL;
2837 } else {
2838 /* which set does this component fit into? */
2839 cset = config_sets;
2840 while(cset!=NULL) {
2841 if (rf_does_it_fit(cset, ac)) {
2842 /* looks like it matches */
2843 ac->next = cset->ac;
2844 cset->ac = ac;
2845 break;
2846 }
2847 cset = cset->next;
2848 }
2849 if (cset==NULL) {
2850 /* didn't find a match above... new set..*/
2851 cset = (RF_ConfigSet_t *)
2852 malloc(sizeof(RF_ConfigSet_t),
2853 M_RAIDFRAME, M_NOWAIT);
2854 if (cset == NULL) {
2855 panic("rf_create_auto_sets: No memory!\n");
2856 }
2857 cset->ac = ac;
2858 ac->next = NULL;
2859 cset->next = config_sets;
2860 cset->rootable = 0;
2861 config_sets = cset;
2862 }
2863 }
2864 ac = ac_next;
2865 }
2866
2867
2868 return(config_sets);
2869 }
2870
2871 static int
2872 rf_does_it_fit(cset, ac)
2873 RF_ConfigSet_t *cset;
2874 RF_AutoConfig_t *ac;
2875 {
2876 RF_ComponentLabel_t *clabel1, *clabel2;
2877
2878 /* If this one matches the *first* one in the set, that's good
2879 enough, since the other members of the set would have been
2880 through here too... */
2881 /* note that we are not checking partitionSize here..
2882
2883 Note that we are also not checking the mod_counters here.
2884 If everything else matches execpt the mod_counter, that's
2885 good enough for this test. We will deal with the mod_counters
2886 a little later in the autoconfiguration process.
2887
2888 (clabel1->mod_counter == clabel2->mod_counter) &&
2889
2890 */
2891
2892 clabel1 = cset->ac->clabel;
2893 clabel2 = ac->clabel;
2894 if ((clabel1->version == clabel2->version) &&
2895 (clabel1->serial_number == clabel2->serial_number) &&
2896 (clabel1->num_rows == clabel2->num_rows) &&
2897 (clabel1->num_columns == clabel2->num_columns) &&
2898 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2899 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2900 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2901 (clabel1->parityConfig == clabel2->parityConfig) &&
2902 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2903 (clabel1->blockSize == clabel2->blockSize) &&
2904 (clabel1->numBlocks == clabel2->numBlocks) &&
2905 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2906 (clabel1->root_partition == clabel2->root_partition) &&
2907 (clabel1->last_unit == clabel2->last_unit) &&
2908 (clabel1->config_order == clabel2->config_order)) {
2909 /* if it get's here, it almost *has* to be a match */
2910 } else {
2911 /* it's not consistent with somebody in the set..
2912 punt */
2913 return(0);
2914 }
2915 /* all was fine.. it must fit... */
2916 return(1);
2917 }
2918
2919 int
2920 rf_have_enough_components(cset)
2921 RF_ConfigSet_t *cset;
2922 {
2923 RF_AutoConfig_t *ac;
2924 RF_AutoConfig_t *auto_config;
2925 RF_ComponentLabel_t *clabel;
2926 int r,c;
2927 int num_rows;
2928 int num_cols;
2929 int num_missing;
2930
2931 /* check to see that we have enough 'live' components
2932 of this set. If so, we can configure it if necessary */
2933
2934 num_rows = cset->ac->clabel->num_rows;
2935 num_cols = cset->ac->clabel->num_columns;
2936
2937 /* XXX Check for duplicate components!?!?!? */
2938
2939 num_missing = 0;
2940 auto_config = cset->ac;
2941
2942 for(r=0; r<num_rows; r++) {
2943 for(c=0; c<num_cols; c++) {
2944 ac = auto_config;
2945 while(ac!=NULL) {
2946 if (ac->clabel==NULL) {
2947 /* big-time bad news. */
2948 goto fail;
2949 }
2950 if ((ac->clabel->row == r) &&
2951 (ac->clabel->column == c)) {
2952 /* it's this one... */
2953 #if DEBUG
2954 printf("Found: %s at %d,%d\n",
2955 ac->devname,r,c);
2956 #endif
2957 break;
2958 }
2959 ac=ac->next;
2960 }
2961 if (ac==NULL) {
2962 /* Didn't find one here! */
2963 num_missing++;
2964 }
2965 }
2966 }
2967
2968 clabel = cset->ac->clabel;
2969
2970 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2971 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2972 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2973 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2974 /* XXX this needs to be made *much* more general */
2975 /* Too many failures */
2976 return(0);
2977 }
2978 /* otherwise, all is well, and we've got enough to take a kick
2979 at autoconfiguring this set */
2980 return(1);
2981 fail:
2982 return(0);
2983
2984 }
2985
2986 void
2987 rf_create_configuration(ac,config,raidPtr)
2988 RF_AutoConfig_t *ac;
2989 RF_Config_t *config;
2990 RF_Raid_t *raidPtr;
2991 {
2992 RF_ComponentLabel_t *clabel;
2993
2994 clabel = ac->clabel;
2995
2996 /* 1. Fill in the common stuff */
2997 config->numRow = clabel->num_rows;
2998 config->numCol = clabel->num_columns;
2999 config->numSpare = 0; /* XXX should this be set here? */
3000 config->sectPerSU = clabel->sectPerSU;
3001 config->SUsPerPU = clabel->SUsPerPU;
3002 config->SUsPerRU = clabel->SUsPerRU;
3003 config->parityConfig = clabel->parityConfig;
3004 /* XXX... */
3005 strcpy(config->diskQueueType,"fifo");
3006 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3007 config->layoutSpecificSize = 0; /* XXX ?? */
3008
3009 while(ac!=NULL) {
3010 /* row/col values will be in range due to the checks
3011 in reasonable_label() */
3012 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3013 ac->devname);
3014 ac = ac->next;
3015 }
3016
3017 }
3018
3019 int
3020 rf_set_autoconfig(raidPtr, new_value)
3021 RF_Raid_t *raidPtr;
3022 int new_value;
3023 {
3024 RF_ComponentLabel_t clabel;
3025 struct vnode *vp;
3026 dev_t dev;
3027 int row, column;
3028
3029 raidPtr->autoconfigure = new_value;
3030 for(row=0; row<raidPtr->numRow; row++) {
3031 for(column=0; column<raidPtr->numCol; column++) {
3032 dev = raidPtr->Disks[row][column].dev;
3033 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3034 raidread_component_label(dev, vp, &clabel);
3035 clabel.autoconfigure = new_value;
3036 raidwrite_component_label(dev, vp, &clabel);
3037 }
3038 }
3039 return(new_value);
3040 }
3041
3042 int
3043 rf_set_rootpartition(raidPtr, new_value)
3044 RF_Raid_t *raidPtr;
3045 int new_value;
3046 {
3047 RF_ComponentLabel_t clabel;
3048 struct vnode *vp;
3049 dev_t dev;
3050 int row, column;
3051
3052 raidPtr->root_partition = new_value;
3053 for(row=0; row<raidPtr->numRow; row++) {
3054 for(column=0; column<raidPtr->numCol; column++) {
3055 dev = raidPtr->Disks[row][column].dev;
3056 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3057 raidread_component_label(dev, vp, &clabel);
3058 clabel.root_partition = new_value;
3059 raidwrite_component_label(dev, vp, &clabel);
3060 }
3061 }
3062 return(new_value);
3063 }
3064
3065 void
3066 rf_release_all_vps(cset)
3067 RF_ConfigSet_t *cset;
3068 {
3069 RF_AutoConfig_t *ac;
3070
3071 ac = cset->ac;
3072 while(ac!=NULL) {
3073 /* Close the vp, and give it back */
3074 if (ac->vp) {
3075 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3076 vput(ac->vp);
3077 }
3078 ac = ac->next;
3079 }
3080 }
3081
3082
3083 void
3084 rf_cleanup_config_set(cset)
3085 RF_ConfigSet_t *cset;
3086 {
3087 RF_AutoConfig_t *ac;
3088 RF_AutoConfig_t *next_ac;
3089
3090 ac = cset->ac;
3091 while(ac!=NULL) {
3092 next_ac = ac->next;
3093 /* nuke the label */
3094 free(ac->clabel, M_RAIDFRAME);
3095 /* cleanup the config structure */
3096 free(ac, M_RAIDFRAME);
3097 /* "next.." */
3098 ac = next_ac;
3099 }
3100 /* and, finally, nuke the config set */
3101 free(cset, M_RAIDFRAME);
3102 }
3103
3104
3105 void
3106 raid_init_component_label(raidPtr, clabel)
3107 RF_Raid_t *raidPtr;
3108 RF_ComponentLabel_t *clabel;
3109 {
3110 /* current version number */
3111 clabel->version = RF_COMPONENT_LABEL_VERSION;
3112 clabel->serial_number = raidPtr->serial_number;
3113 clabel->mod_counter = raidPtr->mod_counter;
3114 clabel->num_rows = raidPtr->numRow;
3115 clabel->num_columns = raidPtr->numCol;
3116 clabel->clean = RF_RAID_DIRTY; /* not clean */
3117 clabel->status = rf_ds_optimal; /* "It's good!" */
3118
3119 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3120 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3121 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3122
3123 clabel->blockSize = raidPtr->bytesPerSector;
3124 clabel->numBlocks = raidPtr->sectorsPerDisk;
3125
3126 /* XXX not portable */
3127 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3128 clabel->maxOutstanding = raidPtr->maxOutstanding;
3129 clabel->autoconfigure = raidPtr->autoconfigure;
3130 clabel->root_partition = raidPtr->root_partition;
3131 clabel->last_unit = raidPtr->raidid;
3132 clabel->config_order = raidPtr->config_order;
3133 }
3134
3135 int
3136 rf_auto_config_set(cset,unit)
3137 RF_ConfigSet_t *cset;
3138 int *unit;
3139 {
3140 RF_Raid_t *raidPtr;
3141 RF_Config_t *config;
3142 int raidID;
3143 int retcode;
3144
3145 printf("RAID autoconfigure\n");
3146
3147 retcode = 0;
3148 *unit = -1;
3149
3150 /* 1. Create a config structure */
3151
3152 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3153 M_RAIDFRAME,
3154 M_NOWAIT);
3155 if (config==NULL) {
3156 printf("Out of mem!?!?\n");
3157 /* XXX do something more intelligent here. */
3158 return(1);
3159 }
3160 /* XXX raidID needs to be set correctly.. */
3161
3162 /*
3163 2. Figure out what RAID ID this one is supposed to live at
3164 See if we can get the same RAID dev that it was configured
3165 on last time..
3166 */
3167
3168 raidID = cset->ac->clabel->last_unit;
3169 if ((raidID < 0) || (raidID >= numraid)) {
3170 /* let's not wander off into lala land. */
3171 raidID = numraid - 1;
3172 }
3173 if (raidPtrs[raidID]->valid != 0) {
3174
3175 /*
3176 Nope... Go looking for an alternative...
3177 Start high so we don't immediately use raid0 if that's
3178 not taken.
3179 */
3180
3181 for(raidID = numraid; raidID >= 0; raidID--) {
3182 if (raidPtrs[raidID]->valid == 0) {
3183 /* can use this one! */
3184 break;
3185 }
3186 }
3187 }
3188
3189 if (raidID < 0) {
3190 /* punt... */
3191 printf("Unable to auto configure this set!\n");
3192 printf("(Out of RAID devs!)\n");
3193 return(1);
3194 }
3195 printf("Configuring raid%d:\n",raidID);
3196 raidPtr = raidPtrs[raidID];
3197
3198 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3199 raidPtr->raidid = raidID;
3200 raidPtr->openings = RAIDOUTSTANDING;
3201
3202 /* 3. Build the configuration structure */
3203 rf_create_configuration(cset->ac, config, raidPtr);
3204
3205 /* 4. Do the configuration */
3206 retcode = rf_Configure(raidPtr, config, cset->ac);
3207
3208 if (retcode == 0) {
3209
3210 raidinit(raidPtrs[raidID]);
3211
3212 rf_markalldirty(raidPtrs[raidID]);
3213 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3214 if (cset->ac->clabel->root_partition==1) {
3215 /* everything configured just fine. Make a note
3216 that this set is eligible to be root. */
3217 cset->rootable = 1;
3218 /* XXX do this here? */
3219 raidPtrs[raidID]->root_partition = 1;
3220 }
3221 }
3222
3223 /* 5. Cleanup */
3224 free(config, M_RAIDFRAME);
3225
3226 *unit = raidID;
3227 return(retcode);
3228 }
3229