rf_netbsdkintf.c revision 1.80 1 /* $NetBSD: rf_netbsdkintf.c,v 1.80 2000/05/27 18:23:27 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
285 RF_Raid_t *));
286 int rf_set_autoconfig __P((RF_Raid_t *, int));
287 int rf_set_rootpartition __P((RF_Raid_t *, int));
288 void rf_release_all_vps __P((RF_ConfigSet_t *));
289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
290 int rf_have_enough_components __P((RF_ConfigSet_t *));
291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_final_update_component_labels( raidPtrs[unit] );
638 }
639
640 raidunlock(rs);
641 return (0);
642
643 }
644
645 void
646 raidstrategy(bp)
647 struct buf *bp;
648 {
649 int s;
650
651 unsigned int raidID = raidunit(bp->b_dev);
652 RF_Raid_t *raidPtr;
653 struct raid_softc *rs = &raid_softc[raidID];
654 struct disklabel *lp;
655 int wlabel;
656
657 if ((rs->sc_flags & RAIDF_INITED) ==0) {
658 bp->b_error = ENXIO;
659 bp->b_flags = B_ERROR;
660 bp->b_resid = bp->b_bcount;
661 biodone(bp);
662 return;
663 }
664 if (raidID >= numraid || !raidPtrs[raidID]) {
665 bp->b_error = ENODEV;
666 bp->b_flags |= B_ERROR;
667 bp->b_resid = bp->b_bcount;
668 biodone(bp);
669 return;
670 }
671 raidPtr = raidPtrs[raidID];
672 if (!raidPtr->valid) {
673 bp->b_error = ENODEV;
674 bp->b_flags |= B_ERROR;
675 bp->b_resid = bp->b_bcount;
676 biodone(bp);
677 return;
678 }
679 if (bp->b_bcount == 0) {
680 db1_printf(("b_bcount is zero..\n"));
681 biodone(bp);
682 return;
683 }
684 lp = rs->sc_dkdev.dk_label;
685
686 /*
687 * Do bounds checking and adjust transfer. If there's an
688 * error, the bounds check will flag that for us.
689 */
690
691 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
692 if (DISKPART(bp->b_dev) != RAW_PART)
693 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
694 db1_printf(("Bounds check failed!!:%d %d\n",
695 (int) bp->b_blkno, (int) wlabel));
696 biodone(bp);
697 return;
698 }
699 s = splbio();
700
701 bp->b_resid = 0;
702
703 /* stuff it onto our queue */
704 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
705
706 raidstart(raidPtrs[raidID]);
707
708 splx(s);
709 }
710 /* ARGSUSED */
711 int
712 raidread(dev, uio, flags)
713 dev_t dev;
714 struct uio *uio;
715 int flags;
716 {
717 int unit = raidunit(dev);
718 struct raid_softc *rs;
719 int part;
720
721 if (unit >= numraid)
722 return (ENXIO);
723 rs = &raid_softc[unit];
724
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 part = DISKPART(dev);
728
729 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
730
731 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
732
733 }
734 /* ARGSUSED */
735 int
736 raidwrite(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743
744 if (unit >= numraid)
745 return (ENXIO);
746 rs = &raid_softc[unit];
747
748 if ((rs->sc_flags & RAIDF_INITED) == 0)
749 return (ENXIO);
750 db1_printf(("raidwrite\n"));
751 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
752
753 }
754
755 int
756 raidioctl(dev, cmd, data, flag, p)
757 dev_t dev;
758 u_long cmd;
759 caddr_t data;
760 int flag;
761 struct proc *p;
762 {
763 int unit = raidunit(dev);
764 int error = 0;
765 int part, pmask;
766 struct raid_softc *rs;
767 RF_Config_t *k_cfg, *u_cfg;
768 RF_Raid_t *raidPtr;
769 RF_RaidDisk_t *diskPtr;
770 RF_AccTotals_t *totals;
771 RF_DeviceConfig_t *d_cfg, **ucfgp;
772 u_char *specific_buf;
773 int retcode = 0;
774 int row;
775 int column;
776 struct rf_recon_req *rrcopy, *rr;
777 RF_ComponentLabel_t *clabel;
778 RF_ComponentLabel_t ci_label;
779 RF_ComponentLabel_t **clabel_ptr;
780 RF_SingleComponent_t *sparePtr,*componentPtr;
781 RF_SingleComponent_t hot_spare;
782 RF_SingleComponent_t component;
783 int i, j, d;
784
785 if (unit >= numraid)
786 return (ENXIO);
787 rs = &raid_softc[unit];
788 raidPtr = raidPtrs[unit];
789
790 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
791 (int) DISKPART(dev), (int) unit, (int) cmd));
792
793 /* Must be open for writes for these commands... */
794 switch (cmd) {
795 case DIOCSDINFO:
796 case DIOCWDINFO:
797 case DIOCWLABEL:
798 if ((flag & FWRITE) == 0)
799 return (EBADF);
800 }
801
802 /* Must be initialized for these... */
803 switch (cmd) {
804 case DIOCGDINFO:
805 case DIOCSDINFO:
806 case DIOCWDINFO:
807 case DIOCGPART:
808 case DIOCWLABEL:
809 case DIOCGDEFLABEL:
810 case RAIDFRAME_SHUTDOWN:
811 case RAIDFRAME_REWRITEPARITY:
812 case RAIDFRAME_GET_INFO:
813 case RAIDFRAME_RESET_ACCTOTALS:
814 case RAIDFRAME_GET_ACCTOTALS:
815 case RAIDFRAME_KEEP_ACCTOTALS:
816 case RAIDFRAME_GET_SIZE:
817 case RAIDFRAME_FAIL_DISK:
818 case RAIDFRAME_COPYBACK:
819 case RAIDFRAME_CHECK_RECON_STATUS:
820 case RAIDFRAME_GET_COMPONENT_LABEL:
821 case RAIDFRAME_SET_COMPONENT_LABEL:
822 case RAIDFRAME_ADD_HOT_SPARE:
823 case RAIDFRAME_REMOVE_HOT_SPARE:
824 case RAIDFRAME_INIT_LABELS:
825 case RAIDFRAME_REBUILD_IN_PLACE:
826 case RAIDFRAME_CHECK_PARITY:
827 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
828 case RAIDFRAME_CHECK_COPYBACK_STATUS:
829 case RAIDFRAME_SET_AUTOCONFIG:
830 case RAIDFRAME_SET_ROOT:
831 case RAIDFRAME_DELETE_COMPONENT:
832 case RAIDFRAME_INCORPORATE_HOT_SPARE:
833 if ((rs->sc_flags & RAIDF_INITED) == 0)
834 return (ENXIO);
835 }
836
837 switch (cmd) {
838
839 /* configure the system */
840 case RAIDFRAME_CONFIGURE:
841
842 if (raidPtr->valid) {
843 /* There is a valid RAID set running on this unit! */
844 printf("raid%d: Device already configured!\n",unit);
845 return(EINVAL);
846 }
847
848 /* copy-in the configuration information */
849 /* data points to a pointer to the configuration structure */
850
851 u_cfg = *((RF_Config_t **) data);
852 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
853 if (k_cfg == NULL) {
854 return (ENOMEM);
855 }
856 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
857 sizeof(RF_Config_t));
858 if (retcode) {
859 RF_Free(k_cfg, sizeof(RF_Config_t));
860 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
861 retcode));
862 return (retcode);
863 }
864 /* allocate a buffer for the layout-specific data, and copy it
865 * in */
866 if (k_cfg->layoutSpecificSize) {
867 if (k_cfg->layoutSpecificSize > 10000) {
868 /* sanity check */
869 RF_Free(k_cfg, sizeof(RF_Config_t));
870 return (EINVAL);
871 }
872 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
873 (u_char *));
874 if (specific_buf == NULL) {
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 return (ENOMEM);
877 }
878 retcode = copyin(k_cfg->layoutSpecific,
879 (caddr_t) specific_buf,
880 k_cfg->layoutSpecificSize);
881 if (retcode) {
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 RF_Free(specific_buf,
884 k_cfg->layoutSpecificSize);
885 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
886 retcode));
887 return (retcode);
888 }
889 } else
890 specific_buf = NULL;
891 k_cfg->layoutSpecific = specific_buf;
892
893 /* should do some kind of sanity check on the configuration.
894 * Store the sum of all the bytes in the last byte? */
895
896 /* configure the system */
897
898 /*
899 * Clear the entire RAID descriptor, just to make sure
900 * there is no stale data left in the case of a
901 * reconfiguration
902 */
903 bzero((char *) raidPtr, sizeof(RF_Raid_t));
904 raidPtr->raidid = unit;
905
906 retcode = rf_Configure(raidPtr, k_cfg, NULL);
907
908 if (retcode == 0) {
909
910 /* allow this many simultaneous IO's to
911 this RAID device */
912 raidPtr->openings = RAIDOUTSTANDING;
913
914 raidinit(raidPtr);
915 rf_markalldirty(raidPtr);
916 }
917 /* free the buffers. No return code here. */
918 if (k_cfg->layoutSpecificSize) {
919 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
920 }
921 RF_Free(k_cfg, sizeof(RF_Config_t));
922
923 return (retcode);
924
925 /* shutdown the system */
926 case RAIDFRAME_SHUTDOWN:
927
928 if ((error = raidlock(rs)) != 0)
929 return (error);
930
931 /*
932 * If somebody has a partition mounted, we shouldn't
933 * shutdown.
934 */
935
936 part = DISKPART(dev);
937 pmask = (1 << part);
938 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
939 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
940 (rs->sc_dkdev.dk_copenmask & pmask))) {
941 raidunlock(rs);
942 return (EBUSY);
943 }
944
945 retcode = rf_Shutdown(raidPtr);
946
947 pool_destroy(&rs->sc_cbufpool);
948
949 /* It's no longer initialized... */
950 rs->sc_flags &= ~RAIDF_INITED;
951
952 /* Detach the disk. */
953 disk_detach(&rs->sc_dkdev);
954
955 raidunlock(rs);
956
957 return (retcode);
958 case RAIDFRAME_GET_COMPONENT_LABEL:
959 clabel_ptr = (RF_ComponentLabel_t **) data;
960 /* need to read the component label for the disk indicated
961 by row,column in clabel */
962
963 /* For practice, let's get it directly fromdisk, rather
964 than from the in-core copy */
965 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
966 (RF_ComponentLabel_t *));
967 if (clabel == NULL)
968 return (ENOMEM);
969
970 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
971
972 retcode = copyin( *clabel_ptr, clabel,
973 sizeof(RF_ComponentLabel_t));
974
975 if (retcode) {
976 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
977 return(retcode);
978 }
979
980 row = clabel->row;
981 column = clabel->column;
982
983 if ((row < 0) || (row >= raidPtr->numRow) ||
984 (column < 0) || (column >= raidPtr->numCol)) {
985 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
986 return(EINVAL);
987 }
988
989 raidread_component_label(raidPtr->Disks[row][column].dev,
990 raidPtr->raid_cinfo[row][column].ci_vp,
991 clabel );
992
993 retcode = copyout((caddr_t) clabel,
994 (caddr_t) *clabel_ptr,
995 sizeof(RF_ComponentLabel_t));
996 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
997 return (retcode);
998
999 case RAIDFRAME_SET_COMPONENT_LABEL:
1000 clabel = (RF_ComponentLabel_t *) data;
1001
1002 /* XXX check the label for valid stuff... */
1003 /* Note that some things *should not* get modified --
1004 the user should be re-initing the labels instead of
1005 trying to patch things.
1006 */
1007
1008 printf("Got component label:\n");
1009 printf("Version: %d\n",clabel->version);
1010 printf("Serial Number: %d\n",clabel->serial_number);
1011 printf("Mod counter: %d\n",clabel->mod_counter);
1012 printf("Row: %d\n", clabel->row);
1013 printf("Column: %d\n", clabel->column);
1014 printf("Num Rows: %d\n", clabel->num_rows);
1015 printf("Num Columns: %d\n", clabel->num_columns);
1016 printf("Clean: %d\n", clabel->clean);
1017 printf("Status: %d\n", clabel->status);
1018
1019 row = clabel->row;
1020 column = clabel->column;
1021
1022 if ((row < 0) || (row >= raidPtr->numRow) ||
1023 (column < 0) || (column >= raidPtr->numCol)) {
1024 return(EINVAL);
1025 }
1026
1027 /* XXX this isn't allowed to do anything for now :-) */
1028
1029 /* XXX and before it is, we need to fill in the rest
1030 of the fields!?!?!?! */
1031 #if 0
1032 raidwrite_component_label(
1033 raidPtr->Disks[row][column].dev,
1034 raidPtr->raid_cinfo[row][column].ci_vp,
1035 clabel );
1036 #endif
1037 return (0);
1038
1039 case RAIDFRAME_INIT_LABELS:
1040 clabel = (RF_ComponentLabel_t *) data;
1041 /*
1042 we only want the serial number from
1043 the above. We get all the rest of the information
1044 from the config that was used to create this RAID
1045 set.
1046 */
1047
1048 raidPtr->serial_number = clabel->serial_number;
1049
1050 raid_init_component_label(raidPtr, &ci_label);
1051 ci_label.serial_number = clabel->serial_number;
1052
1053 for(row=0;row<raidPtr->numRow;row++) {
1054 ci_label.row = row;
1055 for(column=0;column<raidPtr->numCol;column++) {
1056 diskPtr = &raidPtr->Disks[row][column];
1057 ci_label.partitionSize = diskPtr->partitionSize;
1058 ci_label.column = column;
1059 raidwrite_component_label(
1060 raidPtr->Disks[row][column].dev,
1061 raidPtr->raid_cinfo[row][column].ci_vp,
1062 &ci_label );
1063 }
1064 }
1065
1066 return (retcode);
1067 case RAIDFRAME_SET_AUTOCONFIG:
1068 d = rf_set_autoconfig(raidPtr, *(int *) data);
1069 printf("New autoconfig value is: %d\n", d);
1070 *(int *) data = d;
1071 return (retcode);
1072
1073 case RAIDFRAME_SET_ROOT:
1074 d = rf_set_rootpartition(raidPtr, *(int *) data);
1075 printf("New rootpartition value is: %d\n", d);
1076 *(int *) data = d;
1077 return (retcode);
1078
1079 /* initialize all parity */
1080 case RAIDFRAME_REWRITEPARITY:
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Parity for RAID 0 is trivially correct */
1084 raidPtr->parity_good = RF_RAID_CLEAN;
1085 return(0);
1086 }
1087
1088 if (raidPtr->parity_rewrite_in_progress == 1) {
1089 /* Re-write is already in progress! */
1090 return(EINVAL);
1091 }
1092
1093 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1094 rf_RewriteParityThread,
1095 raidPtr,"raid_parity");
1096 return (retcode);
1097
1098
1099 case RAIDFRAME_ADD_HOT_SPARE:
1100 sparePtr = (RF_SingleComponent_t *) data;
1101 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1102 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1103 return(retcode);
1104
1105 case RAIDFRAME_REMOVE_HOT_SPARE:
1106 return(retcode);
1107
1108 case RAIDFRAME_DELETE_COMPONENT:
1109 componentPtr = (RF_SingleComponent_t *)data;
1110 memcpy( &component, componentPtr,
1111 sizeof(RF_SingleComponent_t));
1112 retcode = rf_delete_component(raidPtr, &component);
1113 return(retcode);
1114
1115 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1116 componentPtr = (RF_SingleComponent_t *)data;
1117 memcpy( &component, componentPtr,
1118 sizeof(RF_SingleComponent_t));
1119 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1120 return(retcode);
1121
1122 case RAIDFRAME_REBUILD_IN_PLACE:
1123
1124 if (raidPtr->Layout.map->faultsTolerated == 0) {
1125 /* Can't do this on a RAID 0!! */
1126 return(EINVAL);
1127 }
1128
1129 if (raidPtr->recon_in_progress == 1) {
1130 /* a reconstruct is already in progress! */
1131 return(EINVAL);
1132 }
1133
1134 componentPtr = (RF_SingleComponent_t *) data;
1135 memcpy( &component, componentPtr,
1136 sizeof(RF_SingleComponent_t));
1137 row = component.row;
1138 column = component.column;
1139 printf("Rebuild: %d %d\n",row, column);
1140 if ((row < 0) || (row >= raidPtr->numRow) ||
1141 (column < 0) || (column >= raidPtr->numCol)) {
1142 return(EINVAL);
1143 }
1144
1145 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1146 if (rrcopy == NULL)
1147 return(ENOMEM);
1148
1149 rrcopy->raidPtr = (void *) raidPtr;
1150 rrcopy->row = row;
1151 rrcopy->col = column;
1152
1153 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1154 rf_ReconstructInPlaceThread,
1155 rrcopy,"raid_reconip");
1156 return(retcode);
1157
1158 case RAIDFRAME_GET_INFO:
1159 if (!raidPtr->valid)
1160 return (ENODEV);
1161 ucfgp = (RF_DeviceConfig_t **) data;
1162 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1163 (RF_DeviceConfig_t *));
1164 if (d_cfg == NULL)
1165 return (ENOMEM);
1166 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1167 d_cfg->rows = raidPtr->numRow;
1168 d_cfg->cols = raidPtr->numCol;
1169 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1170 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1171 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1172 return (ENOMEM);
1173 }
1174 d_cfg->nspares = raidPtr->numSpare;
1175 if (d_cfg->nspares >= RF_MAX_DISKS) {
1176 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1177 return (ENOMEM);
1178 }
1179 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1180 d = 0;
1181 for (i = 0; i < d_cfg->rows; i++) {
1182 for (j = 0; j < d_cfg->cols; j++) {
1183 d_cfg->devs[d] = raidPtr->Disks[i][j];
1184 d++;
1185 }
1186 }
1187 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1188 d_cfg->spares[i] = raidPtr->Disks[0][j];
1189 }
1190 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1191 sizeof(RF_DeviceConfig_t));
1192 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1193
1194 return (retcode);
1195
1196 case RAIDFRAME_CHECK_PARITY:
1197 *(int *) data = raidPtr->parity_good;
1198 return (0);
1199
1200 case RAIDFRAME_RESET_ACCTOTALS:
1201 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1202 return (0);
1203
1204 case RAIDFRAME_GET_ACCTOTALS:
1205 totals = (RF_AccTotals_t *) data;
1206 *totals = raidPtr->acc_totals;
1207 return (0);
1208
1209 case RAIDFRAME_KEEP_ACCTOTALS:
1210 raidPtr->keep_acc_totals = *(int *)data;
1211 return (0);
1212
1213 case RAIDFRAME_GET_SIZE:
1214 *(int *) data = raidPtr->totalSectors;
1215 return (0);
1216
1217 /* fail a disk & optionally start reconstruction */
1218 case RAIDFRAME_FAIL_DISK:
1219
1220 if (raidPtr->Layout.map->faultsTolerated == 0) {
1221 /* Can't do this on a RAID 0!! */
1222 return(EINVAL);
1223 }
1224
1225 rr = (struct rf_recon_req *) data;
1226
1227 if (rr->row < 0 || rr->row >= raidPtr->numRow
1228 || rr->col < 0 || rr->col >= raidPtr->numCol)
1229 return (EINVAL);
1230
1231 printf("raid%d: Failing the disk: row: %d col: %d\n",
1232 unit, rr->row, rr->col);
1233
1234 /* make a copy of the recon request so that we don't rely on
1235 * the user's buffer */
1236 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1237 if (rrcopy == NULL)
1238 return(ENOMEM);
1239 bcopy(rr, rrcopy, sizeof(*rr));
1240 rrcopy->raidPtr = (void *) raidPtr;
1241
1242 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1243 rf_ReconThread,
1244 rrcopy,"raid_recon");
1245 return (0);
1246
1247 /* invoke a copyback operation after recon on whatever disk
1248 * needs it, if any */
1249 case RAIDFRAME_COPYBACK:
1250
1251 if (raidPtr->Layout.map->faultsTolerated == 0) {
1252 /* This makes no sense on a RAID 0!! */
1253 return(EINVAL);
1254 }
1255
1256 if (raidPtr->copyback_in_progress == 1) {
1257 /* Copyback is already in progress! */
1258 return(EINVAL);
1259 }
1260
1261 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1262 rf_CopybackThread,
1263 raidPtr,"raid_copyback");
1264 return (retcode);
1265
1266 /* return the percentage completion of reconstruction */
1267 case RAIDFRAME_CHECK_RECON_STATUS:
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* This makes no sense on a RAID 0, so tell the
1270 user it's done. */
1271 *(int *) data = 100;
1272 return(0);
1273 }
1274 row = 0; /* XXX we only consider a single row... */
1275 if (raidPtr->status[row] != rf_rs_reconstructing)
1276 *(int *) data = 100;
1277 else
1278 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1279 return (0);
1280
1281 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1282 if (raidPtr->Layout.map->faultsTolerated == 0) {
1283 /* This makes no sense on a RAID 0, so tell the
1284 user it's done. */
1285 *(int *) data = 100;
1286 return(0);
1287 }
1288 if (raidPtr->parity_rewrite_in_progress == 1) {
1289 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1290 } else {
1291 *(int *) data = 100;
1292 }
1293 return (0);
1294
1295 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1296 if (raidPtr->Layout.map->faultsTolerated == 0) {
1297 /* This makes no sense on a RAID 0 */
1298 return(EINVAL);
1299 }
1300 if (raidPtr->copyback_in_progress == 1) {
1301 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1302 raidPtr->Layout.numStripe;
1303 } else {
1304 *(int *) data = 100;
1305 }
1306 return (0);
1307
1308
1309 /* the sparetable daemon calls this to wait for the kernel to
1310 * need a spare table. this ioctl does not return until a
1311 * spare table is needed. XXX -- calling mpsleep here in the
1312 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1313 * -- I should either compute the spare table in the kernel,
1314 * or have a different -- XXX XXX -- interface (a different
1315 * character device) for delivering the table -- XXX */
1316 #if 0
1317 case RAIDFRAME_SPARET_WAIT:
1318 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1319 while (!rf_sparet_wait_queue)
1320 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1321 waitreq = rf_sparet_wait_queue;
1322 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1323 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1324
1325 /* structure assignment */
1326 *((RF_SparetWait_t *) data) = *waitreq;
1327
1328 RF_Free(waitreq, sizeof(*waitreq));
1329 return (0);
1330
1331 /* wakes up a process waiting on SPARET_WAIT and puts an error
1332 * code in it that will cause the dameon to exit */
1333 case RAIDFRAME_ABORT_SPARET_WAIT:
1334 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1335 waitreq->fcol = -1;
1336 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1337 waitreq->next = rf_sparet_wait_queue;
1338 rf_sparet_wait_queue = waitreq;
1339 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1340 wakeup(&rf_sparet_wait_queue);
1341 return (0);
1342
1343 /* used by the spare table daemon to deliver a spare table
1344 * into the kernel */
1345 case RAIDFRAME_SEND_SPARET:
1346
1347 /* install the spare table */
1348 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1349
1350 /* respond to the requestor. the return status of the spare
1351 * table installation is passed in the "fcol" field */
1352 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1353 waitreq->fcol = retcode;
1354 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1355 waitreq->next = rf_sparet_resp_queue;
1356 rf_sparet_resp_queue = waitreq;
1357 wakeup(&rf_sparet_resp_queue);
1358 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1359
1360 return (retcode);
1361 #endif
1362
1363 default:
1364 break; /* fall through to the os-specific code below */
1365
1366 }
1367
1368 if (!raidPtr->valid)
1369 return (EINVAL);
1370
1371 /*
1372 * Add support for "regular" device ioctls here.
1373 */
1374
1375 switch (cmd) {
1376 case DIOCGDINFO:
1377 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1378 break;
1379
1380 case DIOCGPART:
1381 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1382 ((struct partinfo *) data)->part =
1383 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1384 break;
1385
1386 case DIOCWDINFO:
1387 case DIOCSDINFO:
1388 if ((error = raidlock(rs)) != 0)
1389 return (error);
1390
1391 rs->sc_flags |= RAIDF_LABELLING;
1392
1393 error = setdisklabel(rs->sc_dkdev.dk_label,
1394 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1395 if (error == 0) {
1396 if (cmd == DIOCWDINFO)
1397 error = writedisklabel(RAIDLABELDEV(dev),
1398 raidstrategy, rs->sc_dkdev.dk_label,
1399 rs->sc_dkdev.dk_cpulabel);
1400 }
1401 rs->sc_flags &= ~RAIDF_LABELLING;
1402
1403 raidunlock(rs);
1404
1405 if (error)
1406 return (error);
1407 break;
1408
1409 case DIOCWLABEL:
1410 if (*(int *) data != 0)
1411 rs->sc_flags |= RAIDF_WLABEL;
1412 else
1413 rs->sc_flags &= ~RAIDF_WLABEL;
1414 break;
1415
1416 case DIOCGDEFLABEL:
1417 raidgetdefaultlabel(raidPtr, rs,
1418 (struct disklabel *) data);
1419 break;
1420
1421 default:
1422 retcode = ENOTTY;
1423 }
1424 return (retcode);
1425
1426 }
1427
1428
1429 /* raidinit -- complete the rest of the initialization for the
1430 RAIDframe device. */
1431
1432
1433 static void
1434 raidinit(raidPtr)
1435 RF_Raid_t *raidPtr;
1436 {
1437 struct raid_softc *rs;
1438 int unit;
1439
1440 unit = raidPtr->raidid;
1441
1442 rs = &raid_softc[unit];
1443 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1444 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1445
1446
1447 /* XXX should check return code first... */
1448 rs->sc_flags |= RAIDF_INITED;
1449
1450 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1451
1452 rs->sc_dkdev.dk_name = rs->sc_xname;
1453
1454 /* disk_attach actually creates space for the CPU disklabel, among
1455 * other things, so it's critical to call this *BEFORE* we try putzing
1456 * with disklabels. */
1457
1458 disk_attach(&rs->sc_dkdev);
1459
1460 /* XXX There may be a weird interaction here between this, and
1461 * protectedSectors, as used in RAIDframe. */
1462
1463 rs->sc_size = raidPtr->totalSectors;
1464
1465 }
1466
1467 /* wake up the daemon & tell it to get us a spare table
1468 * XXX
1469 * the entries in the queues should be tagged with the raidPtr
1470 * so that in the extremely rare case that two recons happen at once,
1471 * we know for which device were requesting a spare table
1472 * XXX
1473 *
1474 * XXX This code is not currently used. GO
1475 */
1476 int
1477 rf_GetSpareTableFromDaemon(req)
1478 RF_SparetWait_t *req;
1479 {
1480 int retcode;
1481
1482 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1483 req->next = rf_sparet_wait_queue;
1484 rf_sparet_wait_queue = req;
1485 wakeup(&rf_sparet_wait_queue);
1486
1487 /* mpsleep unlocks the mutex */
1488 while (!rf_sparet_resp_queue) {
1489 tsleep(&rf_sparet_resp_queue, PRIBIO,
1490 "raidframe getsparetable", 0);
1491 }
1492 req = rf_sparet_resp_queue;
1493 rf_sparet_resp_queue = req->next;
1494 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1495
1496 retcode = req->fcol;
1497 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1498 * alloc'd */
1499 return (retcode);
1500 }
1501
1502 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1503 * bp & passes it down.
1504 * any calls originating in the kernel must use non-blocking I/O
1505 * do some extra sanity checking to return "appropriate" error values for
1506 * certain conditions (to make some standard utilities work)
1507 *
1508 * Formerly known as: rf_DoAccessKernel
1509 */
1510 void
1511 raidstart(raidPtr)
1512 RF_Raid_t *raidPtr;
1513 {
1514 RF_SectorCount_t num_blocks, pb, sum;
1515 RF_RaidAddr_t raid_addr;
1516 int retcode;
1517 struct partition *pp;
1518 daddr_t blocknum;
1519 int unit;
1520 struct raid_softc *rs;
1521 int do_async;
1522 struct buf *bp;
1523
1524 unit = raidPtr->raidid;
1525 rs = &raid_softc[unit];
1526
1527 /* quick check to see if anything has died recently */
1528 RF_LOCK_MUTEX(raidPtr->mutex);
1529 if (raidPtr->numNewFailures > 0) {
1530 rf_update_component_labels(raidPtr);
1531 raidPtr->numNewFailures--;
1532 }
1533 RF_UNLOCK_MUTEX(raidPtr->mutex);
1534
1535 /* Check to see if we're at the limit... */
1536 RF_LOCK_MUTEX(raidPtr->mutex);
1537 while (raidPtr->openings > 0) {
1538 RF_UNLOCK_MUTEX(raidPtr->mutex);
1539
1540 /* get the next item, if any, from the queue */
1541 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1542 /* nothing more to do */
1543 return;
1544 }
1545 BUFQ_REMOVE(&rs->buf_queue, bp);
1546
1547 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1548 * partition.. Need to make it absolute to the underlying
1549 * device.. */
1550
1551 blocknum = bp->b_blkno;
1552 if (DISKPART(bp->b_dev) != RAW_PART) {
1553 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1554 blocknum += pp->p_offset;
1555 }
1556
1557 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1558 (int) blocknum));
1559
1560 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1561 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1562
1563 /* *THIS* is where we adjust what block we're going to...
1564 * but DO NOT TOUCH bp->b_blkno!!! */
1565 raid_addr = blocknum;
1566
1567 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1568 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1569 sum = raid_addr + num_blocks + pb;
1570 if (1 || rf_debugKernelAccess) {
1571 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1572 (int) raid_addr, (int) sum, (int) num_blocks,
1573 (int) pb, (int) bp->b_resid));
1574 }
1575 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1576 || (sum < num_blocks) || (sum < pb)) {
1577 bp->b_error = ENOSPC;
1578 bp->b_flags |= B_ERROR;
1579 bp->b_resid = bp->b_bcount;
1580 biodone(bp);
1581 RF_LOCK_MUTEX(raidPtr->mutex);
1582 continue;
1583 }
1584 /*
1585 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1586 */
1587
1588 if (bp->b_bcount & raidPtr->sectorMask) {
1589 bp->b_error = EINVAL;
1590 bp->b_flags |= B_ERROR;
1591 bp->b_resid = bp->b_bcount;
1592 biodone(bp);
1593 RF_LOCK_MUTEX(raidPtr->mutex);
1594 continue;
1595
1596 }
1597 db1_printf(("Calling DoAccess..\n"));
1598
1599
1600 RF_LOCK_MUTEX(raidPtr->mutex);
1601 raidPtr->openings--;
1602 RF_UNLOCK_MUTEX(raidPtr->mutex);
1603
1604 /*
1605 * Everything is async.
1606 */
1607 do_async = 1;
1608
1609 /* don't ever condition on bp->b_flags & B_WRITE.
1610 * always condition on B_READ instead */
1611
1612 /* XXX we're still at splbio() here... do we *really*
1613 need to be? */
1614
1615
1616 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1617 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1618 do_async, raid_addr, num_blocks,
1619 bp->b_data, bp, NULL, NULL,
1620 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1621
1622
1623 RF_LOCK_MUTEX(raidPtr->mutex);
1624 }
1625 RF_UNLOCK_MUTEX(raidPtr->mutex);
1626 }
1627
1628
1629
1630
1631 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1632
1633 int
1634 rf_DispatchKernelIO(queue, req)
1635 RF_DiskQueue_t *queue;
1636 RF_DiskQueueData_t *req;
1637 {
1638 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1639 struct buf *bp;
1640 struct raidbuf *raidbp = NULL;
1641 struct raid_softc *rs;
1642 int unit;
1643 int s;
1644
1645 s=0;
1646 /* s = splbio();*/ /* want to test this */
1647 /* XXX along with the vnode, we also need the softc associated with
1648 * this device.. */
1649
1650 req->queue = queue;
1651
1652 unit = queue->raidPtr->raidid;
1653
1654 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1655
1656 if (unit >= numraid) {
1657 printf("Invalid unit number: %d %d\n", unit, numraid);
1658 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1659 }
1660 rs = &raid_softc[unit];
1661
1662 /* XXX is this the right place? */
1663 disk_busy(&rs->sc_dkdev);
1664
1665 bp = req->bp;
1666 #if 1
1667 /* XXX when there is a physical disk failure, someone is passing us a
1668 * buffer that contains old stuff!! Attempt to deal with this problem
1669 * without taking a performance hit... (not sure where the real bug
1670 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1671
1672 if (bp->b_flags & B_ERROR) {
1673 bp->b_flags &= ~B_ERROR;
1674 }
1675 if (bp->b_error != 0) {
1676 bp->b_error = 0;
1677 }
1678 #endif
1679 raidbp = RAIDGETBUF(rs);
1680
1681 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1682
1683 /*
1684 * context for raidiodone
1685 */
1686 raidbp->rf_obp = bp;
1687 raidbp->req = req;
1688
1689 LIST_INIT(&raidbp->rf_buf.b_dep);
1690
1691 switch (req->type) {
1692 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1693 /* XXX need to do something extra here.. */
1694 /* I'm leaving this in, as I've never actually seen it used,
1695 * and I'd like folks to report it... GO */
1696 printf(("WAKEUP CALLED\n"));
1697 queue->numOutstanding++;
1698
1699 /* XXX need to glue the original buffer into this?? */
1700
1701 KernelWakeupFunc(&raidbp->rf_buf);
1702 break;
1703
1704 case RF_IO_TYPE_READ:
1705 case RF_IO_TYPE_WRITE:
1706
1707 if (req->tracerec) {
1708 RF_ETIMER_START(req->tracerec->timer);
1709 }
1710 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1711 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1712 req->sectorOffset, req->numSector,
1713 req->buf, KernelWakeupFunc, (void *) req,
1714 queue->raidPtr->logBytesPerSector, req->b_proc);
1715
1716 if (rf_debugKernelAccess) {
1717 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1718 (long) bp->b_blkno));
1719 }
1720 queue->numOutstanding++;
1721 queue->last_deq_sector = req->sectorOffset;
1722 /* acc wouldn't have been let in if there were any pending
1723 * reqs at any other priority */
1724 queue->curPriority = req->priority;
1725
1726 db1_printf(("Going for %c to unit %d row %d col %d\n",
1727 req->type, unit, queue->row, queue->col));
1728 db1_printf(("sector %d count %d (%d bytes) %d\n",
1729 (int) req->sectorOffset, (int) req->numSector,
1730 (int) (req->numSector <<
1731 queue->raidPtr->logBytesPerSector),
1732 (int) queue->raidPtr->logBytesPerSector));
1733 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1734 raidbp->rf_buf.b_vp->v_numoutput++;
1735 }
1736 VOP_STRATEGY(&raidbp->rf_buf);
1737
1738 break;
1739
1740 default:
1741 panic("bad req->type in rf_DispatchKernelIO");
1742 }
1743 db1_printf(("Exiting from DispatchKernelIO\n"));
1744 /* splx(s); */ /* want to test this */
1745 return (0);
1746 }
1747 /* this is the callback function associated with a I/O invoked from
1748 kernel code.
1749 */
1750 static void
1751 KernelWakeupFunc(vbp)
1752 struct buf *vbp;
1753 {
1754 RF_DiskQueueData_t *req = NULL;
1755 RF_DiskQueue_t *queue;
1756 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1757 struct buf *bp;
1758 struct raid_softc *rs;
1759 int unit;
1760 int s;
1761
1762 s = splbio();
1763 db1_printf(("recovering the request queue:\n"));
1764 req = raidbp->req;
1765
1766 bp = raidbp->rf_obp;
1767
1768 queue = (RF_DiskQueue_t *) req->queue;
1769
1770 if (raidbp->rf_buf.b_flags & B_ERROR) {
1771 bp->b_flags |= B_ERROR;
1772 bp->b_error = raidbp->rf_buf.b_error ?
1773 raidbp->rf_buf.b_error : EIO;
1774 }
1775
1776 /* XXX methinks this could be wrong... */
1777 #if 1
1778 bp->b_resid = raidbp->rf_buf.b_resid;
1779 #endif
1780
1781 if (req->tracerec) {
1782 RF_ETIMER_STOP(req->tracerec->timer);
1783 RF_ETIMER_EVAL(req->tracerec->timer);
1784 RF_LOCK_MUTEX(rf_tracing_mutex);
1785 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1786 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1787 req->tracerec->num_phys_ios++;
1788 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1789 }
1790 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1791
1792 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1793
1794
1795 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1796 * ballistic, and mark the component as hosed... */
1797
1798 if (bp->b_flags & B_ERROR) {
1799 /* Mark the disk as dead */
1800 /* but only mark it once... */
1801 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1802 rf_ds_optimal) {
1803 printf("raid%d: IO Error. Marking %s as failed.\n",
1804 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1805 queue->raidPtr->Disks[queue->row][queue->col].status =
1806 rf_ds_failed;
1807 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1808 queue->raidPtr->numFailures++;
1809 queue->raidPtr->numNewFailures++;
1810 /* XXX here we should bump the version number for each component, and write that data out */
1811 } else { /* Disk is already dead... */
1812 /* printf("Disk already marked as dead!\n"); */
1813 }
1814
1815 }
1816
1817 rs = &raid_softc[unit];
1818 RAIDPUTBUF(rs, raidbp);
1819
1820
1821 if (bp->b_resid == 0) {
1822 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1823 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1824 }
1825
1826 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1827 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1828
1829 splx(s);
1830 }
1831
1832
1833
1834 /*
1835 * initialize a buf structure for doing an I/O in the kernel.
1836 */
1837 static void
1838 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1839 logBytesPerSector, b_proc)
1840 struct buf *bp;
1841 struct vnode *b_vp;
1842 unsigned rw_flag;
1843 dev_t dev;
1844 RF_SectorNum_t startSect;
1845 RF_SectorCount_t numSect;
1846 caddr_t buf;
1847 void (*cbFunc) (struct buf *);
1848 void *cbArg;
1849 int logBytesPerSector;
1850 struct proc *b_proc;
1851 {
1852 /* bp->b_flags = B_PHYS | rw_flag; */
1853 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1854 bp->b_bcount = numSect << logBytesPerSector;
1855 bp->b_bufsize = bp->b_bcount;
1856 bp->b_error = 0;
1857 bp->b_dev = dev;
1858 bp->b_data = buf;
1859 bp->b_blkno = startSect;
1860 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1861 if (bp->b_bcount == 0) {
1862 panic("bp->b_bcount is zero in InitBP!!\n");
1863 }
1864 bp->b_proc = b_proc;
1865 bp->b_iodone = cbFunc;
1866 bp->b_vp = b_vp;
1867
1868 }
1869
1870 static void
1871 raidgetdefaultlabel(raidPtr, rs, lp)
1872 RF_Raid_t *raidPtr;
1873 struct raid_softc *rs;
1874 struct disklabel *lp;
1875 {
1876 db1_printf(("Building a default label...\n"));
1877 bzero(lp, sizeof(*lp));
1878
1879 /* fabricate a label... */
1880 lp->d_secperunit = raidPtr->totalSectors;
1881 lp->d_secsize = raidPtr->bytesPerSector;
1882 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1883 lp->d_ntracks = 1;
1884 lp->d_ncylinders = raidPtr->totalSectors /
1885 (lp->d_nsectors * lp->d_ntracks);
1886 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1887
1888 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1889 lp->d_type = DTYPE_RAID;
1890 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1891 lp->d_rpm = 3600;
1892 lp->d_interleave = 1;
1893 lp->d_flags = 0;
1894
1895 lp->d_partitions[RAW_PART].p_offset = 0;
1896 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1897 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1898 lp->d_npartitions = RAW_PART + 1;
1899
1900 lp->d_magic = DISKMAGIC;
1901 lp->d_magic2 = DISKMAGIC;
1902 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1903
1904 }
1905 /*
1906 * Read the disklabel from the raid device. If one is not present, fake one
1907 * up.
1908 */
1909 static void
1910 raidgetdisklabel(dev)
1911 dev_t dev;
1912 {
1913 int unit = raidunit(dev);
1914 struct raid_softc *rs = &raid_softc[unit];
1915 char *errstring;
1916 struct disklabel *lp = rs->sc_dkdev.dk_label;
1917 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1918 RF_Raid_t *raidPtr;
1919
1920 db1_printf(("Getting the disklabel...\n"));
1921
1922 bzero(clp, sizeof(*clp));
1923
1924 raidPtr = raidPtrs[unit];
1925
1926 raidgetdefaultlabel(raidPtr, rs, lp);
1927
1928 /*
1929 * Call the generic disklabel extraction routine.
1930 */
1931 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1932 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1933 if (errstring)
1934 raidmakedisklabel(rs);
1935 else {
1936 int i;
1937 struct partition *pp;
1938
1939 /*
1940 * Sanity check whether the found disklabel is valid.
1941 *
1942 * This is necessary since total size of the raid device
1943 * may vary when an interleave is changed even though exactly
1944 * same componets are used, and old disklabel may used
1945 * if that is found.
1946 */
1947 if (lp->d_secperunit != rs->sc_size)
1948 printf("WARNING: %s: "
1949 "total sector size in disklabel (%d) != "
1950 "the size of raid (%ld)\n", rs->sc_xname,
1951 lp->d_secperunit, (long) rs->sc_size);
1952 for (i = 0; i < lp->d_npartitions; i++) {
1953 pp = &lp->d_partitions[i];
1954 if (pp->p_offset + pp->p_size > rs->sc_size)
1955 printf("WARNING: %s: end of partition `%c' "
1956 "exceeds the size of raid (%ld)\n",
1957 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1958 }
1959 }
1960
1961 }
1962 /*
1963 * Take care of things one might want to take care of in the event
1964 * that a disklabel isn't present.
1965 */
1966 static void
1967 raidmakedisklabel(rs)
1968 struct raid_softc *rs;
1969 {
1970 struct disklabel *lp = rs->sc_dkdev.dk_label;
1971 db1_printf(("Making a label..\n"));
1972
1973 /*
1974 * For historical reasons, if there's no disklabel present
1975 * the raw partition must be marked FS_BSDFFS.
1976 */
1977
1978 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1979
1980 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1981
1982 lp->d_checksum = dkcksum(lp);
1983 }
1984 /*
1985 * Lookup the provided name in the filesystem. If the file exists,
1986 * is a valid block device, and isn't being used by anyone else,
1987 * set *vpp to the file's vnode.
1988 * You'll find the original of this in ccd.c
1989 */
1990 int
1991 raidlookup(path, p, vpp)
1992 char *path;
1993 struct proc *p;
1994 struct vnode **vpp; /* result */
1995 {
1996 struct nameidata nd;
1997 struct vnode *vp;
1998 struct vattr va;
1999 int error;
2000
2001 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2002 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2003 #ifdef DEBUG
2004 printf("RAIDframe: vn_open returned %d\n", error);
2005 #endif
2006 return (error);
2007 }
2008 vp = nd.ni_vp;
2009 if (vp->v_usecount > 1) {
2010 VOP_UNLOCK(vp, 0);
2011 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2012 return (EBUSY);
2013 }
2014 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2015 VOP_UNLOCK(vp, 0);
2016 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2017 return (error);
2018 }
2019 /* XXX: eventually we should handle VREG, too. */
2020 if (va.va_type != VBLK) {
2021 VOP_UNLOCK(vp, 0);
2022 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2023 return (ENOTBLK);
2024 }
2025 VOP_UNLOCK(vp, 0);
2026 *vpp = vp;
2027 return (0);
2028 }
2029 /*
2030 * Wait interruptibly for an exclusive lock.
2031 *
2032 * XXX
2033 * Several drivers do this; it should be abstracted and made MP-safe.
2034 * (Hmm... where have we seen this warning before :-> GO )
2035 */
2036 static int
2037 raidlock(rs)
2038 struct raid_softc *rs;
2039 {
2040 int error;
2041
2042 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2043 rs->sc_flags |= RAIDF_WANTED;
2044 if ((error =
2045 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2046 return (error);
2047 }
2048 rs->sc_flags |= RAIDF_LOCKED;
2049 return (0);
2050 }
2051 /*
2052 * Unlock and wake up any waiters.
2053 */
2054 static void
2055 raidunlock(rs)
2056 struct raid_softc *rs;
2057 {
2058
2059 rs->sc_flags &= ~RAIDF_LOCKED;
2060 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2061 rs->sc_flags &= ~RAIDF_WANTED;
2062 wakeup(rs);
2063 }
2064 }
2065
2066
2067 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2068 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2069
2070 int
2071 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2072 {
2073 RF_ComponentLabel_t clabel;
2074 raidread_component_label(dev, b_vp, &clabel);
2075 clabel.mod_counter = mod_counter;
2076 clabel.clean = RF_RAID_CLEAN;
2077 raidwrite_component_label(dev, b_vp, &clabel);
2078 return(0);
2079 }
2080
2081
2082 int
2083 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2084 {
2085 RF_ComponentLabel_t clabel;
2086 raidread_component_label(dev, b_vp, &clabel);
2087 clabel.mod_counter = mod_counter;
2088 clabel.clean = RF_RAID_DIRTY;
2089 raidwrite_component_label(dev, b_vp, &clabel);
2090 return(0);
2091 }
2092
2093 /* ARGSUSED */
2094 int
2095 raidread_component_label(dev, b_vp, clabel)
2096 dev_t dev;
2097 struct vnode *b_vp;
2098 RF_ComponentLabel_t *clabel;
2099 {
2100 struct buf *bp;
2101 int error;
2102
2103 /* XXX should probably ensure that we don't try to do this if
2104 someone has changed rf_protected_sectors. */
2105
2106 /* get a block of the appropriate size... */
2107 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2108 bp->b_dev = dev;
2109
2110 /* get our ducks in a row for the read */
2111 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2112 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2113 bp->b_flags = B_BUSY | B_READ;
2114 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2115
2116 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2117
2118 error = biowait(bp);
2119
2120 if (!error) {
2121 memcpy(clabel, bp->b_data,
2122 sizeof(RF_ComponentLabel_t));
2123 #if 0
2124 rf_print_component_label( clabel );
2125 #endif
2126 } else {
2127 #if 0
2128 printf("Failed to read RAID component label!\n");
2129 #endif
2130 }
2131
2132 bp->b_flags = B_INVAL | B_AGE;
2133 brelse(bp);
2134 return(error);
2135 }
2136 /* ARGSUSED */
2137 int
2138 raidwrite_component_label(dev, b_vp, clabel)
2139 dev_t dev;
2140 struct vnode *b_vp;
2141 RF_ComponentLabel_t *clabel;
2142 {
2143 struct buf *bp;
2144 int error;
2145
2146 /* get a block of the appropriate size... */
2147 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2148 bp->b_dev = dev;
2149
2150 /* get our ducks in a row for the write */
2151 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2152 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2153 bp->b_flags = B_BUSY | B_WRITE;
2154 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2155
2156 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2157
2158 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2159
2160 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2161 error = biowait(bp);
2162 bp->b_flags = B_INVAL | B_AGE;
2163 brelse(bp);
2164 if (error) {
2165 #if 1
2166 printf("Failed to write RAID component info!\n");
2167 #endif
2168 }
2169
2170 return(error);
2171 }
2172
2173 void
2174 rf_markalldirty(raidPtr)
2175 RF_Raid_t *raidPtr;
2176 {
2177 RF_ComponentLabel_t clabel;
2178 int r,c;
2179
2180 raidPtr->mod_counter++;
2181 for (r = 0; r < raidPtr->numRow; r++) {
2182 for (c = 0; c < raidPtr->numCol; c++) {
2183 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2184 raidread_component_label(
2185 raidPtr->Disks[r][c].dev,
2186 raidPtr->raid_cinfo[r][c].ci_vp,
2187 &clabel);
2188 if (clabel.status == rf_ds_spared) {
2189 /* XXX do something special...
2190 but whatever you do, don't
2191 try to access it!! */
2192 } else {
2193 #if 0
2194 clabel.status =
2195 raidPtr->Disks[r][c].status;
2196 raidwrite_component_label(
2197 raidPtr->Disks[r][c].dev,
2198 raidPtr->raid_cinfo[r][c].ci_vp,
2199 &clabel);
2200 #endif
2201 raidmarkdirty(
2202 raidPtr->Disks[r][c].dev,
2203 raidPtr->raid_cinfo[r][c].ci_vp,
2204 raidPtr->mod_counter);
2205 }
2206 }
2207 }
2208 }
2209 /* printf("Component labels marked dirty.\n"); */
2210 #if 0
2211 for( c = 0; c < raidPtr->numSpare ; c++) {
2212 sparecol = raidPtr->numCol + c;
2213 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2214 /*
2215
2216 XXX this is where we get fancy and map this spare
2217 into it's correct spot in the array.
2218
2219 */
2220 /*
2221
2222 we claim this disk is "optimal" if it's
2223 rf_ds_used_spare, as that means it should be
2224 directly substitutable for the disk it replaced.
2225 We note that too...
2226
2227 */
2228
2229 for(i=0;i<raidPtr->numRow;i++) {
2230 for(j=0;j<raidPtr->numCol;j++) {
2231 if ((raidPtr->Disks[i][j].spareRow ==
2232 r) &&
2233 (raidPtr->Disks[i][j].spareCol ==
2234 sparecol)) {
2235 srow = r;
2236 scol = sparecol;
2237 break;
2238 }
2239 }
2240 }
2241
2242 raidread_component_label(
2243 raidPtr->Disks[r][sparecol].dev,
2244 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2245 &clabel);
2246 /* make sure status is noted */
2247 clabel.version = RF_COMPONENT_LABEL_VERSION;
2248 clabel.mod_counter = raidPtr->mod_counter;
2249 clabel.serial_number = raidPtr->serial_number;
2250 clabel.row = srow;
2251 clabel.column = scol;
2252 clabel.num_rows = raidPtr->numRow;
2253 clabel.num_columns = raidPtr->numCol;
2254 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2255 clabel.status = rf_ds_optimal;
2256 raidwrite_component_label(
2257 raidPtr->Disks[r][sparecol].dev,
2258 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2259 &clabel);
2260 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2261 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2262 }
2263 }
2264
2265 #endif
2266 }
2267
2268
2269 void
2270 rf_update_component_labels(raidPtr)
2271 RF_Raid_t *raidPtr;
2272 {
2273 RF_ComponentLabel_t clabel;
2274 int sparecol;
2275 int r,c;
2276 int i,j;
2277 int srow, scol;
2278
2279 srow = -1;
2280 scol = -1;
2281
2282 /* XXX should do extra checks to make sure things really are clean,
2283 rather than blindly setting the clean bit... */
2284
2285 raidPtr->mod_counter++;
2286
2287 for (r = 0; r < raidPtr->numRow; r++) {
2288 for (c = 0; c < raidPtr->numCol; c++) {
2289 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2290 raidread_component_label(
2291 raidPtr->Disks[r][c].dev,
2292 raidPtr->raid_cinfo[r][c].ci_vp,
2293 &clabel);
2294 /* make sure status is noted */
2295 clabel.status = rf_ds_optimal;
2296 /* bump the counter */
2297 clabel.mod_counter = raidPtr->mod_counter;
2298
2299 raidwrite_component_label(
2300 raidPtr->Disks[r][c].dev,
2301 raidPtr->raid_cinfo[r][c].ci_vp,
2302 &clabel);
2303 }
2304 /* else we don't touch it.. */
2305 }
2306 }
2307
2308 for( c = 0; c < raidPtr->numSpare ; c++) {
2309 sparecol = raidPtr->numCol + c;
2310 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2311 /*
2312
2313 we claim this disk is "optimal" if it's
2314 rf_ds_used_spare, as that means it should be
2315 directly substitutable for the disk it replaced.
2316 We note that too...
2317
2318 */
2319
2320 for(i=0;i<raidPtr->numRow;i++) {
2321 for(j=0;j<raidPtr->numCol;j++) {
2322 if ((raidPtr->Disks[i][j].spareRow ==
2323 0) &&
2324 (raidPtr->Disks[i][j].spareCol ==
2325 sparecol)) {
2326 srow = i;
2327 scol = j;
2328 break;
2329 }
2330 }
2331 }
2332
2333 /* XXX shouldn't *really* need this... */
2334 raidread_component_label(
2335 raidPtr->Disks[0][sparecol].dev,
2336 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2337 &clabel);
2338 /* make sure status is noted */
2339
2340 raid_init_component_label(raidPtr, &clabel);
2341
2342 clabel.mod_counter = raidPtr->mod_counter;
2343 clabel.row = srow;
2344 clabel.column = scol;
2345 clabel.status = rf_ds_optimal;
2346
2347 raidwrite_component_label(
2348 raidPtr->Disks[0][sparecol].dev,
2349 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2350 &clabel);
2351 }
2352 }
2353 /* printf("Component labels updated\n"); */
2354 }
2355
2356
2357 void
2358 rf_final_update_component_labels(raidPtr)
2359 RF_Raid_t *raidPtr;
2360 {
2361 RF_ComponentLabel_t clabel;
2362 int sparecol;
2363 int r,c;
2364 int i,j;
2365 int srow, scol;
2366
2367 srow = -1;
2368 scol = -1;
2369
2370 /* XXX should do extra checks to make sure things really are clean,
2371 rather than blindly setting the clean bit... */
2372
2373 raidPtr->mod_counter++;
2374
2375 for (r = 0; r < raidPtr->numRow; r++) {
2376 for (c = 0; c < raidPtr->numCol; c++) {
2377 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2378 raidread_component_label(
2379 raidPtr->Disks[r][c].dev,
2380 raidPtr->raid_cinfo[r][c].ci_vp,
2381 &clabel);
2382 /* make sure status is noted */
2383 clabel.status = rf_ds_optimal;
2384 /* bump the counter */
2385 clabel.mod_counter = raidPtr->mod_counter;
2386
2387 raidwrite_component_label(
2388 raidPtr->Disks[r][c].dev,
2389 raidPtr->raid_cinfo[r][c].ci_vp,
2390 &clabel);
2391 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2392 raidmarkclean(
2393 raidPtr->Disks[r][c].dev,
2394 raidPtr->raid_cinfo[r][c].ci_vp,
2395 raidPtr->mod_counter);
2396 }
2397 }
2398 /* else we don't touch it.. */
2399 }
2400 }
2401
2402 for( c = 0; c < raidPtr->numSpare ; c++) {
2403 sparecol = raidPtr->numCol + c;
2404 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2405 /*
2406
2407 we claim this disk is "optimal" if it's
2408 rf_ds_used_spare, as that means it should be
2409 directly substitutable for the disk it replaced.
2410 We note that too...
2411
2412 */
2413
2414 for(i=0;i<raidPtr->numRow;i++) {
2415 for(j=0;j<raidPtr->numCol;j++) {
2416 if ((raidPtr->Disks[i][j].spareRow ==
2417 0) &&
2418 (raidPtr->Disks[i][j].spareCol ==
2419 sparecol)) {
2420 srow = i;
2421 scol = j;
2422 break;
2423 }
2424 }
2425 }
2426
2427 /* XXX shouldn't *really* need this... */
2428 raidread_component_label(
2429 raidPtr->Disks[0][sparecol].dev,
2430 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2431 &clabel);
2432 /* make sure status is noted */
2433
2434 raid_init_component_label(raidPtr, &clabel);
2435
2436 clabel.mod_counter = raidPtr->mod_counter;
2437 clabel.row = srow;
2438 clabel.column = scol;
2439 clabel.status = rf_ds_optimal;
2440
2441 raidwrite_component_label(
2442 raidPtr->Disks[0][sparecol].dev,
2443 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2444 &clabel);
2445 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2446 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2447 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2448 raidPtr->mod_counter);
2449 }
2450 }
2451 }
2452 /* printf("Component labels updated\n"); */
2453 }
2454
2455 void
2456 rf_close_component(raidPtr, vp, auto_configured)
2457 RF_Raid_t *raidPtr;
2458 struct vnode *vp;
2459 int auto_configured;
2460 {
2461 struct proc *p;
2462
2463 p = raidPtr->engine_thread;
2464
2465 if (vp != NULL) {
2466 if (auto_configured == 1) {
2467 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2468 vput(vp);
2469
2470 } else {
2471 VOP_UNLOCK(vp, 0);
2472 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2473 }
2474 } else {
2475 printf("vnode was NULL\n");
2476 }
2477 }
2478
2479
2480 void
2481 rf_UnconfigureVnodes(raidPtr)
2482 RF_Raid_t *raidPtr;
2483 {
2484 int r,c;
2485 struct proc *p;
2486 struct vnode *vp;
2487 int acd;
2488
2489
2490 /* We take this opportunity to close the vnodes like we should.. */
2491
2492 p = raidPtr->engine_thread;
2493
2494 for (r = 0; r < raidPtr->numRow; r++) {
2495 for (c = 0; c < raidPtr->numCol; c++) {
2496 printf("Closing vnode for row: %d col: %d\n", r, c);
2497 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2498 acd = raidPtr->Disks[r][c].auto_configured;
2499 rf_close_component(raidPtr, vp, acd);
2500 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2501 raidPtr->Disks[r][c].auto_configured = 0;
2502 }
2503 }
2504 for (r = 0; r < raidPtr->numSpare; r++) {
2505 printf("Closing vnode for spare: %d\n", r);
2506 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2507 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2508 rf_close_component(raidPtr, vp, acd);
2509 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2510 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2511 }
2512 }
2513
2514
2515 void
2516 rf_ReconThread(req)
2517 struct rf_recon_req *req;
2518 {
2519 int s;
2520 RF_Raid_t *raidPtr;
2521
2522 s = splbio();
2523 raidPtr = (RF_Raid_t *) req->raidPtr;
2524 raidPtr->recon_in_progress = 1;
2525
2526 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2527 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2528
2529 /* XXX get rid of this! we don't need it at all.. */
2530 RF_Free(req, sizeof(*req));
2531
2532 raidPtr->recon_in_progress = 0;
2533 splx(s);
2534
2535 /* That's all... */
2536 kthread_exit(0); /* does not return */
2537 }
2538
2539 void
2540 rf_RewriteParityThread(raidPtr)
2541 RF_Raid_t *raidPtr;
2542 {
2543 int retcode;
2544 int s;
2545
2546 raidPtr->parity_rewrite_in_progress = 1;
2547 s = splbio();
2548 retcode = rf_RewriteParity(raidPtr);
2549 splx(s);
2550 if (retcode) {
2551 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2552 } else {
2553 /* set the clean bit! If we shutdown correctly,
2554 the clean bit on each component label will get
2555 set */
2556 raidPtr->parity_good = RF_RAID_CLEAN;
2557 }
2558 raidPtr->parity_rewrite_in_progress = 0;
2559
2560 /* That's all... */
2561 kthread_exit(0); /* does not return */
2562 }
2563
2564
2565 void
2566 rf_CopybackThread(raidPtr)
2567 RF_Raid_t *raidPtr;
2568 {
2569 int s;
2570
2571 raidPtr->copyback_in_progress = 1;
2572 s = splbio();
2573 rf_CopybackReconstructedData(raidPtr);
2574 splx(s);
2575 raidPtr->copyback_in_progress = 0;
2576
2577 /* That's all... */
2578 kthread_exit(0); /* does not return */
2579 }
2580
2581
2582 void
2583 rf_ReconstructInPlaceThread(req)
2584 struct rf_recon_req *req;
2585 {
2586 int retcode;
2587 int s;
2588 RF_Raid_t *raidPtr;
2589
2590 s = splbio();
2591 raidPtr = req->raidPtr;
2592 raidPtr->recon_in_progress = 1;
2593 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2594 RF_Free(req, sizeof(*req));
2595 raidPtr->recon_in_progress = 0;
2596 splx(s);
2597
2598 /* That's all... */
2599 kthread_exit(0); /* does not return */
2600 }
2601
2602 void
2603 rf_mountroot_hook(dev)
2604 struct device *dev;
2605 {
2606
2607 }
2608
2609
2610 RF_AutoConfig_t *
2611 rf_find_raid_components()
2612 {
2613 struct devnametobdevmaj *dtobdm;
2614 struct vnode *vp;
2615 struct disklabel label;
2616 struct device *dv;
2617 char *cd_name;
2618 dev_t dev;
2619 int error;
2620 int i;
2621 int good_one;
2622 RF_ComponentLabel_t *clabel;
2623 RF_AutoConfig_t *ac_list;
2624 RF_AutoConfig_t *ac;
2625
2626
2627 /* initialize the AutoConfig list */
2628 ac_list = NULL;
2629
2630 if (raidautoconfig) {
2631
2632 /* we begin by trolling through *all* the devices on the system */
2633
2634 for (dv = alldevs.tqh_first; dv != NULL;
2635 dv = dv->dv_list.tqe_next) {
2636
2637 /* we are only interested in disks... */
2638 if (dv->dv_class != DV_DISK)
2639 continue;
2640
2641 /* we don't care about floppies... */
2642 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2643 continue;
2644 }
2645
2646 /* need to find the device_name_to_block_device_major stuff */
2647 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2648 dtobdm = dev_name2blk;
2649 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2650 dtobdm++;
2651 }
2652
2653 /* get a vnode for the raw partition of this disk */
2654
2655 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2656 if (bdevvp(dev, &vp))
2657 panic("RAID can't alloc vnode");
2658
2659 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2660
2661 if (error) {
2662 /* "Who cares." Continue looking
2663 for something that exists*/
2664 vput(vp);
2665 continue;
2666 }
2667
2668 /* Ok, the disk exists. Go get the disklabel. */
2669 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2670 FREAD, NOCRED, 0);
2671 if (error) {
2672 /*
2673 * XXX can't happen - open() would
2674 * have errored out (or faked up one)
2675 */
2676 printf("can't get label for dev %s%c (%d)!?!?\n",
2677 dv->dv_xname, 'a' + RAW_PART, error);
2678 }
2679
2680 /* don't need this any more. We'll allocate it again
2681 a little later if we really do... */
2682 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2683 vput(vp);
2684
2685 for (i=0; i < label.d_npartitions; i++) {
2686 /* We only support partitions marked as RAID */
2687 if (label.d_partitions[i].p_fstype != FS_RAID)
2688 continue;
2689
2690 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2691 if (bdevvp(dev, &vp))
2692 panic("RAID can't alloc vnode");
2693
2694 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2695 if (error) {
2696 /* Whatever... */
2697 vput(vp);
2698 continue;
2699 }
2700
2701 good_one = 0;
2702
2703 clabel = (RF_ComponentLabel_t *)
2704 malloc(sizeof(RF_ComponentLabel_t),
2705 M_RAIDFRAME, M_NOWAIT);
2706 if (clabel == NULL) {
2707 /* XXX CLEANUP HERE */
2708 printf("RAID auto config: out of memory!\n");
2709 return(NULL); /* XXX probably should panic? */
2710 }
2711
2712 if (!raidread_component_label(dev, vp, clabel)) {
2713 /* Got the label. Does it look reasonable? */
2714 if (rf_reasonable_label(clabel) &&
2715 (clabel->partitionSize <=
2716 label.d_partitions[i].p_size)) {
2717 #if DEBUG
2718 printf("Component on: %s%c: %d\n",
2719 dv->dv_xname, 'a'+i,
2720 label.d_partitions[i].p_size);
2721 rf_print_component_label(clabel);
2722 #endif
2723 /* if it's reasonable, add it,
2724 else ignore it. */
2725 ac = (RF_AutoConfig_t *)
2726 malloc(sizeof(RF_AutoConfig_t),
2727 M_RAIDFRAME,
2728 M_NOWAIT);
2729 if (ac == NULL) {
2730 /* XXX should panic?? */
2731 return(NULL);
2732 }
2733
2734 sprintf(ac->devname, "%s%c",
2735 dv->dv_xname, 'a'+i);
2736 ac->dev = dev;
2737 ac->vp = vp;
2738 ac->clabel = clabel;
2739 ac->next = ac_list;
2740 ac_list = ac;
2741 good_one = 1;
2742 }
2743 }
2744 if (!good_one) {
2745 /* cleanup */
2746 free(clabel, M_RAIDFRAME);
2747 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2748 vput(vp);
2749 }
2750 }
2751 }
2752 }
2753 return(ac_list);
2754 }
2755
2756 static int
2757 rf_reasonable_label(clabel)
2758 RF_ComponentLabel_t *clabel;
2759 {
2760
2761 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2762 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2763 ((clabel->clean == RF_RAID_CLEAN) ||
2764 (clabel->clean == RF_RAID_DIRTY)) &&
2765 clabel->row >=0 &&
2766 clabel->column >= 0 &&
2767 clabel->num_rows > 0 &&
2768 clabel->num_columns > 0 &&
2769 clabel->row < clabel->num_rows &&
2770 clabel->column < clabel->num_columns &&
2771 clabel->blockSize > 0 &&
2772 clabel->numBlocks > 0) {
2773 /* label looks reasonable enough... */
2774 return(1);
2775 }
2776 return(0);
2777 }
2778
2779
2780 void
2781 rf_print_component_label(clabel)
2782 RF_ComponentLabel_t *clabel;
2783 {
2784 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2785 clabel->row, clabel->column,
2786 clabel->num_rows, clabel->num_columns);
2787 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2788 clabel->version, clabel->serial_number,
2789 clabel->mod_counter);
2790 printf(" Clean: %s Status: %d\n",
2791 clabel->clean ? "Yes" : "No", clabel->status );
2792 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2793 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2794 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2795 (char) clabel->parityConfig, clabel->blockSize,
2796 clabel->numBlocks);
2797 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2798 printf(" Contains root partition: %s\n",
2799 clabel->root_partition ? "Yes" : "No" );
2800 printf(" Last configured as: raid%d\n", clabel->last_unit );
2801 #if 0
2802 printf(" Config order: %d\n", clabel->config_order);
2803 #endif
2804
2805 }
2806
2807 RF_ConfigSet_t *
2808 rf_create_auto_sets(ac_list)
2809 RF_AutoConfig_t *ac_list;
2810 {
2811 RF_AutoConfig_t *ac;
2812 RF_ConfigSet_t *config_sets;
2813 RF_ConfigSet_t *cset;
2814 RF_AutoConfig_t *ac_next;
2815
2816
2817 config_sets = NULL;
2818
2819 /* Go through the AutoConfig list, and figure out which components
2820 belong to what sets. */
2821 ac = ac_list;
2822 while(ac!=NULL) {
2823 /* we're going to putz with ac->next, so save it here
2824 for use at the end of the loop */
2825 ac_next = ac->next;
2826
2827 if (config_sets == NULL) {
2828 /* will need at least this one... */
2829 config_sets = (RF_ConfigSet_t *)
2830 malloc(sizeof(RF_ConfigSet_t),
2831 M_RAIDFRAME, M_NOWAIT);
2832 if (config_sets == NULL) {
2833 panic("rf_create_auto_sets: No memory!\n");
2834 }
2835 /* this one is easy :) */
2836 config_sets->ac = ac;
2837 config_sets->next = NULL;
2838 config_sets->rootable = 0;
2839 ac->next = NULL;
2840 } else {
2841 /* which set does this component fit into? */
2842 cset = config_sets;
2843 while(cset!=NULL) {
2844 if (rf_does_it_fit(cset, ac)) {
2845 /* looks like it matches */
2846 ac->next = cset->ac;
2847 cset->ac = ac;
2848 break;
2849 }
2850 cset = cset->next;
2851 }
2852 if (cset==NULL) {
2853 /* didn't find a match above... new set..*/
2854 cset = (RF_ConfigSet_t *)
2855 malloc(sizeof(RF_ConfigSet_t),
2856 M_RAIDFRAME, M_NOWAIT);
2857 if (cset == NULL) {
2858 panic("rf_create_auto_sets: No memory!\n");
2859 }
2860 cset->ac = ac;
2861 ac->next = NULL;
2862 cset->next = config_sets;
2863 cset->rootable = 0;
2864 config_sets = cset;
2865 }
2866 }
2867 ac = ac_next;
2868 }
2869
2870
2871 return(config_sets);
2872 }
2873
2874 static int
2875 rf_does_it_fit(cset, ac)
2876 RF_ConfigSet_t *cset;
2877 RF_AutoConfig_t *ac;
2878 {
2879 RF_ComponentLabel_t *clabel1, *clabel2;
2880
2881 /* If this one matches the *first* one in the set, that's good
2882 enough, since the other members of the set would have been
2883 through here too... */
2884 /* note that we are not checking partitionSize here..
2885
2886 Note that we are also not checking the mod_counters here.
2887 If everything else matches execpt the mod_counter, that's
2888 good enough for this test. We will deal with the mod_counters
2889 a little later in the autoconfiguration process.
2890
2891 (clabel1->mod_counter == clabel2->mod_counter) &&
2892
2893 */
2894
2895 clabel1 = cset->ac->clabel;
2896 clabel2 = ac->clabel;
2897 if ((clabel1->version == clabel2->version) &&
2898 (clabel1->serial_number == clabel2->serial_number) &&
2899 (clabel1->num_rows == clabel2->num_rows) &&
2900 (clabel1->num_columns == clabel2->num_columns) &&
2901 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2902 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2903 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2904 (clabel1->parityConfig == clabel2->parityConfig) &&
2905 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2906 (clabel1->blockSize == clabel2->blockSize) &&
2907 (clabel1->numBlocks == clabel2->numBlocks) &&
2908 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2909 (clabel1->root_partition == clabel2->root_partition) &&
2910 (clabel1->last_unit == clabel2->last_unit) &&
2911 (clabel1->config_order == clabel2->config_order)) {
2912 /* if it get's here, it almost *has* to be a match */
2913 } else {
2914 /* it's not consistent with somebody in the set..
2915 punt */
2916 return(0);
2917 }
2918 /* all was fine.. it must fit... */
2919 return(1);
2920 }
2921
2922 int
2923 rf_have_enough_components(cset)
2924 RF_ConfigSet_t *cset;
2925 {
2926 RF_AutoConfig_t *ac;
2927 RF_AutoConfig_t *auto_config;
2928 RF_ComponentLabel_t *clabel;
2929 int r,c;
2930 int num_rows;
2931 int num_cols;
2932 int num_missing;
2933
2934 /* check to see that we have enough 'live' components
2935 of this set. If so, we can configure it if necessary */
2936
2937 num_rows = cset->ac->clabel->num_rows;
2938 num_cols = cset->ac->clabel->num_columns;
2939
2940 /* XXX Check for duplicate components!?!?!? */
2941
2942 num_missing = 0;
2943 auto_config = cset->ac;
2944
2945 for(r=0; r<num_rows; r++) {
2946 for(c=0; c<num_cols; c++) {
2947 ac = auto_config;
2948 while(ac!=NULL) {
2949 if (ac->clabel==NULL) {
2950 /* big-time bad news. */
2951 goto fail;
2952 }
2953 if ((ac->clabel->row == r) &&
2954 (ac->clabel->column == c)) {
2955 /* it's this one... */
2956 #if DEBUG
2957 printf("Found: %s at %d,%d\n",
2958 ac->devname,r,c);
2959 #endif
2960 break;
2961 }
2962 ac=ac->next;
2963 }
2964 if (ac==NULL) {
2965 /* Didn't find one here! */
2966 num_missing++;
2967 }
2968 }
2969 }
2970
2971 clabel = cset->ac->clabel;
2972
2973 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2974 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2975 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2976 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2977 /* XXX this needs to be made *much* more general */
2978 /* Too many failures */
2979 return(0);
2980 }
2981 /* otherwise, all is well, and we've got enough to take a kick
2982 at autoconfiguring this set */
2983 return(1);
2984 fail:
2985 return(0);
2986
2987 }
2988
2989 void
2990 rf_create_configuration(ac,config,raidPtr)
2991 RF_AutoConfig_t *ac;
2992 RF_Config_t *config;
2993 RF_Raid_t *raidPtr;
2994 {
2995 RF_ComponentLabel_t *clabel;
2996 int i;
2997
2998 clabel = ac->clabel;
2999
3000 /* 1. Fill in the common stuff */
3001 config->numRow = clabel->num_rows;
3002 config->numCol = clabel->num_columns;
3003 config->numSpare = 0; /* XXX should this be set here? */
3004 config->sectPerSU = clabel->sectPerSU;
3005 config->SUsPerPU = clabel->SUsPerPU;
3006 config->SUsPerRU = clabel->SUsPerRU;
3007 config->parityConfig = clabel->parityConfig;
3008 /* XXX... */
3009 strcpy(config->diskQueueType,"fifo");
3010 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3011 config->layoutSpecificSize = 0; /* XXX ?? */
3012
3013 while(ac!=NULL) {
3014 /* row/col values will be in range due to the checks
3015 in reasonable_label() */
3016 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3017 ac->devname);
3018 ac = ac->next;
3019 }
3020
3021 for(i=0;i<RF_MAXDBGV;i++) {
3022 config->debugVars[i][0] = NULL;
3023 }
3024 }
3025
3026 int
3027 rf_set_autoconfig(raidPtr, new_value)
3028 RF_Raid_t *raidPtr;
3029 int new_value;
3030 {
3031 RF_ComponentLabel_t clabel;
3032 struct vnode *vp;
3033 dev_t dev;
3034 int row, column;
3035
3036 raidPtr->autoconfigure = new_value;
3037 for(row=0; row<raidPtr->numRow; row++) {
3038 for(column=0; column<raidPtr->numCol; column++) {
3039 dev = raidPtr->Disks[row][column].dev;
3040 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3041 raidread_component_label(dev, vp, &clabel);
3042 clabel.autoconfigure = new_value;
3043 raidwrite_component_label(dev, vp, &clabel);
3044 }
3045 }
3046 return(new_value);
3047 }
3048
3049 int
3050 rf_set_rootpartition(raidPtr, new_value)
3051 RF_Raid_t *raidPtr;
3052 int new_value;
3053 {
3054 RF_ComponentLabel_t clabel;
3055 struct vnode *vp;
3056 dev_t dev;
3057 int row, column;
3058
3059 raidPtr->root_partition = new_value;
3060 for(row=0; row<raidPtr->numRow; row++) {
3061 for(column=0; column<raidPtr->numCol; column++) {
3062 dev = raidPtr->Disks[row][column].dev;
3063 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3064 raidread_component_label(dev, vp, &clabel);
3065 clabel.root_partition = new_value;
3066 raidwrite_component_label(dev, vp, &clabel);
3067 }
3068 }
3069 return(new_value);
3070 }
3071
3072 void
3073 rf_release_all_vps(cset)
3074 RF_ConfigSet_t *cset;
3075 {
3076 RF_AutoConfig_t *ac;
3077
3078 ac = cset->ac;
3079 while(ac!=NULL) {
3080 /* Close the vp, and give it back */
3081 if (ac->vp) {
3082 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3083 vput(ac->vp);
3084 }
3085 ac = ac->next;
3086 }
3087 }
3088
3089
3090 void
3091 rf_cleanup_config_set(cset)
3092 RF_ConfigSet_t *cset;
3093 {
3094 RF_AutoConfig_t *ac;
3095 RF_AutoConfig_t *next_ac;
3096
3097 ac = cset->ac;
3098 while(ac!=NULL) {
3099 next_ac = ac->next;
3100 /* nuke the label */
3101 free(ac->clabel, M_RAIDFRAME);
3102 /* cleanup the config structure */
3103 free(ac, M_RAIDFRAME);
3104 /* "next.." */
3105 ac = next_ac;
3106 }
3107 /* and, finally, nuke the config set */
3108 free(cset, M_RAIDFRAME);
3109 }
3110
3111
3112 void
3113 raid_init_component_label(raidPtr, clabel)
3114 RF_Raid_t *raidPtr;
3115 RF_ComponentLabel_t *clabel;
3116 {
3117 /* current version number */
3118 clabel->version = RF_COMPONENT_LABEL_VERSION;
3119 clabel->serial_number = raidPtr->serial_number;
3120 clabel->mod_counter = raidPtr->mod_counter;
3121 clabel->num_rows = raidPtr->numRow;
3122 clabel->num_columns = raidPtr->numCol;
3123 clabel->clean = RF_RAID_DIRTY; /* not clean */
3124 clabel->status = rf_ds_optimal; /* "It's good!" */
3125
3126 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3127 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3128 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3129
3130 clabel->blockSize = raidPtr->bytesPerSector;
3131 clabel->numBlocks = raidPtr->sectorsPerDisk;
3132
3133 /* XXX not portable */
3134 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3135 clabel->maxOutstanding = raidPtr->maxOutstanding;
3136 clabel->autoconfigure = raidPtr->autoconfigure;
3137 clabel->root_partition = raidPtr->root_partition;
3138 clabel->last_unit = raidPtr->raidid;
3139 clabel->config_order = raidPtr->config_order;
3140 }
3141
3142 int
3143 rf_auto_config_set(cset,unit)
3144 RF_ConfigSet_t *cset;
3145 int *unit;
3146 {
3147 RF_Raid_t *raidPtr;
3148 RF_Config_t *config;
3149 int raidID;
3150 int retcode;
3151
3152 printf("RAID autoconfigure\n");
3153
3154 retcode = 0;
3155 *unit = -1;
3156
3157 /* 1. Create a config structure */
3158
3159 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3160 M_RAIDFRAME,
3161 M_NOWAIT);
3162 if (config==NULL) {
3163 printf("Out of mem!?!?\n");
3164 /* XXX do something more intelligent here. */
3165 return(1);
3166 }
3167
3168 memset(config, 0, sizeof(RF_Config_t));
3169
3170 /* XXX raidID needs to be set correctly.. */
3171
3172 /*
3173 2. Figure out what RAID ID this one is supposed to live at
3174 See if we can get the same RAID dev that it was configured
3175 on last time..
3176 */
3177
3178 raidID = cset->ac->clabel->last_unit;
3179 if ((raidID < 0) || (raidID >= numraid)) {
3180 /* let's not wander off into lala land. */
3181 raidID = numraid - 1;
3182 }
3183 if (raidPtrs[raidID]->valid != 0) {
3184
3185 /*
3186 Nope... Go looking for an alternative...
3187 Start high so we don't immediately use raid0 if that's
3188 not taken.
3189 */
3190
3191 for(raidID = numraid; raidID >= 0; raidID--) {
3192 if (raidPtrs[raidID]->valid == 0) {
3193 /* can use this one! */
3194 break;
3195 }
3196 }
3197 }
3198
3199 if (raidID < 0) {
3200 /* punt... */
3201 printf("Unable to auto configure this set!\n");
3202 printf("(Out of RAID devs!)\n");
3203 return(1);
3204 }
3205 printf("Configuring raid%d:\n",raidID);
3206 raidPtr = raidPtrs[raidID];
3207
3208 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3209 raidPtr->raidid = raidID;
3210 raidPtr->openings = RAIDOUTSTANDING;
3211
3212 /* 3. Build the configuration structure */
3213 rf_create_configuration(cset->ac, config, raidPtr);
3214
3215 /* 4. Do the configuration */
3216 retcode = rf_Configure(raidPtr, config, cset->ac);
3217
3218 if (retcode == 0) {
3219
3220 raidinit(raidPtrs[raidID]);
3221
3222 rf_markalldirty(raidPtrs[raidID]);
3223 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3224 if (cset->ac->clabel->root_partition==1) {
3225 /* everything configured just fine. Make a note
3226 that this set is eligible to be root. */
3227 cset->rootable = 1;
3228 /* XXX do this here? */
3229 raidPtrs[raidID]->root_partition = 1;
3230 }
3231 }
3232
3233 /* 5. Cleanup */
3234 free(config, M_RAIDFRAME);
3235
3236 *unit = raidID;
3237 return(retcode);
3238 }
3239