rf_netbsdkintf.c revision 1.66 1 /* $NetBSD: rf_netbsdkintf.c,v 1.66 2000/03/04 06:03:21 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_diskqueue.h"
146 #include "rf_acctrace.h"
147 #include "rf_etimer.h"
148 #include "rf_general.h"
149 #include "rf_debugMem.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_debugprint.h"
155 #include "rf_threadstuff.h"
156 #include "rf_configure.h"
157
158 int rf_kdebug_level = 0;
159
160 #ifdef DEBUG
161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
162 #else /* DEBUG */
163 #define db1_printf(a) { }
164 #endif /* DEBUG */
165
166 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
167
168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
169
170 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
171 * spare table */
172 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
173 * installation process */
174
175 /* prototypes */
176 static void KernelWakeupFunc(struct buf * bp);
177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
178 dev_t dev, RF_SectorNum_t startSect,
179 RF_SectorCount_t numSect, caddr_t buf,
180 void (*cbFunc) (struct buf *), void *cbArg,
181 int logBytesPerSector, struct proc * b_proc);
182 static void raidinit __P((RF_Raid_t *));
183
184 void raidattach __P((int));
185 int raidsize __P((dev_t));
186 int raidopen __P((dev_t, int, int, struct proc *));
187 int raidclose __P((dev_t, int, int, struct proc *));
188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
189 int raidwrite __P((dev_t, struct uio *, int));
190 int raidread __P((dev_t, struct uio *, int));
191 void raidstrategy __P((struct buf *));
192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
193
194 /*
195 * Pilfered from ccd.c
196 */
197
198 struct raidbuf {
199 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
200 struct buf *rf_obp; /* ptr. to original I/O buf */
201 int rf_flags; /* misc. flags */
202 RF_DiskQueueData_t *req;/* the request that this was part of.. */
203 };
204
205
206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct pool sc_cbufpool; /* component buffer pool */
220 struct buf_queue buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immedately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
260 struct disklabel *));
261 static void raidgetdisklabel __P((dev_t));
262 static void raidmakedisklabel __P((struct raid_softc *));
263
264 static int raidlock __P((struct raid_softc *));
265 static void raidunlock __P((struct raid_softc *));
266
267 static void rf_markalldirty __P((RF_Raid_t *));
268 void rf_mountroot_hook __P((struct device *));
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread __P((struct rf_recon_req *));
273 /* XXX what I want is: */
274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
278 void rf_buildroothack __P((void *));
279
280 RF_AutoConfig_t *rf_find_raid_components __P((void));
281 void print_component_label __P((RF_ComponentLabel_t *));
282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
286 RF_Raid_t *));
287 int rf_set_autoconfig __P((RF_Raid_t *, int));
288 int rf_set_rootpartition __P((RF_Raid_t *, int));
289 void rf_release_all_vps __P((RF_ConfigSet_t *));
290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
291 int rf_have_enough_components __P((RF_ConfigSet_t *));
292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299 extern struct device *booted_device;
300
301 void
302 raidattach(num)
303 int num;
304 {
305 int raidID;
306 int i, rc;
307 RF_AutoConfig_t *ac_list; /* autoconfig list */
308 RF_ConfigSet_t *config_sets;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 numraid = num;
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337
338 for (i = 0; i < num; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356
357 bzero(raid_softc, num * sizeof(struct raid_softc));
358
359 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
360 M_RAIDFRAME, M_NOWAIT);
361 if (raidrootdev == NULL) {
362 panic("No memory for RAIDframe driver!!?!?!\n");
363 }
364
365 for (raidID = 0; raidID < num; raidID++) {
366 BUFQ_INIT(&raid_softc[raidID].buf_queue);
367
368 raidrootdev[raidID].dv_class = DV_DISK;
369 raidrootdev[raidID].dv_cfdata = NULL;
370 raidrootdev[raidID].dv_unit = raidID;
371 raidrootdev[raidID].dv_parent = NULL;
372 raidrootdev[raidID].dv_flags = 0;
373 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
374
375 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
376 (RF_Raid_t *));
377 if (raidPtrs[raidID] == NULL) {
378 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
379 numraid = raidID;
380 return;
381 }
382 }
383
384 #if RAID_AUTOCONFIG
385 raidautoconfig = 1;
386 #endif
387
388 if (raidautoconfig) {
389 /* 1. locate all RAID components on the system */
390
391 #if DEBUG
392 printf("Searching for raid components...\n");
393 #endif
394 ac_list = rf_find_raid_components();
395
396 /* 2. sort them into their respective sets */
397
398 config_sets = rf_create_auto_sets(ac_list);
399
400 /* 3. evaluate each set and configure the valid ones
401 This gets done in rf_buildroothack() */
402
403 /* schedule the creation of the thread to do the
404 "/ on RAID" stuff */
405
406 kthread_create(rf_buildroothack,config_sets);
407
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 }
412
413 }
414
415 void
416 rf_buildroothack(arg)
417 void *arg;
418 {
419 RF_ConfigSet_t *config_sets = arg;
420 RF_ConfigSet_t *cset;
421 RF_ConfigSet_t *next_cset;
422 int retcode;
423 int raidID;
424 int rootID;
425 int num_root;
426
427 num_root = 0;
428 cset = config_sets;
429 while(cset != NULL ) {
430 next_cset = cset->next;
431 if (rf_have_enough_components(cset) &&
432 cset->ac->clabel->autoconfigure==1) {
433 retcode = rf_auto_config_set(cset,&raidID);
434 if (!retcode) {
435 if (cset->rootable) {
436 rootID = raidID;
437 num_root++;
438 }
439 } else {
440 /* The autoconfig didn't work :( */
441 #if DEBUG
442 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
443 #endif
444 rf_release_all_vps(cset);
445 }
446 } else {
447 /* we're not autoconfiguring this set...
448 release the associated resources */
449 rf_release_all_vps(cset);
450 }
451 /* cleanup */
452 rf_cleanup_config_set(cset);
453 cset = next_cset;
454 }
455 if (boothowto & RB_ASKNAME) {
456 /* We don't auto-config... */
457 } else {
458 /* They didn't ask, and we found something bootable... */
459
460 if (num_root == 1) {
461 booted_device = &raidrootdev[rootID];
462 } else if (num_root > 1) {
463 /* we can't guess.. require the user to answer... */
464 boothowto |= RB_ASKNAME;
465 }
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_final_update_component_labels( raidPtrs[unit] );
639 }
640
641 raidunlock(rs);
642 return (0);
643
644 }
645
646 void
647 raidstrategy(bp)
648 register struct buf *bp;
649 {
650 register int s;
651
652 unsigned int raidID = raidunit(bp->b_dev);
653 RF_Raid_t *raidPtr;
654 struct raid_softc *rs = &raid_softc[raidID];
655 struct disklabel *lp;
656 int wlabel;
657
658 if ((rs->sc_flags & RAIDF_INITED) ==0) {
659 bp->b_error = ENXIO;
660 bp->b_flags = B_ERROR;
661 bp->b_resid = bp->b_bcount;
662 biodone(bp);
663 return;
664 }
665 if (raidID >= numraid || !raidPtrs[raidID]) {
666 bp->b_error = ENODEV;
667 bp->b_flags |= B_ERROR;
668 bp->b_resid = bp->b_bcount;
669 biodone(bp);
670 return;
671 }
672 raidPtr = raidPtrs[raidID];
673 if (!raidPtr->valid) {
674 bp->b_error = ENODEV;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (bp->b_bcount == 0) {
681 db1_printf(("b_bcount is zero..\n"));
682 biodone(bp);
683 return;
684 }
685 lp = rs->sc_dkdev.dk_label;
686
687 /*
688 * Do bounds checking and adjust transfer. If there's an
689 * error, the bounds check will flag that for us.
690 */
691
692 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
693 if (DISKPART(bp->b_dev) != RAW_PART)
694 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
695 db1_printf(("Bounds check failed!!:%d %d\n",
696 (int) bp->b_blkno, (int) wlabel));
697 biodone(bp);
698 return;
699 }
700 s = splbio();
701
702 bp->b_resid = 0;
703
704 /* stuff it onto our queue */
705 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
706
707 raidstart(raidPtrs[raidID]);
708
709 splx(s);
710 }
711 /* ARGSUSED */
712 int
713 raidread(dev, uio, flags)
714 dev_t dev;
715 struct uio *uio;
716 int flags;
717 {
718 int unit = raidunit(dev);
719 struct raid_softc *rs;
720 int part;
721
722 if (unit >= numraid)
723 return (ENXIO);
724 rs = &raid_softc[unit];
725
726 if ((rs->sc_flags & RAIDF_INITED) == 0)
727 return (ENXIO);
728 part = DISKPART(dev);
729
730 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
731
732 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
733
734 }
735 /* ARGSUSED */
736 int
737 raidwrite(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744
745 if (unit >= numraid)
746 return (ENXIO);
747 rs = &raid_softc[unit];
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return (ENXIO);
751 db1_printf(("raidwrite\n"));
752 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
753
754 }
755
756 int
757 raidioctl(dev, cmd, data, flag, p)
758 dev_t dev;
759 u_long cmd;
760 caddr_t data;
761 int flag;
762 struct proc *p;
763 {
764 int unit = raidunit(dev);
765 int error = 0;
766 int part, pmask;
767 struct raid_softc *rs;
768 RF_Config_t *k_cfg, *u_cfg;
769 RF_Raid_t *raidPtr;
770 RF_RaidDisk_t *diskPtr;
771 RF_AccTotals_t *totals;
772 RF_DeviceConfig_t *d_cfg, **ucfgp;
773 u_char *specific_buf;
774 int retcode = 0;
775 int row;
776 int column;
777 struct rf_recon_req *rrcopy, *rr;
778 RF_ComponentLabel_t *clabel;
779 RF_ComponentLabel_t ci_label;
780 RF_ComponentLabel_t **clabel_ptr;
781 RF_SingleComponent_t *sparePtr,*componentPtr;
782 RF_SingleComponent_t hot_spare;
783 RF_SingleComponent_t component;
784 int i, j, d;
785
786 if (unit >= numraid)
787 return (ENXIO);
788 rs = &raid_softc[unit];
789 raidPtr = raidPtrs[unit];
790
791 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
792 (int) DISKPART(dev), (int) unit, (int) cmd));
793
794 /* Must be open for writes for these commands... */
795 switch (cmd) {
796 case DIOCSDINFO:
797 case DIOCWDINFO:
798 case DIOCWLABEL:
799 if ((flag & FWRITE) == 0)
800 return (EBADF);
801 }
802
803 /* Must be initialized for these... */
804 switch (cmd) {
805 case DIOCGDINFO:
806 case DIOCSDINFO:
807 case DIOCWDINFO:
808 case DIOCGPART:
809 case DIOCWLABEL:
810 case DIOCGDEFLABEL:
811 case RAIDFRAME_SHUTDOWN:
812 case RAIDFRAME_REWRITEPARITY:
813 case RAIDFRAME_GET_INFO:
814 case RAIDFRAME_RESET_ACCTOTALS:
815 case RAIDFRAME_GET_ACCTOTALS:
816 case RAIDFRAME_KEEP_ACCTOTALS:
817 case RAIDFRAME_GET_SIZE:
818 case RAIDFRAME_FAIL_DISK:
819 case RAIDFRAME_COPYBACK:
820 case RAIDFRAME_CHECK_RECON_STATUS:
821 case RAIDFRAME_GET_COMPONENT_LABEL:
822 case RAIDFRAME_SET_COMPONENT_LABEL:
823 case RAIDFRAME_ADD_HOT_SPARE:
824 case RAIDFRAME_REMOVE_HOT_SPARE:
825 case RAIDFRAME_INIT_LABELS:
826 case RAIDFRAME_REBUILD_IN_PLACE:
827 case RAIDFRAME_CHECK_PARITY:
828 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
829 case RAIDFRAME_CHECK_COPYBACK_STATUS:
830 case RAIDFRAME_SET_AUTOCONFIG:
831 case RAIDFRAME_SET_ROOT:
832 if ((rs->sc_flags & RAIDF_INITED) == 0)
833 return (ENXIO);
834 }
835
836 switch (cmd) {
837
838 /* configure the system */
839 case RAIDFRAME_CONFIGURE:
840
841 if (raidPtr->valid) {
842 /* There is a valid RAID set running on this unit! */
843 printf("raid%d: Device already configured!\n",unit);
844 return(EINVAL);
845 }
846
847 /* copy-in the configuration information */
848 /* data points to a pointer to the configuration structure */
849
850 u_cfg = *((RF_Config_t **) data);
851 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
852 if (k_cfg == NULL) {
853 return (ENOMEM);
854 }
855 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
856 sizeof(RF_Config_t));
857 if (retcode) {
858 RF_Free(k_cfg, sizeof(RF_Config_t));
859 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
860 retcode));
861 return (retcode);
862 }
863 /* allocate a buffer for the layout-specific data, and copy it
864 * in */
865 if (k_cfg->layoutSpecificSize) {
866 if (k_cfg->layoutSpecificSize > 10000) {
867 /* sanity check */
868 RF_Free(k_cfg, sizeof(RF_Config_t));
869 return (EINVAL);
870 }
871 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
872 (u_char *));
873 if (specific_buf == NULL) {
874 RF_Free(k_cfg, sizeof(RF_Config_t));
875 return (ENOMEM);
876 }
877 retcode = copyin(k_cfg->layoutSpecific,
878 (caddr_t) specific_buf,
879 k_cfg->layoutSpecificSize);
880 if (retcode) {
881 RF_Free(k_cfg, sizeof(RF_Config_t));
882 RF_Free(specific_buf,
883 k_cfg->layoutSpecificSize);
884 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
885 retcode));
886 return (retcode);
887 }
888 } else
889 specific_buf = NULL;
890 k_cfg->layoutSpecific = specific_buf;
891
892 /* should do some kind of sanity check on the configuration.
893 * Store the sum of all the bytes in the last byte? */
894
895 /* configure the system */
896
897 /*
898 * Clear the entire RAID descriptor, just to make sure
899 * there is no stale data left in the case of a
900 * reconfiguration
901 */
902 bzero((char *) raidPtr, sizeof(RF_Raid_t));
903 raidPtr->raidid = unit;
904
905 retcode = rf_Configure(raidPtr, k_cfg, NULL);
906
907 if (retcode == 0) {
908
909 /* allow this many simultaneous IO's to
910 this RAID device */
911 raidPtr->openings = RAIDOUTSTANDING;
912
913 raidinit(raidPtr);
914 rf_markalldirty(raidPtr);
915 }
916 /* free the buffers. No return code here. */
917 if (k_cfg->layoutSpecificSize) {
918 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
919 }
920 RF_Free(k_cfg, sizeof(RF_Config_t));
921
922 return (retcode);
923
924 /* shutdown the system */
925 case RAIDFRAME_SHUTDOWN:
926
927 if ((error = raidlock(rs)) != 0)
928 return (error);
929
930 /*
931 * If somebody has a partition mounted, we shouldn't
932 * shutdown.
933 */
934
935 part = DISKPART(dev);
936 pmask = (1 << part);
937 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
938 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
939 (rs->sc_dkdev.dk_copenmask & pmask))) {
940 raidunlock(rs);
941 return (EBUSY);
942 }
943
944 retcode = rf_Shutdown(raidPtr);
945
946 pool_destroy(&rs->sc_cbufpool);
947
948 /* It's no longer initialized... */
949 rs->sc_flags &= ~RAIDF_INITED;
950
951 /* Detach the disk. */
952 disk_detach(&rs->sc_dkdev);
953
954 raidunlock(rs);
955
956 return (retcode);
957 case RAIDFRAME_GET_COMPONENT_LABEL:
958 clabel_ptr = (RF_ComponentLabel_t **) data;
959 /* need to read the component label for the disk indicated
960 by row,column in clabel */
961
962 /* For practice, let's get it directly fromdisk, rather
963 than from the in-core copy */
964 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
965 (RF_ComponentLabel_t *));
966 if (clabel == NULL)
967 return (ENOMEM);
968
969 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
970
971 retcode = copyin( *clabel_ptr, clabel,
972 sizeof(RF_ComponentLabel_t));
973
974 if (retcode) {
975 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
976 return(retcode);
977 }
978
979 row = clabel->row;
980 column = clabel->column;
981
982 if ((row < 0) || (row >= raidPtr->numRow) ||
983 (column < 0) || (column >= raidPtr->numCol)) {
984 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
985 return(EINVAL);
986 }
987
988 raidread_component_label(raidPtr->Disks[row][column].dev,
989 raidPtr->raid_cinfo[row][column].ci_vp,
990 clabel );
991
992 retcode = copyout((caddr_t) clabel,
993 (caddr_t) *clabel_ptr,
994 sizeof(RF_ComponentLabel_t));
995 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
996 return (retcode);
997
998 case RAIDFRAME_SET_COMPONENT_LABEL:
999 clabel = (RF_ComponentLabel_t *) data;
1000
1001 /* XXX check the label for valid stuff... */
1002 /* Note that some things *should not* get modified --
1003 the user should be re-initing the labels instead of
1004 trying to patch things.
1005 */
1006
1007 printf("Got component label:\n");
1008 printf("Version: %d\n",clabel->version);
1009 printf("Serial Number: %d\n",clabel->serial_number);
1010 printf("Mod counter: %d\n",clabel->mod_counter);
1011 printf("Row: %d\n", clabel->row);
1012 printf("Column: %d\n", clabel->column);
1013 printf("Num Rows: %d\n", clabel->num_rows);
1014 printf("Num Columns: %d\n", clabel->num_columns);
1015 printf("Clean: %d\n", clabel->clean);
1016 printf("Status: %d\n", clabel->status);
1017
1018 row = clabel->row;
1019 column = clabel->column;
1020
1021 if ((row < 0) || (row >= raidPtr->numRow) ||
1022 (column < 0) || (column >= raidPtr->numCol)) {
1023 return(EINVAL);
1024 }
1025
1026 /* XXX this isn't allowed to do anything for now :-) */
1027
1028 /* XXX and before it is, we need to fill in the rest
1029 of the fields!?!?!?! */
1030 #if 0
1031 raidwrite_component_label(
1032 raidPtr->Disks[row][column].dev,
1033 raidPtr->raid_cinfo[row][column].ci_vp,
1034 clabel );
1035 #endif
1036 return (0);
1037
1038 case RAIDFRAME_INIT_LABELS:
1039 clabel = (RF_ComponentLabel_t *) data;
1040 /*
1041 we only want the serial number from
1042 the above. We get all the rest of the information
1043 from the config that was used to create this RAID
1044 set.
1045 */
1046
1047 raidPtr->serial_number = clabel->serial_number;
1048
1049 raid_init_component_label(raidPtr, &ci_label);
1050 ci_label.serial_number = clabel->serial_number;
1051
1052 for(row=0;row<raidPtr->numRow;row++) {
1053 ci_label.row = row;
1054 for(column=0;column<raidPtr->numCol;column++) {
1055 diskPtr = &raidPtr->Disks[row][column];
1056 ci_label.partitionSize = diskPtr->partitionSize;
1057 ci_label.column = column;
1058 raidwrite_component_label(
1059 raidPtr->Disks[row][column].dev,
1060 raidPtr->raid_cinfo[row][column].ci_vp,
1061 &ci_label );
1062 }
1063 }
1064
1065 return (retcode);
1066 case RAIDFRAME_SET_AUTOCONFIG:
1067 d = rf_set_autoconfig(raidPtr, *data);
1068 printf("New autoconfig value is: %d\n", d);
1069 *data = d;
1070 return (retcode);
1071
1072 case RAIDFRAME_SET_ROOT:
1073 d = rf_set_rootpartition(raidPtr, *data);
1074 printf("New rootpartition value is: %d\n", d);
1075 *data = d;
1076 return (retcode);
1077
1078 /* initialize all parity */
1079 case RAIDFRAME_REWRITEPARITY:
1080
1081 if (raidPtr->Layout.map->faultsTolerated == 0) {
1082 /* Parity for RAID 0 is trivially correct */
1083 raidPtr->parity_good = RF_RAID_CLEAN;
1084 return(0);
1085 }
1086
1087 if (raidPtr->parity_rewrite_in_progress == 1) {
1088 /* Re-write is already in progress! */
1089 return(EINVAL);
1090 }
1091
1092 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1093 rf_RewriteParityThread,
1094 raidPtr,"raid_parity");
1095 return (retcode);
1096
1097
1098 case RAIDFRAME_ADD_HOT_SPARE:
1099 sparePtr = (RF_SingleComponent_t *) data;
1100 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1101 printf("Adding spare\n");
1102 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1103 return(retcode);
1104
1105 case RAIDFRAME_REMOVE_HOT_SPARE:
1106 return(retcode);
1107
1108 case RAIDFRAME_REBUILD_IN_PLACE:
1109
1110 if (raidPtr->Layout.map->faultsTolerated == 0) {
1111 /* Can't do this on a RAID 0!! */
1112 return(EINVAL);
1113 }
1114
1115 if (raidPtr->recon_in_progress == 1) {
1116 /* a reconstruct is already in progress! */
1117 return(EINVAL);
1118 }
1119
1120 componentPtr = (RF_SingleComponent_t *) data;
1121 memcpy( &component, componentPtr,
1122 sizeof(RF_SingleComponent_t));
1123 row = component.row;
1124 column = component.column;
1125 printf("Rebuild: %d %d\n",row, column);
1126 if ((row < 0) || (row >= raidPtr->numRow) ||
1127 (column < 0) || (column >= raidPtr->numCol)) {
1128 return(EINVAL);
1129 }
1130
1131 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1132 if (rrcopy == NULL)
1133 return(ENOMEM);
1134
1135 rrcopy->raidPtr = (void *) raidPtr;
1136 rrcopy->row = row;
1137 rrcopy->col = column;
1138
1139 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1140 rf_ReconstructInPlaceThread,
1141 rrcopy,"raid_reconip");
1142 return(retcode);
1143
1144 case RAIDFRAME_GET_INFO:
1145 if (!raidPtr->valid)
1146 return (ENODEV);
1147 ucfgp = (RF_DeviceConfig_t **) data;
1148 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1149 (RF_DeviceConfig_t *));
1150 if (d_cfg == NULL)
1151 return (ENOMEM);
1152 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1153 d_cfg->rows = raidPtr->numRow;
1154 d_cfg->cols = raidPtr->numCol;
1155 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1156 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1157 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1158 return (ENOMEM);
1159 }
1160 d_cfg->nspares = raidPtr->numSpare;
1161 if (d_cfg->nspares >= RF_MAX_DISKS) {
1162 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1163 return (ENOMEM);
1164 }
1165 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1166 d = 0;
1167 for (i = 0; i < d_cfg->rows; i++) {
1168 for (j = 0; j < d_cfg->cols; j++) {
1169 d_cfg->devs[d] = raidPtr->Disks[i][j];
1170 d++;
1171 }
1172 }
1173 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1174 d_cfg->spares[i] = raidPtr->Disks[0][j];
1175 }
1176 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1177 sizeof(RF_DeviceConfig_t));
1178 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1179
1180 return (retcode);
1181
1182 case RAIDFRAME_CHECK_PARITY:
1183 *(int *) data = raidPtr->parity_good;
1184 return (0);
1185
1186 case RAIDFRAME_RESET_ACCTOTALS:
1187 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1188 return (0);
1189
1190 case RAIDFRAME_GET_ACCTOTALS:
1191 totals = (RF_AccTotals_t *) data;
1192 *totals = raidPtr->acc_totals;
1193 return (0);
1194
1195 case RAIDFRAME_KEEP_ACCTOTALS:
1196 raidPtr->keep_acc_totals = *(int *)data;
1197 return (0);
1198
1199 case RAIDFRAME_GET_SIZE:
1200 *(int *) data = raidPtr->totalSectors;
1201 return (0);
1202
1203 /* fail a disk & optionally start reconstruction */
1204 case RAIDFRAME_FAIL_DISK:
1205
1206 if (raidPtr->Layout.map->faultsTolerated == 0) {
1207 /* Can't do this on a RAID 0!! */
1208 return(EINVAL);
1209 }
1210
1211 rr = (struct rf_recon_req *) data;
1212
1213 if (rr->row < 0 || rr->row >= raidPtr->numRow
1214 || rr->col < 0 || rr->col >= raidPtr->numCol)
1215 return (EINVAL);
1216
1217 printf("raid%d: Failing the disk: row: %d col: %d\n",
1218 unit, rr->row, rr->col);
1219
1220 /* make a copy of the recon request so that we don't rely on
1221 * the user's buffer */
1222 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1223 if (rrcopy == NULL)
1224 return(ENOMEM);
1225 bcopy(rr, rrcopy, sizeof(*rr));
1226 rrcopy->raidPtr = (void *) raidPtr;
1227
1228 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1229 rf_ReconThread,
1230 rrcopy,"raid_recon");
1231 return (0);
1232
1233 /* invoke a copyback operation after recon on whatever disk
1234 * needs it, if any */
1235 case RAIDFRAME_COPYBACK:
1236
1237 if (raidPtr->Layout.map->faultsTolerated == 0) {
1238 /* This makes no sense on a RAID 0!! */
1239 return(EINVAL);
1240 }
1241
1242 if (raidPtr->copyback_in_progress == 1) {
1243 /* Copyback is already in progress! */
1244 return(EINVAL);
1245 }
1246
1247 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1248 rf_CopybackThread,
1249 raidPtr,"raid_copyback");
1250 return (retcode);
1251
1252 /* return the percentage completion of reconstruction */
1253 case RAIDFRAME_CHECK_RECON_STATUS:
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* This makes no sense on a RAID 0 */
1256 return(EINVAL);
1257 }
1258 row = 0; /* XXX we only consider a single row... */
1259 if (raidPtr->status[row] != rf_rs_reconstructing)
1260 *(int *) data = 100;
1261 else
1262 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1263 return (0);
1264
1265 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1266 if (raidPtr->Layout.map->faultsTolerated == 0) {
1267 /* This makes no sense on a RAID 0 */
1268 return(EINVAL);
1269 }
1270 if (raidPtr->parity_rewrite_in_progress == 1) {
1271 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1272 } else {
1273 *(int *) data = 100;
1274 }
1275 return (0);
1276
1277 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1278 if (raidPtr->Layout.map->faultsTolerated == 0) {
1279 /* This makes no sense on a RAID 0 */
1280 return(EINVAL);
1281 }
1282 if (raidPtr->copyback_in_progress == 1) {
1283 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1284 raidPtr->Layout.numStripe;
1285 } else {
1286 *(int *) data = 100;
1287 }
1288 return (0);
1289
1290
1291 /* the sparetable daemon calls this to wait for the kernel to
1292 * need a spare table. this ioctl does not return until a
1293 * spare table is needed. XXX -- calling mpsleep here in the
1294 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1295 * -- I should either compute the spare table in the kernel,
1296 * or have a different -- XXX XXX -- interface (a different
1297 * character device) for delivering the table -- XXX */
1298 #if 0
1299 case RAIDFRAME_SPARET_WAIT:
1300 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1301 while (!rf_sparet_wait_queue)
1302 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1303 waitreq = rf_sparet_wait_queue;
1304 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1305 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1306
1307 /* structure assignment */
1308 *((RF_SparetWait_t *) data) = *waitreq;
1309
1310 RF_Free(waitreq, sizeof(*waitreq));
1311 return (0);
1312
1313 /* wakes up a process waiting on SPARET_WAIT and puts an error
1314 * code in it that will cause the dameon to exit */
1315 case RAIDFRAME_ABORT_SPARET_WAIT:
1316 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1317 waitreq->fcol = -1;
1318 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1319 waitreq->next = rf_sparet_wait_queue;
1320 rf_sparet_wait_queue = waitreq;
1321 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1322 wakeup(&rf_sparet_wait_queue);
1323 return (0);
1324
1325 /* used by the spare table daemon to deliver a spare table
1326 * into the kernel */
1327 case RAIDFRAME_SEND_SPARET:
1328
1329 /* install the spare table */
1330 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1331
1332 /* respond to the requestor. the return status of the spare
1333 * table installation is passed in the "fcol" field */
1334 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1335 waitreq->fcol = retcode;
1336 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1337 waitreq->next = rf_sparet_resp_queue;
1338 rf_sparet_resp_queue = waitreq;
1339 wakeup(&rf_sparet_resp_queue);
1340 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1341
1342 return (retcode);
1343 #endif
1344
1345 default:
1346 break; /* fall through to the os-specific code below */
1347
1348 }
1349
1350 if (!raidPtr->valid)
1351 return (EINVAL);
1352
1353 /*
1354 * Add support for "regular" device ioctls here.
1355 */
1356
1357 switch (cmd) {
1358 case DIOCGDINFO:
1359 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1360 break;
1361
1362 case DIOCGPART:
1363 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1364 ((struct partinfo *) data)->part =
1365 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1366 break;
1367
1368 case DIOCWDINFO:
1369 case DIOCSDINFO:
1370 if ((error = raidlock(rs)) != 0)
1371 return (error);
1372
1373 rs->sc_flags |= RAIDF_LABELLING;
1374
1375 error = setdisklabel(rs->sc_dkdev.dk_label,
1376 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1377 if (error == 0) {
1378 if (cmd == DIOCWDINFO)
1379 error = writedisklabel(RAIDLABELDEV(dev),
1380 raidstrategy, rs->sc_dkdev.dk_label,
1381 rs->sc_dkdev.dk_cpulabel);
1382 }
1383 rs->sc_flags &= ~RAIDF_LABELLING;
1384
1385 raidunlock(rs);
1386
1387 if (error)
1388 return (error);
1389 break;
1390
1391 case DIOCWLABEL:
1392 if (*(int *) data != 0)
1393 rs->sc_flags |= RAIDF_WLABEL;
1394 else
1395 rs->sc_flags &= ~RAIDF_WLABEL;
1396 break;
1397
1398 case DIOCGDEFLABEL:
1399 raidgetdefaultlabel(raidPtr, rs,
1400 (struct disklabel *) data);
1401 break;
1402
1403 default:
1404 retcode = ENOTTY;
1405 }
1406 return (retcode);
1407
1408 }
1409
1410
1411 /* raidinit -- complete the rest of the initialization for the
1412 RAIDframe device. */
1413
1414
1415 static void
1416 raidinit(raidPtr)
1417 RF_Raid_t *raidPtr;
1418 {
1419 struct raid_softc *rs;
1420 int unit;
1421
1422 unit = raidPtr->raidid;
1423
1424 rs = &raid_softc[unit];
1425 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1426 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1427
1428
1429 /* XXX should check return code first... */
1430 rs->sc_flags |= RAIDF_INITED;
1431
1432 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1433
1434 rs->sc_dkdev.dk_name = rs->sc_xname;
1435
1436 /* disk_attach actually creates space for the CPU disklabel, among
1437 * other things, so it's critical to call this *BEFORE* we try putzing
1438 * with disklabels. */
1439
1440 disk_attach(&rs->sc_dkdev);
1441
1442 /* XXX There may be a weird interaction here between this, and
1443 * protectedSectors, as used in RAIDframe. */
1444
1445 rs->sc_size = raidPtr->totalSectors;
1446
1447 }
1448
1449 /* wake up the daemon & tell it to get us a spare table
1450 * XXX
1451 * the entries in the queues should be tagged with the raidPtr
1452 * so that in the extremely rare case that two recons happen at once,
1453 * we know for which device were requesting a spare table
1454 * XXX
1455 *
1456 * XXX This code is not currently used. GO
1457 */
1458 int
1459 rf_GetSpareTableFromDaemon(req)
1460 RF_SparetWait_t *req;
1461 {
1462 int retcode;
1463
1464 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1465 req->next = rf_sparet_wait_queue;
1466 rf_sparet_wait_queue = req;
1467 wakeup(&rf_sparet_wait_queue);
1468
1469 /* mpsleep unlocks the mutex */
1470 while (!rf_sparet_resp_queue) {
1471 tsleep(&rf_sparet_resp_queue, PRIBIO,
1472 "raidframe getsparetable", 0);
1473 }
1474 req = rf_sparet_resp_queue;
1475 rf_sparet_resp_queue = req->next;
1476 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1477
1478 retcode = req->fcol;
1479 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1480 * alloc'd */
1481 return (retcode);
1482 }
1483
1484 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1485 * bp & passes it down.
1486 * any calls originating in the kernel must use non-blocking I/O
1487 * do some extra sanity checking to return "appropriate" error values for
1488 * certain conditions (to make some standard utilities work)
1489 *
1490 * Formerly known as: rf_DoAccessKernel
1491 */
1492 void
1493 raidstart(raidPtr)
1494 RF_Raid_t *raidPtr;
1495 {
1496 RF_SectorCount_t num_blocks, pb, sum;
1497 RF_RaidAddr_t raid_addr;
1498 int retcode;
1499 struct partition *pp;
1500 daddr_t blocknum;
1501 int unit;
1502 struct raid_softc *rs;
1503 int do_async;
1504 struct buf *bp;
1505
1506 unit = raidPtr->raidid;
1507 rs = &raid_softc[unit];
1508
1509 /* quick check to see if anything has died recently */
1510 RF_LOCK_MUTEX(raidPtr->mutex);
1511 if (raidPtr->numNewFailures > 0) {
1512 rf_update_component_labels(raidPtr);
1513 raidPtr->numNewFailures--;
1514 }
1515 RF_UNLOCK_MUTEX(raidPtr->mutex);
1516
1517 /* Check to see if we're at the limit... */
1518 RF_LOCK_MUTEX(raidPtr->mutex);
1519 while (raidPtr->openings > 0) {
1520 RF_UNLOCK_MUTEX(raidPtr->mutex);
1521
1522 /* get the next item, if any, from the queue */
1523 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1524 /* nothing more to do */
1525 return;
1526 }
1527 BUFQ_REMOVE(&rs->buf_queue, bp);
1528
1529 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1530 * partition.. Need to make it absolute to the underlying
1531 * device.. */
1532
1533 blocknum = bp->b_blkno;
1534 if (DISKPART(bp->b_dev) != RAW_PART) {
1535 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1536 blocknum += pp->p_offset;
1537 }
1538
1539 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1540 (int) blocknum));
1541
1542 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1543 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1544
1545 /* *THIS* is where we adjust what block we're going to...
1546 * but DO NOT TOUCH bp->b_blkno!!! */
1547 raid_addr = blocknum;
1548
1549 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1550 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1551 sum = raid_addr + num_blocks + pb;
1552 if (1 || rf_debugKernelAccess) {
1553 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1554 (int) raid_addr, (int) sum, (int) num_blocks,
1555 (int) pb, (int) bp->b_resid));
1556 }
1557 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1558 || (sum < num_blocks) || (sum < pb)) {
1559 bp->b_error = ENOSPC;
1560 bp->b_flags |= B_ERROR;
1561 bp->b_resid = bp->b_bcount;
1562 biodone(bp);
1563 RF_LOCK_MUTEX(raidPtr->mutex);
1564 continue;
1565 }
1566 /*
1567 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1568 */
1569
1570 if (bp->b_bcount & raidPtr->sectorMask) {
1571 bp->b_error = EINVAL;
1572 bp->b_flags |= B_ERROR;
1573 bp->b_resid = bp->b_bcount;
1574 biodone(bp);
1575 RF_LOCK_MUTEX(raidPtr->mutex);
1576 continue;
1577
1578 }
1579 db1_printf(("Calling DoAccess..\n"));
1580
1581
1582 RF_LOCK_MUTEX(raidPtr->mutex);
1583 raidPtr->openings--;
1584 RF_UNLOCK_MUTEX(raidPtr->mutex);
1585
1586 /*
1587 * Everything is async.
1588 */
1589 do_async = 1;
1590
1591 /* don't ever condition on bp->b_flags & B_WRITE.
1592 * always condition on B_READ instead */
1593
1594 /* XXX we're still at splbio() here... do we *really*
1595 need to be? */
1596
1597
1598 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1599 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1600 do_async, raid_addr, num_blocks,
1601 bp->b_un.b_addr, bp, NULL, NULL,
1602 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1603
1604
1605 RF_LOCK_MUTEX(raidPtr->mutex);
1606 }
1607 RF_UNLOCK_MUTEX(raidPtr->mutex);
1608 }
1609
1610
1611
1612
1613 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1614
1615 int
1616 rf_DispatchKernelIO(queue, req)
1617 RF_DiskQueue_t *queue;
1618 RF_DiskQueueData_t *req;
1619 {
1620 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1621 struct buf *bp;
1622 struct raidbuf *raidbp = NULL;
1623 struct raid_softc *rs;
1624 int unit;
1625 int s;
1626
1627 s=0;
1628 /* s = splbio();*/ /* want to test this */
1629 /* XXX along with the vnode, we also need the softc associated with
1630 * this device.. */
1631
1632 req->queue = queue;
1633
1634 unit = queue->raidPtr->raidid;
1635
1636 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1637
1638 if (unit >= numraid) {
1639 printf("Invalid unit number: %d %d\n", unit, numraid);
1640 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1641 }
1642 rs = &raid_softc[unit];
1643
1644 /* XXX is this the right place? */
1645 disk_busy(&rs->sc_dkdev);
1646
1647 bp = req->bp;
1648 #if 1
1649 /* XXX when there is a physical disk failure, someone is passing us a
1650 * buffer that contains old stuff!! Attempt to deal with this problem
1651 * without taking a performance hit... (not sure where the real bug
1652 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1653
1654 if (bp->b_flags & B_ERROR) {
1655 bp->b_flags &= ~B_ERROR;
1656 }
1657 if (bp->b_error != 0) {
1658 bp->b_error = 0;
1659 }
1660 #endif
1661 raidbp = RAIDGETBUF(rs);
1662
1663 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1664
1665 /*
1666 * context for raidiodone
1667 */
1668 raidbp->rf_obp = bp;
1669 raidbp->req = req;
1670
1671 LIST_INIT(&raidbp->rf_buf.b_dep);
1672
1673 switch (req->type) {
1674 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1675 /* XXX need to do something extra here.. */
1676 /* I'm leaving this in, as I've never actually seen it used,
1677 * and I'd like folks to report it... GO */
1678 printf(("WAKEUP CALLED\n"));
1679 queue->numOutstanding++;
1680
1681 /* XXX need to glue the original buffer into this?? */
1682
1683 KernelWakeupFunc(&raidbp->rf_buf);
1684 break;
1685
1686 case RF_IO_TYPE_READ:
1687 case RF_IO_TYPE_WRITE:
1688
1689 if (req->tracerec) {
1690 RF_ETIMER_START(req->tracerec->timer);
1691 }
1692 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1693 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1694 req->sectorOffset, req->numSector,
1695 req->buf, KernelWakeupFunc, (void *) req,
1696 queue->raidPtr->logBytesPerSector, req->b_proc);
1697
1698 if (rf_debugKernelAccess) {
1699 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1700 (long) bp->b_blkno));
1701 }
1702 queue->numOutstanding++;
1703 queue->last_deq_sector = req->sectorOffset;
1704 /* acc wouldn't have been let in if there were any pending
1705 * reqs at any other priority */
1706 queue->curPriority = req->priority;
1707
1708 db1_printf(("Going for %c to unit %d row %d col %d\n",
1709 req->type, unit, queue->row, queue->col));
1710 db1_printf(("sector %d count %d (%d bytes) %d\n",
1711 (int) req->sectorOffset, (int) req->numSector,
1712 (int) (req->numSector <<
1713 queue->raidPtr->logBytesPerSector),
1714 (int) queue->raidPtr->logBytesPerSector));
1715 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1716 raidbp->rf_buf.b_vp->v_numoutput++;
1717 }
1718 VOP_STRATEGY(&raidbp->rf_buf);
1719
1720 break;
1721
1722 default:
1723 panic("bad req->type in rf_DispatchKernelIO");
1724 }
1725 db1_printf(("Exiting from DispatchKernelIO\n"));
1726 /* splx(s); */ /* want to test this */
1727 return (0);
1728 }
1729 /* this is the callback function associated with a I/O invoked from
1730 kernel code.
1731 */
1732 static void
1733 KernelWakeupFunc(vbp)
1734 struct buf *vbp;
1735 {
1736 RF_DiskQueueData_t *req = NULL;
1737 RF_DiskQueue_t *queue;
1738 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1739 struct buf *bp;
1740 struct raid_softc *rs;
1741 int unit;
1742 register int s;
1743
1744 s = splbio();
1745 db1_printf(("recovering the request queue:\n"));
1746 req = raidbp->req;
1747
1748 bp = raidbp->rf_obp;
1749
1750 queue = (RF_DiskQueue_t *) req->queue;
1751
1752 if (raidbp->rf_buf.b_flags & B_ERROR) {
1753 bp->b_flags |= B_ERROR;
1754 bp->b_error = raidbp->rf_buf.b_error ?
1755 raidbp->rf_buf.b_error : EIO;
1756 }
1757
1758 /* XXX methinks this could be wrong... */
1759 #if 1
1760 bp->b_resid = raidbp->rf_buf.b_resid;
1761 #endif
1762
1763 if (req->tracerec) {
1764 RF_ETIMER_STOP(req->tracerec->timer);
1765 RF_ETIMER_EVAL(req->tracerec->timer);
1766 RF_LOCK_MUTEX(rf_tracing_mutex);
1767 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1768 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1769 req->tracerec->num_phys_ios++;
1770 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1771 }
1772 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1773
1774 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1775
1776
1777 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1778 * ballistic, and mark the component as hosed... */
1779
1780 if (bp->b_flags & B_ERROR) {
1781 /* Mark the disk as dead */
1782 /* but only mark it once... */
1783 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1784 rf_ds_optimal) {
1785 printf("raid%d: IO Error. Marking %s as failed.\n",
1786 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1787 queue->raidPtr->Disks[queue->row][queue->col].status =
1788 rf_ds_failed;
1789 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1790 queue->raidPtr->numFailures++;
1791 queue->raidPtr->numNewFailures++;
1792 /* XXX here we should bump the version number for each component, and write that data out */
1793 } else { /* Disk is already dead... */
1794 /* printf("Disk already marked as dead!\n"); */
1795 }
1796
1797 }
1798
1799 rs = &raid_softc[unit];
1800 RAIDPUTBUF(rs, raidbp);
1801
1802
1803 if (bp->b_resid == 0) {
1804 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1805 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1806 }
1807
1808 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1809 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1810
1811 splx(s);
1812 }
1813
1814
1815
1816 /*
1817 * initialize a buf structure for doing an I/O in the kernel.
1818 */
1819 static void
1820 InitBP(
1821 struct buf * bp,
1822 struct vnode * b_vp,
1823 unsigned rw_flag,
1824 dev_t dev,
1825 RF_SectorNum_t startSect,
1826 RF_SectorCount_t numSect,
1827 caddr_t buf,
1828 void (*cbFunc) (struct buf *),
1829 void *cbArg,
1830 int logBytesPerSector,
1831 struct proc * b_proc)
1832 {
1833 /* bp->b_flags = B_PHYS | rw_flag; */
1834 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1835 bp->b_bcount = numSect << logBytesPerSector;
1836 bp->b_bufsize = bp->b_bcount;
1837 bp->b_error = 0;
1838 bp->b_dev = dev;
1839 bp->b_un.b_addr = buf;
1840 bp->b_blkno = startSect;
1841 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1842 if (bp->b_bcount == 0) {
1843 panic("bp->b_bcount is zero in InitBP!!\n");
1844 }
1845 bp->b_proc = b_proc;
1846 bp->b_iodone = cbFunc;
1847 bp->b_vp = b_vp;
1848
1849 }
1850
1851 static void
1852 raidgetdefaultlabel(raidPtr, rs, lp)
1853 RF_Raid_t *raidPtr;
1854 struct raid_softc *rs;
1855 struct disklabel *lp;
1856 {
1857 db1_printf(("Building a default label...\n"));
1858 bzero(lp, sizeof(*lp));
1859
1860 /* fabricate a label... */
1861 lp->d_secperunit = raidPtr->totalSectors;
1862 lp->d_secsize = raidPtr->bytesPerSector;
1863 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1864 lp->d_ntracks = 1;
1865 lp->d_ncylinders = raidPtr->totalSectors /
1866 (lp->d_nsectors * lp->d_ntracks);
1867 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1868
1869 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1870 lp->d_type = DTYPE_RAID;
1871 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1872 lp->d_rpm = 3600;
1873 lp->d_interleave = 1;
1874 lp->d_flags = 0;
1875
1876 lp->d_partitions[RAW_PART].p_offset = 0;
1877 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1878 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1879 lp->d_npartitions = RAW_PART + 1;
1880
1881 lp->d_magic = DISKMAGIC;
1882 lp->d_magic2 = DISKMAGIC;
1883 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1884
1885 }
1886 /*
1887 * Read the disklabel from the raid device. If one is not present, fake one
1888 * up.
1889 */
1890 static void
1891 raidgetdisklabel(dev)
1892 dev_t dev;
1893 {
1894 int unit = raidunit(dev);
1895 struct raid_softc *rs = &raid_softc[unit];
1896 char *errstring;
1897 struct disklabel *lp = rs->sc_dkdev.dk_label;
1898 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1899 RF_Raid_t *raidPtr;
1900
1901 db1_printf(("Getting the disklabel...\n"));
1902
1903 bzero(clp, sizeof(*clp));
1904
1905 raidPtr = raidPtrs[unit];
1906
1907 raidgetdefaultlabel(raidPtr, rs, lp);
1908
1909 /*
1910 * Call the generic disklabel extraction routine.
1911 */
1912 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1913 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1914 if (errstring)
1915 raidmakedisklabel(rs);
1916 else {
1917 int i;
1918 struct partition *pp;
1919
1920 /*
1921 * Sanity check whether the found disklabel is valid.
1922 *
1923 * This is necessary since total size of the raid device
1924 * may vary when an interleave is changed even though exactly
1925 * same componets are used, and old disklabel may used
1926 * if that is found.
1927 */
1928 if (lp->d_secperunit != rs->sc_size)
1929 printf("WARNING: %s: "
1930 "total sector size in disklabel (%d) != "
1931 "the size of raid (%ld)\n", rs->sc_xname,
1932 lp->d_secperunit, (long) rs->sc_size);
1933 for (i = 0; i < lp->d_npartitions; i++) {
1934 pp = &lp->d_partitions[i];
1935 if (pp->p_offset + pp->p_size > rs->sc_size)
1936 printf("WARNING: %s: end of partition `%c' "
1937 "exceeds the size of raid (%ld)\n",
1938 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1939 }
1940 }
1941
1942 }
1943 /*
1944 * Take care of things one might want to take care of in the event
1945 * that a disklabel isn't present.
1946 */
1947 static void
1948 raidmakedisklabel(rs)
1949 struct raid_softc *rs;
1950 {
1951 struct disklabel *lp = rs->sc_dkdev.dk_label;
1952 db1_printf(("Making a label..\n"));
1953
1954 /*
1955 * For historical reasons, if there's no disklabel present
1956 * the raw partition must be marked FS_BSDFFS.
1957 */
1958
1959 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1960
1961 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1962
1963 lp->d_checksum = dkcksum(lp);
1964 }
1965 /*
1966 * Lookup the provided name in the filesystem. If the file exists,
1967 * is a valid block device, and isn't being used by anyone else,
1968 * set *vpp to the file's vnode.
1969 * You'll find the original of this in ccd.c
1970 */
1971 int
1972 raidlookup(path, p, vpp)
1973 char *path;
1974 struct proc *p;
1975 struct vnode **vpp; /* result */
1976 {
1977 struct nameidata nd;
1978 struct vnode *vp;
1979 struct vattr va;
1980 int error;
1981
1982 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1983 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1984 #ifdef DEBUG
1985 printf("RAIDframe: vn_open returned %d\n", error);
1986 #endif
1987 return (error);
1988 }
1989 vp = nd.ni_vp;
1990 if (vp->v_usecount > 1) {
1991 VOP_UNLOCK(vp, 0);
1992 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1993 return (EBUSY);
1994 }
1995 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1996 VOP_UNLOCK(vp, 0);
1997 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1998 return (error);
1999 }
2000 /* XXX: eventually we should handle VREG, too. */
2001 if (va.va_type != VBLK) {
2002 VOP_UNLOCK(vp, 0);
2003 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2004 return (ENOTBLK);
2005 }
2006 VOP_UNLOCK(vp, 0);
2007 *vpp = vp;
2008 return (0);
2009 }
2010 /*
2011 * Wait interruptibly for an exclusive lock.
2012 *
2013 * XXX
2014 * Several drivers do this; it should be abstracted and made MP-safe.
2015 * (Hmm... where have we seen this warning before :-> GO )
2016 */
2017 static int
2018 raidlock(rs)
2019 struct raid_softc *rs;
2020 {
2021 int error;
2022
2023 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2024 rs->sc_flags |= RAIDF_WANTED;
2025 if ((error =
2026 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2027 return (error);
2028 }
2029 rs->sc_flags |= RAIDF_LOCKED;
2030 return (0);
2031 }
2032 /*
2033 * Unlock and wake up any waiters.
2034 */
2035 static void
2036 raidunlock(rs)
2037 struct raid_softc *rs;
2038 {
2039
2040 rs->sc_flags &= ~RAIDF_LOCKED;
2041 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2042 rs->sc_flags &= ~RAIDF_WANTED;
2043 wakeup(rs);
2044 }
2045 }
2046
2047
2048 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2049 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2050
2051 int
2052 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2053 {
2054 RF_ComponentLabel_t clabel;
2055 raidread_component_label(dev, b_vp, &clabel);
2056 clabel.mod_counter = mod_counter;
2057 clabel.clean = RF_RAID_CLEAN;
2058 raidwrite_component_label(dev, b_vp, &clabel);
2059 return(0);
2060 }
2061
2062
2063 int
2064 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2065 {
2066 RF_ComponentLabel_t clabel;
2067 raidread_component_label(dev, b_vp, &clabel);
2068 clabel.mod_counter = mod_counter;
2069 clabel.clean = RF_RAID_DIRTY;
2070 raidwrite_component_label(dev, b_vp, &clabel);
2071 return(0);
2072 }
2073
2074 /* ARGSUSED */
2075 int
2076 raidread_component_label(dev, b_vp, clabel)
2077 dev_t dev;
2078 struct vnode *b_vp;
2079 RF_ComponentLabel_t *clabel;
2080 {
2081 struct buf *bp;
2082 int error;
2083
2084 /* XXX should probably ensure that we don't try to do this if
2085 someone has changed rf_protected_sectors. */
2086
2087 /* get a block of the appropriate size... */
2088 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2089 bp->b_dev = dev;
2090
2091 /* get our ducks in a row for the read */
2092 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2093 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2094 bp->b_flags = B_BUSY | B_READ;
2095 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2096
2097 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2098
2099 error = biowait(bp);
2100
2101 if (!error) {
2102 memcpy(clabel, bp->b_un.b_addr,
2103 sizeof(RF_ComponentLabel_t));
2104 #if 0
2105 print_component_label( clabel );
2106 #endif
2107 } else {
2108 #if 0
2109 printf("Failed to read RAID component label!\n");
2110 #endif
2111 }
2112
2113 bp->b_flags = B_INVAL | B_AGE;
2114 brelse(bp);
2115 return(error);
2116 }
2117 /* ARGSUSED */
2118 int
2119 raidwrite_component_label(dev, b_vp, clabel)
2120 dev_t dev;
2121 struct vnode *b_vp;
2122 RF_ComponentLabel_t *clabel;
2123 {
2124 struct buf *bp;
2125 int error;
2126
2127 /* get a block of the appropriate size... */
2128 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2129 bp->b_dev = dev;
2130
2131 /* get our ducks in a row for the write */
2132 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2133 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2134 bp->b_flags = B_BUSY | B_WRITE;
2135 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2136
2137 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2138
2139 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2140
2141 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2142 error = biowait(bp);
2143 bp->b_flags = B_INVAL | B_AGE;
2144 brelse(bp);
2145 if (error) {
2146 #if 1
2147 printf("Failed to write RAID component info!\n");
2148 #endif
2149 }
2150
2151 return(error);
2152 }
2153
2154 void
2155 rf_markalldirty( raidPtr )
2156 RF_Raid_t *raidPtr;
2157 {
2158 RF_ComponentLabel_t clabel;
2159 int r,c;
2160
2161 raidPtr->mod_counter++;
2162 for (r = 0; r < raidPtr->numRow; r++) {
2163 for (c = 0; c < raidPtr->numCol; c++) {
2164 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2165 raidread_component_label(
2166 raidPtr->Disks[r][c].dev,
2167 raidPtr->raid_cinfo[r][c].ci_vp,
2168 &clabel);
2169 if (clabel.status == rf_ds_spared) {
2170 /* XXX do something special...
2171 but whatever you do, don't
2172 try to access it!! */
2173 } else {
2174 #if 0
2175 clabel.status =
2176 raidPtr->Disks[r][c].status;
2177 raidwrite_component_label(
2178 raidPtr->Disks[r][c].dev,
2179 raidPtr->raid_cinfo[r][c].ci_vp,
2180 &clabel);
2181 #endif
2182 raidmarkdirty(
2183 raidPtr->Disks[r][c].dev,
2184 raidPtr->raid_cinfo[r][c].ci_vp,
2185 raidPtr->mod_counter);
2186 }
2187 }
2188 }
2189 }
2190 /* printf("Component labels marked dirty.\n"); */
2191 #if 0
2192 for( c = 0; c < raidPtr->numSpare ; c++) {
2193 sparecol = raidPtr->numCol + c;
2194 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2195 /*
2196
2197 XXX this is where we get fancy and map this spare
2198 into it's correct spot in the array.
2199
2200 */
2201 /*
2202
2203 we claim this disk is "optimal" if it's
2204 rf_ds_used_spare, as that means it should be
2205 directly substitutable for the disk it replaced.
2206 We note that too...
2207
2208 */
2209
2210 for(i=0;i<raidPtr->numRow;i++) {
2211 for(j=0;j<raidPtr->numCol;j++) {
2212 if ((raidPtr->Disks[i][j].spareRow ==
2213 r) &&
2214 (raidPtr->Disks[i][j].spareCol ==
2215 sparecol)) {
2216 srow = r;
2217 scol = sparecol;
2218 break;
2219 }
2220 }
2221 }
2222
2223 raidread_component_label(
2224 raidPtr->Disks[r][sparecol].dev,
2225 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2226 &clabel);
2227 /* make sure status is noted */
2228 clabel.version = RF_COMPONENT_LABEL_VERSION;
2229 clabel.mod_counter = raidPtr->mod_counter;
2230 clabel.serial_number = raidPtr->serial_number;
2231 clabel.row = srow;
2232 clabel.column = scol;
2233 clabel.num_rows = raidPtr->numRow;
2234 clabel.num_columns = raidPtr->numCol;
2235 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2236 clabel.status = rf_ds_optimal;
2237 raidwrite_component_label(
2238 raidPtr->Disks[r][sparecol].dev,
2239 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2240 &clabel);
2241 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2242 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2243 }
2244 }
2245
2246 #endif
2247 }
2248
2249
2250 void
2251 rf_update_component_labels( raidPtr )
2252 RF_Raid_t *raidPtr;
2253 {
2254 RF_ComponentLabel_t clabel;
2255 int sparecol;
2256 int r,c;
2257 int i,j;
2258 int srow, scol;
2259
2260 srow = -1;
2261 scol = -1;
2262
2263 /* XXX should do extra checks to make sure things really are clean,
2264 rather than blindly setting the clean bit... */
2265
2266 raidPtr->mod_counter++;
2267
2268 for (r = 0; r < raidPtr->numRow; r++) {
2269 for (c = 0; c < raidPtr->numCol; c++) {
2270 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2271 raidread_component_label(
2272 raidPtr->Disks[r][c].dev,
2273 raidPtr->raid_cinfo[r][c].ci_vp,
2274 &clabel);
2275 /* make sure status is noted */
2276 clabel.status = rf_ds_optimal;
2277 /* bump the counter */
2278 clabel.mod_counter = raidPtr->mod_counter;
2279
2280 raidwrite_component_label(
2281 raidPtr->Disks[r][c].dev,
2282 raidPtr->raid_cinfo[r][c].ci_vp,
2283 &clabel);
2284 }
2285 /* else we don't touch it.. */
2286 }
2287 }
2288
2289 for( c = 0; c < raidPtr->numSpare ; c++) {
2290 sparecol = raidPtr->numCol + c;
2291 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2292 /*
2293
2294 we claim this disk is "optimal" if it's
2295 rf_ds_used_spare, as that means it should be
2296 directly substitutable for the disk it replaced.
2297 We note that too...
2298
2299 */
2300
2301 for(i=0;i<raidPtr->numRow;i++) {
2302 for(j=0;j<raidPtr->numCol;j++) {
2303 if ((raidPtr->Disks[i][j].spareRow ==
2304 0) &&
2305 (raidPtr->Disks[i][j].spareCol ==
2306 sparecol)) {
2307 srow = i;
2308 scol = j;
2309 break;
2310 }
2311 }
2312 }
2313
2314 /* XXX shouldn't *really* need this... */
2315 raidread_component_label(
2316 raidPtr->Disks[0][sparecol].dev,
2317 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2318 &clabel);
2319 /* make sure status is noted */
2320
2321 raid_init_component_label(raidPtr, &clabel);
2322
2323 clabel.mod_counter = raidPtr->mod_counter;
2324 clabel.row = srow;
2325 clabel.column = scol;
2326 clabel.status = rf_ds_optimal;
2327
2328 raidwrite_component_label(
2329 raidPtr->Disks[0][sparecol].dev,
2330 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2331 &clabel);
2332 }
2333 }
2334 /* printf("Component labels updated\n"); */
2335 }
2336
2337
2338 void
2339 rf_final_update_component_labels( raidPtr )
2340 RF_Raid_t *raidPtr;
2341 {
2342 RF_ComponentLabel_t clabel;
2343 int sparecol;
2344 int r,c;
2345 int i,j;
2346 int srow, scol;
2347
2348 srow = -1;
2349 scol = -1;
2350
2351 /* XXX should do extra checks to make sure things really are clean,
2352 rather than blindly setting the clean bit... */
2353
2354 raidPtr->mod_counter++;
2355
2356 for (r = 0; r < raidPtr->numRow; r++) {
2357 for (c = 0; c < raidPtr->numCol; c++) {
2358 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2359 raidread_component_label(
2360 raidPtr->Disks[r][c].dev,
2361 raidPtr->raid_cinfo[r][c].ci_vp,
2362 &clabel);
2363 /* make sure status is noted */
2364 clabel.status = rf_ds_optimal;
2365 /* bump the counter */
2366 clabel.mod_counter = raidPtr->mod_counter;
2367
2368 raidwrite_component_label(
2369 raidPtr->Disks[r][c].dev,
2370 raidPtr->raid_cinfo[r][c].ci_vp,
2371 &clabel);
2372 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2373 raidmarkclean(
2374 raidPtr->Disks[r][c].dev,
2375 raidPtr->raid_cinfo[r][c].ci_vp,
2376 raidPtr->mod_counter);
2377 }
2378 }
2379 /* else we don't touch it.. */
2380 }
2381 }
2382
2383 for( c = 0; c < raidPtr->numSpare ; c++) {
2384 sparecol = raidPtr->numCol + c;
2385 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2386 /*
2387
2388 we claim this disk is "optimal" if it's
2389 rf_ds_used_spare, as that means it should be
2390 directly substitutable for the disk it replaced.
2391 We note that too...
2392
2393 */
2394
2395 for(i=0;i<raidPtr->numRow;i++) {
2396 for(j=0;j<raidPtr->numCol;j++) {
2397 if ((raidPtr->Disks[i][j].spareRow ==
2398 0) &&
2399 (raidPtr->Disks[i][j].spareCol ==
2400 sparecol)) {
2401 srow = i;
2402 scol = j;
2403 break;
2404 }
2405 }
2406 }
2407
2408 /* XXX shouldn't *really* need this... */
2409 raidread_component_label(
2410 raidPtr->Disks[0][sparecol].dev,
2411 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2412 &clabel);
2413 /* make sure status is noted */
2414
2415 raid_init_component_label(raidPtr, &clabel);
2416
2417 clabel.mod_counter = raidPtr->mod_counter;
2418 clabel.row = srow;
2419 clabel.column = scol;
2420 clabel.status = rf_ds_optimal;
2421
2422 raidwrite_component_label(
2423 raidPtr->Disks[0][sparecol].dev,
2424 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2425 &clabel);
2426 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2427 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2428 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2429 raidPtr->mod_counter);
2430 }
2431 }
2432 }
2433 /* printf("Component labels updated\n"); */
2434 }
2435
2436
2437 void
2438 rf_ReconThread(req)
2439 struct rf_recon_req *req;
2440 {
2441 int s;
2442 RF_Raid_t *raidPtr;
2443
2444 s = splbio();
2445 raidPtr = (RF_Raid_t *) req->raidPtr;
2446 raidPtr->recon_in_progress = 1;
2447
2448 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2449 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2450
2451 /* XXX get rid of this! we don't need it at all.. */
2452 RF_Free(req, sizeof(*req));
2453
2454 raidPtr->recon_in_progress = 0;
2455 splx(s);
2456
2457 /* That's all... */
2458 kthread_exit(0); /* does not return */
2459 }
2460
2461 void
2462 rf_RewriteParityThread(raidPtr)
2463 RF_Raid_t *raidPtr;
2464 {
2465 int retcode;
2466 int s;
2467
2468 raidPtr->parity_rewrite_in_progress = 1;
2469 s = splbio();
2470 retcode = rf_RewriteParity(raidPtr);
2471 splx(s);
2472 if (retcode) {
2473 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2474 } else {
2475 /* set the clean bit! If we shutdown correctly,
2476 the clean bit on each component label will get
2477 set */
2478 raidPtr->parity_good = RF_RAID_CLEAN;
2479 }
2480 raidPtr->parity_rewrite_in_progress = 0;
2481
2482 /* That's all... */
2483 kthread_exit(0); /* does not return */
2484 }
2485
2486
2487 void
2488 rf_CopybackThread(raidPtr)
2489 RF_Raid_t *raidPtr;
2490 {
2491 int s;
2492
2493 raidPtr->copyback_in_progress = 1;
2494 s = splbio();
2495 rf_CopybackReconstructedData(raidPtr);
2496 splx(s);
2497 raidPtr->copyback_in_progress = 0;
2498
2499 /* That's all... */
2500 kthread_exit(0); /* does not return */
2501 }
2502
2503
2504 void
2505 rf_ReconstructInPlaceThread(req)
2506 struct rf_recon_req *req;
2507 {
2508 int retcode;
2509 int s;
2510 RF_Raid_t *raidPtr;
2511
2512 s = splbio();
2513 raidPtr = req->raidPtr;
2514 raidPtr->recon_in_progress = 1;
2515 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2516 RF_Free(req, sizeof(*req));
2517 raidPtr->recon_in_progress = 0;
2518 splx(s);
2519
2520 /* That's all... */
2521 kthread_exit(0); /* does not return */
2522 }
2523
2524 void
2525 rf_mountroot_hook(dev)
2526 struct device *dev;
2527 {
2528
2529 }
2530
2531
2532 RF_AutoConfig_t *
2533 rf_find_raid_components()
2534 {
2535 struct devnametobdevmaj *dtobdm;
2536 struct vnode *vp;
2537 struct disklabel label;
2538 struct device *dv;
2539 char *cd_name;
2540 dev_t dev;
2541 int error;
2542 int i;
2543 int good_one;
2544 RF_ComponentLabel_t *clabel;
2545 RF_AutoConfig_t *ac_list;
2546 RF_AutoConfig_t *ac;
2547
2548
2549 /* initialize the AutoConfig list */
2550 ac_list = NULL;
2551
2552 if (raidautoconfig) {
2553
2554 /* we begin by trolling through *all* the devices on the system */
2555
2556 for (dv = alldevs.tqh_first; dv != NULL;
2557 dv = dv->dv_list.tqe_next) {
2558
2559 /* we are only interested in disks... */
2560 if (dv->dv_class != DV_DISK)
2561 continue;
2562
2563 /* we don't care about floppies... */
2564 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2565 continue;
2566 }
2567
2568 /* need to find the device_name_to_block_device_major stuff */
2569 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2570 dtobdm = dev_name2blk;
2571 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2572 dtobdm++;
2573 }
2574
2575 /* get a vnode for the raw partition of this disk */
2576
2577 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2578 if (bdevvp(dev, &vp))
2579 panic("RAID can't alloc vnode");
2580
2581 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2582
2583 if (error) {
2584 /* "Who cares." Continue looking
2585 for something that exists*/
2586 vput(vp);
2587 continue;
2588 }
2589
2590 /* Ok, the disk exists. Go get the disklabel. */
2591 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2592 FREAD, NOCRED, 0);
2593 if (error) {
2594 /*
2595 * XXX can't happen - open() would
2596 * have errored out (or faked up one)
2597 */
2598 printf("can't get label for dev %s%c (%d)!?!?\n",
2599 dv->dv_xname, 'a' + RAW_PART, error);
2600 }
2601
2602 /* don't need this any more. We'll allocate it again
2603 a little later if we really do... */
2604 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2605 vput(vp);
2606
2607 for (i=0; i < label.d_npartitions; i++) {
2608 /* We only support partitions marked as RAID */
2609 if (label.d_partitions[i].p_fstype != FS_RAID)
2610 continue;
2611
2612 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2613 if (bdevvp(dev, &vp))
2614 panic("RAID can't alloc vnode");
2615
2616 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2617 if (error) {
2618 /* Whatever... */
2619 vput(vp);
2620 continue;
2621 }
2622
2623 good_one = 0;
2624
2625 clabel = (RF_ComponentLabel_t *)
2626 malloc(sizeof(RF_ComponentLabel_t),
2627 M_RAIDFRAME, M_NOWAIT);
2628 if (clabel == NULL) {
2629 /* XXX CLEANUP HERE */
2630 printf("RAID auto config: out of memory!\n");
2631 return(NULL); /* XXX probably should panic? */
2632 }
2633
2634 if (!raidread_component_label(dev, vp, clabel)) {
2635 /* Got the label. Does it look reasonable? */
2636 if (rf_reasonable_label(clabel) &&
2637 (clabel->partitionSize <=
2638 label.d_partitions[i].p_size)) {
2639 #if DEBUG
2640 printf("Component on: %s%c: %d\n",
2641 dv->dv_xname, 'a'+i,
2642 label.d_partitions[i].p_size);
2643 print_component_label(clabel);
2644 #endif
2645 /* if it's reasonable, add it,
2646 else ignore it. */
2647 ac = (RF_AutoConfig_t *)
2648 malloc(sizeof(RF_AutoConfig_t),
2649 M_RAIDFRAME,
2650 M_NOWAIT);
2651 if (ac == NULL) {
2652 /* XXX should panic?? */
2653 return(NULL);
2654 }
2655
2656 sprintf(ac->devname, "%s%c",
2657 dv->dv_xname, 'a'+i);
2658 ac->dev = dev;
2659 ac->vp = vp;
2660 ac->clabel = clabel;
2661 ac->next = ac_list;
2662 ac_list = ac;
2663 good_one = 1;
2664 }
2665 }
2666 if (!good_one) {
2667 /* cleanup */
2668 free(clabel, M_RAIDFRAME);
2669 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2670 vput(vp);
2671 }
2672 }
2673 }
2674 }
2675 return(ac_list);
2676 }
2677
2678 static int
2679 rf_reasonable_label(clabel)
2680 RF_ComponentLabel_t *clabel;
2681 {
2682
2683 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2684 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2685 ((clabel->clean == RF_RAID_CLEAN) ||
2686 (clabel->clean == RF_RAID_DIRTY)) &&
2687 clabel->row >=0 &&
2688 clabel->column >= 0 &&
2689 clabel->num_rows > 0 &&
2690 clabel->num_columns > 0 &&
2691 clabel->row < clabel->num_rows &&
2692 clabel->column < clabel->num_columns &&
2693 clabel->blockSize > 0 &&
2694 clabel->numBlocks > 0) {
2695 /* label looks reasonable enough... */
2696 return(1);
2697 }
2698 return(0);
2699 }
2700
2701
2702 void
2703 print_component_label(clabel)
2704 RF_ComponentLabel_t *clabel;
2705 {
2706 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2707 clabel->row, clabel->column,
2708 clabel->num_rows, clabel->num_columns);
2709 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2710 clabel->version, clabel->serial_number,
2711 clabel->mod_counter);
2712 printf(" Clean: %s Status: %d\n",
2713 clabel->clean ? "Yes" : "No", clabel->status );
2714 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2715 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2716 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2717 (char) clabel->parityConfig, clabel->blockSize,
2718 clabel->numBlocks);
2719 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2720 printf(" Last configured as: raid%d\n", clabel->last_unit );
2721 #if 0
2722 printf(" Config order: %d\n", clabel->config_order);
2723 #endif
2724
2725 }
2726
2727 RF_ConfigSet_t *
2728 rf_create_auto_sets(ac_list)
2729 RF_AutoConfig_t *ac_list;
2730 {
2731 RF_AutoConfig_t *ac;
2732 RF_ConfigSet_t *config_sets;
2733 RF_ConfigSet_t *cset;
2734 RF_AutoConfig_t *ac_next;
2735
2736
2737 config_sets = NULL;
2738
2739 /* Go through the AutoConfig list, and figure out which components
2740 belong to what sets. */
2741 ac = ac_list;
2742 while(ac!=NULL) {
2743 /* we're going to putz with ac->next, so save it here
2744 for use at the end of the loop */
2745 ac_next = ac->next;
2746
2747 if (config_sets == NULL) {
2748 /* will need at least this one... */
2749 config_sets = (RF_ConfigSet_t *)
2750 malloc(sizeof(RF_ConfigSet_t),
2751 M_RAIDFRAME, M_NOWAIT);
2752 if (config_sets == NULL) {
2753 panic("rf_create_auto_sets: No memory!\n");
2754 }
2755 /* this one is easy :) */
2756 config_sets->ac = ac;
2757 config_sets->next = NULL;
2758 config_sets->rootable = 0;
2759 ac->next = NULL;
2760 } else {
2761 /* which set does this component fit into? */
2762 cset = config_sets;
2763 while(cset!=NULL) {
2764 if (rf_does_it_fit(cset, ac)) {
2765 /* looks like it matches */
2766 ac->next = cset->ac;
2767 cset->ac = ac;
2768 break;
2769 }
2770 cset = cset->next;
2771 }
2772 if (cset==NULL) {
2773 /* didn't find a match above... new set..*/
2774 cset = (RF_ConfigSet_t *)
2775 malloc(sizeof(RF_ConfigSet_t),
2776 M_RAIDFRAME, M_NOWAIT);
2777 if (cset == NULL) {
2778 panic("rf_create_auto_sets: No memory!\n");
2779 }
2780 cset->ac = ac;
2781 ac->next = NULL;
2782 cset->next = config_sets;
2783 cset->rootable = 0;
2784 config_sets = cset;
2785 }
2786 }
2787 ac = ac_next;
2788 }
2789
2790
2791 return(config_sets);
2792 }
2793
2794 static int
2795 rf_does_it_fit(cset, ac)
2796 RF_ConfigSet_t *cset;
2797 RF_AutoConfig_t *ac;
2798 {
2799 RF_ComponentLabel_t *clabel1, *clabel2;
2800
2801 /* If this one matches the *first* one in the set, that's good
2802 enough, since the other members of the set would have been
2803 through here too... */
2804 /* note that we are not checking partitionSize here..
2805
2806 Note that we are also not checking the mod_counters here.
2807 If everything else matches execpt the mod_counter, that's
2808 good enough for this test. We will deal with the mod_counters
2809 a little later in the autoconfiguration process.
2810
2811 (clabel1->mod_counter == clabel2->mod_counter) &&
2812
2813 */
2814
2815 clabel1 = cset->ac->clabel;
2816 clabel2 = ac->clabel;
2817 if ((clabel1->version == clabel2->version) &&
2818 (clabel1->serial_number == clabel2->serial_number) &&
2819 (clabel1->num_rows == clabel2->num_rows) &&
2820 (clabel1->num_columns == clabel2->num_columns) &&
2821 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2822 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2823 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2824 (clabel1->parityConfig == clabel2->parityConfig) &&
2825 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2826 (clabel1->blockSize == clabel2->blockSize) &&
2827 (clabel1->numBlocks == clabel2->numBlocks) &&
2828 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2829 (clabel1->root_partition == clabel2->root_partition) &&
2830 (clabel1->last_unit == clabel2->last_unit) &&
2831 (clabel1->config_order == clabel2->config_order)) {
2832 /* if it get's here, it almost *has* to be a match */
2833 } else {
2834 /* it's not consistent with somebody in the set..
2835 punt */
2836 return(0);
2837 }
2838 /* all was fine.. it must fit... */
2839 return(1);
2840 }
2841
2842 int
2843 rf_have_enough_components(cset)
2844 RF_ConfigSet_t *cset;
2845 {
2846 RF_AutoConfig_t *ac;
2847 RF_AutoConfig_t *auto_config;
2848 RF_ComponentLabel_t *clabel;
2849 int r,c;
2850 int num_rows;
2851 int num_cols;
2852 int num_missing;
2853
2854 /* check to see that we have enough 'live' components
2855 of this set. If so, we can configure it if necessary */
2856
2857 num_rows = cset->ac->clabel->num_rows;
2858 num_cols = cset->ac->clabel->num_columns;
2859
2860 /* XXX Check for duplicate components!?!?!? */
2861
2862 num_missing = 0;
2863 auto_config = cset->ac;
2864
2865 for(r=0; r<num_rows; r++) {
2866 for(c=0; c<num_cols; c++) {
2867 ac = auto_config;
2868 while(ac!=NULL) {
2869 if (ac->clabel==NULL) {
2870 /* big-time bad news. */
2871 goto fail;
2872 }
2873 if ((ac->clabel->row == r) &&
2874 (ac->clabel->column == c)) {
2875 /* it's this one... */
2876 #if DEBUG
2877 printf("Found: %s at %d,%d\n",
2878 ac->devname,r,c);
2879 #endif
2880 break;
2881 }
2882 ac=ac->next;
2883 }
2884 if (ac==NULL) {
2885 /* Didn't find one here! */
2886 num_missing++;
2887 }
2888 }
2889 }
2890
2891 clabel = cset->ac->clabel;
2892
2893 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2894 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2895 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2896 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2897 /* XXX this needs to be made *much* more general */
2898 /* Too many failures */
2899 return(0);
2900 }
2901 /* otherwise, all is well, and we've got enough to take a kick
2902 at autoconfiguring this set */
2903 return(1);
2904 fail:
2905 return(0);
2906
2907 }
2908
2909 void
2910 rf_create_configuration(ac,config,raidPtr)
2911 RF_AutoConfig_t *ac;
2912 RF_Config_t *config;
2913 RF_Raid_t *raidPtr;
2914 {
2915 RF_ComponentLabel_t *clabel;
2916
2917 clabel = ac->clabel;
2918
2919 /* 1. Fill in the common stuff */
2920 config->numRow = clabel->num_rows;
2921 config->numCol = clabel->num_columns;
2922 config->numSpare = 0; /* XXX should this be set here? */
2923 config->sectPerSU = clabel->sectPerSU;
2924 config->SUsPerPU = clabel->SUsPerPU;
2925 config->SUsPerRU = clabel->SUsPerRU;
2926 config->parityConfig = clabel->parityConfig;
2927 /* XXX... */
2928 strcpy(config->diskQueueType,"fifo");
2929 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2930 config->layoutSpecificSize = 0; /* XXX ?? */
2931
2932 while(ac!=NULL) {
2933 /* row/col values will be in range due to the checks
2934 in reasonable_label() */
2935 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2936 ac->devname);
2937 ac = ac->next;
2938 }
2939
2940 }
2941
2942 int
2943 rf_set_autoconfig(raidPtr, new_value)
2944 RF_Raid_t *raidPtr;
2945 int new_value;
2946 {
2947 RF_ComponentLabel_t clabel;
2948 struct vnode *vp;
2949 dev_t dev;
2950 int row, column;
2951
2952 raidPtr->autoconfigure = new_value;
2953 for(row=0; row<raidPtr->numRow; row++) {
2954 for(column=0; column<raidPtr->numCol; column++) {
2955 dev = raidPtr->Disks[row][column].dev;
2956 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2957 raidread_component_label(dev, vp, &clabel);
2958 clabel.autoconfigure = new_value;
2959 raidwrite_component_label(dev, vp, &clabel);
2960 }
2961 }
2962 return(new_value);
2963 }
2964
2965 int
2966 rf_set_rootpartition(raidPtr, new_value)
2967 RF_Raid_t *raidPtr;
2968 int new_value;
2969 {
2970 RF_ComponentLabel_t clabel;
2971 struct vnode *vp;
2972 dev_t dev;
2973 int row, column;
2974
2975 raidPtr->root_partition = new_value;
2976 for(row=0; row<raidPtr->numRow; row++) {
2977 for(column=0; column<raidPtr->numCol; column++) {
2978 dev = raidPtr->Disks[row][column].dev;
2979 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2980 raidread_component_label(dev, vp, &clabel);
2981 clabel.root_partition = new_value;
2982 raidwrite_component_label(dev, vp, &clabel);
2983 }
2984 }
2985 return(new_value);
2986 }
2987
2988 void
2989 rf_release_all_vps(cset)
2990 RF_ConfigSet_t *cset;
2991 {
2992 RF_AutoConfig_t *ac;
2993
2994 ac = cset->ac;
2995 while(ac!=NULL) {
2996 /* Close the vp, and give it back */
2997 if (ac->vp) {
2998 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2999 vput(ac->vp);
3000 }
3001 ac = ac->next;
3002 }
3003 }
3004
3005
3006 void
3007 rf_cleanup_config_set(cset)
3008 RF_ConfigSet_t *cset;
3009 {
3010 RF_AutoConfig_t *ac;
3011 RF_AutoConfig_t *next_ac;
3012
3013 ac = cset->ac;
3014 while(ac!=NULL) {
3015 next_ac = ac->next;
3016 /* nuke the label */
3017 free(ac->clabel, M_RAIDFRAME);
3018 /* cleanup the config structure */
3019 free(ac, M_RAIDFRAME);
3020 /* "next.." */
3021 ac = next_ac;
3022 }
3023 /* and, finally, nuke the config set */
3024 free(cset, M_RAIDFRAME);
3025 }
3026
3027
3028 void
3029 raid_init_component_label(raidPtr, clabel)
3030 RF_Raid_t *raidPtr;
3031 RF_ComponentLabel_t *clabel;
3032 {
3033 /* current version number */
3034 clabel->version = RF_COMPONENT_LABEL_VERSION;
3035 clabel->serial_number = raidPtr->serial_number;
3036 clabel->mod_counter = raidPtr->mod_counter;
3037 clabel->num_rows = raidPtr->numRow;
3038 clabel->num_columns = raidPtr->numCol;
3039 clabel->clean = RF_RAID_DIRTY; /* not clean */
3040 clabel->status = rf_ds_optimal; /* "It's good!" */
3041
3042 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3043 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3044 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3045
3046 clabel->blockSize = raidPtr->bytesPerSector;
3047 clabel->numBlocks = raidPtr->sectorsPerDisk;
3048
3049 /* XXX not portable */
3050 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3051 clabel->maxOutstanding = raidPtr->maxOutstanding;
3052 clabel->autoconfigure = raidPtr->autoconfigure;
3053 clabel->root_partition = raidPtr->root_partition;
3054 clabel->last_unit = raidPtr->raidid;
3055 clabel->config_order = raidPtr->config_order;
3056 }
3057
3058 int
3059 rf_auto_config_set(cset,unit)
3060 RF_ConfigSet_t *cset;
3061 int *unit;
3062 {
3063 RF_Raid_t *raidPtr;
3064 RF_Config_t *config;
3065 int raidID;
3066 int retcode;
3067
3068 printf("Starting autoconfigure on raid%d\n",raidID);
3069
3070 retcode = 0;
3071 *unit = -1;
3072
3073 /* 1. Create a config structure */
3074
3075 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3076 M_RAIDFRAME,
3077 M_NOWAIT);
3078 if (config==NULL) {
3079 printf("Out of mem!?!?\n");
3080 /* XXX do something more intelligent here. */
3081 return(1);
3082 }
3083 /* XXX raidID needs to be set correctly.. */
3084
3085 /*
3086 2. Figure out what RAID ID this one is supposed to live at
3087 See if we can get the same RAID dev that it was configured
3088 on last time..
3089 */
3090
3091 raidID = cset->ac->clabel->last_unit;
3092 if ((raidID < 0) || (raidID >= numraid)) {
3093 /* let's not wander off into lala land. */
3094 raidID = numraid - 1;
3095 }
3096 if (raidPtrs[raidID]->valid != 0) {
3097
3098 /*
3099 Nope... Go looking for an alternative...
3100 Start high so we don't immediately use raid0 if that's
3101 not taken.
3102 */
3103
3104 for(raidID = numraid; raidID >= 0; raidID--) {
3105 if (raidPtrs[raidID]->valid == 0) {
3106 /* can use this one! */
3107 break;
3108 }
3109 }
3110 }
3111
3112 if (raidID < 0) {
3113 /* punt... */
3114 printf("Unable to auto configure this set!\n");
3115 printf("(Out of RAID devs!)\n");
3116 return(1);
3117 }
3118
3119 raidPtr = raidPtrs[raidID];
3120
3121 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3122 raidPtr->raidid = raidID;
3123 raidPtr->openings = RAIDOUTSTANDING;
3124
3125 /* 3. Build the configuration structure */
3126 rf_create_configuration(cset->ac, config, raidPtr);
3127
3128 /* 4. Do the configuration */
3129 retcode = rf_Configure(raidPtr, config, cset->ac);
3130
3131 if (retcode == 0) {
3132
3133 raidinit(raidPtrs[raidID]);
3134
3135 rf_markalldirty(raidPtrs[raidID]);
3136 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3137 if (cset->ac->clabel->root_partition==1) {
3138 /* everything configured just fine. Make a note
3139 that this set is eligible to be root. */
3140 cset->rootable = 1;
3141 /* XXX do this here? */
3142 raidPtrs[raidID]->root_partition = 1;
3143 }
3144 }
3145
3146 /* 5. Cleanup */
3147 free(config, M_RAIDFRAME);
3148
3149 *unit = raidID;
3150 return(retcode);
3151 }
3152