rf_netbsdkintf.c revision 1.48 1 /* $NetBSD: rf_netbsdkintf.c,v 1.48 2000/02/13 04:53:57 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int reasonable_label __P((RF_ComponentLabel_t *));
287 void create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,RF_Raid_t *));
288 int rf_set_autoconfig __P((RF_Raid_t *, int));
289 int rf_set_rootpartition __P((RF_Raid_t *, int));
290 void release_all_vps __P((RF_ConfigSet_t *));
291 void cleanup_config_set __P((RF_ConfigSet_t *));
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place */
295 /* XXX ugly hack. */
296 const char *raid_rooty = "raid0";
297 extern struct device *booted_device;
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305 RF_AutoConfig_t *ac_list; /* autoconfig list */
306 RF_ConfigSet_t *config_sets;
307
308 #ifdef DEBUG
309 printf("raidattach: Asked for %d units\n", num);
310 #endif
311
312 if (num <= 0) {
313 #ifdef DIAGNOSTIC
314 panic("raidattach: count <= 0");
315 #endif
316 return;
317 }
318 /* This is where all the initialization stuff gets done. */
319
320 /* Make some space for requested number of units... */
321
322 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
323 if (raidPtrs == NULL) {
324 panic("raidPtrs is NULL!!\n");
325 }
326
327 rc = rf_mutex_init(&rf_sparet_wait_mutex);
328 if (rc) {
329 RF_PANIC();
330 }
331
332 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
333
334 for (i = 0; i < numraid; i++)
335 raidPtrs[i] = NULL;
336 rc = rf_BootRaidframe();
337 if (rc == 0)
338 printf("Kernelized RAIDframe activated\n");
339 else
340 panic("Serious error booting RAID!!\n");
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raidrootdev == NULL) {
358 panic("No memory for RAIDframe driver!!?!?!\n");
359 }
360
361 for (raidID = 0; raidID < num; raidID++) {
362 BUFQ_INIT(&raid_softc[raidID].buf_queue);
363
364 raidrootdev[raidID].dv_class = DV_DISK;
365 raidrootdev[raidID].dv_cfdata = NULL;
366 raidrootdev[raidID].dv_unit = raidID;
367 raidrootdev[raidID].dv_parent = NULL;
368 raidrootdev[raidID].dv_flags = 0;
369 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
370
371 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
372 (RF_Raid_t *));
373 if (raidPtrs[raidID] == NULL) {
374 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
375 numraid = raidID;
376 return;
377 }
378 }
379
380 if (raidautoconfig) {
381 /* 1. locate all RAID components on the system */
382
383 #if DEBUG
384 printf("Searching for raid components...\n");
385 #endif
386 ac_list = rf_find_raid_components();
387
388 /* 2. sort them into their respective sets */
389
390 config_sets = rf_create_auto_sets(ac_list);
391
392 /* 3. evaluate each set and configure the valid ones
393 This gets done in rf_buildroothack() */
394
395 /* schedule the creation of the thread to do the
396 "/ on RAID" stuff */
397
398 kthread_create(rf_buildroothack,config_sets);
399
400 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
401 /* XXXX pick raid0 for now... and this should be only done
402 if we find something that's bootable!!! */
403 #if 0
404 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
405 #endif
406 if (boothowto & RB_ASKNAME) {
407 /* We don't auto-config... */
408 } else {
409 /* They didn't ask, and we found something bootable... */
410 /* XXX pretend for now.. */
411 #if 0
412 booted_device = &raidrootdev[0];
413 #endif
414 }
415 }
416
417 }
418
419 void
420 rf_buildroothack(arg)
421 void *arg;
422 {
423 RF_ConfigSet_t *config_sets = arg;
424 RF_ConfigSet_t *cset;
425 RF_ConfigSet_t *next_cset;
426 RF_Raid_t *raidPtr;
427 RF_Config_t *config;
428 int raidID;
429 int retcode;
430
431 raidID=0;
432 cset = config_sets;
433 while(cset != NULL ) {
434 next_cset = cset->next;
435 if (cset->ac->clabel->autoconfigure==1) {
436 printf("Starting autoconfigure on raid%d\n",raidID);
437 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
438 M_RAIDFRAME,
439 M_NOWAIT);
440 if (config==NULL) {
441 printf("Out of mem!?!?\n");
442 /* XXX do something more intelligent here. */
443 return;
444 }
445 /* XXX raidID needs to be set correctly.. */
446 raidPtr = raidPtrs[raidID];
447 /* XXX all this stuff should be done SOMEWHERE ELSE! */
448 raidPtr->raidid = raidID;
449 raidPtr->openings = RAIDOUTSTANDING;
450 create_configuration(cset->ac, config, raidPtr);
451 retcode = rf_Configure( raidPtr, config, cset->ac );
452
453 if (retcode == 0) {
454 #if DEBUG
455 printf("Calling raidinit()\n");
456 #endif
457 /* XXX the 0's below are bogus! */
458 retcode = raidinit(0, raidPtrs[raidID], 0);
459 if (retcode) {
460 printf("init returned: %d\n",retcode);
461 }
462 rf_markalldirty( raidPtrs[raidID] );
463 }
464 raidID++; /* XXX for now.. */
465 free(config, M_RAIDFRAME);
466 } else {
467 /* we're not autoconfiguring this set...
468 release the associated resources */
469 #if DEBUG
470 printf("Releasing vp's\n");
471 #endif
472 release_all_vps(cset);
473 #if DEBUG
474 printf("Done.\n");
475 #endif
476 }
477 /* cleanup */
478 #if DEBUG
479 printf("Cleaning up config set\n");
480 #endif
481 cleanup_config_set(cset);
482 #if DEBUG
483 printf("Done cleanup\n");
484 #endif
485 cset = next_cset;
486 }
487 }
488
489
490 int
491 raidsize(dev)
492 dev_t dev;
493 {
494 struct raid_softc *rs;
495 struct disklabel *lp;
496 int part, unit, omask, size;
497
498 unit = raidunit(dev);
499 if (unit >= numraid)
500 return (-1);
501 rs = &raid_softc[unit];
502
503 if ((rs->sc_flags & RAIDF_INITED) == 0)
504 return (-1);
505
506 part = DISKPART(dev);
507 omask = rs->sc_dkdev.dk_openmask & (1 << part);
508 lp = rs->sc_dkdev.dk_label;
509
510 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
511 return (-1);
512
513 if (lp->d_partitions[part].p_fstype != FS_SWAP)
514 size = -1;
515 else
516 size = lp->d_partitions[part].p_size *
517 (lp->d_secsize / DEV_BSIZE);
518
519 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
520 return (-1);
521
522 return (size);
523
524 }
525
526 int
527 raiddump(dev, blkno, va, size)
528 dev_t dev;
529 daddr_t blkno;
530 caddr_t va;
531 size_t size;
532 {
533 /* Not implemented. */
534 return ENXIO;
535 }
536 /* ARGSUSED */
537 int
538 raidopen(dev, flags, fmt, p)
539 dev_t dev;
540 int flags, fmt;
541 struct proc *p;
542 {
543 int unit = raidunit(dev);
544 struct raid_softc *rs;
545 struct disklabel *lp;
546 int part, pmask;
547 int error = 0;
548
549 if (unit >= numraid)
550 return (ENXIO);
551 rs = &raid_softc[unit];
552
553 if ((error = raidlock(rs)) != 0)
554 return (error);
555 lp = rs->sc_dkdev.dk_label;
556
557 part = DISKPART(dev);
558 pmask = (1 << part);
559
560 db1_printf(("Opening raid device number: %d partition: %d\n",
561 unit, part));
562
563
564 if ((rs->sc_flags & RAIDF_INITED) &&
565 (rs->sc_dkdev.dk_openmask == 0))
566 raidgetdisklabel(dev);
567
568 /* make sure that this partition exists */
569
570 if (part != RAW_PART) {
571 db1_printf(("Not a raw partition..\n"));
572 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
573 ((part >= lp->d_npartitions) ||
574 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
575 error = ENXIO;
576 raidunlock(rs);
577 db1_printf(("Bailing out...\n"));
578 return (error);
579 }
580 }
581 /* Prevent this unit from being unconfigured while open. */
582 switch (fmt) {
583 case S_IFCHR:
584 rs->sc_dkdev.dk_copenmask |= pmask;
585 break;
586
587 case S_IFBLK:
588 rs->sc_dkdev.dk_bopenmask |= pmask;
589 break;
590 }
591
592 if ((rs->sc_dkdev.dk_openmask == 0) &&
593 ((rs->sc_flags & RAIDF_INITED) != 0)) {
594 /* First one... mark things as dirty... Note that we *MUST*
595 have done a configure before this. I DO NOT WANT TO BE
596 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
597 THAT THEY BELONG TOGETHER!!!!! */
598 /* XXX should check to see if we're only open for reading
599 here... If so, we needn't do this, but then need some
600 other way of keeping track of what's happened.. */
601
602 rf_markalldirty( raidPtrs[unit] );
603 }
604
605
606 rs->sc_dkdev.dk_openmask =
607 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
608
609 raidunlock(rs);
610
611 return (error);
612
613
614 }
615 /* ARGSUSED */
616 int
617 raidclose(dev, flags, fmt, p)
618 dev_t dev;
619 int flags, fmt;
620 struct proc *p;
621 {
622 int unit = raidunit(dev);
623 struct raid_softc *rs;
624 int error = 0;
625 int part;
626
627 if (unit >= numraid)
628 return (ENXIO);
629 rs = &raid_softc[unit];
630
631 if ((error = raidlock(rs)) != 0)
632 return (error);
633
634 part = DISKPART(dev);
635
636 /* ...that much closer to allowing unconfiguration... */
637 switch (fmt) {
638 case S_IFCHR:
639 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
640 break;
641
642 case S_IFBLK:
643 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
644 break;
645 }
646 rs->sc_dkdev.dk_openmask =
647 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
648
649 if ((rs->sc_dkdev.dk_openmask == 0) &&
650 ((rs->sc_flags & RAIDF_INITED) != 0)) {
651 /* Last one... device is not unconfigured yet.
652 Device shutdown has taken care of setting the
653 clean bits if RAIDF_INITED is not set
654 mark things as clean... */
655 rf_update_component_labels( raidPtrs[unit] );
656 }
657
658 raidunlock(rs);
659 return (0);
660
661 }
662
663 void
664 raidstrategy(bp)
665 register struct buf *bp;
666 {
667 register int s;
668
669 unsigned int raidID = raidunit(bp->b_dev);
670 RF_Raid_t *raidPtr;
671 struct raid_softc *rs = &raid_softc[raidID];
672 struct disklabel *lp;
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags = B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 if (raidID >= numraid || !raidPtrs[raidID]) {
683 bp->b_error = ENODEV;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 raidPtr = raidPtrs[raidID];
690 if (!raidPtr->valid) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 biodone(bp);
700 return;
701 }
702 lp = rs->sc_dkdev.dk_label;
703
704 /*
705 * Do bounds checking and adjust transfer. If there's an
706 * error, the bounds check will flag that for us.
707 */
708
709 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
710 if (DISKPART(bp->b_dev) != RAW_PART)
711 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
712 db1_printf(("Bounds check failed!!:%d %d\n",
713 (int) bp->b_blkno, (int) wlabel));
714 biodone(bp);
715 return;
716 }
717 s = splbio();
718
719 bp->b_resid = 0;
720
721 /* stuff it onto our queue */
722 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
723
724 raidstart(raidPtrs[raidID]);
725
726 splx(s);
727 }
728 /* ARGSUSED */
729 int
730 raidread(dev, uio, flags)
731 dev_t dev;
732 struct uio *uio;
733 int flags;
734 {
735 int unit = raidunit(dev);
736 struct raid_softc *rs;
737 int part;
738
739 if (unit >= numraid)
740 return (ENXIO);
741 rs = &raid_softc[unit];
742
743 if ((rs->sc_flags & RAIDF_INITED) == 0)
744 return (ENXIO);
745 part = DISKPART(dev);
746
747 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
748
749 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
750
751 }
752 /* ARGSUSED */
753 int
754 raidwrite(dev, uio, flags)
755 dev_t dev;
756 struct uio *uio;
757 int flags;
758 {
759 int unit = raidunit(dev);
760 struct raid_softc *rs;
761
762 if (unit >= numraid)
763 return (ENXIO);
764 rs = &raid_softc[unit];
765
766 if ((rs->sc_flags & RAIDF_INITED) == 0)
767 return (ENXIO);
768 db1_printf(("raidwrite\n"));
769 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
770
771 }
772
773 int
774 raidioctl(dev, cmd, data, flag, p)
775 dev_t dev;
776 u_long cmd;
777 caddr_t data;
778 int flag;
779 struct proc *p;
780 {
781 int unit = raidunit(dev);
782 int error = 0;
783 int part, pmask;
784 struct raid_softc *rs;
785 RF_Config_t *k_cfg, *u_cfg;
786 RF_Raid_t *raidPtr;
787 RF_RaidDisk_t *diskPtr;
788 RF_AccTotals_t *totals;
789 RF_DeviceConfig_t *d_cfg, **ucfgp;
790 u_char *specific_buf;
791 int retcode = 0;
792 int row;
793 int column;
794 struct rf_recon_req *rrcopy, *rr;
795 RF_ComponentLabel_t *clabel;
796 RF_ComponentLabel_t ci_label;
797 RF_ComponentLabel_t **clabel_ptr;
798 RF_SingleComponent_t *sparePtr,*componentPtr;
799 RF_SingleComponent_t hot_spare;
800 RF_SingleComponent_t component;
801 int i, j, d;
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 case DIOCWLABEL:
816 if ((flag & FWRITE) == 0)
817 return (EBADF);
818 }
819
820 /* Must be initialized for these... */
821 switch (cmd) {
822 case DIOCGDINFO:
823 case DIOCSDINFO:
824 case DIOCWDINFO:
825 case DIOCGPART:
826 case DIOCWLABEL:
827 case DIOCGDEFLABEL:
828 case RAIDFRAME_SHUTDOWN:
829 case RAIDFRAME_REWRITEPARITY:
830 case RAIDFRAME_GET_INFO:
831 case RAIDFRAME_RESET_ACCTOTALS:
832 case RAIDFRAME_GET_ACCTOTALS:
833 case RAIDFRAME_KEEP_ACCTOTALS:
834 case RAIDFRAME_GET_SIZE:
835 case RAIDFRAME_FAIL_DISK:
836 case RAIDFRAME_COPYBACK:
837 case RAIDFRAME_CHECK_RECON_STATUS:
838 case RAIDFRAME_GET_COMPONENT_LABEL:
839 case RAIDFRAME_SET_COMPONENT_LABEL:
840 case RAIDFRAME_ADD_HOT_SPARE:
841 case RAIDFRAME_REMOVE_HOT_SPARE:
842 case RAIDFRAME_INIT_LABELS:
843 case RAIDFRAME_REBUILD_IN_PLACE:
844 case RAIDFRAME_CHECK_PARITY:
845 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
846 case RAIDFRAME_CHECK_COPYBACK_STATUS:
847 case RAIDFRAME_SET_AUTOCONFIG:
848 case RAIDFRAME_SET_ROOT:
849 if ((rs->sc_flags & RAIDF_INITED) == 0)
850 return (ENXIO);
851 }
852
853 switch (cmd) {
854
855 /* configure the system */
856 case RAIDFRAME_CONFIGURE:
857
858 if (raidPtr->valid) {
859 /* There is a valid RAID set running on this unit! */
860 printf("raid%d: Device already configured!\n",unit);
861 }
862
863 /* copy-in the configuration information */
864 /* data points to a pointer to the configuration structure */
865
866 u_cfg = *((RF_Config_t **) data);
867 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
868 if (k_cfg == NULL) {
869 return (ENOMEM);
870 }
871 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
872 sizeof(RF_Config_t));
873 if (retcode) {
874 RF_Free(k_cfg, sizeof(RF_Config_t));
875 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
876 retcode));
877 return (retcode);
878 }
879 /* allocate a buffer for the layout-specific data, and copy it
880 * in */
881 if (k_cfg->layoutSpecificSize) {
882 if (k_cfg->layoutSpecificSize > 10000) {
883 /* sanity check */
884 RF_Free(k_cfg, sizeof(RF_Config_t));
885 return (EINVAL);
886 }
887 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
888 (u_char *));
889 if (specific_buf == NULL) {
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 return (ENOMEM);
892 }
893 retcode = copyin(k_cfg->layoutSpecific,
894 (caddr_t) specific_buf,
895 k_cfg->layoutSpecificSize);
896 if (retcode) {
897 RF_Free(k_cfg, sizeof(RF_Config_t));
898 RF_Free(specific_buf,
899 k_cfg->layoutSpecificSize);
900 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
901 retcode));
902 return (retcode);
903 }
904 } else
905 specific_buf = NULL;
906 k_cfg->layoutSpecific = specific_buf;
907
908 /* should do some kind of sanity check on the configuration.
909 * Store the sum of all the bytes in the last byte? */
910
911 /* configure the system */
912
913 /*
914 * Clear the entire RAID descriptor, just to make sure
915 * there is no stale data left in the case of a
916 * reconfiguration
917 */
918 bzero((char *) raidPtr, sizeof(RF_Raid_t));
919 raidPtr->raidid = unit;
920
921 retcode = rf_Configure(raidPtr, k_cfg, NULL);
922
923 if (retcode == 0) {
924
925 /* allow this many simultaneous IO's to
926 this RAID device */
927 raidPtr->openings = RAIDOUTSTANDING;
928
929 retcode = raidinit(dev, raidPtr, unit);
930 rf_markalldirty( raidPtr );
931 }
932 /* free the buffers. No return code here. */
933 if (k_cfg->layoutSpecificSize) {
934 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
935 }
936 RF_Free(k_cfg, sizeof(RF_Config_t));
937
938 return (retcode);
939
940 /* shutdown the system */
941 case RAIDFRAME_SHUTDOWN:
942
943 if ((error = raidlock(rs)) != 0)
944 return (error);
945
946 /*
947 * If somebody has a partition mounted, we shouldn't
948 * shutdown.
949 */
950
951 part = DISKPART(dev);
952 pmask = (1 << part);
953 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
954 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
955 (rs->sc_dkdev.dk_copenmask & pmask))) {
956 raidunlock(rs);
957 return (EBUSY);
958 }
959
960 retcode = rf_Shutdown(raidPtr);
961
962 pool_destroy(&rs->sc_cbufpool);
963
964 /* It's no longer initialized... */
965 rs->sc_flags &= ~RAIDF_INITED;
966
967 /* Detach the disk. */
968 disk_detach(&rs->sc_dkdev);
969
970 raidunlock(rs);
971
972 return (retcode);
973 case RAIDFRAME_GET_COMPONENT_LABEL:
974 clabel_ptr = (RF_ComponentLabel_t **) data;
975 /* need to read the component label for the disk indicated
976 by row,column in clabel */
977
978 /* For practice, let's get it directly fromdisk, rather
979 than from the in-core copy */
980 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
981 (RF_ComponentLabel_t *));
982 if (clabel == NULL)
983 return (ENOMEM);
984
985 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
986
987 retcode = copyin( *clabel_ptr, clabel,
988 sizeof(RF_ComponentLabel_t));
989
990 if (retcode) {
991 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
992 return(retcode);
993 }
994
995 row = clabel->row;
996 column = clabel->column;
997
998 if ((row < 0) || (row >= raidPtr->numRow) ||
999 (column < 0) || (column >= raidPtr->numCol)) {
1000 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1001 return(EINVAL);
1002 }
1003
1004 raidread_component_label(raidPtr->Disks[row][column].dev,
1005 raidPtr->raid_cinfo[row][column].ci_vp,
1006 clabel );
1007
1008 retcode = copyout((caddr_t) clabel,
1009 (caddr_t) *clabel_ptr,
1010 sizeof(RF_ComponentLabel_t));
1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1012 return (retcode);
1013
1014 case RAIDFRAME_SET_COMPONENT_LABEL:
1015 clabel = (RF_ComponentLabel_t *) data;
1016
1017 /* XXX check the label for valid stuff... */
1018 /* Note that some things *should not* get modified --
1019 the user should be re-initing the labels instead of
1020 trying to patch things.
1021 */
1022
1023 printf("Got component label:\n");
1024 printf("Version: %d\n",clabel->version);
1025 printf("Serial Number: %d\n",clabel->serial_number);
1026 printf("Mod counter: %d\n",clabel->mod_counter);
1027 printf("Row: %d\n", clabel->row);
1028 printf("Column: %d\n", clabel->column);
1029 printf("Num Rows: %d\n", clabel->num_rows);
1030 printf("Num Columns: %d\n", clabel->num_columns);
1031 printf("Clean: %d\n", clabel->clean);
1032 printf("Status: %d\n", clabel->status);
1033
1034 row = clabel->row;
1035 column = clabel->column;
1036
1037 if ((row < 0) || (row >= raidPtr->numRow) ||
1038 (column < 0) || (column >= raidPtr->numCol)) {
1039 return(EINVAL);
1040 }
1041
1042 /* XXX this isn't allowed to do anything for now :-) */
1043
1044 /* XXX and before it is, we need to fill in the rest
1045 of the fields!?!?!?! */
1046 #if 0
1047 raidwrite_component_label(
1048 raidPtr->Disks[row][column].dev,
1049 raidPtr->raid_cinfo[row][column].ci_vp,
1050 clabel );
1051 #endif
1052 return (0);
1053
1054 case RAIDFRAME_INIT_LABELS:
1055 clabel = (RF_ComponentLabel_t *) data;
1056 /*
1057 we only want the serial number from
1058 the above. We get all the rest of the information
1059 from the config that was used to create this RAID
1060 set.
1061 */
1062
1063 raidPtr->serial_number = clabel->serial_number;
1064
1065 raid_init_component_label(raidPtr, clabel);
1066
1067 for(row=0;row<raidPtr->numRow;row++) {
1068 ci_label.row = row;
1069 for(column=0;column<raidPtr->numCol;column++) {
1070 diskPtr = &raidPtr->Disks[row][column];
1071 ci_label.blockSize = diskPtr->blockSize;
1072 ci_label.numBlocks = diskPtr->numBlocks;
1073 ci_label.partitionSize = diskPtr->partitionSize;
1074 ci_label.column = column;
1075 raidwrite_component_label(
1076 raidPtr->Disks[row][column].dev,
1077 raidPtr->raid_cinfo[row][column].ci_vp,
1078 &ci_label );
1079 }
1080 }
1081
1082 return (retcode);
1083 case RAIDFRAME_SET_AUTOCONFIG:
1084 d = rf_set_autoconfig(raidPtr, *data);
1085 printf("New autoconfig value is: %d\n", d);
1086 *data = d;
1087 return (retcode);
1088
1089 case RAIDFRAME_SET_ROOT:
1090 d = rf_set_rootpartition(raidPtr, *data);
1091 printf("New rootpartition value is: %d\n", d);
1092 *data = d;
1093 return (retcode);
1094
1095 /* initialize all parity */
1096 case RAIDFRAME_REWRITEPARITY:
1097
1098 if (raidPtr->Layout.map->faultsTolerated == 0) {
1099 /* Parity for RAID 0 is trivially correct */
1100 raidPtr->parity_good = RF_RAID_CLEAN;
1101 return(0);
1102 }
1103
1104 if (raidPtr->parity_rewrite_in_progress == 1) {
1105 /* Re-write is already in progress! */
1106 return(EINVAL);
1107 }
1108
1109 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1110 rf_RewriteParityThread,
1111 raidPtr,"raid_parity");
1112 return (retcode);
1113
1114
1115 case RAIDFRAME_ADD_HOT_SPARE:
1116 sparePtr = (RF_SingleComponent_t *) data;
1117 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1118 printf("Adding spare\n");
1119 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1120 return(retcode);
1121
1122 case RAIDFRAME_REMOVE_HOT_SPARE:
1123 return(retcode);
1124
1125 case RAIDFRAME_REBUILD_IN_PLACE:
1126
1127 if (raidPtr->Layout.map->faultsTolerated == 0) {
1128 /* Can't do this on a RAID 0!! */
1129 return(EINVAL);
1130 }
1131
1132 if (raidPtr->recon_in_progress == 1) {
1133 /* a reconstruct is already in progress! */
1134 return(EINVAL);
1135 }
1136
1137 componentPtr = (RF_SingleComponent_t *) data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 row = component.row;
1141 column = component.column;
1142 printf("Rebuild: %d %d\n",row, column);
1143 if ((row < 0) || (row >= raidPtr->numRow) ||
1144 (column < 0) || (column >= raidPtr->numCol)) {
1145 return(EINVAL);
1146 }
1147
1148 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1149 if (rrcopy == NULL)
1150 return(ENOMEM);
1151
1152 rrcopy->raidPtr = (void *) raidPtr;
1153 rrcopy->row = row;
1154 rrcopy->col = column;
1155
1156 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1157 rf_ReconstructInPlaceThread,
1158 rrcopy,"raid_reconip");
1159 return(retcode);
1160
1161 case RAIDFRAME_GET_INFO:
1162 if (!raidPtr->valid)
1163 return (ENODEV);
1164 ucfgp = (RF_DeviceConfig_t **) data;
1165 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1166 (RF_DeviceConfig_t *));
1167 if (d_cfg == NULL)
1168 return (ENOMEM);
1169 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1170 d_cfg->rows = raidPtr->numRow;
1171 d_cfg->cols = raidPtr->numCol;
1172 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1173 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1174 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1175 return (ENOMEM);
1176 }
1177 d_cfg->nspares = raidPtr->numSpare;
1178 if (d_cfg->nspares >= RF_MAX_DISKS) {
1179 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1180 return (ENOMEM);
1181 }
1182 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1183 d = 0;
1184 for (i = 0; i < d_cfg->rows; i++) {
1185 for (j = 0; j < d_cfg->cols; j++) {
1186 d_cfg->devs[d] = raidPtr->Disks[i][j];
1187 d++;
1188 }
1189 }
1190 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1191 d_cfg->spares[i] = raidPtr->Disks[0][j];
1192 }
1193 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1194 sizeof(RF_DeviceConfig_t));
1195 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1196
1197 return (retcode);
1198
1199 case RAIDFRAME_CHECK_PARITY:
1200 *(int *) data = raidPtr->parity_good;
1201 return (0);
1202
1203 case RAIDFRAME_RESET_ACCTOTALS:
1204 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1205 return (0);
1206
1207 case RAIDFRAME_GET_ACCTOTALS:
1208 totals = (RF_AccTotals_t *) data;
1209 *totals = raidPtr->acc_totals;
1210 return (0);
1211
1212 case RAIDFRAME_KEEP_ACCTOTALS:
1213 raidPtr->keep_acc_totals = *(int *)data;
1214 return (0);
1215
1216 case RAIDFRAME_GET_SIZE:
1217 *(int *) data = raidPtr->totalSectors;
1218 return (0);
1219
1220 /* fail a disk & optionally start reconstruction */
1221 case RAIDFRAME_FAIL_DISK:
1222
1223 if (raidPtr->Layout.map->faultsTolerated == 0) {
1224 /* Can't do this on a RAID 0!! */
1225 return(EINVAL);
1226 }
1227
1228 rr = (struct rf_recon_req *) data;
1229
1230 if (rr->row < 0 || rr->row >= raidPtr->numRow
1231 || rr->col < 0 || rr->col >= raidPtr->numCol)
1232 return (EINVAL);
1233
1234 printf("raid%d: Failing the disk: row: %d col: %d\n",
1235 unit, rr->row, rr->col);
1236
1237 /* make a copy of the recon request so that we don't rely on
1238 * the user's buffer */
1239 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1240 if (rrcopy == NULL)
1241 return(ENOMEM);
1242 bcopy(rr, rrcopy, sizeof(*rr));
1243 rrcopy->raidPtr = (void *) raidPtr;
1244
1245 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1246 rf_ReconThread,
1247 rrcopy,"raid_recon");
1248 return (0);
1249
1250 /* invoke a copyback operation after recon on whatever disk
1251 * needs it, if any */
1252 case RAIDFRAME_COPYBACK:
1253
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* This makes no sense on a RAID 0!! */
1256 return(EINVAL);
1257 }
1258
1259 if (raidPtr->copyback_in_progress == 1) {
1260 /* Copyback is already in progress! */
1261 return(EINVAL);
1262 }
1263
1264 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1265 rf_CopybackThread,
1266 raidPtr,"raid_copyback");
1267 return (retcode);
1268
1269 /* return the percentage completion of reconstruction */
1270 case RAIDFRAME_CHECK_RECON_STATUS:
1271 if (raidPtr->Layout.map->faultsTolerated == 0) {
1272 /* This makes no sense on a RAID 0 */
1273 return(EINVAL);
1274 }
1275 row = 0; /* XXX we only consider a single row... */
1276 if (raidPtr->status[row] != rf_rs_reconstructing)
1277 *(int *) data = 100;
1278 else
1279 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1280 return (0);
1281
1282 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1283 if (raidPtr->Layout.map->faultsTolerated == 0) {
1284 /* This makes no sense on a RAID 0 */
1285 return(EINVAL);
1286 }
1287 if (raidPtr->parity_rewrite_in_progress == 1) {
1288 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1289 } else {
1290 *(int *) data = 100;
1291 }
1292 return (0);
1293
1294 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1295 if (raidPtr->Layout.map->faultsTolerated == 0) {
1296 /* This makes no sense on a RAID 0 */
1297 return(EINVAL);
1298 }
1299 if (raidPtr->copyback_in_progress == 1) {
1300 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1301 raidPtr->Layout.numStripe;
1302 } else {
1303 *(int *) data = 100;
1304 }
1305 return (0);
1306
1307
1308 /* the sparetable daemon calls this to wait for the kernel to
1309 * need a spare table. this ioctl does not return until a
1310 * spare table is needed. XXX -- calling mpsleep here in the
1311 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1312 * -- I should either compute the spare table in the kernel,
1313 * or have a different -- XXX XXX -- interface (a different
1314 * character device) for delivering the table -- XXX */
1315 #if 0
1316 case RAIDFRAME_SPARET_WAIT:
1317 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1318 while (!rf_sparet_wait_queue)
1319 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1320 waitreq = rf_sparet_wait_queue;
1321 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1322 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1323
1324 /* structure assignment */
1325 *((RF_SparetWait_t *) data) = *waitreq;
1326
1327 RF_Free(waitreq, sizeof(*waitreq));
1328 return (0);
1329
1330 /* wakes up a process waiting on SPARET_WAIT and puts an error
1331 * code in it that will cause the dameon to exit */
1332 case RAIDFRAME_ABORT_SPARET_WAIT:
1333 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1334 waitreq->fcol = -1;
1335 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1336 waitreq->next = rf_sparet_wait_queue;
1337 rf_sparet_wait_queue = waitreq;
1338 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1339 wakeup(&rf_sparet_wait_queue);
1340 return (0);
1341
1342 /* used by the spare table daemon to deliver a spare table
1343 * into the kernel */
1344 case RAIDFRAME_SEND_SPARET:
1345
1346 /* install the spare table */
1347 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1348
1349 /* respond to the requestor. the return status of the spare
1350 * table installation is passed in the "fcol" field */
1351 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1352 waitreq->fcol = retcode;
1353 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1354 waitreq->next = rf_sparet_resp_queue;
1355 rf_sparet_resp_queue = waitreq;
1356 wakeup(&rf_sparet_resp_queue);
1357 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1358
1359 return (retcode);
1360 #endif
1361
1362 default:
1363 break; /* fall through to the os-specific code below */
1364
1365 }
1366
1367 if (!raidPtr->valid)
1368 return (EINVAL);
1369
1370 /*
1371 * Add support for "regular" device ioctls here.
1372 */
1373
1374 switch (cmd) {
1375 case DIOCGDINFO:
1376 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1377 break;
1378
1379 case DIOCGPART:
1380 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1381 ((struct partinfo *) data)->part =
1382 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1383 break;
1384
1385 case DIOCWDINFO:
1386 case DIOCSDINFO:
1387 if ((error = raidlock(rs)) != 0)
1388 return (error);
1389
1390 rs->sc_flags |= RAIDF_LABELLING;
1391
1392 error = setdisklabel(rs->sc_dkdev.dk_label,
1393 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1394 if (error == 0) {
1395 if (cmd == DIOCWDINFO)
1396 error = writedisklabel(RAIDLABELDEV(dev),
1397 raidstrategy, rs->sc_dkdev.dk_label,
1398 rs->sc_dkdev.dk_cpulabel);
1399 }
1400 rs->sc_flags &= ~RAIDF_LABELLING;
1401
1402 raidunlock(rs);
1403
1404 if (error)
1405 return (error);
1406 break;
1407
1408 case DIOCWLABEL:
1409 if (*(int *) data != 0)
1410 rs->sc_flags |= RAIDF_WLABEL;
1411 else
1412 rs->sc_flags &= ~RAIDF_WLABEL;
1413 break;
1414
1415 case DIOCGDEFLABEL:
1416 raidgetdefaultlabel(raidPtr, rs,
1417 (struct disklabel *) data);
1418 break;
1419
1420 default:
1421 retcode = ENOTTY;
1422 }
1423 return (retcode);
1424
1425 }
1426
1427
1428 /* raidinit -- complete the rest of the initialization for the
1429 RAIDframe device. */
1430
1431
1432 static int
1433 raidinit(dev, raidPtr, unit)
1434 dev_t dev;
1435 RF_Raid_t *raidPtr;
1436 int unit;
1437 {
1438 int retcode;
1439 struct raid_softc *rs;
1440
1441 retcode = 0;
1442
1443 rs = &raid_softc[unit];
1444 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1445 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1446
1447
1448 /* XXX should check return code first... */
1449 rs->sc_flags |= RAIDF_INITED;
1450
1451 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1452
1453 rs->sc_dkdev.dk_name = rs->sc_xname;
1454
1455 /* disk_attach actually creates space for the CPU disklabel, among
1456 * other things, so it's critical to call this *BEFORE* we try putzing
1457 * with disklabels. */
1458
1459 disk_attach(&rs->sc_dkdev);
1460
1461 /* XXX There may be a weird interaction here between this, and
1462 * protectedSectors, as used in RAIDframe. */
1463
1464 rs->sc_size = raidPtr->totalSectors;
1465 rs->sc_dev = dev;
1466
1467 return (retcode);
1468 }
1469
1470 /* wake up the daemon & tell it to get us a spare table
1471 * XXX
1472 * the entries in the queues should be tagged with the raidPtr
1473 * so that in the extremely rare case that two recons happen at once,
1474 * we know for which device were requesting a spare table
1475 * XXX
1476 *
1477 * XXX This code is not currently used. GO
1478 */
1479 int
1480 rf_GetSpareTableFromDaemon(req)
1481 RF_SparetWait_t *req;
1482 {
1483 int retcode;
1484
1485 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1486 req->next = rf_sparet_wait_queue;
1487 rf_sparet_wait_queue = req;
1488 wakeup(&rf_sparet_wait_queue);
1489
1490 /* mpsleep unlocks the mutex */
1491 while (!rf_sparet_resp_queue) {
1492 tsleep(&rf_sparet_resp_queue, PRIBIO,
1493 "raidframe getsparetable", 0);
1494 }
1495 req = rf_sparet_resp_queue;
1496 rf_sparet_resp_queue = req->next;
1497 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1498
1499 retcode = req->fcol;
1500 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1501 * alloc'd */
1502 return (retcode);
1503 }
1504
1505 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1506 * bp & passes it down.
1507 * any calls originating in the kernel must use non-blocking I/O
1508 * do some extra sanity checking to return "appropriate" error values for
1509 * certain conditions (to make some standard utilities work)
1510 *
1511 * Formerly known as: rf_DoAccessKernel
1512 */
1513 void
1514 raidstart(raidPtr)
1515 RF_Raid_t *raidPtr;
1516 {
1517 RF_SectorCount_t num_blocks, pb, sum;
1518 RF_RaidAddr_t raid_addr;
1519 int retcode;
1520 struct partition *pp;
1521 daddr_t blocknum;
1522 int unit;
1523 struct raid_softc *rs;
1524 int do_async;
1525 struct buf *bp;
1526
1527 unit = raidPtr->raidid;
1528 rs = &raid_softc[unit];
1529
1530 /* Check to see if we're at the limit... */
1531 RF_LOCK_MUTEX(raidPtr->mutex);
1532 while (raidPtr->openings > 0) {
1533 RF_UNLOCK_MUTEX(raidPtr->mutex);
1534
1535 /* get the next item, if any, from the queue */
1536 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1537 /* nothing more to do */
1538 return;
1539 }
1540 BUFQ_REMOVE(&rs->buf_queue, bp);
1541
1542 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1543 * partition.. Need to make it absolute to the underlying
1544 * device.. */
1545
1546 blocknum = bp->b_blkno;
1547 if (DISKPART(bp->b_dev) != RAW_PART) {
1548 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1549 blocknum += pp->p_offset;
1550 }
1551
1552 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1553 (int) blocknum));
1554
1555 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1556 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1557
1558 /* *THIS* is where we adjust what block we're going to...
1559 * but DO NOT TOUCH bp->b_blkno!!! */
1560 raid_addr = blocknum;
1561
1562 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1563 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1564 sum = raid_addr + num_blocks + pb;
1565 if (1 || rf_debugKernelAccess) {
1566 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1567 (int) raid_addr, (int) sum, (int) num_blocks,
1568 (int) pb, (int) bp->b_resid));
1569 }
1570 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1571 || (sum < num_blocks) || (sum < pb)) {
1572 bp->b_error = ENOSPC;
1573 bp->b_flags |= B_ERROR;
1574 bp->b_resid = bp->b_bcount;
1575 biodone(bp);
1576 RF_LOCK_MUTEX(raidPtr->mutex);
1577 continue;
1578 }
1579 /*
1580 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1581 */
1582
1583 if (bp->b_bcount & raidPtr->sectorMask) {
1584 bp->b_error = EINVAL;
1585 bp->b_flags |= B_ERROR;
1586 bp->b_resid = bp->b_bcount;
1587 biodone(bp);
1588 RF_LOCK_MUTEX(raidPtr->mutex);
1589 continue;
1590
1591 }
1592 db1_printf(("Calling DoAccess..\n"));
1593
1594
1595 RF_LOCK_MUTEX(raidPtr->mutex);
1596 raidPtr->openings--;
1597 RF_UNLOCK_MUTEX(raidPtr->mutex);
1598
1599 /*
1600 * Everything is async.
1601 */
1602 do_async = 1;
1603
1604 /* don't ever condition on bp->b_flags & B_WRITE.
1605 * always condition on B_READ instead */
1606
1607 /* XXX we're still at splbio() here... do we *really*
1608 need to be? */
1609
1610
1611 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1612 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1613 do_async, raid_addr, num_blocks,
1614 bp->b_un.b_addr, bp, NULL, NULL,
1615 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1616
1617
1618 RF_LOCK_MUTEX(raidPtr->mutex);
1619 }
1620 RF_UNLOCK_MUTEX(raidPtr->mutex);
1621 }
1622
1623
1624
1625
1626 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1627
1628 int
1629 rf_DispatchKernelIO(queue, req)
1630 RF_DiskQueue_t *queue;
1631 RF_DiskQueueData_t *req;
1632 {
1633 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1634 struct buf *bp;
1635 struct raidbuf *raidbp = NULL;
1636 struct raid_softc *rs;
1637 int unit;
1638 int s;
1639
1640 s=0;
1641 /* s = splbio();*/ /* want to test this */
1642 /* XXX along with the vnode, we also need the softc associated with
1643 * this device.. */
1644
1645 req->queue = queue;
1646
1647 unit = queue->raidPtr->raidid;
1648
1649 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1650
1651 if (unit >= numraid) {
1652 printf("Invalid unit number: %d %d\n", unit, numraid);
1653 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1654 }
1655 rs = &raid_softc[unit];
1656
1657 /* XXX is this the right place? */
1658 disk_busy(&rs->sc_dkdev);
1659
1660 bp = req->bp;
1661 #if 1
1662 /* XXX when there is a physical disk failure, someone is passing us a
1663 * buffer that contains old stuff!! Attempt to deal with this problem
1664 * without taking a performance hit... (not sure where the real bug
1665 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1666
1667 if (bp->b_flags & B_ERROR) {
1668 bp->b_flags &= ~B_ERROR;
1669 }
1670 if (bp->b_error != 0) {
1671 bp->b_error = 0;
1672 }
1673 #endif
1674 raidbp = RAIDGETBUF(rs);
1675
1676 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1677
1678 /*
1679 * context for raidiodone
1680 */
1681 raidbp->rf_obp = bp;
1682 raidbp->req = req;
1683
1684 LIST_INIT(&raidbp->rf_buf.b_dep);
1685
1686 switch (req->type) {
1687 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1688 /* XXX need to do something extra here.. */
1689 /* I'm leaving this in, as I've never actually seen it used,
1690 * and I'd like folks to report it... GO */
1691 printf(("WAKEUP CALLED\n"));
1692 queue->numOutstanding++;
1693
1694 /* XXX need to glue the original buffer into this?? */
1695
1696 KernelWakeupFunc(&raidbp->rf_buf);
1697 break;
1698
1699 case RF_IO_TYPE_READ:
1700 case RF_IO_TYPE_WRITE:
1701
1702 if (req->tracerec) {
1703 RF_ETIMER_START(req->tracerec->timer);
1704 }
1705 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1706 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1707 req->sectorOffset, req->numSector,
1708 req->buf, KernelWakeupFunc, (void *) req,
1709 queue->raidPtr->logBytesPerSector, req->b_proc);
1710
1711 if (rf_debugKernelAccess) {
1712 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1713 (long) bp->b_blkno));
1714 }
1715 queue->numOutstanding++;
1716 queue->last_deq_sector = req->sectorOffset;
1717 /* acc wouldn't have been let in if there were any pending
1718 * reqs at any other priority */
1719 queue->curPriority = req->priority;
1720
1721 db1_printf(("Going for %c to unit %d row %d col %d\n",
1722 req->type, unit, queue->row, queue->col));
1723 db1_printf(("sector %d count %d (%d bytes) %d\n",
1724 (int) req->sectorOffset, (int) req->numSector,
1725 (int) (req->numSector <<
1726 queue->raidPtr->logBytesPerSector),
1727 (int) queue->raidPtr->logBytesPerSector));
1728 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1729 raidbp->rf_buf.b_vp->v_numoutput++;
1730 }
1731 VOP_STRATEGY(&raidbp->rf_buf);
1732
1733 break;
1734
1735 default:
1736 panic("bad req->type in rf_DispatchKernelIO");
1737 }
1738 db1_printf(("Exiting from DispatchKernelIO\n"));
1739 /* splx(s); */ /* want to test this */
1740 return (0);
1741 }
1742 /* this is the callback function associated with a I/O invoked from
1743 kernel code.
1744 */
1745 static void
1746 KernelWakeupFunc(vbp)
1747 struct buf *vbp;
1748 {
1749 RF_DiskQueueData_t *req = NULL;
1750 RF_DiskQueue_t *queue;
1751 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1752 struct buf *bp;
1753 struct raid_softc *rs;
1754 int unit;
1755 register int s;
1756
1757 s = splbio();
1758 db1_printf(("recovering the request queue:\n"));
1759 req = raidbp->req;
1760
1761 bp = raidbp->rf_obp;
1762
1763 queue = (RF_DiskQueue_t *) req->queue;
1764
1765 if (raidbp->rf_buf.b_flags & B_ERROR) {
1766 bp->b_flags |= B_ERROR;
1767 bp->b_error = raidbp->rf_buf.b_error ?
1768 raidbp->rf_buf.b_error : EIO;
1769 }
1770
1771 /* XXX methinks this could be wrong... */
1772 #if 1
1773 bp->b_resid = raidbp->rf_buf.b_resid;
1774 #endif
1775
1776 if (req->tracerec) {
1777 RF_ETIMER_STOP(req->tracerec->timer);
1778 RF_ETIMER_EVAL(req->tracerec->timer);
1779 RF_LOCK_MUTEX(rf_tracing_mutex);
1780 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1781 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1782 req->tracerec->num_phys_ios++;
1783 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1784 }
1785 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1786
1787 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1788
1789
1790 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1791 * ballistic, and mark the component as hosed... */
1792
1793 if (bp->b_flags & B_ERROR) {
1794 /* Mark the disk as dead */
1795 /* but only mark it once... */
1796 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1797 rf_ds_optimal) {
1798 printf("raid%d: IO Error. Marking %s as failed.\n",
1799 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1800 queue->raidPtr->Disks[queue->row][queue->col].status =
1801 rf_ds_failed;
1802 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1803 queue->raidPtr->numFailures++;
1804 /* XXX here we should bump the version number for each component, and write that data out */
1805 } else { /* Disk is already dead... */
1806 /* printf("Disk already marked as dead!\n"); */
1807 }
1808
1809 }
1810
1811 rs = &raid_softc[unit];
1812 RAIDPUTBUF(rs, raidbp);
1813
1814
1815 if (bp->b_resid == 0) {
1816 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1817 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1818 }
1819
1820 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1821 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1822
1823 splx(s);
1824 }
1825
1826
1827
1828 /*
1829 * initialize a buf structure for doing an I/O in the kernel.
1830 */
1831 static void
1832 InitBP(
1833 struct buf * bp,
1834 struct vnode * b_vp,
1835 unsigned rw_flag,
1836 dev_t dev,
1837 RF_SectorNum_t startSect,
1838 RF_SectorCount_t numSect,
1839 caddr_t buf,
1840 void (*cbFunc) (struct buf *),
1841 void *cbArg,
1842 int logBytesPerSector,
1843 struct proc * b_proc)
1844 {
1845 /* bp->b_flags = B_PHYS | rw_flag; */
1846 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1847 bp->b_bcount = numSect << logBytesPerSector;
1848 bp->b_bufsize = bp->b_bcount;
1849 bp->b_error = 0;
1850 bp->b_dev = dev;
1851 bp->b_un.b_addr = buf;
1852 bp->b_blkno = startSect;
1853 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1854 if (bp->b_bcount == 0) {
1855 panic("bp->b_bcount is zero in InitBP!!\n");
1856 }
1857 bp->b_proc = b_proc;
1858 bp->b_iodone = cbFunc;
1859 bp->b_vp = b_vp;
1860
1861 }
1862
1863 static void
1864 raidgetdefaultlabel(raidPtr, rs, lp)
1865 RF_Raid_t *raidPtr;
1866 struct raid_softc *rs;
1867 struct disklabel *lp;
1868 {
1869 db1_printf(("Building a default label...\n"));
1870 bzero(lp, sizeof(*lp));
1871
1872 /* fabricate a label... */
1873 lp->d_secperunit = raidPtr->totalSectors;
1874 lp->d_secsize = raidPtr->bytesPerSector;
1875 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1876 lp->d_ntracks = 1;
1877 lp->d_ncylinders = raidPtr->totalSectors /
1878 (lp->d_nsectors * lp->d_ntracks);
1879 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1880
1881 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1882 lp->d_type = DTYPE_RAID;
1883 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1884 lp->d_rpm = 3600;
1885 lp->d_interleave = 1;
1886 lp->d_flags = 0;
1887
1888 lp->d_partitions[RAW_PART].p_offset = 0;
1889 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1890 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1891 lp->d_npartitions = RAW_PART + 1;
1892
1893 lp->d_magic = DISKMAGIC;
1894 lp->d_magic2 = DISKMAGIC;
1895 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1896
1897 }
1898 /*
1899 * Read the disklabel from the raid device. If one is not present, fake one
1900 * up.
1901 */
1902 static void
1903 raidgetdisklabel(dev)
1904 dev_t dev;
1905 {
1906 int unit = raidunit(dev);
1907 struct raid_softc *rs = &raid_softc[unit];
1908 char *errstring;
1909 struct disklabel *lp = rs->sc_dkdev.dk_label;
1910 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1911 RF_Raid_t *raidPtr;
1912
1913 db1_printf(("Getting the disklabel...\n"));
1914
1915 bzero(clp, sizeof(*clp));
1916
1917 raidPtr = raidPtrs[unit];
1918
1919 raidgetdefaultlabel(raidPtr, rs, lp);
1920
1921 /*
1922 * Call the generic disklabel extraction routine.
1923 */
1924 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1925 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1926 if (errstring)
1927 raidmakedisklabel(rs);
1928 else {
1929 int i;
1930 struct partition *pp;
1931
1932 /*
1933 * Sanity check whether the found disklabel is valid.
1934 *
1935 * This is necessary since total size of the raid device
1936 * may vary when an interleave is changed even though exactly
1937 * same componets are used, and old disklabel may used
1938 * if that is found.
1939 */
1940 if (lp->d_secperunit != rs->sc_size)
1941 printf("WARNING: %s: "
1942 "total sector size in disklabel (%d) != "
1943 "the size of raid (%ld)\n", rs->sc_xname,
1944 lp->d_secperunit, (long) rs->sc_size);
1945 for (i = 0; i < lp->d_npartitions; i++) {
1946 pp = &lp->d_partitions[i];
1947 if (pp->p_offset + pp->p_size > rs->sc_size)
1948 printf("WARNING: %s: end of partition `%c' "
1949 "exceeds the size of raid (%ld)\n",
1950 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1951 }
1952 }
1953
1954 }
1955 /*
1956 * Take care of things one might want to take care of in the event
1957 * that a disklabel isn't present.
1958 */
1959 static void
1960 raidmakedisklabel(rs)
1961 struct raid_softc *rs;
1962 {
1963 struct disklabel *lp = rs->sc_dkdev.dk_label;
1964 db1_printf(("Making a label..\n"));
1965
1966 /*
1967 * For historical reasons, if there's no disklabel present
1968 * the raw partition must be marked FS_BSDFFS.
1969 */
1970
1971 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1972
1973 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1974
1975 lp->d_checksum = dkcksum(lp);
1976 }
1977 /*
1978 * Lookup the provided name in the filesystem. If the file exists,
1979 * is a valid block device, and isn't being used by anyone else,
1980 * set *vpp to the file's vnode.
1981 * You'll find the original of this in ccd.c
1982 */
1983 int
1984 raidlookup(path, p, vpp)
1985 char *path;
1986 struct proc *p;
1987 struct vnode **vpp; /* result */
1988 {
1989 struct nameidata nd;
1990 struct vnode *vp;
1991 struct vattr va;
1992 int error;
1993
1994 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1995 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1996 #ifdef DEBUG
1997 printf("RAIDframe: vn_open returned %d\n", error);
1998 #endif
1999 return (error);
2000 }
2001 vp = nd.ni_vp;
2002 if (vp->v_usecount > 1) {
2003 VOP_UNLOCK(vp, 0);
2004 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2005 return (EBUSY);
2006 }
2007 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2008 VOP_UNLOCK(vp, 0);
2009 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2010 return (error);
2011 }
2012 /* XXX: eventually we should handle VREG, too. */
2013 if (va.va_type != VBLK) {
2014 VOP_UNLOCK(vp, 0);
2015 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2016 return (ENOTBLK);
2017 }
2018 VOP_UNLOCK(vp, 0);
2019 *vpp = vp;
2020 return (0);
2021 }
2022 /*
2023 * Wait interruptibly for an exclusive lock.
2024 *
2025 * XXX
2026 * Several drivers do this; it should be abstracted and made MP-safe.
2027 * (Hmm... where have we seen this warning before :-> GO )
2028 */
2029 static int
2030 raidlock(rs)
2031 struct raid_softc *rs;
2032 {
2033 int error;
2034
2035 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2036 rs->sc_flags |= RAIDF_WANTED;
2037 if ((error =
2038 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2039 return (error);
2040 }
2041 rs->sc_flags |= RAIDF_LOCKED;
2042 return (0);
2043 }
2044 /*
2045 * Unlock and wake up any waiters.
2046 */
2047 static void
2048 raidunlock(rs)
2049 struct raid_softc *rs;
2050 {
2051
2052 rs->sc_flags &= ~RAIDF_LOCKED;
2053 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2054 rs->sc_flags &= ~RAIDF_WANTED;
2055 wakeup(rs);
2056 }
2057 }
2058
2059
2060 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2061 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2062
2063 int
2064 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2065 {
2066 RF_ComponentLabel_t clabel;
2067 raidread_component_label(dev, b_vp, &clabel);
2068 clabel.mod_counter = mod_counter;
2069 clabel.clean = RF_RAID_CLEAN;
2070 raidwrite_component_label(dev, b_vp, &clabel);
2071 return(0);
2072 }
2073
2074
2075 int
2076 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2077 {
2078 RF_ComponentLabel_t clabel;
2079 raidread_component_label(dev, b_vp, &clabel);
2080 clabel.mod_counter = mod_counter;
2081 clabel.clean = RF_RAID_DIRTY;
2082 raidwrite_component_label(dev, b_vp, &clabel);
2083 return(0);
2084 }
2085
2086 /* ARGSUSED */
2087 int
2088 raidread_component_label(dev, b_vp, clabel)
2089 dev_t dev;
2090 struct vnode *b_vp;
2091 RF_ComponentLabel_t *clabel;
2092 {
2093 struct buf *bp;
2094 int error;
2095
2096 /* XXX should probably ensure that we don't try to do this if
2097 someone has changed rf_protected_sectors. */
2098
2099 /* get a block of the appropriate size... */
2100 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2101 bp->b_dev = dev;
2102
2103 /* get our ducks in a row for the read */
2104 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2105 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2106 bp->b_flags = B_BUSY | B_READ;
2107 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2108
2109 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2110
2111 error = biowait(bp);
2112
2113 if (!error) {
2114 memcpy(clabel, bp->b_un.b_addr,
2115 sizeof(RF_ComponentLabel_t));
2116 #if 0
2117 print_component_label( clabel );
2118 #endif
2119 } else {
2120 #if 0
2121 printf("Failed to read RAID component label!\n");
2122 #endif
2123 }
2124
2125 bp->b_flags = B_INVAL | B_AGE;
2126 brelse(bp);
2127 return(error);
2128 }
2129 /* ARGSUSED */
2130 int
2131 raidwrite_component_label(dev, b_vp, clabel)
2132 dev_t dev;
2133 struct vnode *b_vp;
2134 RF_ComponentLabel_t *clabel;
2135 {
2136 struct buf *bp;
2137 int error;
2138
2139 /* get a block of the appropriate size... */
2140 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2141 bp->b_dev = dev;
2142
2143 /* get our ducks in a row for the write */
2144 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2145 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2146 bp->b_flags = B_BUSY | B_WRITE;
2147 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2148
2149 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2150
2151 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2152
2153 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2154 error = biowait(bp);
2155 bp->b_flags = B_INVAL | B_AGE;
2156 brelse(bp);
2157 if (error) {
2158 #if 1
2159 printf("Failed to write RAID component info!\n");
2160 #endif
2161 }
2162
2163 return(error);
2164 }
2165
2166 void
2167 rf_markalldirty( raidPtr )
2168 RF_Raid_t *raidPtr;
2169 {
2170 RF_ComponentLabel_t clabel;
2171 int r,c;
2172
2173 raidPtr->mod_counter++;
2174 for (r = 0; r < raidPtr->numRow; r++) {
2175 for (c = 0; c < raidPtr->numCol; c++) {
2176 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2177 raidread_component_label(
2178 raidPtr->Disks[r][c].dev,
2179 raidPtr->raid_cinfo[r][c].ci_vp,
2180 &clabel);
2181 if (clabel.status == rf_ds_spared) {
2182 /* XXX do something special...
2183 but whatever you do, don't
2184 try to access it!! */
2185 } else {
2186 #if 0
2187 clabel.status =
2188 raidPtr->Disks[r][c].status;
2189 raidwrite_component_label(
2190 raidPtr->Disks[r][c].dev,
2191 raidPtr->raid_cinfo[r][c].ci_vp,
2192 &clabel);
2193 #endif
2194 raidmarkdirty(
2195 raidPtr->Disks[r][c].dev,
2196 raidPtr->raid_cinfo[r][c].ci_vp,
2197 raidPtr->mod_counter);
2198 }
2199 }
2200 }
2201 }
2202 /* printf("Component labels marked dirty.\n"); */
2203 #if 0
2204 for( c = 0; c < raidPtr->numSpare ; c++) {
2205 sparecol = raidPtr->numCol + c;
2206 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2207 /*
2208
2209 XXX this is where we get fancy and map this spare
2210 into it's correct spot in the array.
2211
2212 */
2213 /*
2214
2215 we claim this disk is "optimal" if it's
2216 rf_ds_used_spare, as that means it should be
2217 directly substitutable for the disk it replaced.
2218 We note that too...
2219
2220 */
2221
2222 for(i=0;i<raidPtr->numRow;i++) {
2223 for(j=0;j<raidPtr->numCol;j++) {
2224 if ((raidPtr->Disks[i][j].spareRow ==
2225 r) &&
2226 (raidPtr->Disks[i][j].spareCol ==
2227 sparecol)) {
2228 srow = r;
2229 scol = sparecol;
2230 break;
2231 }
2232 }
2233 }
2234
2235 raidread_component_label(
2236 raidPtr->Disks[r][sparecol].dev,
2237 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2238 &clabel);
2239 /* make sure status is noted */
2240 clabel.version = RF_COMPONENT_LABEL_VERSION;
2241 clabel.mod_counter = raidPtr->mod_counter;
2242 clabel.serial_number = raidPtr->serial_number;
2243 clabel.row = srow;
2244 clabel.column = scol;
2245 clabel.num_rows = raidPtr->numRow;
2246 clabel.num_columns = raidPtr->numCol;
2247 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2248 clabel.status = rf_ds_optimal;
2249 raidwrite_component_label(
2250 raidPtr->Disks[r][sparecol].dev,
2251 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2252 &clabel);
2253 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2254 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2255 }
2256 }
2257
2258 #endif
2259 }
2260
2261
2262 void
2263 rf_update_component_labels( raidPtr )
2264 RF_Raid_t *raidPtr;
2265 {
2266 RF_ComponentLabel_t clabel;
2267 int sparecol;
2268 int r,c;
2269 int i,j;
2270 int srow, scol;
2271
2272 srow = -1;
2273 scol = -1;
2274
2275 /* XXX should do extra checks to make sure things really are clean,
2276 rather than blindly setting the clean bit... */
2277
2278 raidPtr->mod_counter++;
2279
2280 for (r = 0; r < raidPtr->numRow; r++) {
2281 for (c = 0; c < raidPtr->numCol; c++) {
2282 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2283 raidread_component_label(
2284 raidPtr->Disks[r][c].dev,
2285 raidPtr->raid_cinfo[r][c].ci_vp,
2286 &clabel);
2287 /* make sure status is noted */
2288 clabel.status = rf_ds_optimal;
2289 raidwrite_component_label(
2290 raidPtr->Disks[r][c].dev,
2291 raidPtr->raid_cinfo[r][c].ci_vp,
2292 &clabel);
2293 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2294 raidmarkclean(
2295 raidPtr->Disks[r][c].dev,
2296 raidPtr->raid_cinfo[r][c].ci_vp,
2297 raidPtr->mod_counter);
2298 }
2299 }
2300 /* else we don't touch it.. */
2301 #if 0
2302 else if (raidPtr->Disks[r][c].status !=
2303 rf_ds_failed) {
2304 raidread_component_label(
2305 raidPtr->Disks[r][c].dev,
2306 raidPtr->raid_cinfo[r][c].ci_vp,
2307 &clabel);
2308 /* make sure status is noted */
2309 clabel.status =
2310 raidPtr->Disks[r][c].status;
2311 raidwrite_component_label(
2312 raidPtr->Disks[r][c].dev,
2313 raidPtr->raid_cinfo[r][c].ci_vp,
2314 &clabel);
2315 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2316 raidmarkclean(
2317 raidPtr->Disks[r][c].dev,
2318 raidPtr->raid_cinfo[r][c].ci_vp,
2319 raidPtr->mod_counter);
2320 }
2321 }
2322 #endif
2323 }
2324 }
2325
2326 for( c = 0; c < raidPtr->numSpare ; c++) {
2327 sparecol = raidPtr->numCol + c;
2328 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2329 /*
2330
2331 we claim this disk is "optimal" if it's
2332 rf_ds_used_spare, as that means it should be
2333 directly substitutable for the disk it replaced.
2334 We note that too...
2335
2336 */
2337
2338 for(i=0;i<raidPtr->numRow;i++) {
2339 for(j=0;j<raidPtr->numCol;j++) {
2340 if ((raidPtr->Disks[i][j].spareRow ==
2341 0) &&
2342 (raidPtr->Disks[i][j].spareCol ==
2343 sparecol)) {
2344 srow = i;
2345 scol = j;
2346 break;
2347 }
2348 }
2349 }
2350
2351 raidread_component_label(
2352 raidPtr->Disks[0][sparecol].dev,
2353 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2354 &clabel);
2355 /* make sure status is noted */
2356 clabel.version = RF_COMPONENT_LABEL_VERSION;
2357 clabel.mod_counter = raidPtr->mod_counter;
2358 clabel.serial_number = raidPtr->serial_number;
2359 clabel.row = srow;
2360 clabel.column = scol;
2361 clabel.num_rows = raidPtr->numRow;
2362 clabel.num_columns = raidPtr->numCol;
2363 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2364 clabel.status = rf_ds_optimal;
2365 raidwrite_component_label(
2366 raidPtr->Disks[0][sparecol].dev,
2367 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2368 &clabel);
2369 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2370 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2371 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2372 raidPtr->mod_counter);
2373 }
2374 }
2375 }
2376 /* printf("Component labels updated\n"); */
2377 }
2378
2379 void
2380 rf_ReconThread(req)
2381 struct rf_recon_req *req;
2382 {
2383 int s;
2384 RF_Raid_t *raidPtr;
2385
2386 s = splbio();
2387 raidPtr = (RF_Raid_t *) req->raidPtr;
2388 raidPtr->recon_in_progress = 1;
2389
2390 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2391 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2392
2393 /* XXX get rid of this! we don't need it at all.. */
2394 RF_Free(req, sizeof(*req));
2395
2396 raidPtr->recon_in_progress = 0;
2397 splx(s);
2398
2399 /* That's all... */
2400 kthread_exit(0); /* does not return */
2401 }
2402
2403 void
2404 rf_RewriteParityThread(raidPtr)
2405 RF_Raid_t *raidPtr;
2406 {
2407 int retcode;
2408 int s;
2409
2410 raidPtr->parity_rewrite_in_progress = 1;
2411 s = splbio();
2412 retcode = rf_RewriteParity(raidPtr);
2413 splx(s);
2414 if (retcode) {
2415 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2416 } else {
2417 /* set the clean bit! If we shutdown correctly,
2418 the clean bit on each component label will get
2419 set */
2420 raidPtr->parity_good = RF_RAID_CLEAN;
2421 }
2422 raidPtr->parity_rewrite_in_progress = 0;
2423
2424 /* That's all... */
2425 kthread_exit(0); /* does not return */
2426 }
2427
2428
2429 void
2430 rf_CopybackThread(raidPtr)
2431 RF_Raid_t *raidPtr;
2432 {
2433 int s;
2434
2435 raidPtr->copyback_in_progress = 1;
2436 s = splbio();
2437 rf_CopybackReconstructedData(raidPtr);
2438 splx(s);
2439 raidPtr->copyback_in_progress = 0;
2440
2441 /* That's all... */
2442 kthread_exit(0); /* does not return */
2443 }
2444
2445
2446 void
2447 rf_ReconstructInPlaceThread(req)
2448 struct rf_recon_req *req;
2449 {
2450 int retcode;
2451 int s;
2452 RF_Raid_t *raidPtr;
2453
2454 s = splbio();
2455 raidPtr = req->raidPtr;
2456 raidPtr->recon_in_progress = 1;
2457 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2458 RF_Free(req, sizeof(*req));
2459 raidPtr->recon_in_progress = 0;
2460 splx(s);
2461
2462 /* That's all... */
2463 kthread_exit(0); /* does not return */
2464 }
2465
2466 void
2467 rf_mountroot_hook(dev)
2468 struct device *dev;
2469 {
2470 #if 1
2471 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2472 #endif
2473 if (boothowto & RB_ASKNAME) {
2474 /* We don't auto-config... */
2475 } else {
2476 /* They didn't ask, and we found something bootable... */
2477 /* XXX pretend for now.. */
2478 if (raidautoconfig) {
2479 rootspec = raid_rooty;
2480 }
2481 }
2482 }
2483
2484
2485 RF_AutoConfig_t *
2486 rf_find_raid_components()
2487 {
2488 struct devnametobdevmaj *dtobdm;
2489 struct vnode *vp;
2490 struct disklabel label;
2491 struct device *dv;
2492 char *cd_name;
2493 dev_t dev;
2494 int error;
2495 int i;
2496 int good_one;
2497 RF_ComponentLabel_t *clabel;
2498 RF_AutoConfig_t *ac_list;
2499 RF_AutoConfig_t *ac;
2500
2501
2502 /* initialize the AutoConfig list */
2503 ac_list = NULL;
2504
2505 if (raidautoconfig) {
2506
2507 /* we begin by trolling through *all* the devices on the system */
2508
2509 for (dv = alldevs.tqh_first; dv != NULL;
2510 dv = dv->dv_list.tqe_next) {
2511
2512 /* we are only interested in disks... */
2513 if (dv->dv_class != DV_DISK)
2514 continue;
2515
2516 /* we don't care about floppies... */
2517 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2518 continue;
2519 }
2520
2521 /* need to find the device_name_to_block_device_major stuff */
2522 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2523 dtobdm = dev_name2blk;
2524 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2525 dtobdm++;
2526 }
2527
2528 /* get a vnode for the raw partition of this disk */
2529
2530 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2531 if (bdevvp(dev, &vp))
2532 panic("RAID can't alloc vnode");
2533
2534 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2535
2536 if (error) {
2537 /* "Who cares." Continue looking
2538 for something that exists*/
2539 vput(vp);
2540 continue;
2541 }
2542
2543 /* Ok, the disk exists. Go get the disklabel. */
2544 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2545 FREAD, NOCRED, 0);
2546 if (error) {
2547 /*
2548 * XXX can't happen - open() would
2549 * have errored out (or faked up one)
2550 */
2551 printf("can't get label for dev %s%c (%d)!?!?\n",
2552 dv->dv_xname, 'a' + RAW_PART, error);
2553 }
2554
2555 /* don't need this any more. We'll allocate it again
2556 a little later if we really do... */
2557 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2558 vput(vp);
2559
2560 for (i=0; i < label.d_npartitions; i++) {
2561 /* We only support partitions marked as RAID */
2562 if (label.d_partitions[i].p_fstype != FS_RAID)
2563 continue;
2564
2565 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2566 if (bdevvp(dev, &vp))
2567 panic("RAID can't alloc vnode");
2568
2569 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2570 if (error) {
2571 /* Whatever... */
2572 vput(vp);
2573 continue;
2574 }
2575
2576 good_one = 0;
2577
2578 clabel = (RF_ComponentLabel_t *)
2579 malloc(sizeof(RF_ComponentLabel_t),
2580 M_RAIDFRAME, M_NOWAIT);
2581 if (clabel == NULL) {
2582 /* XXX CLEANUP HERE */
2583 printf("RAID auto config: out of memory!\n");
2584 return(NULL); /* XXX probably should panic? */
2585 }
2586
2587 if (!raidread_component_label(dev, vp, clabel)) {
2588 /* Got the label. Does it look reasonable? */
2589 if (reasonable_label(clabel) &&
2590 (clabel->partitionSize ==
2591 label.d_partitions[i].p_size)) {
2592 #if DEBUG
2593 printf("Component on: %s%c: %d\n",
2594 dv->dv_xname, 'a'+i,
2595 label.d_partitions[i].p_size);
2596 print_component_label(clabel);
2597 #endif
2598 /* if it's reasonable, add it,
2599 else ignore it. */
2600 ac = (RF_AutoConfig_t *)
2601 malloc(sizeof(RF_AutoConfig_t),
2602 M_RAIDFRAME,
2603 M_NOWAIT);
2604 if (ac == NULL) {
2605 /* XXX should panic?? */
2606 return(NULL);
2607 }
2608
2609 sprintf(ac->devname, "%s%c",
2610 dv->dv_xname, 'a'+i);
2611 ac->dev = dev;
2612 ac->vp = vp;
2613 ac->clabel = clabel;
2614 ac->next = ac_list;
2615 ac_list = ac;
2616 good_one = 1;
2617 }
2618 }
2619 if (!good_one) {
2620 /* cleanup */
2621 free(clabel, M_RAIDFRAME);
2622 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2623 vput(vp);
2624 }
2625 }
2626 }
2627 }
2628 return(ac_list);
2629 }
2630
2631 static int
2632 reasonable_label(clabel)
2633 RF_ComponentLabel_t *clabel;
2634 {
2635
2636 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2637 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2638 ((clabel->clean == RF_RAID_CLEAN) ||
2639 (clabel->clean == RF_RAID_DIRTY)) &&
2640 clabel->row >=0 &&
2641 clabel->column >= 0 &&
2642 clabel->num_rows > 0 &&
2643 clabel->num_columns > 0 &&
2644 clabel->row < clabel->num_rows &&
2645 clabel->column < clabel->num_columns &&
2646 clabel->blockSize > 0 &&
2647 clabel->numBlocks > 0) {
2648 /* label looks reasonable enough... */
2649 return(1);
2650 }
2651 return(0);
2652 }
2653
2654
2655 void
2656 print_component_label(clabel)
2657 RF_ComponentLabel_t *clabel;
2658 {
2659 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2660 clabel->row, clabel->column,
2661 clabel->num_rows, clabel->num_columns);
2662 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2663 clabel->version, clabel->serial_number,
2664 clabel->mod_counter);
2665 printf(" Clean: %s Status: %d\n",
2666 clabel->clean ? "Yes" : "No", clabel->status );
2667 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2668 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2669 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2670 (char) clabel->parityConfig, clabel->blockSize,
2671 clabel->numBlocks);
2672 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2673 printf(" Last configured as: raid%d\n", clabel->last_unit );
2674 printf(" Config order: %d\n", clabel->config_order);
2675
2676 }
2677
2678 RF_ConfigSet_t *
2679 rf_create_auto_sets(ac_list)
2680 RF_AutoConfig_t *ac_list;
2681 {
2682 RF_AutoConfig_t *ac;
2683 RF_ConfigSet_t *config_sets;
2684 RF_ConfigSet_t *cset;
2685 RF_AutoConfig_t *ac_next;
2686
2687
2688 config_sets = NULL;
2689
2690 /* Go through the AutoConfig list, and figure out which components
2691 belong to what sets. */
2692 ac = ac_list;
2693 while(ac!=NULL) {
2694 /* we're going to putz with ac->next, so save it here
2695 for use at the end of the loop */
2696 ac_next = ac->next;
2697
2698 if (config_sets == NULL) {
2699 /* will need at least this one... */
2700 config_sets = (RF_ConfigSet_t *)
2701 malloc(sizeof(RF_ConfigSet_t),
2702 M_RAIDFRAME, M_NOWAIT);
2703 if (config_sets == NULL) {
2704 panic("rf_create_auto_sets: No memory!\n");
2705 }
2706 /* this one is easy :) */
2707 config_sets->ac = ac;
2708 config_sets->next = NULL;
2709 ac->next = NULL;
2710 } else {
2711 /* which set does this component fit into? */
2712 cset = config_sets;
2713 while(cset!=NULL) {
2714 if (does_it_fit(cset, ac)) {
2715 /* looks like it matches */
2716 ac->next = cset->ac;
2717 cset->ac = ac;
2718 break;
2719 }
2720 cset = cset->next;
2721 }
2722 if (cset==NULL) {
2723 /* didn't find a match above... new set..*/
2724 cset = (RF_ConfigSet_t *)
2725 malloc(sizeof(RF_ConfigSet_t),
2726 M_RAIDFRAME, M_NOWAIT);
2727 if (cset == NULL) {
2728 panic("rf_create_auto_sets: No memory!\n");
2729 }
2730 cset->ac = ac;
2731 ac->next = NULL;
2732 cset->next = config_sets;
2733 config_sets = cset;
2734 }
2735 }
2736 ac = ac_next;
2737 }
2738
2739
2740 return(config_sets);
2741 }
2742
2743 static int
2744 does_it_fit(cset, ac)
2745 RF_ConfigSet_t *cset;
2746 RF_AutoConfig_t *ac;
2747 {
2748 RF_ComponentLabel_t *clabel1, *clabel2;
2749
2750 /* If this one matches the *first* one in the set, that's good
2751 enough, since the other members of the set would have been
2752 through here too... */
2753
2754 clabel1 = cset->ac->clabel;
2755 clabel2 = ac->clabel;
2756 if ((clabel1->version == clabel2->version) &&
2757 (clabel1->serial_number == clabel2->serial_number) &&
2758 (clabel1->mod_counter == clabel2->mod_counter) &&
2759 (clabel1->num_rows == clabel2->num_rows) &&
2760 (clabel1->num_columns == clabel2->num_columns) &&
2761 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2762 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2763 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2764 (clabel1->parityConfig == clabel2->parityConfig) &&
2765 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2766 (clabel1->blockSize == clabel2->blockSize) &&
2767 (clabel1->numBlocks == clabel2->numBlocks) &&
2768 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2769 (clabel1->root_partition == clabel2->root_partition) &&
2770 (clabel1->last_unit == clabel2->last_unit) &&
2771 (clabel1->config_order == clabel2->config_order)) {
2772 /* if it get's here, it almost *has* to be a match */
2773 } else {
2774 /* it's not consistent with somebody in the set..
2775 punt */
2776 return(0);
2777 }
2778 /* all was fine.. it must fit... */
2779 return(1);
2780 }
2781
2782 #if 0
2783 int have_enough();
2784 int
2785 have_enough()
2786 {
2787 /* check to see that we have enough 'live' components
2788 of this set. If so, we can configure it if necessary */
2789
2790 }
2791 #endif
2792
2793 void
2794 create_configuration(ac,config,raidPtr)
2795 RF_AutoConfig_t *ac;
2796 RF_Config_t *config;
2797 RF_Raid_t *raidPtr;
2798 {
2799 RF_ComponentLabel_t *clabel;
2800
2801 clabel = ac->clabel;
2802
2803 /* 1. Fill in the common stuff */
2804 config->numRow = clabel->num_rows;
2805 config->numCol = clabel->num_columns;
2806 config->numSpare = 0; /* XXX should this be set here? */
2807 config->sectPerSU = clabel->sectPerSU;
2808 config->SUsPerPU = clabel->SUsPerPU;
2809 config->SUsPerRU = clabel->SUsPerRU;
2810 config->parityConfig = clabel->parityConfig;
2811 /* XXX... */
2812 strcpy(config->diskQueueType,"fifo");
2813 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2814 config->layoutSpecificSize = 0; /* XXX ?? */
2815
2816 while(ac!=NULL) {
2817 /* row/col values will be in range due to the checks
2818 in reasonable_label() */
2819 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2820 ac->devname);
2821 ac = ac->next;
2822 }
2823
2824 }
2825
2826 int
2827 rf_set_autoconfig(raidPtr, new_value)
2828 RF_Raid_t *raidPtr;
2829 int new_value;
2830 {
2831 RF_ComponentLabel_t clabel;
2832 struct vnode *vp;
2833 dev_t dev;
2834 int row, column;
2835
2836 for(row=0; row<raidPtr->numRow; row++) {
2837 for(column=0; column<raidPtr->numCol; column++) {
2838 dev = raidPtr->Disks[row][column].dev;
2839 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2840 raidread_component_label(dev, vp, &clabel);
2841 clabel.autoconfigure = new_value;
2842 raidwrite_component_label(dev, vp, &clabel);
2843 }
2844 }
2845 return(new_value);
2846 }
2847
2848 int
2849 rf_set_rootpartition(raidPtr, new_value)
2850 RF_Raid_t *raidPtr;
2851 int new_value;
2852 {
2853 RF_ComponentLabel_t clabel;
2854 struct vnode *vp;
2855 dev_t dev;
2856 int row, column;
2857
2858 for(row=0; row<raidPtr->numRow; row++) {
2859 for(column=0; column<raidPtr->numCol; column++) {
2860 dev = raidPtr->Disks[row][column].dev;
2861 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2862 raidread_component_label(dev, vp, &clabel);
2863 clabel.root_partition = new_value;
2864 raidwrite_component_label(dev, vp, &clabel);
2865 }
2866 }
2867 return(new_value);
2868 }
2869
2870 void
2871 release_all_vps(cset)
2872 RF_ConfigSet_t *cset;
2873 {
2874 RF_AutoConfig_t *ac;
2875
2876 ac = cset->ac;
2877 while(ac!=NULL) {
2878 /* Close the vp, and give it back */
2879 if (ac->vp) {
2880 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2881 vput(ac->vp);
2882 }
2883 ac = ac->next;
2884 }
2885 }
2886
2887
2888 void
2889 cleanup_config_set(cset)
2890 RF_ConfigSet_t *cset;
2891 {
2892 RF_AutoConfig_t *ac;
2893 RF_AutoConfig_t *next_ac;
2894
2895 ac = cset->ac;
2896 while(ac!=NULL) {
2897 next_ac = ac->next;
2898 /* nuke the label */
2899 free(ac->clabel, M_RAIDFRAME);
2900 /* cleanup the config structure */
2901 free(ac, M_RAIDFRAME);
2902 /* "next.." */
2903 ac = next_ac;
2904 }
2905 /* and, finally, nuke the config set */
2906 free(cset, M_RAIDFRAME);
2907 }
2908
2909
2910 void
2911 raid_init_component_label(raidPtr, clabel)
2912 RF_Raid_t *raidPtr;
2913 RF_ComponentLabel_t *clabel;
2914 {
2915 /* current version number */
2916 clabel->version = RF_COMPONENT_LABEL_VERSION;
2917 clabel->serial_number = clabel->serial_number;
2918 clabel->mod_counter = raidPtr->mod_counter;
2919 clabel->num_rows = raidPtr->numRow;
2920 clabel->num_columns = raidPtr->numCol;
2921 clabel->clean = RF_RAID_DIRTY; /* not clean */
2922 clabel->status = rf_ds_optimal; /* "It's good!" */
2923
2924 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2925 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2926 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2927 /* XXX not portable */
2928 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2929 /* XXX THIS SHOULD BE SET RIGHT!! */
2930 clabel->maxOutstanding = 100;
2931 clabel->autoconfigure = 0;
2932 clabel->root_partition = 0;
2933 clabel->last_unit = raidPtr->raidid;
2934 }
2935