rf_netbsdkintf.c revision 1.51 1 /* $NetBSD: rf_netbsdkintf.c,v 1.51 2000/02/22 03:38:42 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293 int rf_have_enough_components __P((RF_ConfigSet_t *));
294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place */
298 /* XXX ugly hack. */
299 const char *raid_rooty = "raid0";
300 extern struct device *booted_device;
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 rc = rf_mutex_init(&rf_sparet_wait_mutex);
333 if (rc) {
334 RF_PANIC();
335 }
336
337 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
338
339 for (i = 0; i < numraid; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357
358 bzero(raid_softc, num * sizeof(struct raid_softc));
359
360 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
361 M_RAIDFRAME, M_NOWAIT);
362 if (raidrootdev == NULL) {
363 panic("No memory for RAIDframe driver!!?!?!\n");
364 }
365
366 for (raidID = 0; raidID < num; raidID++) {
367 BUFQ_INIT(&raid_softc[raidID].buf_queue);
368
369 raidrootdev[raidID].dv_class = DV_DISK;
370 raidrootdev[raidID].dv_cfdata = NULL;
371 raidrootdev[raidID].dv_unit = raidID;
372 raidrootdev[raidID].dv_parent = NULL;
373 raidrootdev[raidID].dv_flags = 0;
374 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
375
376 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
377 (RF_Raid_t *));
378 if (raidPtrs[raidID] == NULL) {
379 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
380 numraid = raidID;
381 return;
382 }
383 }
384
385 if (raidautoconfig) {
386 /* 1. locate all RAID components on the system */
387
388 #if DEBUG
389 printf("Searching for raid components...\n");
390 #endif
391 ac_list = rf_find_raid_components();
392
393 /* 2. sort them into their respective sets */
394
395 config_sets = rf_create_auto_sets(ac_list);
396
397 /* 3. evaluate each set and configure the valid ones
398 This gets done in rf_buildroothack() */
399
400 /* schedule the creation of the thread to do the
401 "/ on RAID" stuff */
402
403 kthread_create(rf_buildroothack,config_sets);
404
405 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
406 /* XXXX pick raid0 for now... and this should be only done
407 if we find something that's bootable!!! */
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 if (boothowto & RB_ASKNAME) {
412 /* We don't auto-config... */
413 } else {
414 /* They didn't ask, and we found something bootable... */
415 /* XXX pretend for now.. */
416 #if 0
417 booted_device = &raidrootdev[0];
418 #endif
419 }
420 }
421
422 }
423
424 void
425 rf_buildroothack(arg)
426 void *arg;
427 {
428 RF_ConfigSet_t *config_sets = arg;
429 RF_ConfigSet_t *cset;
430 RF_ConfigSet_t *next_cset;
431 int retcode;
432 int raidID;
433 int rootID;
434 int num_root;
435
436 num_root = 0;
437 cset = config_sets;
438 while(cset != NULL ) {
439 next_cset = cset->next;
440 if (rf_have_enough_components(cset) &&
441 cset->ac->clabel->autoconfigure==1) {
442 retcode = rf_auto_config_set(cset,&raidID);
443 if (!retcode) {
444 if (cset->rootable) {
445 rootID = raidID;
446 num_root++;
447 }
448 } else {
449 /* The autoconfig didn't work :( */
450 #if DEBUG
451 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
452 #endif
453 rf_release_all_vps(cset);
454 #if DEBUG
455 printf("Done cleanup\n");
456 #endif
457 }
458 } else {
459 /* we're not autoconfiguring this set...
460 release the associated resources */
461 #if DEBUG
462 printf("Releasing vp's\n");
463 #endif
464 rf_release_all_vps(cset);
465 #if DEBUG
466 printf("Done.\n");
467 #endif
468 }
469 /* cleanup */
470 #if DEBUG
471 printf("Cleaning up config set\n");
472 #endif
473 rf_cleanup_config_set(cset);
474 #if DEBUG
475 printf("Done cleanup\n");
476 #endif
477 cset = next_cset;
478 }
479 if (boothowto & RB_ASKNAME) {
480 /* We don't auto-config... */
481 } else {
482 /* They didn't ask, and we found something bootable... */
483 /* XXX pretend for now.. */
484 if (num_root == 1) {
485 #if 1
486 booted_device = &raidrootdev[rootID];
487 #endif
488 } else if (num_root > 1) {
489 /* we can't guess.. require the user to answer... */
490 boothowto |= RB_ASKNAME;
491 }
492 }
493 }
494
495
496 int
497 raidsize(dev)
498 dev_t dev;
499 {
500 struct raid_softc *rs;
501 struct disklabel *lp;
502 int part, unit, omask, size;
503
504 unit = raidunit(dev);
505 if (unit >= numraid)
506 return (-1);
507 rs = &raid_softc[unit];
508
509 if ((rs->sc_flags & RAIDF_INITED) == 0)
510 return (-1);
511
512 part = DISKPART(dev);
513 omask = rs->sc_dkdev.dk_openmask & (1 << part);
514 lp = rs->sc_dkdev.dk_label;
515
516 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
517 return (-1);
518
519 if (lp->d_partitions[part].p_fstype != FS_SWAP)
520 size = -1;
521 else
522 size = lp->d_partitions[part].p_size *
523 (lp->d_secsize / DEV_BSIZE);
524
525 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
526 return (-1);
527
528 return (size);
529
530 }
531
532 int
533 raiddump(dev, blkno, va, size)
534 dev_t dev;
535 daddr_t blkno;
536 caddr_t va;
537 size_t size;
538 {
539 /* Not implemented. */
540 return ENXIO;
541 }
542 /* ARGSUSED */
543 int
544 raidopen(dev, flags, fmt, p)
545 dev_t dev;
546 int flags, fmt;
547 struct proc *p;
548 {
549 int unit = raidunit(dev);
550 struct raid_softc *rs;
551 struct disklabel *lp;
552 int part, pmask;
553 int error = 0;
554
555 if (unit >= numraid)
556 return (ENXIO);
557 rs = &raid_softc[unit];
558
559 if ((error = raidlock(rs)) != 0)
560 return (error);
561 lp = rs->sc_dkdev.dk_label;
562
563 part = DISKPART(dev);
564 pmask = (1 << part);
565
566 db1_printf(("Opening raid device number: %d partition: %d\n",
567 unit, part));
568
569
570 if ((rs->sc_flags & RAIDF_INITED) &&
571 (rs->sc_dkdev.dk_openmask == 0))
572 raidgetdisklabel(dev);
573
574 /* make sure that this partition exists */
575
576 if (part != RAW_PART) {
577 db1_printf(("Not a raw partition..\n"));
578 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
579 ((part >= lp->d_npartitions) ||
580 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
581 error = ENXIO;
582 raidunlock(rs);
583 db1_printf(("Bailing out...\n"));
584 return (error);
585 }
586 }
587 /* Prevent this unit from being unconfigured while open. */
588 switch (fmt) {
589 case S_IFCHR:
590 rs->sc_dkdev.dk_copenmask |= pmask;
591 break;
592
593 case S_IFBLK:
594 rs->sc_dkdev.dk_bopenmask |= pmask;
595 break;
596 }
597
598 if ((rs->sc_dkdev.dk_openmask == 0) &&
599 ((rs->sc_flags & RAIDF_INITED) != 0)) {
600 /* First one... mark things as dirty... Note that we *MUST*
601 have done a configure before this. I DO NOT WANT TO BE
602 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
603 THAT THEY BELONG TOGETHER!!!!! */
604 /* XXX should check to see if we're only open for reading
605 here... If so, we needn't do this, but then need some
606 other way of keeping track of what's happened.. */
607
608 rf_markalldirty( raidPtrs[unit] );
609 }
610
611
612 rs->sc_dkdev.dk_openmask =
613 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
614
615 raidunlock(rs);
616
617 return (error);
618
619
620 }
621 /* ARGSUSED */
622 int
623 raidclose(dev, flags, fmt, p)
624 dev_t dev;
625 int flags, fmt;
626 struct proc *p;
627 {
628 int unit = raidunit(dev);
629 struct raid_softc *rs;
630 int error = 0;
631 int part;
632
633 if (unit >= numraid)
634 return (ENXIO);
635 rs = &raid_softc[unit];
636
637 if ((error = raidlock(rs)) != 0)
638 return (error);
639
640 part = DISKPART(dev);
641
642 /* ...that much closer to allowing unconfiguration... */
643 switch (fmt) {
644 case S_IFCHR:
645 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
646 break;
647
648 case S_IFBLK:
649 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
650 break;
651 }
652 rs->sc_dkdev.dk_openmask =
653 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
654
655 if ((rs->sc_dkdev.dk_openmask == 0) &&
656 ((rs->sc_flags & RAIDF_INITED) != 0)) {
657 /* Last one... device is not unconfigured yet.
658 Device shutdown has taken care of setting the
659 clean bits if RAIDF_INITED is not set
660 mark things as clean... */
661 rf_update_component_labels( raidPtrs[unit] );
662 }
663
664 raidunlock(rs);
665 return (0);
666
667 }
668
669 void
670 raidstrategy(bp)
671 register struct buf *bp;
672 {
673 register int s;
674
675 unsigned int raidID = raidunit(bp->b_dev);
676 RF_Raid_t *raidPtr;
677 struct raid_softc *rs = &raid_softc[raidID];
678 struct disklabel *lp;
679 int wlabel;
680
681 if ((rs->sc_flags & RAIDF_INITED) ==0) {
682 bp->b_error = ENXIO;
683 bp->b_flags = B_ERROR;
684 bp->b_resid = bp->b_bcount;
685 biodone(bp);
686 return;
687 }
688 if (raidID >= numraid || !raidPtrs[raidID]) {
689 bp->b_error = ENODEV;
690 bp->b_flags |= B_ERROR;
691 bp->b_resid = bp->b_bcount;
692 biodone(bp);
693 return;
694 }
695 raidPtr = raidPtrs[raidID];
696 if (!raidPtr->valid) {
697 bp->b_error = ENODEV;
698 bp->b_flags |= B_ERROR;
699 bp->b_resid = bp->b_bcount;
700 biodone(bp);
701 return;
702 }
703 if (bp->b_bcount == 0) {
704 db1_printf(("b_bcount is zero..\n"));
705 biodone(bp);
706 return;
707 }
708 lp = rs->sc_dkdev.dk_label;
709
710 /*
711 * Do bounds checking and adjust transfer. If there's an
712 * error, the bounds check will flag that for us.
713 */
714
715 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
716 if (DISKPART(bp->b_dev) != RAW_PART)
717 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
718 db1_printf(("Bounds check failed!!:%d %d\n",
719 (int) bp->b_blkno, (int) wlabel));
720 biodone(bp);
721 return;
722 }
723 s = splbio();
724
725 bp->b_resid = 0;
726
727 /* stuff it onto our queue */
728 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
729
730 raidstart(raidPtrs[raidID]);
731
732 splx(s);
733 }
734 /* ARGSUSED */
735 int
736 raidread(dev, uio, flags)
737 dev_t dev;
738 struct uio *uio;
739 int flags;
740 {
741 int unit = raidunit(dev);
742 struct raid_softc *rs;
743 int part;
744
745 if (unit >= numraid)
746 return (ENXIO);
747 rs = &raid_softc[unit];
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return (ENXIO);
751 part = DISKPART(dev);
752
753 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
754
755 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
756
757 }
758 /* ARGSUSED */
759 int
760 raidwrite(dev, uio, flags)
761 dev_t dev;
762 struct uio *uio;
763 int flags;
764 {
765 int unit = raidunit(dev);
766 struct raid_softc *rs;
767
768 if (unit >= numraid)
769 return (ENXIO);
770 rs = &raid_softc[unit];
771
772 if ((rs->sc_flags & RAIDF_INITED) == 0)
773 return (ENXIO);
774 db1_printf(("raidwrite\n"));
775 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
776
777 }
778
779 int
780 raidioctl(dev, cmd, data, flag, p)
781 dev_t dev;
782 u_long cmd;
783 caddr_t data;
784 int flag;
785 struct proc *p;
786 {
787 int unit = raidunit(dev);
788 int error = 0;
789 int part, pmask;
790 struct raid_softc *rs;
791 RF_Config_t *k_cfg, *u_cfg;
792 RF_Raid_t *raidPtr;
793 RF_RaidDisk_t *diskPtr;
794 RF_AccTotals_t *totals;
795 RF_DeviceConfig_t *d_cfg, **ucfgp;
796 u_char *specific_buf;
797 int retcode = 0;
798 int row;
799 int column;
800 struct rf_recon_req *rrcopy, *rr;
801 RF_ComponentLabel_t *clabel;
802 RF_ComponentLabel_t ci_label;
803 RF_ComponentLabel_t **clabel_ptr;
804 RF_SingleComponent_t *sparePtr,*componentPtr;
805 RF_SingleComponent_t hot_spare;
806 RF_SingleComponent_t component;
807 int i, j, d;
808
809 if (unit >= numraid)
810 return (ENXIO);
811 rs = &raid_softc[unit];
812 raidPtr = raidPtrs[unit];
813
814 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
815 (int) DISKPART(dev), (int) unit, (int) cmd));
816
817 /* Must be open for writes for these commands... */
818 switch (cmd) {
819 case DIOCSDINFO:
820 case DIOCWDINFO:
821 case DIOCWLABEL:
822 if ((flag & FWRITE) == 0)
823 return (EBADF);
824 }
825
826 /* Must be initialized for these... */
827 switch (cmd) {
828 case DIOCGDINFO:
829 case DIOCSDINFO:
830 case DIOCWDINFO:
831 case DIOCGPART:
832 case DIOCWLABEL:
833 case DIOCGDEFLABEL:
834 case RAIDFRAME_SHUTDOWN:
835 case RAIDFRAME_REWRITEPARITY:
836 case RAIDFRAME_GET_INFO:
837 case RAIDFRAME_RESET_ACCTOTALS:
838 case RAIDFRAME_GET_ACCTOTALS:
839 case RAIDFRAME_KEEP_ACCTOTALS:
840 case RAIDFRAME_GET_SIZE:
841 case RAIDFRAME_FAIL_DISK:
842 case RAIDFRAME_COPYBACK:
843 case RAIDFRAME_CHECK_RECON_STATUS:
844 case RAIDFRAME_GET_COMPONENT_LABEL:
845 case RAIDFRAME_SET_COMPONENT_LABEL:
846 case RAIDFRAME_ADD_HOT_SPARE:
847 case RAIDFRAME_REMOVE_HOT_SPARE:
848 case RAIDFRAME_INIT_LABELS:
849 case RAIDFRAME_REBUILD_IN_PLACE:
850 case RAIDFRAME_CHECK_PARITY:
851 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
852 case RAIDFRAME_CHECK_COPYBACK_STATUS:
853 case RAIDFRAME_SET_AUTOCONFIG:
854 case RAIDFRAME_SET_ROOT:
855 if ((rs->sc_flags & RAIDF_INITED) == 0)
856 return (ENXIO);
857 }
858
859 switch (cmd) {
860
861 /* configure the system */
862 case RAIDFRAME_CONFIGURE:
863
864 if (raidPtr->valid) {
865 /* There is a valid RAID set running on this unit! */
866 printf("raid%d: Device already configured!\n",unit);
867 }
868
869 /* copy-in the configuration information */
870 /* data points to a pointer to the configuration structure */
871
872 u_cfg = *((RF_Config_t **) data);
873 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
874 if (k_cfg == NULL) {
875 return (ENOMEM);
876 }
877 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
878 sizeof(RF_Config_t));
879 if (retcode) {
880 RF_Free(k_cfg, sizeof(RF_Config_t));
881 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
882 retcode));
883 return (retcode);
884 }
885 /* allocate a buffer for the layout-specific data, and copy it
886 * in */
887 if (k_cfg->layoutSpecificSize) {
888 if (k_cfg->layoutSpecificSize > 10000) {
889 /* sanity check */
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 return (EINVAL);
892 }
893 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
894 (u_char *));
895 if (specific_buf == NULL) {
896 RF_Free(k_cfg, sizeof(RF_Config_t));
897 return (ENOMEM);
898 }
899 retcode = copyin(k_cfg->layoutSpecific,
900 (caddr_t) specific_buf,
901 k_cfg->layoutSpecificSize);
902 if (retcode) {
903 RF_Free(k_cfg, sizeof(RF_Config_t));
904 RF_Free(specific_buf,
905 k_cfg->layoutSpecificSize);
906 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
907 retcode));
908 return (retcode);
909 }
910 } else
911 specific_buf = NULL;
912 k_cfg->layoutSpecific = specific_buf;
913
914 /* should do some kind of sanity check on the configuration.
915 * Store the sum of all the bytes in the last byte? */
916
917 /* configure the system */
918
919 /*
920 * Clear the entire RAID descriptor, just to make sure
921 * there is no stale data left in the case of a
922 * reconfiguration
923 */
924 bzero((char *) raidPtr, sizeof(RF_Raid_t));
925 raidPtr->raidid = unit;
926
927 retcode = rf_Configure(raidPtr, k_cfg, NULL);
928
929 if (retcode == 0) {
930
931 /* allow this many simultaneous IO's to
932 this RAID device */
933 raidPtr->openings = RAIDOUTSTANDING;
934
935 retcode = raidinit(dev, raidPtr, unit);
936 rf_markalldirty( raidPtr );
937 }
938 /* free the buffers. No return code here. */
939 if (k_cfg->layoutSpecificSize) {
940 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
941 }
942 RF_Free(k_cfg, sizeof(RF_Config_t));
943
944 return (retcode);
945
946 /* shutdown the system */
947 case RAIDFRAME_SHUTDOWN:
948
949 if ((error = raidlock(rs)) != 0)
950 return (error);
951
952 /*
953 * If somebody has a partition mounted, we shouldn't
954 * shutdown.
955 */
956
957 part = DISKPART(dev);
958 pmask = (1 << part);
959 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
960 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
961 (rs->sc_dkdev.dk_copenmask & pmask))) {
962 raidunlock(rs);
963 return (EBUSY);
964 }
965
966 retcode = rf_Shutdown(raidPtr);
967
968 pool_destroy(&rs->sc_cbufpool);
969
970 /* It's no longer initialized... */
971 rs->sc_flags &= ~RAIDF_INITED;
972
973 /* Detach the disk. */
974 disk_detach(&rs->sc_dkdev);
975
976 raidunlock(rs);
977
978 return (retcode);
979 case RAIDFRAME_GET_COMPONENT_LABEL:
980 clabel_ptr = (RF_ComponentLabel_t **) data;
981 /* need to read the component label for the disk indicated
982 by row,column in clabel */
983
984 /* For practice, let's get it directly fromdisk, rather
985 than from the in-core copy */
986 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
987 (RF_ComponentLabel_t *));
988 if (clabel == NULL)
989 return (ENOMEM);
990
991 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
992
993 retcode = copyin( *clabel_ptr, clabel,
994 sizeof(RF_ComponentLabel_t));
995
996 if (retcode) {
997 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
998 return(retcode);
999 }
1000
1001 row = clabel->row;
1002 column = clabel->column;
1003
1004 if ((row < 0) || (row >= raidPtr->numRow) ||
1005 (column < 0) || (column >= raidPtr->numCol)) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(EINVAL);
1008 }
1009
1010 raidread_component_label(raidPtr->Disks[row][column].dev,
1011 raidPtr->raid_cinfo[row][column].ci_vp,
1012 clabel );
1013
1014 retcode = copyout((caddr_t) clabel,
1015 (caddr_t) *clabel_ptr,
1016 sizeof(RF_ComponentLabel_t));
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return (retcode);
1019
1020 case RAIDFRAME_SET_COMPONENT_LABEL:
1021 clabel = (RF_ComponentLabel_t *) data;
1022
1023 /* XXX check the label for valid stuff... */
1024 /* Note that some things *should not* get modified --
1025 the user should be re-initing the labels instead of
1026 trying to patch things.
1027 */
1028
1029 printf("Got component label:\n");
1030 printf("Version: %d\n",clabel->version);
1031 printf("Serial Number: %d\n",clabel->serial_number);
1032 printf("Mod counter: %d\n",clabel->mod_counter);
1033 printf("Row: %d\n", clabel->row);
1034 printf("Column: %d\n", clabel->column);
1035 printf("Num Rows: %d\n", clabel->num_rows);
1036 printf("Num Columns: %d\n", clabel->num_columns);
1037 printf("Clean: %d\n", clabel->clean);
1038 printf("Status: %d\n", clabel->status);
1039
1040 row = clabel->row;
1041 column = clabel->column;
1042
1043 if ((row < 0) || (row >= raidPtr->numRow) ||
1044 (column < 0) || (column >= raidPtr->numCol)) {
1045 return(EINVAL);
1046 }
1047
1048 /* XXX this isn't allowed to do anything for now :-) */
1049
1050 /* XXX and before it is, we need to fill in the rest
1051 of the fields!?!?!?! */
1052 #if 0
1053 raidwrite_component_label(
1054 raidPtr->Disks[row][column].dev,
1055 raidPtr->raid_cinfo[row][column].ci_vp,
1056 clabel );
1057 #endif
1058 return (0);
1059
1060 case RAIDFRAME_INIT_LABELS:
1061 clabel = (RF_ComponentLabel_t *) data;
1062 /*
1063 we only want the serial number from
1064 the above. We get all the rest of the information
1065 from the config that was used to create this RAID
1066 set.
1067 */
1068
1069 raidPtr->serial_number = clabel->serial_number;
1070
1071 raid_init_component_label(raidPtr, &ci_label);
1072 ci_label.serial_number = clabel->serial_number;
1073
1074 for(row=0;row<raidPtr->numRow;row++) {
1075 ci_label.row = row;
1076 for(column=0;column<raidPtr->numCol;column++) {
1077 diskPtr = &raidPtr->Disks[row][column];
1078 ci_label.blockSize = diskPtr->blockSize;
1079 ci_label.numBlocks = diskPtr->numBlocks;
1080 ci_label.partitionSize = diskPtr->partitionSize;
1081 ci_label.column = column;
1082 raidwrite_component_label(
1083 raidPtr->Disks[row][column].dev,
1084 raidPtr->raid_cinfo[row][column].ci_vp,
1085 &ci_label );
1086 }
1087 }
1088
1089 return (retcode);
1090 case RAIDFRAME_SET_AUTOCONFIG:
1091 d = rf_set_autoconfig(raidPtr, *data);
1092 printf("New autoconfig value is: %d\n", d);
1093 *data = d;
1094 return (retcode);
1095
1096 case RAIDFRAME_SET_ROOT:
1097 d = rf_set_rootpartition(raidPtr, *data);
1098 printf("New rootpartition value is: %d\n", d);
1099 *data = d;
1100 return (retcode);
1101
1102 /* initialize all parity */
1103 case RAIDFRAME_REWRITEPARITY:
1104
1105 if (raidPtr->Layout.map->faultsTolerated == 0) {
1106 /* Parity for RAID 0 is trivially correct */
1107 raidPtr->parity_good = RF_RAID_CLEAN;
1108 return(0);
1109 }
1110
1111 if (raidPtr->parity_rewrite_in_progress == 1) {
1112 /* Re-write is already in progress! */
1113 return(EINVAL);
1114 }
1115
1116 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1117 rf_RewriteParityThread,
1118 raidPtr,"raid_parity");
1119 return (retcode);
1120
1121
1122 case RAIDFRAME_ADD_HOT_SPARE:
1123 sparePtr = (RF_SingleComponent_t *) data;
1124 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1125 printf("Adding spare\n");
1126 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1127 return(retcode);
1128
1129 case RAIDFRAME_REMOVE_HOT_SPARE:
1130 return(retcode);
1131
1132 case RAIDFRAME_REBUILD_IN_PLACE:
1133
1134 if (raidPtr->Layout.map->faultsTolerated == 0) {
1135 /* Can't do this on a RAID 0!! */
1136 return(EINVAL);
1137 }
1138
1139 if (raidPtr->recon_in_progress == 1) {
1140 /* a reconstruct is already in progress! */
1141 return(EINVAL);
1142 }
1143
1144 componentPtr = (RF_SingleComponent_t *) data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 row = component.row;
1148 column = component.column;
1149 printf("Rebuild: %d %d\n",row, column);
1150 if ((row < 0) || (row >= raidPtr->numRow) ||
1151 (column < 0) || (column >= raidPtr->numCol)) {
1152 return(EINVAL);
1153 }
1154
1155 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1156 if (rrcopy == NULL)
1157 return(ENOMEM);
1158
1159 rrcopy->raidPtr = (void *) raidPtr;
1160 rrcopy->row = row;
1161 rrcopy->col = column;
1162
1163 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1164 rf_ReconstructInPlaceThread,
1165 rrcopy,"raid_reconip");
1166 return(retcode);
1167
1168 case RAIDFRAME_GET_INFO:
1169 if (!raidPtr->valid)
1170 return (ENODEV);
1171 ucfgp = (RF_DeviceConfig_t **) data;
1172 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1173 (RF_DeviceConfig_t *));
1174 if (d_cfg == NULL)
1175 return (ENOMEM);
1176 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1177 d_cfg->rows = raidPtr->numRow;
1178 d_cfg->cols = raidPtr->numCol;
1179 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1180 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1181 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1182 return (ENOMEM);
1183 }
1184 d_cfg->nspares = raidPtr->numSpare;
1185 if (d_cfg->nspares >= RF_MAX_DISKS) {
1186 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1187 return (ENOMEM);
1188 }
1189 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1190 d = 0;
1191 for (i = 0; i < d_cfg->rows; i++) {
1192 for (j = 0; j < d_cfg->cols; j++) {
1193 d_cfg->devs[d] = raidPtr->Disks[i][j];
1194 d++;
1195 }
1196 }
1197 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1198 d_cfg->spares[i] = raidPtr->Disks[0][j];
1199 }
1200 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1201 sizeof(RF_DeviceConfig_t));
1202 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1203
1204 return (retcode);
1205
1206 case RAIDFRAME_CHECK_PARITY:
1207 *(int *) data = raidPtr->parity_good;
1208 return (0);
1209
1210 case RAIDFRAME_RESET_ACCTOTALS:
1211 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1212 return (0);
1213
1214 case RAIDFRAME_GET_ACCTOTALS:
1215 totals = (RF_AccTotals_t *) data;
1216 *totals = raidPtr->acc_totals;
1217 return (0);
1218
1219 case RAIDFRAME_KEEP_ACCTOTALS:
1220 raidPtr->keep_acc_totals = *(int *)data;
1221 return (0);
1222
1223 case RAIDFRAME_GET_SIZE:
1224 *(int *) data = raidPtr->totalSectors;
1225 return (0);
1226
1227 /* fail a disk & optionally start reconstruction */
1228 case RAIDFRAME_FAIL_DISK:
1229
1230 if (raidPtr->Layout.map->faultsTolerated == 0) {
1231 /* Can't do this on a RAID 0!! */
1232 return(EINVAL);
1233 }
1234
1235 rr = (struct rf_recon_req *) data;
1236
1237 if (rr->row < 0 || rr->row >= raidPtr->numRow
1238 || rr->col < 0 || rr->col >= raidPtr->numCol)
1239 return (EINVAL);
1240
1241 printf("raid%d: Failing the disk: row: %d col: %d\n",
1242 unit, rr->row, rr->col);
1243
1244 /* make a copy of the recon request so that we don't rely on
1245 * the user's buffer */
1246 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1247 if (rrcopy == NULL)
1248 return(ENOMEM);
1249 bcopy(rr, rrcopy, sizeof(*rr));
1250 rrcopy->raidPtr = (void *) raidPtr;
1251
1252 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1253 rf_ReconThread,
1254 rrcopy,"raid_recon");
1255 return (0);
1256
1257 /* invoke a copyback operation after recon on whatever disk
1258 * needs it, if any */
1259 case RAIDFRAME_COPYBACK:
1260
1261 if (raidPtr->Layout.map->faultsTolerated == 0) {
1262 /* This makes no sense on a RAID 0!! */
1263 return(EINVAL);
1264 }
1265
1266 if (raidPtr->copyback_in_progress == 1) {
1267 /* Copyback is already in progress! */
1268 return(EINVAL);
1269 }
1270
1271 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1272 rf_CopybackThread,
1273 raidPtr,"raid_copyback");
1274 return (retcode);
1275
1276 /* return the percentage completion of reconstruction */
1277 case RAIDFRAME_CHECK_RECON_STATUS:
1278 if (raidPtr->Layout.map->faultsTolerated == 0) {
1279 /* This makes no sense on a RAID 0 */
1280 return(EINVAL);
1281 }
1282 row = 0; /* XXX we only consider a single row... */
1283 if (raidPtr->status[row] != rf_rs_reconstructing)
1284 *(int *) data = 100;
1285 else
1286 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1287 return (0);
1288
1289 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1290 if (raidPtr->Layout.map->faultsTolerated == 0) {
1291 /* This makes no sense on a RAID 0 */
1292 return(EINVAL);
1293 }
1294 if (raidPtr->parity_rewrite_in_progress == 1) {
1295 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1296 } else {
1297 *(int *) data = 100;
1298 }
1299 return (0);
1300
1301 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1302 if (raidPtr->Layout.map->faultsTolerated == 0) {
1303 /* This makes no sense on a RAID 0 */
1304 return(EINVAL);
1305 }
1306 if (raidPtr->copyback_in_progress == 1) {
1307 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1308 raidPtr->Layout.numStripe;
1309 } else {
1310 *(int *) data = 100;
1311 }
1312 return (0);
1313
1314
1315 /* the sparetable daemon calls this to wait for the kernel to
1316 * need a spare table. this ioctl does not return until a
1317 * spare table is needed. XXX -- calling mpsleep here in the
1318 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1319 * -- I should either compute the spare table in the kernel,
1320 * or have a different -- XXX XXX -- interface (a different
1321 * character device) for delivering the table -- XXX */
1322 #if 0
1323 case RAIDFRAME_SPARET_WAIT:
1324 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1325 while (!rf_sparet_wait_queue)
1326 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1327 waitreq = rf_sparet_wait_queue;
1328 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1329 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1330
1331 /* structure assignment */
1332 *((RF_SparetWait_t *) data) = *waitreq;
1333
1334 RF_Free(waitreq, sizeof(*waitreq));
1335 return (0);
1336
1337 /* wakes up a process waiting on SPARET_WAIT and puts an error
1338 * code in it that will cause the dameon to exit */
1339 case RAIDFRAME_ABORT_SPARET_WAIT:
1340 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1341 waitreq->fcol = -1;
1342 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1343 waitreq->next = rf_sparet_wait_queue;
1344 rf_sparet_wait_queue = waitreq;
1345 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1346 wakeup(&rf_sparet_wait_queue);
1347 return (0);
1348
1349 /* used by the spare table daemon to deliver a spare table
1350 * into the kernel */
1351 case RAIDFRAME_SEND_SPARET:
1352
1353 /* install the spare table */
1354 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1355
1356 /* respond to the requestor. the return status of the spare
1357 * table installation is passed in the "fcol" field */
1358 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1359 waitreq->fcol = retcode;
1360 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1361 waitreq->next = rf_sparet_resp_queue;
1362 rf_sparet_resp_queue = waitreq;
1363 wakeup(&rf_sparet_resp_queue);
1364 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1365
1366 return (retcode);
1367 #endif
1368
1369 default:
1370 break; /* fall through to the os-specific code below */
1371
1372 }
1373
1374 if (!raidPtr->valid)
1375 return (EINVAL);
1376
1377 /*
1378 * Add support for "regular" device ioctls here.
1379 */
1380
1381 switch (cmd) {
1382 case DIOCGDINFO:
1383 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1384 break;
1385
1386 case DIOCGPART:
1387 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1388 ((struct partinfo *) data)->part =
1389 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1390 break;
1391
1392 case DIOCWDINFO:
1393 case DIOCSDINFO:
1394 if ((error = raidlock(rs)) != 0)
1395 return (error);
1396
1397 rs->sc_flags |= RAIDF_LABELLING;
1398
1399 error = setdisklabel(rs->sc_dkdev.dk_label,
1400 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1401 if (error == 0) {
1402 if (cmd == DIOCWDINFO)
1403 error = writedisklabel(RAIDLABELDEV(dev),
1404 raidstrategy, rs->sc_dkdev.dk_label,
1405 rs->sc_dkdev.dk_cpulabel);
1406 }
1407 rs->sc_flags &= ~RAIDF_LABELLING;
1408
1409 raidunlock(rs);
1410
1411 if (error)
1412 return (error);
1413 break;
1414
1415 case DIOCWLABEL:
1416 if (*(int *) data != 0)
1417 rs->sc_flags |= RAIDF_WLABEL;
1418 else
1419 rs->sc_flags &= ~RAIDF_WLABEL;
1420 break;
1421
1422 case DIOCGDEFLABEL:
1423 raidgetdefaultlabel(raidPtr, rs,
1424 (struct disklabel *) data);
1425 break;
1426
1427 default:
1428 retcode = ENOTTY;
1429 }
1430 return (retcode);
1431
1432 }
1433
1434
1435 /* raidinit -- complete the rest of the initialization for the
1436 RAIDframe device. */
1437
1438
1439 static int
1440 raidinit(dev, raidPtr, unit)
1441 dev_t dev;
1442 RF_Raid_t *raidPtr;
1443 int unit;
1444 {
1445 int retcode;
1446 struct raid_softc *rs;
1447
1448 retcode = 0;
1449
1450 rs = &raid_softc[unit];
1451 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1452 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1453
1454
1455 /* XXX should check return code first... */
1456 rs->sc_flags |= RAIDF_INITED;
1457
1458 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1459
1460 rs->sc_dkdev.dk_name = rs->sc_xname;
1461
1462 /* disk_attach actually creates space for the CPU disklabel, among
1463 * other things, so it's critical to call this *BEFORE* we try putzing
1464 * with disklabels. */
1465
1466 disk_attach(&rs->sc_dkdev);
1467
1468 /* XXX There may be a weird interaction here between this, and
1469 * protectedSectors, as used in RAIDframe. */
1470
1471 rs->sc_size = raidPtr->totalSectors;
1472 rs->sc_dev = dev;
1473
1474 return (retcode);
1475 }
1476
1477 /* wake up the daemon & tell it to get us a spare table
1478 * XXX
1479 * the entries in the queues should be tagged with the raidPtr
1480 * so that in the extremely rare case that two recons happen at once,
1481 * we know for which device were requesting a spare table
1482 * XXX
1483 *
1484 * XXX This code is not currently used. GO
1485 */
1486 int
1487 rf_GetSpareTableFromDaemon(req)
1488 RF_SparetWait_t *req;
1489 {
1490 int retcode;
1491
1492 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1493 req->next = rf_sparet_wait_queue;
1494 rf_sparet_wait_queue = req;
1495 wakeup(&rf_sparet_wait_queue);
1496
1497 /* mpsleep unlocks the mutex */
1498 while (!rf_sparet_resp_queue) {
1499 tsleep(&rf_sparet_resp_queue, PRIBIO,
1500 "raidframe getsparetable", 0);
1501 }
1502 req = rf_sparet_resp_queue;
1503 rf_sparet_resp_queue = req->next;
1504 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1505
1506 retcode = req->fcol;
1507 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1508 * alloc'd */
1509 return (retcode);
1510 }
1511
1512 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1513 * bp & passes it down.
1514 * any calls originating in the kernel must use non-blocking I/O
1515 * do some extra sanity checking to return "appropriate" error values for
1516 * certain conditions (to make some standard utilities work)
1517 *
1518 * Formerly known as: rf_DoAccessKernel
1519 */
1520 void
1521 raidstart(raidPtr)
1522 RF_Raid_t *raidPtr;
1523 {
1524 RF_SectorCount_t num_blocks, pb, sum;
1525 RF_RaidAddr_t raid_addr;
1526 int retcode;
1527 struct partition *pp;
1528 daddr_t blocknum;
1529 int unit;
1530 struct raid_softc *rs;
1531 int do_async;
1532 struct buf *bp;
1533
1534 unit = raidPtr->raidid;
1535 rs = &raid_softc[unit];
1536
1537 /* Check to see if we're at the limit... */
1538 RF_LOCK_MUTEX(raidPtr->mutex);
1539 while (raidPtr->openings > 0) {
1540 RF_UNLOCK_MUTEX(raidPtr->mutex);
1541
1542 /* get the next item, if any, from the queue */
1543 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1544 /* nothing more to do */
1545 return;
1546 }
1547 BUFQ_REMOVE(&rs->buf_queue, bp);
1548
1549 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1550 * partition.. Need to make it absolute to the underlying
1551 * device.. */
1552
1553 blocknum = bp->b_blkno;
1554 if (DISKPART(bp->b_dev) != RAW_PART) {
1555 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1556 blocknum += pp->p_offset;
1557 }
1558
1559 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1560 (int) blocknum));
1561
1562 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1563 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1564
1565 /* *THIS* is where we adjust what block we're going to...
1566 * but DO NOT TOUCH bp->b_blkno!!! */
1567 raid_addr = blocknum;
1568
1569 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1570 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1571 sum = raid_addr + num_blocks + pb;
1572 if (1 || rf_debugKernelAccess) {
1573 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1574 (int) raid_addr, (int) sum, (int) num_blocks,
1575 (int) pb, (int) bp->b_resid));
1576 }
1577 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1578 || (sum < num_blocks) || (sum < pb)) {
1579 bp->b_error = ENOSPC;
1580 bp->b_flags |= B_ERROR;
1581 bp->b_resid = bp->b_bcount;
1582 biodone(bp);
1583 RF_LOCK_MUTEX(raidPtr->mutex);
1584 continue;
1585 }
1586 /*
1587 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1588 */
1589
1590 if (bp->b_bcount & raidPtr->sectorMask) {
1591 bp->b_error = EINVAL;
1592 bp->b_flags |= B_ERROR;
1593 bp->b_resid = bp->b_bcount;
1594 biodone(bp);
1595 RF_LOCK_MUTEX(raidPtr->mutex);
1596 continue;
1597
1598 }
1599 db1_printf(("Calling DoAccess..\n"));
1600
1601
1602 RF_LOCK_MUTEX(raidPtr->mutex);
1603 raidPtr->openings--;
1604 RF_UNLOCK_MUTEX(raidPtr->mutex);
1605
1606 /*
1607 * Everything is async.
1608 */
1609 do_async = 1;
1610
1611 /* don't ever condition on bp->b_flags & B_WRITE.
1612 * always condition on B_READ instead */
1613
1614 /* XXX we're still at splbio() here... do we *really*
1615 need to be? */
1616
1617
1618 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1619 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1620 do_async, raid_addr, num_blocks,
1621 bp->b_un.b_addr, bp, NULL, NULL,
1622 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1623
1624
1625 RF_LOCK_MUTEX(raidPtr->mutex);
1626 }
1627 RF_UNLOCK_MUTEX(raidPtr->mutex);
1628 }
1629
1630
1631
1632
1633 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1634
1635 int
1636 rf_DispatchKernelIO(queue, req)
1637 RF_DiskQueue_t *queue;
1638 RF_DiskQueueData_t *req;
1639 {
1640 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1641 struct buf *bp;
1642 struct raidbuf *raidbp = NULL;
1643 struct raid_softc *rs;
1644 int unit;
1645 int s;
1646
1647 s=0;
1648 /* s = splbio();*/ /* want to test this */
1649 /* XXX along with the vnode, we also need the softc associated with
1650 * this device.. */
1651
1652 req->queue = queue;
1653
1654 unit = queue->raidPtr->raidid;
1655
1656 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1657
1658 if (unit >= numraid) {
1659 printf("Invalid unit number: %d %d\n", unit, numraid);
1660 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1661 }
1662 rs = &raid_softc[unit];
1663
1664 /* XXX is this the right place? */
1665 disk_busy(&rs->sc_dkdev);
1666
1667 bp = req->bp;
1668 #if 1
1669 /* XXX when there is a physical disk failure, someone is passing us a
1670 * buffer that contains old stuff!! Attempt to deal with this problem
1671 * without taking a performance hit... (not sure where the real bug
1672 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1673
1674 if (bp->b_flags & B_ERROR) {
1675 bp->b_flags &= ~B_ERROR;
1676 }
1677 if (bp->b_error != 0) {
1678 bp->b_error = 0;
1679 }
1680 #endif
1681 raidbp = RAIDGETBUF(rs);
1682
1683 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1684
1685 /*
1686 * context for raidiodone
1687 */
1688 raidbp->rf_obp = bp;
1689 raidbp->req = req;
1690
1691 LIST_INIT(&raidbp->rf_buf.b_dep);
1692
1693 switch (req->type) {
1694 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1695 /* XXX need to do something extra here.. */
1696 /* I'm leaving this in, as I've never actually seen it used,
1697 * and I'd like folks to report it... GO */
1698 printf(("WAKEUP CALLED\n"));
1699 queue->numOutstanding++;
1700
1701 /* XXX need to glue the original buffer into this?? */
1702
1703 KernelWakeupFunc(&raidbp->rf_buf);
1704 break;
1705
1706 case RF_IO_TYPE_READ:
1707 case RF_IO_TYPE_WRITE:
1708
1709 if (req->tracerec) {
1710 RF_ETIMER_START(req->tracerec->timer);
1711 }
1712 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1713 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1714 req->sectorOffset, req->numSector,
1715 req->buf, KernelWakeupFunc, (void *) req,
1716 queue->raidPtr->logBytesPerSector, req->b_proc);
1717
1718 if (rf_debugKernelAccess) {
1719 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1720 (long) bp->b_blkno));
1721 }
1722 queue->numOutstanding++;
1723 queue->last_deq_sector = req->sectorOffset;
1724 /* acc wouldn't have been let in if there were any pending
1725 * reqs at any other priority */
1726 queue->curPriority = req->priority;
1727
1728 db1_printf(("Going for %c to unit %d row %d col %d\n",
1729 req->type, unit, queue->row, queue->col));
1730 db1_printf(("sector %d count %d (%d bytes) %d\n",
1731 (int) req->sectorOffset, (int) req->numSector,
1732 (int) (req->numSector <<
1733 queue->raidPtr->logBytesPerSector),
1734 (int) queue->raidPtr->logBytesPerSector));
1735 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1736 raidbp->rf_buf.b_vp->v_numoutput++;
1737 }
1738 VOP_STRATEGY(&raidbp->rf_buf);
1739
1740 break;
1741
1742 default:
1743 panic("bad req->type in rf_DispatchKernelIO");
1744 }
1745 db1_printf(("Exiting from DispatchKernelIO\n"));
1746 /* splx(s); */ /* want to test this */
1747 return (0);
1748 }
1749 /* this is the callback function associated with a I/O invoked from
1750 kernel code.
1751 */
1752 static void
1753 KernelWakeupFunc(vbp)
1754 struct buf *vbp;
1755 {
1756 RF_DiskQueueData_t *req = NULL;
1757 RF_DiskQueue_t *queue;
1758 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1759 struct buf *bp;
1760 struct raid_softc *rs;
1761 int unit;
1762 register int s;
1763
1764 s = splbio();
1765 db1_printf(("recovering the request queue:\n"));
1766 req = raidbp->req;
1767
1768 bp = raidbp->rf_obp;
1769
1770 queue = (RF_DiskQueue_t *) req->queue;
1771
1772 if (raidbp->rf_buf.b_flags & B_ERROR) {
1773 bp->b_flags |= B_ERROR;
1774 bp->b_error = raidbp->rf_buf.b_error ?
1775 raidbp->rf_buf.b_error : EIO;
1776 }
1777
1778 /* XXX methinks this could be wrong... */
1779 #if 1
1780 bp->b_resid = raidbp->rf_buf.b_resid;
1781 #endif
1782
1783 if (req->tracerec) {
1784 RF_ETIMER_STOP(req->tracerec->timer);
1785 RF_ETIMER_EVAL(req->tracerec->timer);
1786 RF_LOCK_MUTEX(rf_tracing_mutex);
1787 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1788 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1789 req->tracerec->num_phys_ios++;
1790 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1791 }
1792 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1793
1794 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1795
1796
1797 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1798 * ballistic, and mark the component as hosed... */
1799
1800 if (bp->b_flags & B_ERROR) {
1801 /* Mark the disk as dead */
1802 /* but only mark it once... */
1803 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1804 rf_ds_optimal) {
1805 printf("raid%d: IO Error. Marking %s as failed.\n",
1806 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1807 queue->raidPtr->Disks[queue->row][queue->col].status =
1808 rf_ds_failed;
1809 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1810 queue->raidPtr->numFailures++;
1811 /* XXX here we should bump the version number for each component, and write that data out */
1812 } else { /* Disk is already dead... */
1813 /* printf("Disk already marked as dead!\n"); */
1814 }
1815
1816 }
1817
1818 rs = &raid_softc[unit];
1819 RAIDPUTBUF(rs, raidbp);
1820
1821
1822 if (bp->b_resid == 0) {
1823 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1824 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1825 }
1826
1827 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1828 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1829
1830 splx(s);
1831 }
1832
1833
1834
1835 /*
1836 * initialize a buf structure for doing an I/O in the kernel.
1837 */
1838 static void
1839 InitBP(
1840 struct buf * bp,
1841 struct vnode * b_vp,
1842 unsigned rw_flag,
1843 dev_t dev,
1844 RF_SectorNum_t startSect,
1845 RF_SectorCount_t numSect,
1846 caddr_t buf,
1847 void (*cbFunc) (struct buf *),
1848 void *cbArg,
1849 int logBytesPerSector,
1850 struct proc * b_proc)
1851 {
1852 /* bp->b_flags = B_PHYS | rw_flag; */
1853 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1854 bp->b_bcount = numSect << logBytesPerSector;
1855 bp->b_bufsize = bp->b_bcount;
1856 bp->b_error = 0;
1857 bp->b_dev = dev;
1858 bp->b_un.b_addr = buf;
1859 bp->b_blkno = startSect;
1860 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1861 if (bp->b_bcount == 0) {
1862 panic("bp->b_bcount is zero in InitBP!!\n");
1863 }
1864 bp->b_proc = b_proc;
1865 bp->b_iodone = cbFunc;
1866 bp->b_vp = b_vp;
1867
1868 }
1869
1870 static void
1871 raidgetdefaultlabel(raidPtr, rs, lp)
1872 RF_Raid_t *raidPtr;
1873 struct raid_softc *rs;
1874 struct disklabel *lp;
1875 {
1876 db1_printf(("Building a default label...\n"));
1877 bzero(lp, sizeof(*lp));
1878
1879 /* fabricate a label... */
1880 lp->d_secperunit = raidPtr->totalSectors;
1881 lp->d_secsize = raidPtr->bytesPerSector;
1882 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1883 lp->d_ntracks = 1;
1884 lp->d_ncylinders = raidPtr->totalSectors /
1885 (lp->d_nsectors * lp->d_ntracks);
1886 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1887
1888 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1889 lp->d_type = DTYPE_RAID;
1890 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1891 lp->d_rpm = 3600;
1892 lp->d_interleave = 1;
1893 lp->d_flags = 0;
1894
1895 lp->d_partitions[RAW_PART].p_offset = 0;
1896 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1897 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1898 lp->d_npartitions = RAW_PART + 1;
1899
1900 lp->d_magic = DISKMAGIC;
1901 lp->d_magic2 = DISKMAGIC;
1902 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1903
1904 }
1905 /*
1906 * Read the disklabel from the raid device. If one is not present, fake one
1907 * up.
1908 */
1909 static void
1910 raidgetdisklabel(dev)
1911 dev_t dev;
1912 {
1913 int unit = raidunit(dev);
1914 struct raid_softc *rs = &raid_softc[unit];
1915 char *errstring;
1916 struct disklabel *lp = rs->sc_dkdev.dk_label;
1917 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1918 RF_Raid_t *raidPtr;
1919
1920 db1_printf(("Getting the disklabel...\n"));
1921
1922 bzero(clp, sizeof(*clp));
1923
1924 raidPtr = raidPtrs[unit];
1925
1926 raidgetdefaultlabel(raidPtr, rs, lp);
1927
1928 /*
1929 * Call the generic disklabel extraction routine.
1930 */
1931 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1932 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1933 if (errstring)
1934 raidmakedisklabel(rs);
1935 else {
1936 int i;
1937 struct partition *pp;
1938
1939 /*
1940 * Sanity check whether the found disklabel is valid.
1941 *
1942 * This is necessary since total size of the raid device
1943 * may vary when an interleave is changed even though exactly
1944 * same componets are used, and old disklabel may used
1945 * if that is found.
1946 */
1947 if (lp->d_secperunit != rs->sc_size)
1948 printf("WARNING: %s: "
1949 "total sector size in disklabel (%d) != "
1950 "the size of raid (%ld)\n", rs->sc_xname,
1951 lp->d_secperunit, (long) rs->sc_size);
1952 for (i = 0; i < lp->d_npartitions; i++) {
1953 pp = &lp->d_partitions[i];
1954 if (pp->p_offset + pp->p_size > rs->sc_size)
1955 printf("WARNING: %s: end of partition `%c' "
1956 "exceeds the size of raid (%ld)\n",
1957 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1958 }
1959 }
1960
1961 }
1962 /*
1963 * Take care of things one might want to take care of in the event
1964 * that a disklabel isn't present.
1965 */
1966 static void
1967 raidmakedisklabel(rs)
1968 struct raid_softc *rs;
1969 {
1970 struct disklabel *lp = rs->sc_dkdev.dk_label;
1971 db1_printf(("Making a label..\n"));
1972
1973 /*
1974 * For historical reasons, if there's no disklabel present
1975 * the raw partition must be marked FS_BSDFFS.
1976 */
1977
1978 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1979
1980 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1981
1982 lp->d_checksum = dkcksum(lp);
1983 }
1984 /*
1985 * Lookup the provided name in the filesystem. If the file exists,
1986 * is a valid block device, and isn't being used by anyone else,
1987 * set *vpp to the file's vnode.
1988 * You'll find the original of this in ccd.c
1989 */
1990 int
1991 raidlookup(path, p, vpp)
1992 char *path;
1993 struct proc *p;
1994 struct vnode **vpp; /* result */
1995 {
1996 struct nameidata nd;
1997 struct vnode *vp;
1998 struct vattr va;
1999 int error;
2000
2001 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2002 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2003 #ifdef DEBUG
2004 printf("RAIDframe: vn_open returned %d\n", error);
2005 #endif
2006 return (error);
2007 }
2008 vp = nd.ni_vp;
2009 if (vp->v_usecount > 1) {
2010 VOP_UNLOCK(vp, 0);
2011 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2012 return (EBUSY);
2013 }
2014 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2015 VOP_UNLOCK(vp, 0);
2016 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2017 return (error);
2018 }
2019 /* XXX: eventually we should handle VREG, too. */
2020 if (va.va_type != VBLK) {
2021 VOP_UNLOCK(vp, 0);
2022 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2023 return (ENOTBLK);
2024 }
2025 VOP_UNLOCK(vp, 0);
2026 *vpp = vp;
2027 return (0);
2028 }
2029 /*
2030 * Wait interruptibly for an exclusive lock.
2031 *
2032 * XXX
2033 * Several drivers do this; it should be abstracted and made MP-safe.
2034 * (Hmm... where have we seen this warning before :-> GO )
2035 */
2036 static int
2037 raidlock(rs)
2038 struct raid_softc *rs;
2039 {
2040 int error;
2041
2042 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2043 rs->sc_flags |= RAIDF_WANTED;
2044 if ((error =
2045 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2046 return (error);
2047 }
2048 rs->sc_flags |= RAIDF_LOCKED;
2049 return (0);
2050 }
2051 /*
2052 * Unlock and wake up any waiters.
2053 */
2054 static void
2055 raidunlock(rs)
2056 struct raid_softc *rs;
2057 {
2058
2059 rs->sc_flags &= ~RAIDF_LOCKED;
2060 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2061 rs->sc_flags &= ~RAIDF_WANTED;
2062 wakeup(rs);
2063 }
2064 }
2065
2066
2067 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2068 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2069
2070 int
2071 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2072 {
2073 RF_ComponentLabel_t clabel;
2074 raidread_component_label(dev, b_vp, &clabel);
2075 clabel.mod_counter = mod_counter;
2076 clabel.clean = RF_RAID_CLEAN;
2077 raidwrite_component_label(dev, b_vp, &clabel);
2078 return(0);
2079 }
2080
2081
2082 int
2083 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2084 {
2085 RF_ComponentLabel_t clabel;
2086 raidread_component_label(dev, b_vp, &clabel);
2087 clabel.mod_counter = mod_counter;
2088 clabel.clean = RF_RAID_DIRTY;
2089 raidwrite_component_label(dev, b_vp, &clabel);
2090 return(0);
2091 }
2092
2093 /* ARGSUSED */
2094 int
2095 raidread_component_label(dev, b_vp, clabel)
2096 dev_t dev;
2097 struct vnode *b_vp;
2098 RF_ComponentLabel_t *clabel;
2099 {
2100 struct buf *bp;
2101 int error;
2102
2103 /* XXX should probably ensure that we don't try to do this if
2104 someone has changed rf_protected_sectors. */
2105
2106 /* get a block of the appropriate size... */
2107 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2108 bp->b_dev = dev;
2109
2110 /* get our ducks in a row for the read */
2111 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2112 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2113 bp->b_flags = B_BUSY | B_READ;
2114 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2115
2116 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2117
2118 error = biowait(bp);
2119
2120 if (!error) {
2121 memcpy(clabel, bp->b_un.b_addr,
2122 sizeof(RF_ComponentLabel_t));
2123 #if 0
2124 print_component_label( clabel );
2125 #endif
2126 } else {
2127 #if 0
2128 printf("Failed to read RAID component label!\n");
2129 #endif
2130 }
2131
2132 bp->b_flags = B_INVAL | B_AGE;
2133 brelse(bp);
2134 return(error);
2135 }
2136 /* ARGSUSED */
2137 int
2138 raidwrite_component_label(dev, b_vp, clabel)
2139 dev_t dev;
2140 struct vnode *b_vp;
2141 RF_ComponentLabel_t *clabel;
2142 {
2143 struct buf *bp;
2144 int error;
2145
2146 /* get a block of the appropriate size... */
2147 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2148 bp->b_dev = dev;
2149
2150 /* get our ducks in a row for the write */
2151 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2152 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2153 bp->b_flags = B_BUSY | B_WRITE;
2154 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2155
2156 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2157
2158 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2159
2160 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2161 error = biowait(bp);
2162 bp->b_flags = B_INVAL | B_AGE;
2163 brelse(bp);
2164 if (error) {
2165 #if 1
2166 printf("Failed to write RAID component info!\n");
2167 #endif
2168 }
2169
2170 return(error);
2171 }
2172
2173 void
2174 rf_markalldirty( raidPtr )
2175 RF_Raid_t *raidPtr;
2176 {
2177 RF_ComponentLabel_t clabel;
2178 int r,c;
2179
2180 raidPtr->mod_counter++;
2181 for (r = 0; r < raidPtr->numRow; r++) {
2182 for (c = 0; c < raidPtr->numCol; c++) {
2183 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2184 raidread_component_label(
2185 raidPtr->Disks[r][c].dev,
2186 raidPtr->raid_cinfo[r][c].ci_vp,
2187 &clabel);
2188 if (clabel.status == rf_ds_spared) {
2189 /* XXX do something special...
2190 but whatever you do, don't
2191 try to access it!! */
2192 } else {
2193 #if 0
2194 clabel.status =
2195 raidPtr->Disks[r][c].status;
2196 raidwrite_component_label(
2197 raidPtr->Disks[r][c].dev,
2198 raidPtr->raid_cinfo[r][c].ci_vp,
2199 &clabel);
2200 #endif
2201 raidmarkdirty(
2202 raidPtr->Disks[r][c].dev,
2203 raidPtr->raid_cinfo[r][c].ci_vp,
2204 raidPtr->mod_counter);
2205 }
2206 }
2207 }
2208 }
2209 /* printf("Component labels marked dirty.\n"); */
2210 #if 0
2211 for( c = 0; c < raidPtr->numSpare ; c++) {
2212 sparecol = raidPtr->numCol + c;
2213 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2214 /*
2215
2216 XXX this is where we get fancy and map this spare
2217 into it's correct spot in the array.
2218
2219 */
2220 /*
2221
2222 we claim this disk is "optimal" if it's
2223 rf_ds_used_spare, as that means it should be
2224 directly substitutable for the disk it replaced.
2225 We note that too...
2226
2227 */
2228
2229 for(i=0;i<raidPtr->numRow;i++) {
2230 for(j=0;j<raidPtr->numCol;j++) {
2231 if ((raidPtr->Disks[i][j].spareRow ==
2232 r) &&
2233 (raidPtr->Disks[i][j].spareCol ==
2234 sparecol)) {
2235 srow = r;
2236 scol = sparecol;
2237 break;
2238 }
2239 }
2240 }
2241
2242 raidread_component_label(
2243 raidPtr->Disks[r][sparecol].dev,
2244 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2245 &clabel);
2246 /* make sure status is noted */
2247 clabel.version = RF_COMPONENT_LABEL_VERSION;
2248 clabel.mod_counter = raidPtr->mod_counter;
2249 clabel.serial_number = raidPtr->serial_number;
2250 clabel.row = srow;
2251 clabel.column = scol;
2252 clabel.num_rows = raidPtr->numRow;
2253 clabel.num_columns = raidPtr->numCol;
2254 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2255 clabel.status = rf_ds_optimal;
2256 raidwrite_component_label(
2257 raidPtr->Disks[r][sparecol].dev,
2258 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2259 &clabel);
2260 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2261 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2262 }
2263 }
2264
2265 #endif
2266 }
2267
2268
2269 void
2270 rf_update_component_labels( raidPtr )
2271 RF_Raid_t *raidPtr;
2272 {
2273 RF_ComponentLabel_t clabel;
2274 int sparecol;
2275 int r,c;
2276 int i,j;
2277 int srow, scol;
2278
2279 srow = -1;
2280 scol = -1;
2281
2282 /* XXX should do extra checks to make sure things really are clean,
2283 rather than blindly setting the clean bit... */
2284
2285 raidPtr->mod_counter++;
2286
2287 for (r = 0; r < raidPtr->numRow; r++) {
2288 for (c = 0; c < raidPtr->numCol; c++) {
2289 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2290 raidread_component_label(
2291 raidPtr->Disks[r][c].dev,
2292 raidPtr->raid_cinfo[r][c].ci_vp,
2293 &clabel);
2294 /* make sure status is noted */
2295 clabel.status = rf_ds_optimal;
2296 raidwrite_component_label(
2297 raidPtr->Disks[r][c].dev,
2298 raidPtr->raid_cinfo[r][c].ci_vp,
2299 &clabel);
2300 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2301 raidmarkclean(
2302 raidPtr->Disks[r][c].dev,
2303 raidPtr->raid_cinfo[r][c].ci_vp,
2304 raidPtr->mod_counter);
2305 }
2306 }
2307 /* else we don't touch it.. */
2308 #if 0
2309 else if (raidPtr->Disks[r][c].status !=
2310 rf_ds_failed) {
2311 raidread_component_label(
2312 raidPtr->Disks[r][c].dev,
2313 raidPtr->raid_cinfo[r][c].ci_vp,
2314 &clabel);
2315 /* make sure status is noted */
2316 clabel.status =
2317 raidPtr->Disks[r][c].status;
2318 raidwrite_component_label(
2319 raidPtr->Disks[r][c].dev,
2320 raidPtr->raid_cinfo[r][c].ci_vp,
2321 &clabel);
2322 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2323 raidmarkclean(
2324 raidPtr->Disks[r][c].dev,
2325 raidPtr->raid_cinfo[r][c].ci_vp,
2326 raidPtr->mod_counter);
2327 }
2328 }
2329 #endif
2330 }
2331 }
2332
2333 for( c = 0; c < raidPtr->numSpare ; c++) {
2334 sparecol = raidPtr->numCol + c;
2335 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2336 /*
2337
2338 we claim this disk is "optimal" if it's
2339 rf_ds_used_spare, as that means it should be
2340 directly substitutable for the disk it replaced.
2341 We note that too...
2342
2343 */
2344
2345 for(i=0;i<raidPtr->numRow;i++) {
2346 for(j=0;j<raidPtr->numCol;j++) {
2347 if ((raidPtr->Disks[i][j].spareRow ==
2348 0) &&
2349 (raidPtr->Disks[i][j].spareCol ==
2350 sparecol)) {
2351 srow = i;
2352 scol = j;
2353 break;
2354 }
2355 }
2356 }
2357
2358 raidread_component_label(
2359 raidPtr->Disks[0][sparecol].dev,
2360 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2361 &clabel);
2362 /* make sure status is noted */
2363 clabel.version = RF_COMPONENT_LABEL_VERSION;
2364 clabel.mod_counter = raidPtr->mod_counter;
2365 clabel.serial_number = raidPtr->serial_number;
2366 clabel.row = srow;
2367 clabel.column = scol;
2368 clabel.num_rows = raidPtr->numRow;
2369 clabel.num_columns = raidPtr->numCol;
2370 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2371 clabel.status = rf_ds_optimal;
2372 raidwrite_component_label(
2373 raidPtr->Disks[0][sparecol].dev,
2374 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2375 &clabel);
2376 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2377 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2378 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2379 raidPtr->mod_counter);
2380 }
2381 }
2382 }
2383 /* printf("Component labels updated\n"); */
2384 }
2385
2386 void
2387 rf_ReconThread(req)
2388 struct rf_recon_req *req;
2389 {
2390 int s;
2391 RF_Raid_t *raidPtr;
2392
2393 s = splbio();
2394 raidPtr = (RF_Raid_t *) req->raidPtr;
2395 raidPtr->recon_in_progress = 1;
2396
2397 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2398 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2399
2400 /* XXX get rid of this! we don't need it at all.. */
2401 RF_Free(req, sizeof(*req));
2402
2403 raidPtr->recon_in_progress = 0;
2404 splx(s);
2405
2406 /* That's all... */
2407 kthread_exit(0); /* does not return */
2408 }
2409
2410 void
2411 rf_RewriteParityThread(raidPtr)
2412 RF_Raid_t *raidPtr;
2413 {
2414 int retcode;
2415 int s;
2416
2417 raidPtr->parity_rewrite_in_progress = 1;
2418 s = splbio();
2419 retcode = rf_RewriteParity(raidPtr);
2420 splx(s);
2421 if (retcode) {
2422 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2423 } else {
2424 /* set the clean bit! If we shutdown correctly,
2425 the clean bit on each component label will get
2426 set */
2427 raidPtr->parity_good = RF_RAID_CLEAN;
2428 }
2429 raidPtr->parity_rewrite_in_progress = 0;
2430
2431 /* That's all... */
2432 kthread_exit(0); /* does not return */
2433 }
2434
2435
2436 void
2437 rf_CopybackThread(raidPtr)
2438 RF_Raid_t *raidPtr;
2439 {
2440 int s;
2441
2442 raidPtr->copyback_in_progress = 1;
2443 s = splbio();
2444 rf_CopybackReconstructedData(raidPtr);
2445 splx(s);
2446 raidPtr->copyback_in_progress = 0;
2447
2448 /* That's all... */
2449 kthread_exit(0); /* does not return */
2450 }
2451
2452
2453 void
2454 rf_ReconstructInPlaceThread(req)
2455 struct rf_recon_req *req;
2456 {
2457 int retcode;
2458 int s;
2459 RF_Raid_t *raidPtr;
2460
2461 s = splbio();
2462 raidPtr = req->raidPtr;
2463 raidPtr->recon_in_progress = 1;
2464 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2465 RF_Free(req, sizeof(*req));
2466 raidPtr->recon_in_progress = 0;
2467 splx(s);
2468
2469 /* That's all... */
2470 kthread_exit(0); /* does not return */
2471 }
2472
2473 void
2474 rf_mountroot_hook(dev)
2475 struct device *dev;
2476 {
2477 #if 1
2478 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2479 #endif
2480 if (boothowto & RB_ASKNAME) {
2481 /* We don't auto-config... */
2482 } else {
2483 /* They didn't ask, and we found something bootable... */
2484 /* XXX pretend for now.. */
2485 if (raidautoconfig) {
2486 rootspec = raid_rooty;
2487 }
2488 }
2489 }
2490
2491
2492 RF_AutoConfig_t *
2493 rf_find_raid_components()
2494 {
2495 struct devnametobdevmaj *dtobdm;
2496 struct vnode *vp;
2497 struct disklabel label;
2498 struct device *dv;
2499 char *cd_name;
2500 dev_t dev;
2501 int error;
2502 int i;
2503 int good_one;
2504 RF_ComponentLabel_t *clabel;
2505 RF_AutoConfig_t *ac_list;
2506 RF_AutoConfig_t *ac;
2507
2508
2509 /* initialize the AutoConfig list */
2510 ac_list = NULL;
2511
2512 if (raidautoconfig) {
2513
2514 /* we begin by trolling through *all* the devices on the system */
2515
2516 for (dv = alldevs.tqh_first; dv != NULL;
2517 dv = dv->dv_list.tqe_next) {
2518
2519 /* we are only interested in disks... */
2520 if (dv->dv_class != DV_DISK)
2521 continue;
2522
2523 /* we don't care about floppies... */
2524 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2525 continue;
2526 }
2527
2528 /* need to find the device_name_to_block_device_major stuff */
2529 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2530 dtobdm = dev_name2blk;
2531 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2532 dtobdm++;
2533 }
2534
2535 /* get a vnode for the raw partition of this disk */
2536
2537 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2538 if (bdevvp(dev, &vp))
2539 panic("RAID can't alloc vnode");
2540
2541 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2542
2543 if (error) {
2544 /* "Who cares." Continue looking
2545 for something that exists*/
2546 vput(vp);
2547 continue;
2548 }
2549
2550 /* Ok, the disk exists. Go get the disklabel. */
2551 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2552 FREAD, NOCRED, 0);
2553 if (error) {
2554 /*
2555 * XXX can't happen - open() would
2556 * have errored out (or faked up one)
2557 */
2558 printf("can't get label for dev %s%c (%d)!?!?\n",
2559 dv->dv_xname, 'a' + RAW_PART, error);
2560 }
2561
2562 /* don't need this any more. We'll allocate it again
2563 a little later if we really do... */
2564 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2565 vput(vp);
2566
2567 for (i=0; i < label.d_npartitions; i++) {
2568 /* We only support partitions marked as RAID */
2569 if (label.d_partitions[i].p_fstype != FS_RAID)
2570 continue;
2571
2572 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2573 if (bdevvp(dev, &vp))
2574 panic("RAID can't alloc vnode");
2575
2576 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2577 if (error) {
2578 /* Whatever... */
2579 vput(vp);
2580 continue;
2581 }
2582
2583 good_one = 0;
2584
2585 clabel = (RF_ComponentLabel_t *)
2586 malloc(sizeof(RF_ComponentLabel_t),
2587 M_RAIDFRAME, M_NOWAIT);
2588 if (clabel == NULL) {
2589 /* XXX CLEANUP HERE */
2590 printf("RAID auto config: out of memory!\n");
2591 return(NULL); /* XXX probably should panic? */
2592 }
2593
2594 if (!raidread_component_label(dev, vp, clabel)) {
2595 /* Got the label. Does it look reasonable? */
2596 if (rf_reasonable_label(clabel) &&
2597 (clabel->partitionSize ==
2598 label.d_partitions[i].p_size)) {
2599 #if DEBUG
2600 printf("Component on: %s%c: %d\n",
2601 dv->dv_xname, 'a'+i,
2602 label.d_partitions[i].p_size);
2603 print_component_label(clabel);
2604 #endif
2605 /* if it's reasonable, add it,
2606 else ignore it. */
2607 ac = (RF_AutoConfig_t *)
2608 malloc(sizeof(RF_AutoConfig_t),
2609 M_RAIDFRAME,
2610 M_NOWAIT);
2611 if (ac == NULL) {
2612 /* XXX should panic?? */
2613 return(NULL);
2614 }
2615
2616 sprintf(ac->devname, "%s%c",
2617 dv->dv_xname, 'a'+i);
2618 ac->dev = dev;
2619 ac->vp = vp;
2620 ac->clabel = clabel;
2621 ac->next = ac_list;
2622 ac_list = ac;
2623 good_one = 1;
2624 }
2625 }
2626 if (!good_one) {
2627 /* cleanup */
2628 free(clabel, M_RAIDFRAME);
2629 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2630 vput(vp);
2631 }
2632 }
2633 }
2634 }
2635 return(ac_list);
2636 }
2637
2638 static int
2639 rf_reasonable_label(clabel)
2640 RF_ComponentLabel_t *clabel;
2641 {
2642
2643 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2644 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2645 ((clabel->clean == RF_RAID_CLEAN) ||
2646 (clabel->clean == RF_RAID_DIRTY)) &&
2647 clabel->row >=0 &&
2648 clabel->column >= 0 &&
2649 clabel->num_rows > 0 &&
2650 clabel->num_columns > 0 &&
2651 clabel->row < clabel->num_rows &&
2652 clabel->column < clabel->num_columns &&
2653 clabel->blockSize > 0 &&
2654 clabel->numBlocks > 0) {
2655 /* label looks reasonable enough... */
2656 return(1);
2657 }
2658 return(0);
2659 }
2660
2661
2662 void
2663 print_component_label(clabel)
2664 RF_ComponentLabel_t *clabel;
2665 {
2666 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2667 clabel->row, clabel->column,
2668 clabel->num_rows, clabel->num_columns);
2669 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2670 clabel->version, clabel->serial_number,
2671 clabel->mod_counter);
2672 printf(" Clean: %s Status: %d\n",
2673 clabel->clean ? "Yes" : "No", clabel->status );
2674 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2675 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2676 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2677 (char) clabel->parityConfig, clabel->blockSize,
2678 clabel->numBlocks);
2679 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2680 printf(" Last configured as: raid%d\n", clabel->last_unit );
2681 #if 0
2682 printf(" Config order: %d\n", clabel->config_order);
2683 #endif
2684
2685 }
2686
2687 RF_ConfigSet_t *
2688 rf_create_auto_sets(ac_list)
2689 RF_AutoConfig_t *ac_list;
2690 {
2691 RF_AutoConfig_t *ac;
2692 RF_ConfigSet_t *config_sets;
2693 RF_ConfigSet_t *cset;
2694 RF_AutoConfig_t *ac_next;
2695
2696
2697 config_sets = NULL;
2698
2699 /* Go through the AutoConfig list, and figure out which components
2700 belong to what sets. */
2701 ac = ac_list;
2702 while(ac!=NULL) {
2703 /* we're going to putz with ac->next, so save it here
2704 for use at the end of the loop */
2705 ac_next = ac->next;
2706
2707 if (config_sets == NULL) {
2708 /* will need at least this one... */
2709 config_sets = (RF_ConfigSet_t *)
2710 malloc(sizeof(RF_ConfigSet_t),
2711 M_RAIDFRAME, M_NOWAIT);
2712 if (config_sets == NULL) {
2713 panic("rf_create_auto_sets: No memory!\n");
2714 }
2715 /* this one is easy :) */
2716 config_sets->ac = ac;
2717 config_sets->next = NULL;
2718 config_sets->rootable = 0;
2719 ac->next = NULL;
2720 } else {
2721 /* which set does this component fit into? */
2722 cset = config_sets;
2723 while(cset!=NULL) {
2724 if (rf_does_it_fit(cset, ac)) {
2725 /* looks like it matches */
2726 ac->next = cset->ac;
2727 cset->ac = ac;
2728 break;
2729 }
2730 cset = cset->next;
2731 }
2732 if (cset==NULL) {
2733 /* didn't find a match above... new set..*/
2734 cset = (RF_ConfigSet_t *)
2735 malloc(sizeof(RF_ConfigSet_t),
2736 M_RAIDFRAME, M_NOWAIT);
2737 if (cset == NULL) {
2738 panic("rf_create_auto_sets: No memory!\n");
2739 }
2740 cset->ac = ac;
2741 ac->next = NULL;
2742 cset->next = config_sets;
2743 cset->rootable = 0;
2744 config_sets = cset;
2745 }
2746 }
2747 ac = ac_next;
2748 }
2749
2750
2751 return(config_sets);
2752 }
2753
2754 static int
2755 rf_does_it_fit(cset, ac)
2756 RF_ConfigSet_t *cset;
2757 RF_AutoConfig_t *ac;
2758 {
2759 RF_ComponentLabel_t *clabel1, *clabel2;
2760
2761 /* If this one matches the *first* one in the set, that's good
2762 enough, since the other members of the set would have been
2763 through here too... */
2764
2765 clabel1 = cset->ac->clabel;
2766 clabel2 = ac->clabel;
2767 if ((clabel1->version == clabel2->version) &&
2768 (clabel1->serial_number == clabel2->serial_number) &&
2769 (clabel1->mod_counter == clabel2->mod_counter) &&
2770 (clabel1->num_rows == clabel2->num_rows) &&
2771 (clabel1->num_columns == clabel2->num_columns) &&
2772 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2773 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2774 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2775 (clabel1->parityConfig == clabel2->parityConfig) &&
2776 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2777 (clabel1->blockSize == clabel2->blockSize) &&
2778 (clabel1->numBlocks == clabel2->numBlocks) &&
2779 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2780 (clabel1->root_partition == clabel2->root_partition) &&
2781 (clabel1->last_unit == clabel2->last_unit) &&
2782 (clabel1->config_order == clabel2->config_order)) {
2783 /* if it get's here, it almost *has* to be a match */
2784 } else {
2785 /* it's not consistent with somebody in the set..
2786 punt */
2787 return(0);
2788 }
2789 /* all was fine.. it must fit... */
2790 return(1);
2791 }
2792
2793 int
2794 rf_have_enough_components(cset)
2795 RF_ConfigSet_t *cset;
2796 {
2797 RF_AutoConfig_t *ac;
2798 RF_AutoConfig_t *auto_config;
2799 RF_ComponentLabel_t *clabel;
2800 int r,c;
2801 int num_rows;
2802 int num_cols;
2803 int num_missing;
2804
2805 /* check to see that we have enough 'live' components
2806 of this set. If so, we can configure it if necessary */
2807
2808 num_rows = cset->ac->clabel->num_rows;
2809 num_cols = cset->ac->clabel->num_columns;
2810
2811 /* XXX Check for duplicate components!?!?!? */
2812
2813 num_missing = 0;
2814 auto_config = cset->ac;
2815
2816 for(r=0; r<num_rows; r++) {
2817 for(c=0; c<num_cols; c++) {
2818 ac = auto_config;
2819 while(ac!=NULL) {
2820 if (ac->clabel==NULL) {
2821 /* big-time bad news. */
2822 goto fail;
2823 }
2824 if ((ac->clabel->row == r) &&
2825 (ac->clabel->column == c)) {
2826 /* it's this one... */
2827 #if DEBUG
2828 printf("Found: %s at %d,%d\n",
2829 ac->devname,r,c);
2830 #endif
2831 break;
2832 }
2833 ac=ac->next;
2834 }
2835 if (ac==NULL) {
2836 /* Didn't find one here! */
2837 num_missing++;
2838 }
2839 }
2840 }
2841
2842 clabel = cset->ac->clabel;
2843
2844 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2845 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2846 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2847 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2848 /* XXX this needs to be made *much* more general */
2849 /* Too many failures */
2850 return(0);
2851 }
2852 /* otherwise, all is well, and we've got enough to take a kick
2853 at autoconfiguring this set */
2854 return(1);
2855 fail:
2856 return(0);
2857
2858 }
2859
2860 void
2861 rf_create_configuration(ac,config,raidPtr)
2862 RF_AutoConfig_t *ac;
2863 RF_Config_t *config;
2864 RF_Raid_t *raidPtr;
2865 {
2866 RF_ComponentLabel_t *clabel;
2867
2868 clabel = ac->clabel;
2869
2870 /* 1. Fill in the common stuff */
2871 config->numRow = clabel->num_rows;
2872 config->numCol = clabel->num_columns;
2873 config->numSpare = 0; /* XXX should this be set here? */
2874 config->sectPerSU = clabel->sectPerSU;
2875 config->SUsPerPU = clabel->SUsPerPU;
2876 config->SUsPerRU = clabel->SUsPerRU;
2877 config->parityConfig = clabel->parityConfig;
2878 /* XXX... */
2879 strcpy(config->diskQueueType,"fifo");
2880 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2881 config->layoutSpecificSize = 0; /* XXX ?? */
2882
2883 while(ac!=NULL) {
2884 /* row/col values will be in range due to the checks
2885 in reasonable_label() */
2886 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2887 ac->devname);
2888 ac = ac->next;
2889 }
2890
2891 }
2892
2893 int
2894 rf_set_autoconfig(raidPtr, new_value)
2895 RF_Raid_t *raidPtr;
2896 int new_value;
2897 {
2898 RF_ComponentLabel_t clabel;
2899 struct vnode *vp;
2900 dev_t dev;
2901 int row, column;
2902
2903 for(row=0; row<raidPtr->numRow; row++) {
2904 for(column=0; column<raidPtr->numCol; column++) {
2905 dev = raidPtr->Disks[row][column].dev;
2906 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2907 raidread_component_label(dev, vp, &clabel);
2908 clabel.autoconfigure = new_value;
2909 raidwrite_component_label(dev, vp, &clabel);
2910 }
2911 }
2912 return(new_value);
2913 }
2914
2915 int
2916 rf_set_rootpartition(raidPtr, new_value)
2917 RF_Raid_t *raidPtr;
2918 int new_value;
2919 {
2920 RF_ComponentLabel_t clabel;
2921 struct vnode *vp;
2922 dev_t dev;
2923 int row, column;
2924
2925 for(row=0; row<raidPtr->numRow; row++) {
2926 for(column=0; column<raidPtr->numCol; column++) {
2927 dev = raidPtr->Disks[row][column].dev;
2928 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2929 raidread_component_label(dev, vp, &clabel);
2930 clabel.root_partition = new_value;
2931 raidwrite_component_label(dev, vp, &clabel);
2932 }
2933 }
2934 return(new_value);
2935 }
2936
2937 void
2938 rf_release_all_vps(cset)
2939 RF_ConfigSet_t *cset;
2940 {
2941 RF_AutoConfig_t *ac;
2942
2943 ac = cset->ac;
2944 while(ac!=NULL) {
2945 /* Close the vp, and give it back */
2946 if (ac->vp) {
2947 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2948 vput(ac->vp);
2949 }
2950 ac = ac->next;
2951 }
2952 }
2953
2954
2955 void
2956 rf_cleanup_config_set(cset)
2957 RF_ConfigSet_t *cset;
2958 {
2959 RF_AutoConfig_t *ac;
2960 RF_AutoConfig_t *next_ac;
2961
2962 ac = cset->ac;
2963 while(ac!=NULL) {
2964 next_ac = ac->next;
2965 /* nuke the label */
2966 free(ac->clabel, M_RAIDFRAME);
2967 /* cleanup the config structure */
2968 free(ac, M_RAIDFRAME);
2969 /* "next.." */
2970 ac = next_ac;
2971 }
2972 /* and, finally, nuke the config set */
2973 free(cset, M_RAIDFRAME);
2974 }
2975
2976
2977 void
2978 raid_init_component_label(raidPtr, clabel)
2979 RF_Raid_t *raidPtr;
2980 RF_ComponentLabel_t *clabel;
2981 {
2982 /* current version number */
2983 clabel->version = RF_COMPONENT_LABEL_VERSION;
2984 clabel->serial_number = clabel->serial_number;
2985 clabel->mod_counter = raidPtr->mod_counter;
2986 clabel->num_rows = raidPtr->numRow;
2987 clabel->num_columns = raidPtr->numCol;
2988 clabel->clean = RF_RAID_DIRTY; /* not clean */
2989 clabel->status = rf_ds_optimal; /* "It's good!" */
2990
2991 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2992 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2993 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2994 /* XXX not portable */
2995 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2996 /* XXX THIS SHOULD BE SET RIGHT!! */
2997 clabel->maxOutstanding = 100;
2998 clabel->autoconfigure = 0;
2999 clabel->root_partition = 0;
3000 clabel->last_unit = raidPtr->raidid;
3001 clabel->config_order = 0;
3002 }
3003
3004 int
3005 rf_auto_config_set(cset,unit)
3006 RF_ConfigSet_t *cset;
3007 int *unit;
3008 {
3009 RF_Raid_t *raidPtr;
3010 RF_Config_t *config;
3011 int raidID;
3012 int retcode;
3013
3014 printf("Starting autoconfigure on raid%d\n",raidID);
3015
3016 retcode = 0;
3017 *unit = -1;
3018
3019 /* 1. Create a config structure */
3020
3021 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3022 M_RAIDFRAME,
3023 M_NOWAIT);
3024 if (config==NULL) {
3025 printf("Out of mem!?!?\n");
3026 /* XXX do something more intelligent here. */
3027 return(1);
3028 }
3029 /* XXX raidID needs to be set correctly.. */
3030
3031 /*
3032 2. Figure out what RAID ID this one is supposed to live at
3033 See if we can get the same RAID dev that it was configured
3034 on last time..
3035 */
3036
3037 raidID = cset->ac->clabel->last_unit;
3038 if (raidID >= numraid) {
3039 /* let's not wander off into lala land. */
3040 raidID = numraid - 1;
3041 }
3042 if (raidPtrs[raidID]->valid != 0) {
3043
3044 /*
3045 Nope... Go looking for an alternative...
3046 Start high so we don't immediately use raid0 if that's
3047 not taken.
3048 */
3049
3050 for(raidID = numraid; raidID >= 0; raidID--) {
3051 if (raidPtrs[raidID]->valid == 0) {
3052 /* can use this one! */
3053 break;
3054 }
3055 }
3056 }
3057
3058 if (raidID < 0) {
3059 /* punt... */
3060 printf("Unable to auto configure this set!\n");
3061 printf("(Out of RAID devs!)\n");
3062 return(1);
3063 }
3064
3065 raidPtr = raidPtrs[raidID];
3066
3067 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3068 raidPtr->raidid = raidID;
3069 raidPtr->openings = RAIDOUTSTANDING;
3070
3071 /* 3. Build the configuration structure */
3072 rf_create_configuration(cset->ac, config, raidPtr);
3073
3074 /* 4. Do the configuration */
3075 retcode = rf_Configure(raidPtr, config, cset->ac);
3076
3077 if (retcode == 0) {
3078 #if DEBUG
3079 printf("Calling raidinit()\n");
3080 #endif
3081 /* XXX the 0 below is bogus! */
3082 retcode = raidinit(0, raidPtrs[raidID], raidID);
3083 if (retcode) {
3084 printf("init returned: %d\n",retcode);
3085 }
3086 rf_markalldirty( raidPtrs[raidID] );
3087 if (cset->ac->clabel->root_partition==1) {
3088 /* everything configured just fine. Make a note
3089 that this set is eligible to be root. */
3090 cset->rootable = 1;
3091 }
3092 }
3093
3094 /* 5. Cleanup */
3095 free(config, M_RAIDFRAME);
3096
3097 *unit = raidID;
3098 return(retcode);
3099 }
3100