rf_netbsdkintf.c revision 1.62 1 /* $NetBSD: rf_netbsdkintf.c,v 1.62 2000/02/26 17:35:43 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "opt_raid_autoconfig.h"
139 #include "rf_raid.h"
140 #include "rf_raidframe.h"
141 #include "rf_copyback.h"
142 #include "rf_dag.h"
143 #include "rf_dagflags.h"
144 #include "rf_diskqueue.h"
145 #include "rf_acctrace.h"
146 #include "rf_etimer.h"
147 #include "rf_general.h"
148 #include "rf_debugMem.h"
149 #include "rf_kintf.h"
150 #include "rf_options.h"
151 #include "rf_driver.h"
152 #include "rf_parityscan.h"
153 #include "rf_debugprint.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit __P((RF_Raid_t *));
181
182 void raidattach __P((int));
183 int raidsize __P((dev_t));
184 int raidopen __P((dev_t, int, int, struct proc *));
185 int raidclose __P((dev_t, int, int, struct proc *));
186 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
187 int raidwrite __P((dev_t, struct uio *, int));
188 int raidread __P((dev_t, struct uio *, int));
189 void raidstrategy __P((struct buf *));
190 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
191
192 /*
193 * Pilfered from ccd.c
194 */
195
196 struct raidbuf {
197 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
198 struct buf *rf_obp; /* ptr. to original I/O buf */
199 int rf_flags; /* misc. flags */
200 RF_DiskQueueData_t *req;/* the request that this was part of.. */
201 };
202
203
204 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
205 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
206
207 /* XXX Not sure if the following should be replacing the raidPtrs above,
208 or if it should be used in conjunction with that...
209 */
210
211 struct raid_softc {
212 int sc_flags; /* flags */
213 int sc_cflags; /* configuration flags */
214 size_t sc_size; /* size of the raid device */
215 char sc_xname[20]; /* XXX external name */
216 struct disk sc_dkdev; /* generic disk device info */
217 struct pool sc_cbufpool; /* component buffer pool */
218 struct buf_queue buf_queue; /* used for the device queue */
219 };
220 /* sc_flags */
221 #define RAIDF_INITED 0x01 /* unit has been initialized */
222 #define RAIDF_WLABEL 0x02 /* label area is writable */
223 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
224 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
225 #define RAIDF_LOCKED 0x80 /* unit is locked */
226
227 #define raidunit(x) DISKUNIT(x)
228 int numraid = 0;
229
230 /*
231 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
232 * Be aware that large numbers can allow the driver to consume a lot of
233 * kernel memory, especially on writes, and in degraded mode reads.
234 *
235 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
236 * a single 64K write will typically require 64K for the old data,
237 * 64K for the old parity, and 64K for the new parity, for a total
238 * of 192K (if the parity buffer is not re-used immediately).
239 * Even it if is used immedately, that's still 128K, which when multiplied
240 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
241 *
242 * Now in degraded mode, for example, a 64K read on the above setup may
243 * require data reconstruction, which will require *all* of the 4 remaining
244 * disks to participate -- 4 * 32K/disk == 128K again.
245 */
246
247 #ifndef RAIDOUTSTANDING
248 #define RAIDOUTSTANDING 6
249 #endif
250
251 #define RAIDLABELDEV(dev) \
252 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
253
254 /* declared here, and made public, for the benefit of KVM stuff.. */
255 struct raid_softc *raid_softc;
256
257 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
258 struct disklabel *));
259 static void raidgetdisklabel __P((dev_t));
260 static void raidmakedisklabel __P((struct raid_softc *));
261
262 static int raidlock __P((struct raid_softc *));
263 static void raidunlock __P((struct raid_softc *));
264
265 static void rf_markalldirty __P((RF_Raid_t *));
266 void rf_mountroot_hook __P((struct device *));
267
268 struct device *raidrootdev;
269 struct cfdata cf_raidrootdev;
270 struct cfdriver cfdrv;
271 /* XXX these should be moved up */
272 #include "rf_configure.h"
273 #include <sys/reboot.h>
274
275 void rf_ReconThread __P((struct rf_recon_req *));
276 /* XXX what I want is: */
277 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
278 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
279 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
280 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
281 void rf_buildroothack __P((void *));
282
283 RF_AutoConfig_t *rf_find_raid_components __P((void));
284 void print_component_label __P((RF_ComponentLabel_t *));
285 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
286 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
287 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
288 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
289 RF_Raid_t *));
290 int rf_set_autoconfig __P((RF_Raid_t *, int));
291 int rf_set_rootpartition __P((RF_Raid_t *, int));
292 void rf_release_all_vps __P((RF_ConfigSet_t *));
293 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
294 int rf_have_enough_components __P((RF_ConfigSet_t *));
295 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
296
297 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
298 allow autoconfig to take place.
299 Note that this is overridden by having
300 RAID_AUTOCONFIG as an option in the
301 kernel config file. */
302 extern struct device *booted_device;
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310 RF_AutoConfig_t *ac_list; /* autoconfig list */
311 RF_ConfigSet_t *config_sets;
312
313 #ifdef DEBUG
314 printf("raidattach: Asked for %d units\n", num);
315 #endif
316
317 if (num <= 0) {
318 #ifdef DIAGNOSTIC
319 panic("raidattach: count <= 0");
320 #endif
321 return;
322 }
323 /* This is where all the initialization stuff gets done. */
324
325 numraid = num;
326
327 /* Make some space for requested number of units... */
328
329 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
330 if (raidPtrs == NULL) {
331 panic("raidPtrs is NULL!!\n");
332 }
333
334 rc = rf_mutex_init(&rf_sparet_wait_mutex);
335 if (rc) {
336 RF_PANIC();
337 }
338
339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
340
341 for (i = 0; i < num; i++)
342 raidPtrs[i] = NULL;
343 rc = rf_BootRaidframe();
344 if (rc == 0)
345 printf("Kernelized RAIDframe activated\n");
346 else
347 panic("Serious error booting RAID!!\n");
348
349 /* put together some datastructures like the CCD device does.. This
350 * lets us lock the device and what-not when it gets opened. */
351
352 raid_softc = (struct raid_softc *)
353 malloc(num * sizeof(struct raid_softc),
354 M_RAIDFRAME, M_NOWAIT);
355 if (raid_softc == NULL) {
356 printf("WARNING: no memory for RAIDframe driver\n");
357 return;
358 }
359
360 bzero(raid_softc, num * sizeof(struct raid_softc));
361
362 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
363 M_RAIDFRAME, M_NOWAIT);
364 if (raidrootdev == NULL) {
365 panic("No memory for RAIDframe driver!!?!?!\n");
366 }
367
368 for (raidID = 0; raidID < num; raidID++) {
369 BUFQ_INIT(&raid_softc[raidID].buf_queue);
370
371 raidrootdev[raidID].dv_class = DV_DISK;
372 raidrootdev[raidID].dv_cfdata = NULL;
373 raidrootdev[raidID].dv_unit = raidID;
374 raidrootdev[raidID].dv_parent = NULL;
375 raidrootdev[raidID].dv_flags = 0;
376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
377
378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
379 (RF_Raid_t *));
380 if (raidPtrs[raidID] == NULL) {
381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
382 numraid = raidID;
383 return;
384 }
385 }
386
387 #if RAID_AUTOCONFIG
388 raidautoconfig = 1;
389 #endif
390
391 if (raidautoconfig) {
392 /* 1. locate all RAID components on the system */
393
394 #if DEBUG
395 printf("Searching for raid components...\n");
396 #endif
397 ac_list = rf_find_raid_components();
398
399 /* 2. sort them into their respective sets */
400
401 config_sets = rf_create_auto_sets(ac_list);
402
403 /* 3. evaluate each set and configure the valid ones
404 This gets done in rf_buildroothack() */
405
406 /* schedule the creation of the thread to do the
407 "/ on RAID" stuff */
408
409 kthread_create(rf_buildroothack,config_sets);
410
411 #if 0
412 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
413 #endif
414 }
415
416 }
417
418 void
419 rf_buildroothack(arg)
420 void *arg;
421 {
422 RF_ConfigSet_t *config_sets = arg;
423 RF_ConfigSet_t *cset;
424 RF_ConfigSet_t *next_cset;
425 int retcode;
426 int raidID;
427 int rootID;
428 int num_root;
429
430 num_root = 0;
431 cset = config_sets;
432 while(cset != NULL ) {
433 next_cset = cset->next;
434 if (rf_have_enough_components(cset) &&
435 cset->ac->clabel->autoconfigure==1) {
436 retcode = rf_auto_config_set(cset,&raidID);
437 if (!retcode) {
438 if (cset->rootable) {
439 rootID = raidID;
440 num_root++;
441 }
442 } else {
443 /* The autoconfig didn't work :( */
444 #if DEBUG
445 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
446 #endif
447 rf_release_all_vps(cset);
448 }
449 } else {
450 /* we're not autoconfiguring this set...
451 release the associated resources */
452 rf_release_all_vps(cset);
453 }
454 /* cleanup */
455 rf_cleanup_config_set(cset);
456 cset = next_cset;
457 }
458 if (boothowto & RB_ASKNAME) {
459 /* We don't auto-config... */
460 } else {
461 /* They didn't ask, and we found something bootable... */
462
463 if (num_root == 1) {
464 booted_device = &raidrootdev[rootID];
465 } else if (num_root > 1) {
466 /* we can't guess.. require the user to answer... */
467 boothowto |= RB_ASKNAME;
468 }
469 }
470 }
471
472
473 int
474 raidsize(dev)
475 dev_t dev;
476 {
477 struct raid_softc *rs;
478 struct disklabel *lp;
479 int part, unit, omask, size;
480
481 unit = raidunit(dev);
482 if (unit >= numraid)
483 return (-1);
484 rs = &raid_softc[unit];
485
486 if ((rs->sc_flags & RAIDF_INITED) == 0)
487 return (-1);
488
489 part = DISKPART(dev);
490 omask = rs->sc_dkdev.dk_openmask & (1 << part);
491 lp = rs->sc_dkdev.dk_label;
492
493 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
494 return (-1);
495
496 if (lp->d_partitions[part].p_fstype != FS_SWAP)
497 size = -1;
498 else
499 size = lp->d_partitions[part].p_size *
500 (lp->d_secsize / DEV_BSIZE);
501
502 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
503 return (-1);
504
505 return (size);
506
507 }
508
509 int
510 raiddump(dev, blkno, va, size)
511 dev_t dev;
512 daddr_t blkno;
513 caddr_t va;
514 size_t size;
515 {
516 /* Not implemented. */
517 return ENXIO;
518 }
519 /* ARGSUSED */
520 int
521 raidopen(dev, flags, fmt, p)
522 dev_t dev;
523 int flags, fmt;
524 struct proc *p;
525 {
526 int unit = raidunit(dev);
527 struct raid_softc *rs;
528 struct disklabel *lp;
529 int part, pmask;
530 int error = 0;
531
532 if (unit >= numraid)
533 return (ENXIO);
534 rs = &raid_softc[unit];
535
536 if ((error = raidlock(rs)) != 0)
537 return (error);
538 lp = rs->sc_dkdev.dk_label;
539
540 part = DISKPART(dev);
541 pmask = (1 << part);
542
543 db1_printf(("Opening raid device number: %d partition: %d\n",
544 unit, part));
545
546
547 if ((rs->sc_flags & RAIDF_INITED) &&
548 (rs->sc_dkdev.dk_openmask == 0))
549 raidgetdisklabel(dev);
550
551 /* make sure that this partition exists */
552
553 if (part != RAW_PART) {
554 db1_printf(("Not a raw partition..\n"));
555 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
556 ((part >= lp->d_npartitions) ||
557 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
558 error = ENXIO;
559 raidunlock(rs);
560 db1_printf(("Bailing out...\n"));
561 return (error);
562 }
563 }
564 /* Prevent this unit from being unconfigured while open. */
565 switch (fmt) {
566 case S_IFCHR:
567 rs->sc_dkdev.dk_copenmask |= pmask;
568 break;
569
570 case S_IFBLK:
571 rs->sc_dkdev.dk_bopenmask |= pmask;
572 break;
573 }
574
575 if ((rs->sc_dkdev.dk_openmask == 0) &&
576 ((rs->sc_flags & RAIDF_INITED) != 0)) {
577 /* First one... mark things as dirty... Note that we *MUST*
578 have done a configure before this. I DO NOT WANT TO BE
579 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
580 THAT THEY BELONG TOGETHER!!!!! */
581 /* XXX should check to see if we're only open for reading
582 here... If so, we needn't do this, but then need some
583 other way of keeping track of what's happened.. */
584
585 rf_markalldirty( raidPtrs[unit] );
586 }
587
588
589 rs->sc_dkdev.dk_openmask =
590 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
591
592 raidunlock(rs);
593
594 return (error);
595
596
597 }
598 /* ARGSUSED */
599 int
600 raidclose(dev, flags, fmt, p)
601 dev_t dev;
602 int flags, fmt;
603 struct proc *p;
604 {
605 int unit = raidunit(dev);
606 struct raid_softc *rs;
607 int error = 0;
608 int part;
609
610 if (unit >= numraid)
611 return (ENXIO);
612 rs = &raid_softc[unit];
613
614 if ((error = raidlock(rs)) != 0)
615 return (error);
616
617 part = DISKPART(dev);
618
619 /* ...that much closer to allowing unconfiguration... */
620 switch (fmt) {
621 case S_IFCHR:
622 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
623 break;
624
625 case S_IFBLK:
626 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
627 break;
628 }
629 rs->sc_dkdev.dk_openmask =
630 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
631
632 if ((rs->sc_dkdev.dk_openmask == 0) &&
633 ((rs->sc_flags & RAIDF_INITED) != 0)) {
634 /* Last one... device is not unconfigured yet.
635 Device shutdown has taken care of setting the
636 clean bits if RAIDF_INITED is not set
637 mark things as clean... */
638 #if 1
639 printf("Last one on raid%d. Updating status.\n",unit);
640 #endif
641 rf_update_component_labels( raidPtrs[unit] );
642 }
643
644 raidunlock(rs);
645 return (0);
646
647 }
648
649 void
650 raidstrategy(bp)
651 register struct buf *bp;
652 {
653 register int s;
654
655 unsigned int raidID = raidunit(bp->b_dev);
656 RF_Raid_t *raidPtr;
657 struct raid_softc *rs = &raid_softc[raidID];
658 struct disklabel *lp;
659 int wlabel;
660
661 if ((rs->sc_flags & RAIDF_INITED) ==0) {
662 bp->b_error = ENXIO;
663 bp->b_flags = B_ERROR;
664 bp->b_resid = bp->b_bcount;
665 biodone(bp);
666 return;
667 }
668 if (raidID >= numraid || !raidPtrs[raidID]) {
669 bp->b_error = ENODEV;
670 bp->b_flags |= B_ERROR;
671 bp->b_resid = bp->b_bcount;
672 biodone(bp);
673 return;
674 }
675 raidPtr = raidPtrs[raidID];
676 if (!raidPtr->valid) {
677 bp->b_error = ENODEV;
678 bp->b_flags |= B_ERROR;
679 bp->b_resid = bp->b_bcount;
680 biodone(bp);
681 return;
682 }
683 if (bp->b_bcount == 0) {
684 db1_printf(("b_bcount is zero..\n"));
685 biodone(bp);
686 return;
687 }
688 lp = rs->sc_dkdev.dk_label;
689
690 /*
691 * Do bounds checking and adjust transfer. If there's an
692 * error, the bounds check will flag that for us.
693 */
694
695 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
696 if (DISKPART(bp->b_dev) != RAW_PART)
697 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
698 db1_printf(("Bounds check failed!!:%d %d\n",
699 (int) bp->b_blkno, (int) wlabel));
700 biodone(bp);
701 return;
702 }
703 s = splbio();
704
705 bp->b_resid = 0;
706
707 /* stuff it onto our queue */
708 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
709
710 raidstart(raidPtrs[raidID]);
711
712 splx(s);
713 }
714 /* ARGSUSED */
715 int
716 raidread(dev, uio, flags)
717 dev_t dev;
718 struct uio *uio;
719 int flags;
720 {
721 int unit = raidunit(dev);
722 struct raid_softc *rs;
723 int part;
724
725 if (unit >= numraid)
726 return (ENXIO);
727 rs = &raid_softc[unit];
728
729 if ((rs->sc_flags & RAIDF_INITED) == 0)
730 return (ENXIO);
731 part = DISKPART(dev);
732
733 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
734
735 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
736
737 }
738 /* ARGSUSED */
739 int
740 raidwrite(dev, uio, flags)
741 dev_t dev;
742 struct uio *uio;
743 int flags;
744 {
745 int unit = raidunit(dev);
746 struct raid_softc *rs;
747
748 if (unit >= numraid)
749 return (ENXIO);
750 rs = &raid_softc[unit];
751
752 if ((rs->sc_flags & RAIDF_INITED) == 0)
753 return (ENXIO);
754 db1_printf(("raidwrite\n"));
755 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
756
757 }
758
759 int
760 raidioctl(dev, cmd, data, flag, p)
761 dev_t dev;
762 u_long cmd;
763 caddr_t data;
764 int flag;
765 struct proc *p;
766 {
767 int unit = raidunit(dev);
768 int error = 0;
769 int part, pmask;
770 struct raid_softc *rs;
771 RF_Config_t *k_cfg, *u_cfg;
772 RF_Raid_t *raidPtr;
773 RF_RaidDisk_t *diskPtr;
774 RF_AccTotals_t *totals;
775 RF_DeviceConfig_t *d_cfg, **ucfgp;
776 u_char *specific_buf;
777 int retcode = 0;
778 int row;
779 int column;
780 struct rf_recon_req *rrcopy, *rr;
781 RF_ComponentLabel_t *clabel;
782 RF_ComponentLabel_t ci_label;
783 RF_ComponentLabel_t **clabel_ptr;
784 RF_SingleComponent_t *sparePtr,*componentPtr;
785 RF_SingleComponent_t hot_spare;
786 RF_SingleComponent_t component;
787 int i, j, d;
788
789 if (unit >= numraid)
790 return (ENXIO);
791 rs = &raid_softc[unit];
792 raidPtr = raidPtrs[unit];
793
794 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
795 (int) DISKPART(dev), (int) unit, (int) cmd));
796
797 /* Must be open for writes for these commands... */
798 switch (cmd) {
799 case DIOCSDINFO:
800 case DIOCWDINFO:
801 case DIOCWLABEL:
802 if ((flag & FWRITE) == 0)
803 return (EBADF);
804 }
805
806 /* Must be initialized for these... */
807 switch (cmd) {
808 case DIOCGDINFO:
809 case DIOCSDINFO:
810 case DIOCWDINFO:
811 case DIOCGPART:
812 case DIOCWLABEL:
813 case DIOCGDEFLABEL:
814 case RAIDFRAME_SHUTDOWN:
815 case RAIDFRAME_REWRITEPARITY:
816 case RAIDFRAME_GET_INFO:
817 case RAIDFRAME_RESET_ACCTOTALS:
818 case RAIDFRAME_GET_ACCTOTALS:
819 case RAIDFRAME_KEEP_ACCTOTALS:
820 case RAIDFRAME_GET_SIZE:
821 case RAIDFRAME_FAIL_DISK:
822 case RAIDFRAME_COPYBACK:
823 case RAIDFRAME_CHECK_RECON_STATUS:
824 case RAIDFRAME_GET_COMPONENT_LABEL:
825 case RAIDFRAME_SET_COMPONENT_LABEL:
826 case RAIDFRAME_ADD_HOT_SPARE:
827 case RAIDFRAME_REMOVE_HOT_SPARE:
828 case RAIDFRAME_INIT_LABELS:
829 case RAIDFRAME_REBUILD_IN_PLACE:
830 case RAIDFRAME_CHECK_PARITY:
831 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
832 case RAIDFRAME_CHECK_COPYBACK_STATUS:
833 case RAIDFRAME_SET_AUTOCONFIG:
834 case RAIDFRAME_SET_ROOT:
835 if ((rs->sc_flags & RAIDF_INITED) == 0)
836 return (ENXIO);
837 }
838
839 switch (cmd) {
840
841 /* configure the system */
842 case RAIDFRAME_CONFIGURE:
843
844 if (raidPtr->valid) {
845 /* There is a valid RAID set running on this unit! */
846 printf("raid%d: Device already configured!\n",unit);
847 }
848
849 /* copy-in the configuration information */
850 /* data points to a pointer to the configuration structure */
851
852 u_cfg = *((RF_Config_t **) data);
853 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
854 if (k_cfg == NULL) {
855 return (ENOMEM);
856 }
857 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
858 sizeof(RF_Config_t));
859 if (retcode) {
860 RF_Free(k_cfg, sizeof(RF_Config_t));
861 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
862 retcode));
863 return (retcode);
864 }
865 /* allocate a buffer for the layout-specific data, and copy it
866 * in */
867 if (k_cfg->layoutSpecificSize) {
868 if (k_cfg->layoutSpecificSize > 10000) {
869 /* sanity check */
870 RF_Free(k_cfg, sizeof(RF_Config_t));
871 return (EINVAL);
872 }
873 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
874 (u_char *));
875 if (specific_buf == NULL) {
876 RF_Free(k_cfg, sizeof(RF_Config_t));
877 return (ENOMEM);
878 }
879 retcode = copyin(k_cfg->layoutSpecific,
880 (caddr_t) specific_buf,
881 k_cfg->layoutSpecificSize);
882 if (retcode) {
883 RF_Free(k_cfg, sizeof(RF_Config_t));
884 RF_Free(specific_buf,
885 k_cfg->layoutSpecificSize);
886 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
887 retcode));
888 return (retcode);
889 }
890 } else
891 specific_buf = NULL;
892 k_cfg->layoutSpecific = specific_buf;
893
894 /* should do some kind of sanity check on the configuration.
895 * Store the sum of all the bytes in the last byte? */
896
897 /* configure the system */
898
899 /*
900 * Clear the entire RAID descriptor, just to make sure
901 * there is no stale data left in the case of a
902 * reconfiguration
903 */
904 bzero((char *) raidPtr, sizeof(RF_Raid_t));
905 raidPtr->raidid = unit;
906
907 retcode = rf_Configure(raidPtr, k_cfg, NULL);
908
909 if (retcode == 0) {
910
911 /* allow this many simultaneous IO's to
912 this RAID device */
913 raidPtr->openings = RAIDOUTSTANDING;
914
915 raidinit(raidPtr);
916 rf_markalldirty(raidPtr);
917 }
918 /* free the buffers. No return code here. */
919 if (k_cfg->layoutSpecificSize) {
920 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
921 }
922 RF_Free(k_cfg, sizeof(RF_Config_t));
923
924 return (retcode);
925
926 /* shutdown the system */
927 case RAIDFRAME_SHUTDOWN:
928
929 if ((error = raidlock(rs)) != 0)
930 return (error);
931
932 /*
933 * If somebody has a partition mounted, we shouldn't
934 * shutdown.
935 */
936
937 part = DISKPART(dev);
938 pmask = (1 << part);
939 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
940 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
941 (rs->sc_dkdev.dk_copenmask & pmask))) {
942 raidunlock(rs);
943 return (EBUSY);
944 }
945
946 retcode = rf_Shutdown(raidPtr);
947
948 pool_destroy(&rs->sc_cbufpool);
949
950 /* It's no longer initialized... */
951 rs->sc_flags &= ~RAIDF_INITED;
952
953 /* Detach the disk. */
954 disk_detach(&rs->sc_dkdev);
955
956 raidunlock(rs);
957
958 return (retcode);
959 case RAIDFRAME_GET_COMPONENT_LABEL:
960 clabel_ptr = (RF_ComponentLabel_t **) data;
961 /* need to read the component label for the disk indicated
962 by row,column in clabel */
963
964 /* For practice, let's get it directly fromdisk, rather
965 than from the in-core copy */
966 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
967 (RF_ComponentLabel_t *));
968 if (clabel == NULL)
969 return (ENOMEM);
970
971 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
972
973 retcode = copyin( *clabel_ptr, clabel,
974 sizeof(RF_ComponentLabel_t));
975
976 if (retcode) {
977 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
978 return(retcode);
979 }
980
981 row = clabel->row;
982 column = clabel->column;
983
984 if ((row < 0) || (row >= raidPtr->numRow) ||
985 (column < 0) || (column >= raidPtr->numCol)) {
986 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
987 return(EINVAL);
988 }
989
990 raidread_component_label(raidPtr->Disks[row][column].dev,
991 raidPtr->raid_cinfo[row][column].ci_vp,
992 clabel );
993
994 retcode = copyout((caddr_t) clabel,
995 (caddr_t) *clabel_ptr,
996 sizeof(RF_ComponentLabel_t));
997 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
998 return (retcode);
999
1000 case RAIDFRAME_SET_COMPONENT_LABEL:
1001 clabel = (RF_ComponentLabel_t *) data;
1002
1003 /* XXX check the label for valid stuff... */
1004 /* Note that some things *should not* get modified --
1005 the user should be re-initing the labels instead of
1006 trying to patch things.
1007 */
1008
1009 printf("Got component label:\n");
1010 printf("Version: %d\n",clabel->version);
1011 printf("Serial Number: %d\n",clabel->serial_number);
1012 printf("Mod counter: %d\n",clabel->mod_counter);
1013 printf("Row: %d\n", clabel->row);
1014 printf("Column: %d\n", clabel->column);
1015 printf("Num Rows: %d\n", clabel->num_rows);
1016 printf("Num Columns: %d\n", clabel->num_columns);
1017 printf("Clean: %d\n", clabel->clean);
1018 printf("Status: %d\n", clabel->status);
1019
1020 row = clabel->row;
1021 column = clabel->column;
1022
1023 if ((row < 0) || (row >= raidPtr->numRow) ||
1024 (column < 0) || (column >= raidPtr->numCol)) {
1025 return(EINVAL);
1026 }
1027
1028 /* XXX this isn't allowed to do anything for now :-) */
1029
1030 /* XXX and before it is, we need to fill in the rest
1031 of the fields!?!?!?! */
1032 #if 0
1033 raidwrite_component_label(
1034 raidPtr->Disks[row][column].dev,
1035 raidPtr->raid_cinfo[row][column].ci_vp,
1036 clabel );
1037 #endif
1038 return (0);
1039
1040 case RAIDFRAME_INIT_LABELS:
1041 clabel = (RF_ComponentLabel_t *) data;
1042 /*
1043 we only want the serial number from
1044 the above. We get all the rest of the information
1045 from the config that was used to create this RAID
1046 set.
1047 */
1048
1049 raidPtr->serial_number = clabel->serial_number;
1050
1051 raid_init_component_label(raidPtr, &ci_label);
1052 ci_label.serial_number = clabel->serial_number;
1053
1054 for(row=0;row<raidPtr->numRow;row++) {
1055 ci_label.row = row;
1056 for(column=0;column<raidPtr->numCol;column++) {
1057 diskPtr = &raidPtr->Disks[row][column];
1058 ci_label.partitionSize = diskPtr->partitionSize;
1059 ci_label.column = column;
1060 raidwrite_component_label(
1061 raidPtr->Disks[row][column].dev,
1062 raidPtr->raid_cinfo[row][column].ci_vp,
1063 &ci_label );
1064 }
1065 }
1066
1067 return (retcode);
1068 case RAIDFRAME_SET_AUTOCONFIG:
1069 d = rf_set_autoconfig(raidPtr, *data);
1070 printf("New autoconfig value is: %d\n", d);
1071 *data = d;
1072 return (retcode);
1073
1074 case RAIDFRAME_SET_ROOT:
1075 d = rf_set_rootpartition(raidPtr, *data);
1076 printf("New rootpartition value is: %d\n", d);
1077 *data = d;
1078 return (retcode);
1079
1080 /* initialize all parity */
1081 case RAIDFRAME_REWRITEPARITY:
1082
1083 if (raidPtr->Layout.map->faultsTolerated == 0) {
1084 /* Parity for RAID 0 is trivially correct */
1085 raidPtr->parity_good = RF_RAID_CLEAN;
1086 return(0);
1087 }
1088
1089 if (raidPtr->parity_rewrite_in_progress == 1) {
1090 /* Re-write is already in progress! */
1091 return(EINVAL);
1092 }
1093
1094 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1095 rf_RewriteParityThread,
1096 raidPtr,"raid_parity");
1097 return (retcode);
1098
1099
1100 case RAIDFRAME_ADD_HOT_SPARE:
1101 sparePtr = (RF_SingleComponent_t *) data;
1102 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1103 printf("Adding spare\n");
1104 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1105 return(retcode);
1106
1107 case RAIDFRAME_REMOVE_HOT_SPARE:
1108 return(retcode);
1109
1110 case RAIDFRAME_REBUILD_IN_PLACE:
1111
1112 if (raidPtr->Layout.map->faultsTolerated == 0) {
1113 /* Can't do this on a RAID 0!! */
1114 return(EINVAL);
1115 }
1116
1117 if (raidPtr->recon_in_progress == 1) {
1118 /* a reconstruct is already in progress! */
1119 return(EINVAL);
1120 }
1121
1122 componentPtr = (RF_SingleComponent_t *) data;
1123 memcpy( &component, componentPtr,
1124 sizeof(RF_SingleComponent_t));
1125 row = component.row;
1126 column = component.column;
1127 printf("Rebuild: %d %d\n",row, column);
1128 if ((row < 0) || (row >= raidPtr->numRow) ||
1129 (column < 0) || (column >= raidPtr->numCol)) {
1130 return(EINVAL);
1131 }
1132
1133 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1134 if (rrcopy == NULL)
1135 return(ENOMEM);
1136
1137 rrcopy->raidPtr = (void *) raidPtr;
1138 rrcopy->row = row;
1139 rrcopy->col = column;
1140
1141 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1142 rf_ReconstructInPlaceThread,
1143 rrcopy,"raid_reconip");
1144 return(retcode);
1145
1146 case RAIDFRAME_GET_INFO:
1147 if (!raidPtr->valid)
1148 return (ENODEV);
1149 ucfgp = (RF_DeviceConfig_t **) data;
1150 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1151 (RF_DeviceConfig_t *));
1152 if (d_cfg == NULL)
1153 return (ENOMEM);
1154 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1155 d_cfg->rows = raidPtr->numRow;
1156 d_cfg->cols = raidPtr->numCol;
1157 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1158 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1159 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1160 return (ENOMEM);
1161 }
1162 d_cfg->nspares = raidPtr->numSpare;
1163 if (d_cfg->nspares >= RF_MAX_DISKS) {
1164 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1165 return (ENOMEM);
1166 }
1167 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1168 d = 0;
1169 for (i = 0; i < d_cfg->rows; i++) {
1170 for (j = 0; j < d_cfg->cols; j++) {
1171 d_cfg->devs[d] = raidPtr->Disks[i][j];
1172 d++;
1173 }
1174 }
1175 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1176 d_cfg->spares[i] = raidPtr->Disks[0][j];
1177 }
1178 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1179 sizeof(RF_DeviceConfig_t));
1180 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1181
1182 return (retcode);
1183
1184 case RAIDFRAME_CHECK_PARITY:
1185 *(int *) data = raidPtr->parity_good;
1186 return (0);
1187
1188 case RAIDFRAME_RESET_ACCTOTALS:
1189 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1190 return (0);
1191
1192 case RAIDFRAME_GET_ACCTOTALS:
1193 totals = (RF_AccTotals_t *) data;
1194 *totals = raidPtr->acc_totals;
1195 return (0);
1196
1197 case RAIDFRAME_KEEP_ACCTOTALS:
1198 raidPtr->keep_acc_totals = *(int *)data;
1199 return (0);
1200
1201 case RAIDFRAME_GET_SIZE:
1202 *(int *) data = raidPtr->totalSectors;
1203 return (0);
1204
1205 /* fail a disk & optionally start reconstruction */
1206 case RAIDFRAME_FAIL_DISK:
1207
1208 if (raidPtr->Layout.map->faultsTolerated == 0) {
1209 /* Can't do this on a RAID 0!! */
1210 return(EINVAL);
1211 }
1212
1213 rr = (struct rf_recon_req *) data;
1214
1215 if (rr->row < 0 || rr->row >= raidPtr->numRow
1216 || rr->col < 0 || rr->col >= raidPtr->numCol)
1217 return (EINVAL);
1218
1219 printf("raid%d: Failing the disk: row: %d col: %d\n",
1220 unit, rr->row, rr->col);
1221
1222 /* make a copy of the recon request so that we don't rely on
1223 * the user's buffer */
1224 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1225 if (rrcopy == NULL)
1226 return(ENOMEM);
1227 bcopy(rr, rrcopy, sizeof(*rr));
1228 rrcopy->raidPtr = (void *) raidPtr;
1229
1230 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1231 rf_ReconThread,
1232 rrcopy,"raid_recon");
1233 return (0);
1234
1235 /* invoke a copyback operation after recon on whatever disk
1236 * needs it, if any */
1237 case RAIDFRAME_COPYBACK:
1238
1239 if (raidPtr->Layout.map->faultsTolerated == 0) {
1240 /* This makes no sense on a RAID 0!! */
1241 return(EINVAL);
1242 }
1243
1244 if (raidPtr->copyback_in_progress == 1) {
1245 /* Copyback is already in progress! */
1246 return(EINVAL);
1247 }
1248
1249 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1250 rf_CopybackThread,
1251 raidPtr,"raid_copyback");
1252 return (retcode);
1253
1254 /* return the percentage completion of reconstruction */
1255 case RAIDFRAME_CHECK_RECON_STATUS:
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* This makes no sense on a RAID 0 */
1258 return(EINVAL);
1259 }
1260 row = 0; /* XXX we only consider a single row... */
1261 if (raidPtr->status[row] != rf_rs_reconstructing)
1262 *(int *) data = 100;
1263 else
1264 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1265 return (0);
1266
1267 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* This makes no sense on a RAID 0 */
1270 return(EINVAL);
1271 }
1272 if (raidPtr->parity_rewrite_in_progress == 1) {
1273 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1274 } else {
1275 *(int *) data = 100;
1276 }
1277 return (0);
1278
1279 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1280 if (raidPtr->Layout.map->faultsTolerated == 0) {
1281 /* This makes no sense on a RAID 0 */
1282 return(EINVAL);
1283 }
1284 if (raidPtr->copyback_in_progress == 1) {
1285 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1286 raidPtr->Layout.numStripe;
1287 } else {
1288 *(int *) data = 100;
1289 }
1290 return (0);
1291
1292
1293 /* the sparetable daemon calls this to wait for the kernel to
1294 * need a spare table. this ioctl does not return until a
1295 * spare table is needed. XXX -- calling mpsleep here in the
1296 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1297 * -- I should either compute the spare table in the kernel,
1298 * or have a different -- XXX XXX -- interface (a different
1299 * character device) for delivering the table -- XXX */
1300 #if 0
1301 case RAIDFRAME_SPARET_WAIT:
1302 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1303 while (!rf_sparet_wait_queue)
1304 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1305 waitreq = rf_sparet_wait_queue;
1306 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1307 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1308
1309 /* structure assignment */
1310 *((RF_SparetWait_t *) data) = *waitreq;
1311
1312 RF_Free(waitreq, sizeof(*waitreq));
1313 return (0);
1314
1315 /* wakes up a process waiting on SPARET_WAIT and puts an error
1316 * code in it that will cause the dameon to exit */
1317 case RAIDFRAME_ABORT_SPARET_WAIT:
1318 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1319 waitreq->fcol = -1;
1320 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1321 waitreq->next = rf_sparet_wait_queue;
1322 rf_sparet_wait_queue = waitreq;
1323 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1324 wakeup(&rf_sparet_wait_queue);
1325 return (0);
1326
1327 /* used by the spare table daemon to deliver a spare table
1328 * into the kernel */
1329 case RAIDFRAME_SEND_SPARET:
1330
1331 /* install the spare table */
1332 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1333
1334 /* respond to the requestor. the return status of the spare
1335 * table installation is passed in the "fcol" field */
1336 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1337 waitreq->fcol = retcode;
1338 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1339 waitreq->next = rf_sparet_resp_queue;
1340 rf_sparet_resp_queue = waitreq;
1341 wakeup(&rf_sparet_resp_queue);
1342 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1343
1344 return (retcode);
1345 #endif
1346
1347 default:
1348 break; /* fall through to the os-specific code below */
1349
1350 }
1351
1352 if (!raidPtr->valid)
1353 return (EINVAL);
1354
1355 /*
1356 * Add support for "regular" device ioctls here.
1357 */
1358
1359 switch (cmd) {
1360 case DIOCGDINFO:
1361 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1362 break;
1363
1364 case DIOCGPART:
1365 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1366 ((struct partinfo *) data)->part =
1367 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1368 break;
1369
1370 case DIOCWDINFO:
1371 case DIOCSDINFO:
1372 if ((error = raidlock(rs)) != 0)
1373 return (error);
1374
1375 rs->sc_flags |= RAIDF_LABELLING;
1376
1377 error = setdisklabel(rs->sc_dkdev.dk_label,
1378 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1379 if (error == 0) {
1380 if (cmd == DIOCWDINFO)
1381 error = writedisklabel(RAIDLABELDEV(dev),
1382 raidstrategy, rs->sc_dkdev.dk_label,
1383 rs->sc_dkdev.dk_cpulabel);
1384 }
1385 rs->sc_flags &= ~RAIDF_LABELLING;
1386
1387 raidunlock(rs);
1388
1389 if (error)
1390 return (error);
1391 break;
1392
1393 case DIOCWLABEL:
1394 if (*(int *) data != 0)
1395 rs->sc_flags |= RAIDF_WLABEL;
1396 else
1397 rs->sc_flags &= ~RAIDF_WLABEL;
1398 break;
1399
1400 case DIOCGDEFLABEL:
1401 raidgetdefaultlabel(raidPtr, rs,
1402 (struct disklabel *) data);
1403 break;
1404
1405 default:
1406 retcode = ENOTTY;
1407 }
1408 return (retcode);
1409
1410 }
1411
1412
1413 /* raidinit -- complete the rest of the initialization for the
1414 RAIDframe device. */
1415
1416
1417 static void
1418 raidinit(raidPtr)
1419 RF_Raid_t *raidPtr;
1420 {
1421 struct raid_softc *rs;
1422 int unit;
1423
1424 unit = raidPtr->raidid;
1425
1426 rs = &raid_softc[unit];
1427 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1428 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1429
1430
1431 /* XXX should check return code first... */
1432 rs->sc_flags |= RAIDF_INITED;
1433
1434 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1435
1436 rs->sc_dkdev.dk_name = rs->sc_xname;
1437
1438 /* disk_attach actually creates space for the CPU disklabel, among
1439 * other things, so it's critical to call this *BEFORE* we try putzing
1440 * with disklabels. */
1441
1442 disk_attach(&rs->sc_dkdev);
1443
1444 /* XXX There may be a weird interaction here between this, and
1445 * protectedSectors, as used in RAIDframe. */
1446
1447 rs->sc_size = raidPtr->totalSectors;
1448
1449 }
1450
1451 /* wake up the daemon & tell it to get us a spare table
1452 * XXX
1453 * the entries in the queues should be tagged with the raidPtr
1454 * so that in the extremely rare case that two recons happen at once,
1455 * we know for which device were requesting a spare table
1456 * XXX
1457 *
1458 * XXX This code is not currently used. GO
1459 */
1460 int
1461 rf_GetSpareTableFromDaemon(req)
1462 RF_SparetWait_t *req;
1463 {
1464 int retcode;
1465
1466 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1467 req->next = rf_sparet_wait_queue;
1468 rf_sparet_wait_queue = req;
1469 wakeup(&rf_sparet_wait_queue);
1470
1471 /* mpsleep unlocks the mutex */
1472 while (!rf_sparet_resp_queue) {
1473 tsleep(&rf_sparet_resp_queue, PRIBIO,
1474 "raidframe getsparetable", 0);
1475 }
1476 req = rf_sparet_resp_queue;
1477 rf_sparet_resp_queue = req->next;
1478 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1479
1480 retcode = req->fcol;
1481 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1482 * alloc'd */
1483 return (retcode);
1484 }
1485
1486 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1487 * bp & passes it down.
1488 * any calls originating in the kernel must use non-blocking I/O
1489 * do some extra sanity checking to return "appropriate" error values for
1490 * certain conditions (to make some standard utilities work)
1491 *
1492 * Formerly known as: rf_DoAccessKernel
1493 */
1494 void
1495 raidstart(raidPtr)
1496 RF_Raid_t *raidPtr;
1497 {
1498 RF_SectorCount_t num_blocks, pb, sum;
1499 RF_RaidAddr_t raid_addr;
1500 int retcode;
1501 struct partition *pp;
1502 daddr_t blocknum;
1503 int unit;
1504 struct raid_softc *rs;
1505 int do_async;
1506 struct buf *bp;
1507
1508 unit = raidPtr->raidid;
1509 rs = &raid_softc[unit];
1510
1511 /* quick check to see if anything has died recently */
1512 RF_LOCK_MUTEX(raidPtr->mutex);
1513 if (raidPtr->numNewFailures > 0) {
1514 rf_update_component_labels(raidPtr);
1515 raidPtr->numNewFailures--;
1516 }
1517 RF_UNLOCK_MUTEX(raidPtr->mutex);
1518
1519 /* Check to see if we're at the limit... */
1520 RF_LOCK_MUTEX(raidPtr->mutex);
1521 while (raidPtr->openings > 0) {
1522 RF_UNLOCK_MUTEX(raidPtr->mutex);
1523
1524 /* get the next item, if any, from the queue */
1525 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1526 /* nothing more to do */
1527 return;
1528 }
1529 BUFQ_REMOVE(&rs->buf_queue, bp);
1530
1531 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1532 * partition.. Need to make it absolute to the underlying
1533 * device.. */
1534
1535 blocknum = bp->b_blkno;
1536 if (DISKPART(bp->b_dev) != RAW_PART) {
1537 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1538 blocknum += pp->p_offset;
1539 }
1540
1541 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1542 (int) blocknum));
1543
1544 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1545 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1546
1547 /* *THIS* is where we adjust what block we're going to...
1548 * but DO NOT TOUCH bp->b_blkno!!! */
1549 raid_addr = blocknum;
1550
1551 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1552 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1553 sum = raid_addr + num_blocks + pb;
1554 if (1 || rf_debugKernelAccess) {
1555 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1556 (int) raid_addr, (int) sum, (int) num_blocks,
1557 (int) pb, (int) bp->b_resid));
1558 }
1559 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1560 || (sum < num_blocks) || (sum < pb)) {
1561 bp->b_error = ENOSPC;
1562 bp->b_flags |= B_ERROR;
1563 bp->b_resid = bp->b_bcount;
1564 biodone(bp);
1565 RF_LOCK_MUTEX(raidPtr->mutex);
1566 continue;
1567 }
1568 /*
1569 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1570 */
1571
1572 if (bp->b_bcount & raidPtr->sectorMask) {
1573 bp->b_error = EINVAL;
1574 bp->b_flags |= B_ERROR;
1575 bp->b_resid = bp->b_bcount;
1576 biodone(bp);
1577 RF_LOCK_MUTEX(raidPtr->mutex);
1578 continue;
1579
1580 }
1581 db1_printf(("Calling DoAccess..\n"));
1582
1583
1584 RF_LOCK_MUTEX(raidPtr->mutex);
1585 raidPtr->openings--;
1586 RF_UNLOCK_MUTEX(raidPtr->mutex);
1587
1588 /*
1589 * Everything is async.
1590 */
1591 do_async = 1;
1592
1593 /* don't ever condition on bp->b_flags & B_WRITE.
1594 * always condition on B_READ instead */
1595
1596 /* XXX we're still at splbio() here... do we *really*
1597 need to be? */
1598
1599
1600 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1601 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1602 do_async, raid_addr, num_blocks,
1603 bp->b_un.b_addr, bp, NULL, NULL,
1604 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1605
1606
1607 RF_LOCK_MUTEX(raidPtr->mutex);
1608 }
1609 RF_UNLOCK_MUTEX(raidPtr->mutex);
1610 }
1611
1612
1613
1614
1615 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1616
1617 int
1618 rf_DispatchKernelIO(queue, req)
1619 RF_DiskQueue_t *queue;
1620 RF_DiskQueueData_t *req;
1621 {
1622 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1623 struct buf *bp;
1624 struct raidbuf *raidbp = NULL;
1625 struct raid_softc *rs;
1626 int unit;
1627 int s;
1628
1629 s=0;
1630 /* s = splbio();*/ /* want to test this */
1631 /* XXX along with the vnode, we also need the softc associated with
1632 * this device.. */
1633
1634 req->queue = queue;
1635
1636 unit = queue->raidPtr->raidid;
1637
1638 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1639
1640 if (unit >= numraid) {
1641 printf("Invalid unit number: %d %d\n", unit, numraid);
1642 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1643 }
1644 rs = &raid_softc[unit];
1645
1646 /* XXX is this the right place? */
1647 disk_busy(&rs->sc_dkdev);
1648
1649 bp = req->bp;
1650 #if 1
1651 /* XXX when there is a physical disk failure, someone is passing us a
1652 * buffer that contains old stuff!! Attempt to deal with this problem
1653 * without taking a performance hit... (not sure where the real bug
1654 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1655
1656 if (bp->b_flags & B_ERROR) {
1657 bp->b_flags &= ~B_ERROR;
1658 }
1659 if (bp->b_error != 0) {
1660 bp->b_error = 0;
1661 }
1662 #endif
1663 raidbp = RAIDGETBUF(rs);
1664
1665 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1666
1667 /*
1668 * context for raidiodone
1669 */
1670 raidbp->rf_obp = bp;
1671 raidbp->req = req;
1672
1673 LIST_INIT(&raidbp->rf_buf.b_dep);
1674
1675 switch (req->type) {
1676 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1677 /* XXX need to do something extra here.. */
1678 /* I'm leaving this in, as I've never actually seen it used,
1679 * and I'd like folks to report it... GO */
1680 printf(("WAKEUP CALLED\n"));
1681 queue->numOutstanding++;
1682
1683 /* XXX need to glue the original buffer into this?? */
1684
1685 KernelWakeupFunc(&raidbp->rf_buf);
1686 break;
1687
1688 case RF_IO_TYPE_READ:
1689 case RF_IO_TYPE_WRITE:
1690
1691 if (req->tracerec) {
1692 RF_ETIMER_START(req->tracerec->timer);
1693 }
1694 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1695 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1696 req->sectorOffset, req->numSector,
1697 req->buf, KernelWakeupFunc, (void *) req,
1698 queue->raidPtr->logBytesPerSector, req->b_proc);
1699
1700 if (rf_debugKernelAccess) {
1701 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1702 (long) bp->b_blkno));
1703 }
1704 queue->numOutstanding++;
1705 queue->last_deq_sector = req->sectorOffset;
1706 /* acc wouldn't have been let in if there were any pending
1707 * reqs at any other priority */
1708 queue->curPriority = req->priority;
1709
1710 db1_printf(("Going for %c to unit %d row %d col %d\n",
1711 req->type, unit, queue->row, queue->col));
1712 db1_printf(("sector %d count %d (%d bytes) %d\n",
1713 (int) req->sectorOffset, (int) req->numSector,
1714 (int) (req->numSector <<
1715 queue->raidPtr->logBytesPerSector),
1716 (int) queue->raidPtr->logBytesPerSector));
1717 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1718 raidbp->rf_buf.b_vp->v_numoutput++;
1719 }
1720 VOP_STRATEGY(&raidbp->rf_buf);
1721
1722 break;
1723
1724 default:
1725 panic("bad req->type in rf_DispatchKernelIO");
1726 }
1727 db1_printf(("Exiting from DispatchKernelIO\n"));
1728 /* splx(s); */ /* want to test this */
1729 return (0);
1730 }
1731 /* this is the callback function associated with a I/O invoked from
1732 kernel code.
1733 */
1734 static void
1735 KernelWakeupFunc(vbp)
1736 struct buf *vbp;
1737 {
1738 RF_DiskQueueData_t *req = NULL;
1739 RF_DiskQueue_t *queue;
1740 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1741 struct buf *bp;
1742 struct raid_softc *rs;
1743 int unit;
1744 register int s;
1745
1746 s = splbio();
1747 db1_printf(("recovering the request queue:\n"));
1748 req = raidbp->req;
1749
1750 bp = raidbp->rf_obp;
1751
1752 queue = (RF_DiskQueue_t *) req->queue;
1753
1754 if (raidbp->rf_buf.b_flags & B_ERROR) {
1755 bp->b_flags |= B_ERROR;
1756 bp->b_error = raidbp->rf_buf.b_error ?
1757 raidbp->rf_buf.b_error : EIO;
1758 }
1759
1760 /* XXX methinks this could be wrong... */
1761 #if 1
1762 bp->b_resid = raidbp->rf_buf.b_resid;
1763 #endif
1764
1765 if (req->tracerec) {
1766 RF_ETIMER_STOP(req->tracerec->timer);
1767 RF_ETIMER_EVAL(req->tracerec->timer);
1768 RF_LOCK_MUTEX(rf_tracing_mutex);
1769 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1770 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1771 req->tracerec->num_phys_ios++;
1772 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1773 }
1774 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1775
1776 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1777
1778
1779 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1780 * ballistic, and mark the component as hosed... */
1781
1782 if (bp->b_flags & B_ERROR) {
1783 /* Mark the disk as dead */
1784 /* but only mark it once... */
1785 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1786 rf_ds_optimal) {
1787 printf("raid%d: IO Error. Marking %s as failed.\n",
1788 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1789 queue->raidPtr->Disks[queue->row][queue->col].status =
1790 rf_ds_failed;
1791 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1792 queue->raidPtr->numFailures++;
1793 queue->raidPtr->numNewFailures++;
1794 /* XXX here we should bump the version number for each component, and write that data out */
1795 } else { /* Disk is already dead... */
1796 /* printf("Disk already marked as dead!\n"); */
1797 }
1798
1799 }
1800
1801 rs = &raid_softc[unit];
1802 RAIDPUTBUF(rs, raidbp);
1803
1804
1805 if (bp->b_resid == 0) {
1806 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1807 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1808 }
1809
1810 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1811 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1812
1813 splx(s);
1814 }
1815
1816
1817
1818 /*
1819 * initialize a buf structure for doing an I/O in the kernel.
1820 */
1821 static void
1822 InitBP(
1823 struct buf * bp,
1824 struct vnode * b_vp,
1825 unsigned rw_flag,
1826 dev_t dev,
1827 RF_SectorNum_t startSect,
1828 RF_SectorCount_t numSect,
1829 caddr_t buf,
1830 void (*cbFunc) (struct buf *),
1831 void *cbArg,
1832 int logBytesPerSector,
1833 struct proc * b_proc)
1834 {
1835 /* bp->b_flags = B_PHYS | rw_flag; */
1836 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1837 bp->b_bcount = numSect << logBytesPerSector;
1838 bp->b_bufsize = bp->b_bcount;
1839 bp->b_error = 0;
1840 bp->b_dev = dev;
1841 bp->b_un.b_addr = buf;
1842 bp->b_blkno = startSect;
1843 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1844 if (bp->b_bcount == 0) {
1845 panic("bp->b_bcount is zero in InitBP!!\n");
1846 }
1847 bp->b_proc = b_proc;
1848 bp->b_iodone = cbFunc;
1849 bp->b_vp = b_vp;
1850
1851 }
1852
1853 static void
1854 raidgetdefaultlabel(raidPtr, rs, lp)
1855 RF_Raid_t *raidPtr;
1856 struct raid_softc *rs;
1857 struct disklabel *lp;
1858 {
1859 db1_printf(("Building a default label...\n"));
1860 bzero(lp, sizeof(*lp));
1861
1862 /* fabricate a label... */
1863 lp->d_secperunit = raidPtr->totalSectors;
1864 lp->d_secsize = raidPtr->bytesPerSector;
1865 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1866 lp->d_ntracks = 1;
1867 lp->d_ncylinders = raidPtr->totalSectors /
1868 (lp->d_nsectors * lp->d_ntracks);
1869 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1870
1871 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1872 lp->d_type = DTYPE_RAID;
1873 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1874 lp->d_rpm = 3600;
1875 lp->d_interleave = 1;
1876 lp->d_flags = 0;
1877
1878 lp->d_partitions[RAW_PART].p_offset = 0;
1879 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1880 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1881 lp->d_npartitions = RAW_PART + 1;
1882
1883 lp->d_magic = DISKMAGIC;
1884 lp->d_magic2 = DISKMAGIC;
1885 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1886
1887 }
1888 /*
1889 * Read the disklabel from the raid device. If one is not present, fake one
1890 * up.
1891 */
1892 static void
1893 raidgetdisklabel(dev)
1894 dev_t dev;
1895 {
1896 int unit = raidunit(dev);
1897 struct raid_softc *rs = &raid_softc[unit];
1898 char *errstring;
1899 struct disklabel *lp = rs->sc_dkdev.dk_label;
1900 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1901 RF_Raid_t *raidPtr;
1902
1903 db1_printf(("Getting the disklabel...\n"));
1904
1905 bzero(clp, sizeof(*clp));
1906
1907 raidPtr = raidPtrs[unit];
1908
1909 raidgetdefaultlabel(raidPtr, rs, lp);
1910
1911 /*
1912 * Call the generic disklabel extraction routine.
1913 */
1914 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1915 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1916 if (errstring)
1917 raidmakedisklabel(rs);
1918 else {
1919 int i;
1920 struct partition *pp;
1921
1922 /*
1923 * Sanity check whether the found disklabel is valid.
1924 *
1925 * This is necessary since total size of the raid device
1926 * may vary when an interleave is changed even though exactly
1927 * same componets are used, and old disklabel may used
1928 * if that is found.
1929 */
1930 if (lp->d_secperunit != rs->sc_size)
1931 printf("WARNING: %s: "
1932 "total sector size in disklabel (%d) != "
1933 "the size of raid (%ld)\n", rs->sc_xname,
1934 lp->d_secperunit, (long) rs->sc_size);
1935 for (i = 0; i < lp->d_npartitions; i++) {
1936 pp = &lp->d_partitions[i];
1937 if (pp->p_offset + pp->p_size > rs->sc_size)
1938 printf("WARNING: %s: end of partition `%c' "
1939 "exceeds the size of raid (%ld)\n",
1940 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1941 }
1942 }
1943
1944 }
1945 /*
1946 * Take care of things one might want to take care of in the event
1947 * that a disklabel isn't present.
1948 */
1949 static void
1950 raidmakedisklabel(rs)
1951 struct raid_softc *rs;
1952 {
1953 struct disklabel *lp = rs->sc_dkdev.dk_label;
1954 db1_printf(("Making a label..\n"));
1955
1956 /*
1957 * For historical reasons, if there's no disklabel present
1958 * the raw partition must be marked FS_BSDFFS.
1959 */
1960
1961 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1962
1963 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1964
1965 lp->d_checksum = dkcksum(lp);
1966 }
1967 /*
1968 * Lookup the provided name in the filesystem. If the file exists,
1969 * is a valid block device, and isn't being used by anyone else,
1970 * set *vpp to the file's vnode.
1971 * You'll find the original of this in ccd.c
1972 */
1973 int
1974 raidlookup(path, p, vpp)
1975 char *path;
1976 struct proc *p;
1977 struct vnode **vpp; /* result */
1978 {
1979 struct nameidata nd;
1980 struct vnode *vp;
1981 struct vattr va;
1982 int error;
1983
1984 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1985 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1986 #ifdef DEBUG
1987 printf("RAIDframe: vn_open returned %d\n", error);
1988 #endif
1989 return (error);
1990 }
1991 vp = nd.ni_vp;
1992 if (vp->v_usecount > 1) {
1993 VOP_UNLOCK(vp, 0);
1994 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1995 return (EBUSY);
1996 }
1997 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1998 VOP_UNLOCK(vp, 0);
1999 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2000 return (error);
2001 }
2002 /* XXX: eventually we should handle VREG, too. */
2003 if (va.va_type != VBLK) {
2004 VOP_UNLOCK(vp, 0);
2005 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2006 return (ENOTBLK);
2007 }
2008 VOP_UNLOCK(vp, 0);
2009 *vpp = vp;
2010 return (0);
2011 }
2012 /*
2013 * Wait interruptibly for an exclusive lock.
2014 *
2015 * XXX
2016 * Several drivers do this; it should be abstracted and made MP-safe.
2017 * (Hmm... where have we seen this warning before :-> GO )
2018 */
2019 static int
2020 raidlock(rs)
2021 struct raid_softc *rs;
2022 {
2023 int error;
2024
2025 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2026 rs->sc_flags |= RAIDF_WANTED;
2027 if ((error =
2028 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2029 return (error);
2030 }
2031 rs->sc_flags |= RAIDF_LOCKED;
2032 return (0);
2033 }
2034 /*
2035 * Unlock and wake up any waiters.
2036 */
2037 static void
2038 raidunlock(rs)
2039 struct raid_softc *rs;
2040 {
2041
2042 rs->sc_flags &= ~RAIDF_LOCKED;
2043 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2044 rs->sc_flags &= ~RAIDF_WANTED;
2045 wakeup(rs);
2046 }
2047 }
2048
2049
2050 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2051 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2052
2053 int
2054 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2055 {
2056 RF_ComponentLabel_t clabel;
2057 raidread_component_label(dev, b_vp, &clabel);
2058 clabel.mod_counter = mod_counter;
2059 clabel.clean = RF_RAID_CLEAN;
2060 raidwrite_component_label(dev, b_vp, &clabel);
2061 return(0);
2062 }
2063
2064
2065 int
2066 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2067 {
2068 RF_ComponentLabel_t clabel;
2069 raidread_component_label(dev, b_vp, &clabel);
2070 clabel.mod_counter = mod_counter;
2071 clabel.clean = RF_RAID_DIRTY;
2072 raidwrite_component_label(dev, b_vp, &clabel);
2073 return(0);
2074 }
2075
2076 /* ARGSUSED */
2077 int
2078 raidread_component_label(dev, b_vp, clabel)
2079 dev_t dev;
2080 struct vnode *b_vp;
2081 RF_ComponentLabel_t *clabel;
2082 {
2083 struct buf *bp;
2084 int error;
2085
2086 /* XXX should probably ensure that we don't try to do this if
2087 someone has changed rf_protected_sectors. */
2088
2089 /* get a block of the appropriate size... */
2090 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2091 bp->b_dev = dev;
2092
2093 /* get our ducks in a row for the read */
2094 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2095 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2096 bp->b_flags = B_BUSY | B_READ;
2097 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2098
2099 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2100
2101 error = biowait(bp);
2102
2103 if (!error) {
2104 memcpy(clabel, bp->b_un.b_addr,
2105 sizeof(RF_ComponentLabel_t));
2106 #if 0
2107 print_component_label( clabel );
2108 #endif
2109 } else {
2110 #if 0
2111 printf("Failed to read RAID component label!\n");
2112 #endif
2113 }
2114
2115 bp->b_flags = B_INVAL | B_AGE;
2116 brelse(bp);
2117 return(error);
2118 }
2119 /* ARGSUSED */
2120 int
2121 raidwrite_component_label(dev, b_vp, clabel)
2122 dev_t dev;
2123 struct vnode *b_vp;
2124 RF_ComponentLabel_t *clabel;
2125 {
2126 struct buf *bp;
2127 int error;
2128
2129 /* get a block of the appropriate size... */
2130 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2131 bp->b_dev = dev;
2132
2133 /* get our ducks in a row for the write */
2134 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2135 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2136 bp->b_flags = B_BUSY | B_WRITE;
2137 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2138
2139 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2140
2141 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2142
2143 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2144 error = biowait(bp);
2145 bp->b_flags = B_INVAL | B_AGE;
2146 brelse(bp);
2147 if (error) {
2148 #if 1
2149 printf("Failed to write RAID component info!\n");
2150 #endif
2151 }
2152
2153 return(error);
2154 }
2155
2156 void
2157 rf_markalldirty( raidPtr )
2158 RF_Raid_t *raidPtr;
2159 {
2160 RF_ComponentLabel_t clabel;
2161 int r,c;
2162
2163 raidPtr->mod_counter++;
2164 for (r = 0; r < raidPtr->numRow; r++) {
2165 for (c = 0; c < raidPtr->numCol; c++) {
2166 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2167 raidread_component_label(
2168 raidPtr->Disks[r][c].dev,
2169 raidPtr->raid_cinfo[r][c].ci_vp,
2170 &clabel);
2171 if (clabel.status == rf_ds_spared) {
2172 /* XXX do something special...
2173 but whatever you do, don't
2174 try to access it!! */
2175 } else {
2176 #if 0
2177 clabel.status =
2178 raidPtr->Disks[r][c].status;
2179 raidwrite_component_label(
2180 raidPtr->Disks[r][c].dev,
2181 raidPtr->raid_cinfo[r][c].ci_vp,
2182 &clabel);
2183 #endif
2184 raidmarkdirty(
2185 raidPtr->Disks[r][c].dev,
2186 raidPtr->raid_cinfo[r][c].ci_vp,
2187 raidPtr->mod_counter);
2188 }
2189 }
2190 }
2191 }
2192 /* printf("Component labels marked dirty.\n"); */
2193 #if 0
2194 for( c = 0; c < raidPtr->numSpare ; c++) {
2195 sparecol = raidPtr->numCol + c;
2196 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2197 /*
2198
2199 XXX this is where we get fancy and map this spare
2200 into it's correct spot in the array.
2201
2202 */
2203 /*
2204
2205 we claim this disk is "optimal" if it's
2206 rf_ds_used_spare, as that means it should be
2207 directly substitutable for the disk it replaced.
2208 We note that too...
2209
2210 */
2211
2212 for(i=0;i<raidPtr->numRow;i++) {
2213 for(j=0;j<raidPtr->numCol;j++) {
2214 if ((raidPtr->Disks[i][j].spareRow ==
2215 r) &&
2216 (raidPtr->Disks[i][j].spareCol ==
2217 sparecol)) {
2218 srow = r;
2219 scol = sparecol;
2220 break;
2221 }
2222 }
2223 }
2224
2225 raidread_component_label(
2226 raidPtr->Disks[r][sparecol].dev,
2227 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2228 &clabel);
2229 /* make sure status is noted */
2230 clabel.version = RF_COMPONENT_LABEL_VERSION;
2231 clabel.mod_counter = raidPtr->mod_counter;
2232 clabel.serial_number = raidPtr->serial_number;
2233 clabel.row = srow;
2234 clabel.column = scol;
2235 clabel.num_rows = raidPtr->numRow;
2236 clabel.num_columns = raidPtr->numCol;
2237 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2238 clabel.status = rf_ds_optimal;
2239 raidwrite_component_label(
2240 raidPtr->Disks[r][sparecol].dev,
2241 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2242 &clabel);
2243 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2244 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2245 }
2246 }
2247
2248 #endif
2249 }
2250
2251
2252 void
2253 rf_update_component_labels( raidPtr )
2254 RF_Raid_t *raidPtr;
2255 {
2256 RF_ComponentLabel_t clabel;
2257 int sparecol;
2258 int r,c;
2259 int i,j;
2260 int srow, scol;
2261
2262 srow = -1;
2263 scol = -1;
2264
2265 /* XXX should do extra checks to make sure things really are clean,
2266 rather than blindly setting the clean bit... */
2267
2268 raidPtr->mod_counter++;
2269
2270 for (r = 0; r < raidPtr->numRow; r++) {
2271 for (c = 0; c < raidPtr->numCol; c++) {
2272 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2273 raidread_component_label(
2274 raidPtr->Disks[r][c].dev,
2275 raidPtr->raid_cinfo[r][c].ci_vp,
2276 &clabel);
2277 /* make sure status is noted */
2278 clabel.status = rf_ds_optimal;
2279 /* bump the counter */
2280 clabel.mod_counter = raidPtr->mod_counter;
2281 #if 0
2282 /* note where this set was configured last */
2283 clabel.last_unit = raidPtr->raidid;
2284 #endif
2285
2286 raidwrite_component_label(
2287 raidPtr->Disks[r][c].dev,
2288 raidPtr->raid_cinfo[r][c].ci_vp,
2289 &clabel);
2290 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2291 raidmarkclean(
2292 raidPtr->Disks[r][c].dev,
2293 raidPtr->raid_cinfo[r][c].ci_vp,
2294 raidPtr->mod_counter);
2295 }
2296 }
2297 /* else we don't touch it.. */
2298 #if 0
2299 else if (raidPtr->Disks[r][c].status !=
2300 rf_ds_failed) {
2301 raidread_component_label(
2302 raidPtr->Disks[r][c].dev,
2303 raidPtr->raid_cinfo[r][c].ci_vp,
2304 &clabel);
2305 /* make sure status is noted */
2306 clabel.status =
2307 raidPtr->Disks[r][c].status;
2308 raidwrite_component_label(
2309 raidPtr->Disks[r][c].dev,
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 &clabel);
2312 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2313 raidmarkclean(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 raidPtr->mod_counter);
2317 }
2318 }
2319 #endif
2320 }
2321 }
2322
2323 for( c = 0; c < raidPtr->numSpare ; c++) {
2324 sparecol = raidPtr->numCol + c;
2325 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2326 /*
2327
2328 we claim this disk is "optimal" if it's
2329 rf_ds_used_spare, as that means it should be
2330 directly substitutable for the disk it replaced.
2331 We note that too...
2332
2333 */
2334
2335 for(i=0;i<raidPtr->numRow;i++) {
2336 for(j=0;j<raidPtr->numCol;j++) {
2337 if ((raidPtr->Disks[i][j].spareRow ==
2338 0) &&
2339 (raidPtr->Disks[i][j].spareCol ==
2340 sparecol)) {
2341 srow = i;
2342 scol = j;
2343 break;
2344 }
2345 }
2346 }
2347
2348 /* XXX shouldn't *really* need this... */
2349 raidread_component_label(
2350 raidPtr->Disks[0][sparecol].dev,
2351 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2352 &clabel);
2353 /* make sure status is noted */
2354
2355 raid_init_component_label(raidPtr, &clabel);
2356
2357 clabel.mod_counter = raidPtr->mod_counter;
2358 clabel.row = srow;
2359 clabel.column = scol;
2360 clabel.status = rf_ds_optimal;
2361
2362 raidwrite_component_label(
2363 raidPtr->Disks[0][sparecol].dev,
2364 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2365 &clabel);
2366 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2367 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2368 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2369 raidPtr->mod_counter);
2370 }
2371 }
2372 }
2373 /* printf("Component labels updated\n"); */
2374 }
2375
2376 void
2377 rf_ReconThread(req)
2378 struct rf_recon_req *req;
2379 {
2380 int s;
2381 RF_Raid_t *raidPtr;
2382
2383 s = splbio();
2384 raidPtr = (RF_Raid_t *) req->raidPtr;
2385 raidPtr->recon_in_progress = 1;
2386
2387 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2388 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2389
2390 /* XXX get rid of this! we don't need it at all.. */
2391 RF_Free(req, sizeof(*req));
2392
2393 raidPtr->recon_in_progress = 0;
2394 splx(s);
2395
2396 /* That's all... */
2397 kthread_exit(0); /* does not return */
2398 }
2399
2400 void
2401 rf_RewriteParityThread(raidPtr)
2402 RF_Raid_t *raidPtr;
2403 {
2404 int retcode;
2405 int s;
2406
2407 raidPtr->parity_rewrite_in_progress = 1;
2408 s = splbio();
2409 retcode = rf_RewriteParity(raidPtr);
2410 splx(s);
2411 if (retcode) {
2412 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2413 } else {
2414 /* set the clean bit! If we shutdown correctly,
2415 the clean bit on each component label will get
2416 set */
2417 raidPtr->parity_good = RF_RAID_CLEAN;
2418 }
2419 raidPtr->parity_rewrite_in_progress = 0;
2420
2421 /* That's all... */
2422 kthread_exit(0); /* does not return */
2423 }
2424
2425
2426 void
2427 rf_CopybackThread(raidPtr)
2428 RF_Raid_t *raidPtr;
2429 {
2430 int s;
2431
2432 raidPtr->copyback_in_progress = 1;
2433 s = splbio();
2434 rf_CopybackReconstructedData(raidPtr);
2435 splx(s);
2436 raidPtr->copyback_in_progress = 0;
2437
2438 /* That's all... */
2439 kthread_exit(0); /* does not return */
2440 }
2441
2442
2443 void
2444 rf_ReconstructInPlaceThread(req)
2445 struct rf_recon_req *req;
2446 {
2447 int retcode;
2448 int s;
2449 RF_Raid_t *raidPtr;
2450
2451 s = splbio();
2452 raidPtr = req->raidPtr;
2453 raidPtr->recon_in_progress = 1;
2454 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2455 RF_Free(req, sizeof(*req));
2456 raidPtr->recon_in_progress = 0;
2457 splx(s);
2458
2459 /* That's all... */
2460 kthread_exit(0); /* does not return */
2461 }
2462
2463 void
2464 rf_mountroot_hook(dev)
2465 struct device *dev;
2466 {
2467
2468 }
2469
2470
2471 RF_AutoConfig_t *
2472 rf_find_raid_components()
2473 {
2474 struct devnametobdevmaj *dtobdm;
2475 struct vnode *vp;
2476 struct disklabel label;
2477 struct device *dv;
2478 char *cd_name;
2479 dev_t dev;
2480 int error;
2481 int i;
2482 int good_one;
2483 RF_ComponentLabel_t *clabel;
2484 RF_AutoConfig_t *ac_list;
2485 RF_AutoConfig_t *ac;
2486
2487
2488 /* initialize the AutoConfig list */
2489 ac_list = NULL;
2490
2491 if (raidautoconfig) {
2492
2493 /* we begin by trolling through *all* the devices on the system */
2494
2495 for (dv = alldevs.tqh_first; dv != NULL;
2496 dv = dv->dv_list.tqe_next) {
2497
2498 /* we are only interested in disks... */
2499 if (dv->dv_class != DV_DISK)
2500 continue;
2501
2502 /* we don't care about floppies... */
2503 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2504 continue;
2505 }
2506
2507 /* need to find the device_name_to_block_device_major stuff */
2508 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2509 dtobdm = dev_name2blk;
2510 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2511 dtobdm++;
2512 }
2513
2514 /* get a vnode for the raw partition of this disk */
2515
2516 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2517 if (bdevvp(dev, &vp))
2518 panic("RAID can't alloc vnode");
2519
2520 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2521
2522 if (error) {
2523 /* "Who cares." Continue looking
2524 for something that exists*/
2525 vput(vp);
2526 continue;
2527 }
2528
2529 /* Ok, the disk exists. Go get the disklabel. */
2530 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2531 FREAD, NOCRED, 0);
2532 if (error) {
2533 /*
2534 * XXX can't happen - open() would
2535 * have errored out (or faked up one)
2536 */
2537 printf("can't get label for dev %s%c (%d)!?!?\n",
2538 dv->dv_xname, 'a' + RAW_PART, error);
2539 }
2540
2541 /* don't need this any more. We'll allocate it again
2542 a little later if we really do... */
2543 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2544 vput(vp);
2545
2546 for (i=0; i < label.d_npartitions; i++) {
2547 /* We only support partitions marked as RAID */
2548 if (label.d_partitions[i].p_fstype != FS_RAID)
2549 continue;
2550
2551 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2552 if (bdevvp(dev, &vp))
2553 panic("RAID can't alloc vnode");
2554
2555 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2556 if (error) {
2557 /* Whatever... */
2558 vput(vp);
2559 continue;
2560 }
2561
2562 good_one = 0;
2563
2564 clabel = (RF_ComponentLabel_t *)
2565 malloc(sizeof(RF_ComponentLabel_t),
2566 M_RAIDFRAME, M_NOWAIT);
2567 if (clabel == NULL) {
2568 /* XXX CLEANUP HERE */
2569 printf("RAID auto config: out of memory!\n");
2570 return(NULL); /* XXX probably should panic? */
2571 }
2572
2573 if (!raidread_component_label(dev, vp, clabel)) {
2574 /* Got the label. Does it look reasonable? */
2575 if (rf_reasonable_label(clabel) &&
2576 (clabel->partitionSize <=
2577 label.d_partitions[i].p_size)) {
2578 #if DEBUG
2579 printf("Component on: %s%c: %d\n",
2580 dv->dv_xname, 'a'+i,
2581 label.d_partitions[i].p_size);
2582 print_component_label(clabel);
2583 #endif
2584 /* if it's reasonable, add it,
2585 else ignore it. */
2586 ac = (RF_AutoConfig_t *)
2587 malloc(sizeof(RF_AutoConfig_t),
2588 M_RAIDFRAME,
2589 M_NOWAIT);
2590 if (ac == NULL) {
2591 /* XXX should panic?? */
2592 return(NULL);
2593 }
2594
2595 sprintf(ac->devname, "%s%c",
2596 dv->dv_xname, 'a'+i);
2597 ac->dev = dev;
2598 ac->vp = vp;
2599 ac->clabel = clabel;
2600 ac->next = ac_list;
2601 ac_list = ac;
2602 good_one = 1;
2603 }
2604 }
2605 if (!good_one) {
2606 /* cleanup */
2607 free(clabel, M_RAIDFRAME);
2608 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2609 vput(vp);
2610 }
2611 }
2612 }
2613 }
2614 return(ac_list);
2615 }
2616
2617 static int
2618 rf_reasonable_label(clabel)
2619 RF_ComponentLabel_t *clabel;
2620 {
2621
2622 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2623 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2624 ((clabel->clean == RF_RAID_CLEAN) ||
2625 (clabel->clean == RF_RAID_DIRTY)) &&
2626 clabel->row >=0 &&
2627 clabel->column >= 0 &&
2628 clabel->num_rows > 0 &&
2629 clabel->num_columns > 0 &&
2630 clabel->row < clabel->num_rows &&
2631 clabel->column < clabel->num_columns &&
2632 clabel->blockSize > 0 &&
2633 clabel->numBlocks > 0) {
2634 /* label looks reasonable enough... */
2635 return(1);
2636 }
2637 return(0);
2638 }
2639
2640
2641 void
2642 print_component_label(clabel)
2643 RF_ComponentLabel_t *clabel;
2644 {
2645 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2646 clabel->row, clabel->column,
2647 clabel->num_rows, clabel->num_columns);
2648 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2649 clabel->version, clabel->serial_number,
2650 clabel->mod_counter);
2651 printf(" Clean: %s Status: %d\n",
2652 clabel->clean ? "Yes" : "No", clabel->status );
2653 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2654 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2655 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2656 (char) clabel->parityConfig, clabel->blockSize,
2657 clabel->numBlocks);
2658 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2659 printf(" Last configured as: raid%d\n", clabel->last_unit );
2660 #if 0
2661 printf(" Config order: %d\n", clabel->config_order);
2662 #endif
2663
2664 }
2665
2666 RF_ConfigSet_t *
2667 rf_create_auto_sets(ac_list)
2668 RF_AutoConfig_t *ac_list;
2669 {
2670 RF_AutoConfig_t *ac;
2671 RF_ConfigSet_t *config_sets;
2672 RF_ConfigSet_t *cset;
2673 RF_AutoConfig_t *ac_next;
2674
2675
2676 config_sets = NULL;
2677
2678 /* Go through the AutoConfig list, and figure out which components
2679 belong to what sets. */
2680 ac = ac_list;
2681 while(ac!=NULL) {
2682 /* we're going to putz with ac->next, so save it here
2683 for use at the end of the loop */
2684 ac_next = ac->next;
2685
2686 if (config_sets == NULL) {
2687 /* will need at least this one... */
2688 config_sets = (RF_ConfigSet_t *)
2689 malloc(sizeof(RF_ConfigSet_t),
2690 M_RAIDFRAME, M_NOWAIT);
2691 if (config_sets == NULL) {
2692 panic("rf_create_auto_sets: No memory!\n");
2693 }
2694 /* this one is easy :) */
2695 config_sets->ac = ac;
2696 config_sets->next = NULL;
2697 config_sets->rootable = 0;
2698 ac->next = NULL;
2699 } else {
2700 /* which set does this component fit into? */
2701 cset = config_sets;
2702 while(cset!=NULL) {
2703 if (rf_does_it_fit(cset, ac)) {
2704 /* looks like it matches */
2705 ac->next = cset->ac;
2706 cset->ac = ac;
2707 break;
2708 }
2709 cset = cset->next;
2710 }
2711 if (cset==NULL) {
2712 /* didn't find a match above... new set..*/
2713 cset = (RF_ConfigSet_t *)
2714 malloc(sizeof(RF_ConfigSet_t),
2715 M_RAIDFRAME, M_NOWAIT);
2716 if (cset == NULL) {
2717 panic("rf_create_auto_sets: No memory!\n");
2718 }
2719 cset->ac = ac;
2720 ac->next = NULL;
2721 cset->next = config_sets;
2722 cset->rootable = 0;
2723 config_sets = cset;
2724 }
2725 }
2726 ac = ac_next;
2727 }
2728
2729
2730 return(config_sets);
2731 }
2732
2733 static int
2734 rf_does_it_fit(cset, ac)
2735 RF_ConfigSet_t *cset;
2736 RF_AutoConfig_t *ac;
2737 {
2738 RF_ComponentLabel_t *clabel1, *clabel2;
2739
2740 /* If this one matches the *first* one in the set, that's good
2741 enough, since the other members of the set would have been
2742 through here too... */
2743 /* note that we are not checking partitionSize here..
2744
2745 Note that we are also not checking the mod_counters here.
2746 If everything else matches execpt the mod_counter, that's
2747 good enough for this test. We will deal with the mod_counters
2748 a little later in the autoconfiguration process.
2749
2750 (clabel1->mod_counter == clabel2->mod_counter) &&
2751
2752 */
2753
2754 clabel1 = cset->ac->clabel;
2755 clabel2 = ac->clabel;
2756 if ((clabel1->version == clabel2->version) &&
2757 (clabel1->serial_number == clabel2->serial_number) &&
2758 (clabel1->num_rows == clabel2->num_rows) &&
2759 (clabel1->num_columns == clabel2->num_columns) &&
2760 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2761 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2762 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2763 (clabel1->parityConfig == clabel2->parityConfig) &&
2764 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2765 (clabel1->blockSize == clabel2->blockSize) &&
2766 (clabel1->numBlocks == clabel2->numBlocks) &&
2767 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2768 (clabel1->root_partition == clabel2->root_partition) &&
2769 (clabel1->last_unit == clabel2->last_unit) &&
2770 (clabel1->config_order == clabel2->config_order)) {
2771 /* if it get's here, it almost *has* to be a match */
2772 } else {
2773 /* it's not consistent with somebody in the set..
2774 punt */
2775 return(0);
2776 }
2777 /* all was fine.. it must fit... */
2778 return(1);
2779 }
2780
2781 int
2782 rf_have_enough_components(cset)
2783 RF_ConfigSet_t *cset;
2784 {
2785 RF_AutoConfig_t *ac;
2786 RF_AutoConfig_t *auto_config;
2787 RF_ComponentLabel_t *clabel;
2788 int r,c;
2789 int num_rows;
2790 int num_cols;
2791 int num_missing;
2792
2793 /* check to see that we have enough 'live' components
2794 of this set. If so, we can configure it if necessary */
2795
2796 num_rows = cset->ac->clabel->num_rows;
2797 num_cols = cset->ac->clabel->num_columns;
2798
2799 /* XXX Check for duplicate components!?!?!? */
2800
2801 num_missing = 0;
2802 auto_config = cset->ac;
2803
2804 for(r=0; r<num_rows; r++) {
2805 for(c=0; c<num_cols; c++) {
2806 ac = auto_config;
2807 while(ac!=NULL) {
2808 if (ac->clabel==NULL) {
2809 /* big-time bad news. */
2810 goto fail;
2811 }
2812 if ((ac->clabel->row == r) &&
2813 (ac->clabel->column == c)) {
2814 /* it's this one... */
2815 #if DEBUG
2816 printf("Found: %s at %d,%d\n",
2817 ac->devname,r,c);
2818 #endif
2819 break;
2820 }
2821 ac=ac->next;
2822 }
2823 if (ac==NULL) {
2824 /* Didn't find one here! */
2825 num_missing++;
2826 }
2827 }
2828 }
2829
2830 clabel = cset->ac->clabel;
2831
2832 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2833 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2834 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2835 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2836 /* XXX this needs to be made *much* more general */
2837 /* Too many failures */
2838 return(0);
2839 }
2840 /* otherwise, all is well, and we've got enough to take a kick
2841 at autoconfiguring this set */
2842 return(1);
2843 fail:
2844 return(0);
2845
2846 }
2847
2848 void
2849 rf_create_configuration(ac,config,raidPtr)
2850 RF_AutoConfig_t *ac;
2851 RF_Config_t *config;
2852 RF_Raid_t *raidPtr;
2853 {
2854 RF_ComponentLabel_t *clabel;
2855
2856 clabel = ac->clabel;
2857
2858 /* 1. Fill in the common stuff */
2859 config->numRow = clabel->num_rows;
2860 config->numCol = clabel->num_columns;
2861 config->numSpare = 0; /* XXX should this be set here? */
2862 config->sectPerSU = clabel->sectPerSU;
2863 config->SUsPerPU = clabel->SUsPerPU;
2864 config->SUsPerRU = clabel->SUsPerRU;
2865 config->parityConfig = clabel->parityConfig;
2866 /* XXX... */
2867 strcpy(config->diskQueueType,"fifo");
2868 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2869 config->layoutSpecificSize = 0; /* XXX ?? */
2870
2871 while(ac!=NULL) {
2872 /* row/col values will be in range due to the checks
2873 in reasonable_label() */
2874 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2875 ac->devname);
2876 ac = ac->next;
2877 }
2878
2879 }
2880
2881 int
2882 rf_set_autoconfig(raidPtr, new_value)
2883 RF_Raid_t *raidPtr;
2884 int new_value;
2885 {
2886 RF_ComponentLabel_t clabel;
2887 struct vnode *vp;
2888 dev_t dev;
2889 int row, column;
2890
2891 raidPtr->autoconfigure = new_value;
2892 for(row=0; row<raidPtr->numRow; row++) {
2893 for(column=0; column<raidPtr->numCol; column++) {
2894 dev = raidPtr->Disks[row][column].dev;
2895 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2896 raidread_component_label(dev, vp, &clabel);
2897 clabel.autoconfigure = new_value;
2898 raidwrite_component_label(dev, vp, &clabel);
2899 }
2900 }
2901 return(new_value);
2902 }
2903
2904 int
2905 rf_set_rootpartition(raidPtr, new_value)
2906 RF_Raid_t *raidPtr;
2907 int new_value;
2908 {
2909 RF_ComponentLabel_t clabel;
2910 struct vnode *vp;
2911 dev_t dev;
2912 int row, column;
2913
2914 raidPtr->root_partition = new_value;
2915 for(row=0; row<raidPtr->numRow; row++) {
2916 for(column=0; column<raidPtr->numCol; column++) {
2917 dev = raidPtr->Disks[row][column].dev;
2918 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2919 raidread_component_label(dev, vp, &clabel);
2920 clabel.root_partition = new_value;
2921 raidwrite_component_label(dev, vp, &clabel);
2922 }
2923 }
2924 return(new_value);
2925 }
2926
2927 void
2928 rf_release_all_vps(cset)
2929 RF_ConfigSet_t *cset;
2930 {
2931 RF_AutoConfig_t *ac;
2932
2933 ac = cset->ac;
2934 while(ac!=NULL) {
2935 /* Close the vp, and give it back */
2936 if (ac->vp) {
2937 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2938 vput(ac->vp);
2939 }
2940 ac = ac->next;
2941 }
2942 }
2943
2944
2945 void
2946 rf_cleanup_config_set(cset)
2947 RF_ConfigSet_t *cset;
2948 {
2949 RF_AutoConfig_t *ac;
2950 RF_AutoConfig_t *next_ac;
2951
2952 ac = cset->ac;
2953 while(ac!=NULL) {
2954 next_ac = ac->next;
2955 /* nuke the label */
2956 free(ac->clabel, M_RAIDFRAME);
2957 /* cleanup the config structure */
2958 free(ac, M_RAIDFRAME);
2959 /* "next.." */
2960 ac = next_ac;
2961 }
2962 /* and, finally, nuke the config set */
2963 free(cset, M_RAIDFRAME);
2964 }
2965
2966
2967 void
2968 raid_init_component_label(raidPtr, clabel)
2969 RF_Raid_t *raidPtr;
2970 RF_ComponentLabel_t *clabel;
2971 {
2972 /* current version number */
2973 clabel->version = RF_COMPONENT_LABEL_VERSION;
2974 clabel->serial_number = raidPtr->serial_number;
2975 clabel->mod_counter = raidPtr->mod_counter;
2976 clabel->num_rows = raidPtr->numRow;
2977 clabel->num_columns = raidPtr->numCol;
2978 clabel->clean = RF_RAID_DIRTY; /* not clean */
2979 clabel->status = rf_ds_optimal; /* "It's good!" */
2980
2981 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2982 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2983 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2984
2985 clabel->blockSize = raidPtr->bytesPerSector;
2986 clabel->numBlocks = raidPtr->sectorsPerDisk;
2987
2988 /* XXX not portable */
2989 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2990 clabel->maxOutstanding = raidPtr->maxOutstanding;
2991 clabel->autoconfigure = raidPtr->autoconfigure;
2992 clabel->root_partition = raidPtr->root_partition;
2993 clabel->last_unit = raidPtr->raidid;
2994 clabel->config_order = raidPtr->config_order;
2995 }
2996
2997 int
2998 rf_auto_config_set(cset,unit)
2999 RF_ConfigSet_t *cset;
3000 int *unit;
3001 {
3002 RF_Raid_t *raidPtr;
3003 RF_Config_t *config;
3004 int raidID;
3005 int retcode;
3006
3007 printf("Starting autoconfigure on raid%d\n",raidID);
3008
3009 retcode = 0;
3010 *unit = -1;
3011
3012 /* 1. Create a config structure */
3013
3014 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3015 M_RAIDFRAME,
3016 M_NOWAIT);
3017 if (config==NULL) {
3018 printf("Out of mem!?!?\n");
3019 /* XXX do something more intelligent here. */
3020 return(1);
3021 }
3022 /* XXX raidID needs to be set correctly.. */
3023
3024 /*
3025 2. Figure out what RAID ID this one is supposed to live at
3026 See if we can get the same RAID dev that it was configured
3027 on last time..
3028 */
3029
3030 raidID = cset->ac->clabel->last_unit;
3031 if ((raidID < 0) || (raidID >= numraid)) {
3032 /* let's not wander off into lala land. */
3033 raidID = numraid - 1;
3034 }
3035 if (raidPtrs[raidID]->valid != 0) {
3036
3037 /*
3038 Nope... Go looking for an alternative...
3039 Start high so we don't immediately use raid0 if that's
3040 not taken.
3041 */
3042
3043 for(raidID = numraid; raidID >= 0; raidID--) {
3044 if (raidPtrs[raidID]->valid == 0) {
3045 /* can use this one! */
3046 break;
3047 }
3048 }
3049 }
3050
3051 if (raidID < 0) {
3052 /* punt... */
3053 printf("Unable to auto configure this set!\n");
3054 printf("(Out of RAID devs!)\n");
3055 return(1);
3056 }
3057
3058 raidPtr = raidPtrs[raidID];
3059
3060 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3061 raidPtr->raidid = raidID;
3062 raidPtr->openings = RAIDOUTSTANDING;
3063
3064 /* 3. Build the configuration structure */
3065 rf_create_configuration(cset->ac, config, raidPtr);
3066
3067 /* 4. Do the configuration */
3068 retcode = rf_Configure(raidPtr, config, cset->ac);
3069
3070 if (retcode == 0) {
3071
3072 raidinit(raidPtrs[raidID]);
3073
3074 rf_markalldirty(raidPtrs[raidID]);
3075 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3076 if (cset->ac->clabel->root_partition==1) {
3077 /* everything configured just fine. Make a note
3078 that this set is eligible to be root. */
3079 cset->rootable = 1;
3080 /* XXX do this here? */
3081 raidPtrs[raidID]->root_partition = 1;
3082 }
3083 }
3084
3085 /* 5. Cleanup */
3086 free(config, M_RAIDFRAME);
3087
3088 *unit = raidID;
3089 return(retcode);
3090 }
3091