rf_netbsdkintf.c revision 1.61 1 /* $NetBSD: rf_netbsdkintf.c,v 1.61 2000/02/25 20:11:00 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit __P((RF_Raid_t *));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that...
208 */
209
210 struct raid_softc {
211 int sc_flags; /* flags */
212 int sc_cflags; /* configuration flags */
213 size_t sc_size; /* size of the raid device */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293 int rf_have_enough_components __P((RF_ConfigSet_t *));
294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place */
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 if (raidautoconfig) {
384 /* 1. locate all RAID components on the system */
385
386 #if DEBUG
387 printf("Searching for raid components...\n");
388 #endif
389 ac_list = rf_find_raid_components();
390
391 /* 2. sort them into their respective sets */
392
393 config_sets = rf_create_auto_sets(ac_list);
394
395 /* 3. evaluate each set and configure the valid ones
396 This gets done in rf_buildroothack() */
397
398 /* schedule the creation of the thread to do the
399 "/ on RAID" stuff */
400
401 kthread_create(rf_buildroothack,config_sets);
402
403 #if 0
404 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
405 #endif
406 }
407
408 }
409
410 void
411 rf_buildroothack(arg)
412 void *arg;
413 {
414 RF_ConfigSet_t *config_sets = arg;
415 RF_ConfigSet_t *cset;
416 RF_ConfigSet_t *next_cset;
417 int retcode;
418 int raidID;
419 int rootID;
420 int num_root;
421
422 num_root = 0;
423 cset = config_sets;
424 while(cset != NULL ) {
425 next_cset = cset->next;
426 if (rf_have_enough_components(cset) &&
427 cset->ac->clabel->autoconfigure==1) {
428 retcode = rf_auto_config_set(cset,&raidID);
429 if (!retcode) {
430 if (cset->rootable) {
431 rootID = raidID;
432 num_root++;
433 }
434 } else {
435 /* The autoconfig didn't work :( */
436 #if DEBUG
437 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
438 #endif
439 rf_release_all_vps(cset);
440 }
441 } else {
442 /* we're not autoconfiguring this set...
443 release the associated resources */
444 rf_release_all_vps(cset);
445 }
446 /* cleanup */
447 rf_cleanup_config_set(cset);
448 cset = next_cset;
449 }
450 if (boothowto & RB_ASKNAME) {
451 /* We don't auto-config... */
452 } else {
453 /* They didn't ask, and we found something bootable... */
454
455 if (num_root == 1) {
456 booted_device = &raidrootdev[rootID];
457 } else if (num_root > 1) {
458 /* we can't guess.. require the user to answer... */
459 boothowto |= RB_ASKNAME;
460 }
461 }
462 }
463
464
465 int
466 raidsize(dev)
467 dev_t dev;
468 {
469 struct raid_softc *rs;
470 struct disklabel *lp;
471 int part, unit, omask, size;
472
473 unit = raidunit(dev);
474 if (unit >= numraid)
475 return (-1);
476 rs = &raid_softc[unit];
477
478 if ((rs->sc_flags & RAIDF_INITED) == 0)
479 return (-1);
480
481 part = DISKPART(dev);
482 omask = rs->sc_dkdev.dk_openmask & (1 << part);
483 lp = rs->sc_dkdev.dk_label;
484
485 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
486 return (-1);
487
488 if (lp->d_partitions[part].p_fstype != FS_SWAP)
489 size = -1;
490 else
491 size = lp->d_partitions[part].p_size *
492 (lp->d_secsize / DEV_BSIZE);
493
494 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
495 return (-1);
496
497 return (size);
498
499 }
500
501 int
502 raiddump(dev, blkno, va, size)
503 dev_t dev;
504 daddr_t blkno;
505 caddr_t va;
506 size_t size;
507 {
508 /* Not implemented. */
509 return ENXIO;
510 }
511 /* ARGSUSED */
512 int
513 raidopen(dev, flags, fmt, p)
514 dev_t dev;
515 int flags, fmt;
516 struct proc *p;
517 {
518 int unit = raidunit(dev);
519 struct raid_softc *rs;
520 struct disklabel *lp;
521 int part, pmask;
522 int error = 0;
523
524 if (unit >= numraid)
525 return (ENXIO);
526 rs = &raid_softc[unit];
527
528 if ((error = raidlock(rs)) != 0)
529 return (error);
530 lp = rs->sc_dkdev.dk_label;
531
532 part = DISKPART(dev);
533 pmask = (1 << part);
534
535 db1_printf(("Opening raid device number: %d partition: %d\n",
536 unit, part));
537
538
539 if ((rs->sc_flags & RAIDF_INITED) &&
540 (rs->sc_dkdev.dk_openmask == 0))
541 raidgetdisklabel(dev);
542
543 /* make sure that this partition exists */
544
545 if (part != RAW_PART) {
546 db1_printf(("Not a raw partition..\n"));
547 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
548 ((part >= lp->d_npartitions) ||
549 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
550 error = ENXIO;
551 raidunlock(rs);
552 db1_printf(("Bailing out...\n"));
553 return (error);
554 }
555 }
556 /* Prevent this unit from being unconfigured while open. */
557 switch (fmt) {
558 case S_IFCHR:
559 rs->sc_dkdev.dk_copenmask |= pmask;
560 break;
561
562 case S_IFBLK:
563 rs->sc_dkdev.dk_bopenmask |= pmask;
564 break;
565 }
566
567 if ((rs->sc_dkdev.dk_openmask == 0) &&
568 ((rs->sc_flags & RAIDF_INITED) != 0)) {
569 /* First one... mark things as dirty... Note that we *MUST*
570 have done a configure before this. I DO NOT WANT TO BE
571 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
572 THAT THEY BELONG TOGETHER!!!!! */
573 /* XXX should check to see if we're only open for reading
574 here... If so, we needn't do this, but then need some
575 other way of keeping track of what's happened.. */
576
577 rf_markalldirty( raidPtrs[unit] );
578 }
579
580
581 rs->sc_dkdev.dk_openmask =
582 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
583
584 raidunlock(rs);
585
586 return (error);
587
588
589 }
590 /* ARGSUSED */
591 int
592 raidclose(dev, flags, fmt, p)
593 dev_t dev;
594 int flags, fmt;
595 struct proc *p;
596 {
597 int unit = raidunit(dev);
598 struct raid_softc *rs;
599 int error = 0;
600 int part;
601
602 if (unit >= numraid)
603 return (ENXIO);
604 rs = &raid_softc[unit];
605
606 if ((error = raidlock(rs)) != 0)
607 return (error);
608
609 part = DISKPART(dev);
610
611 /* ...that much closer to allowing unconfiguration... */
612 switch (fmt) {
613 case S_IFCHR:
614 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
615 break;
616
617 case S_IFBLK:
618 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
619 break;
620 }
621 rs->sc_dkdev.dk_openmask =
622 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
623
624 if ((rs->sc_dkdev.dk_openmask == 0) &&
625 ((rs->sc_flags & RAIDF_INITED) != 0)) {
626 /* Last one... device is not unconfigured yet.
627 Device shutdown has taken care of setting the
628 clean bits if RAIDF_INITED is not set
629 mark things as clean... */
630 #if 0
631 printf("Last one on raid%d. Updating status.\n",unit);
632 #endif
633 rf_update_component_labels( raidPtrs[unit] );
634 }
635
636 raidunlock(rs);
637 return (0);
638
639 }
640
641 void
642 raidstrategy(bp)
643 register struct buf *bp;
644 {
645 register int s;
646
647 unsigned int raidID = raidunit(bp->b_dev);
648 RF_Raid_t *raidPtr;
649 struct raid_softc *rs = &raid_softc[raidID];
650 struct disklabel *lp;
651 int wlabel;
652
653 if ((rs->sc_flags & RAIDF_INITED) ==0) {
654 bp->b_error = ENXIO;
655 bp->b_flags = B_ERROR;
656 bp->b_resid = bp->b_bcount;
657 biodone(bp);
658 return;
659 }
660 if (raidID >= numraid || !raidPtrs[raidID]) {
661 bp->b_error = ENODEV;
662 bp->b_flags |= B_ERROR;
663 bp->b_resid = bp->b_bcount;
664 biodone(bp);
665 return;
666 }
667 raidPtr = raidPtrs[raidID];
668 if (!raidPtr->valid) {
669 bp->b_error = ENODEV;
670 bp->b_flags |= B_ERROR;
671 bp->b_resid = bp->b_bcount;
672 biodone(bp);
673 return;
674 }
675 if (bp->b_bcount == 0) {
676 db1_printf(("b_bcount is zero..\n"));
677 biodone(bp);
678 return;
679 }
680 lp = rs->sc_dkdev.dk_label;
681
682 /*
683 * Do bounds checking and adjust transfer. If there's an
684 * error, the bounds check will flag that for us.
685 */
686
687 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
688 if (DISKPART(bp->b_dev) != RAW_PART)
689 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
690 db1_printf(("Bounds check failed!!:%d %d\n",
691 (int) bp->b_blkno, (int) wlabel));
692 biodone(bp);
693 return;
694 }
695 s = splbio();
696
697 bp->b_resid = 0;
698
699 /* stuff it onto our queue */
700 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
701
702 raidstart(raidPtrs[raidID]);
703
704 splx(s);
705 }
706 /* ARGSUSED */
707 int
708 raidread(dev, uio, flags)
709 dev_t dev;
710 struct uio *uio;
711 int flags;
712 {
713 int unit = raidunit(dev);
714 struct raid_softc *rs;
715 int part;
716
717 if (unit >= numraid)
718 return (ENXIO);
719 rs = &raid_softc[unit];
720
721 if ((rs->sc_flags & RAIDF_INITED) == 0)
722 return (ENXIO);
723 part = DISKPART(dev);
724
725 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
726
727 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
728
729 }
730 /* ARGSUSED */
731 int
732 raidwrite(dev, uio, flags)
733 dev_t dev;
734 struct uio *uio;
735 int flags;
736 {
737 int unit = raidunit(dev);
738 struct raid_softc *rs;
739
740 if (unit >= numraid)
741 return (ENXIO);
742 rs = &raid_softc[unit];
743
744 if ((rs->sc_flags & RAIDF_INITED) == 0)
745 return (ENXIO);
746 db1_printf(("raidwrite\n"));
747 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
748
749 }
750
751 int
752 raidioctl(dev, cmd, data, flag, p)
753 dev_t dev;
754 u_long cmd;
755 caddr_t data;
756 int flag;
757 struct proc *p;
758 {
759 int unit = raidunit(dev);
760 int error = 0;
761 int part, pmask;
762 struct raid_softc *rs;
763 RF_Config_t *k_cfg, *u_cfg;
764 RF_Raid_t *raidPtr;
765 RF_RaidDisk_t *diskPtr;
766 RF_AccTotals_t *totals;
767 RF_DeviceConfig_t *d_cfg, **ucfgp;
768 u_char *specific_buf;
769 int retcode = 0;
770 int row;
771 int column;
772 struct rf_recon_req *rrcopy, *rr;
773 RF_ComponentLabel_t *clabel;
774 RF_ComponentLabel_t ci_label;
775 RF_ComponentLabel_t **clabel_ptr;
776 RF_SingleComponent_t *sparePtr,*componentPtr;
777 RF_SingleComponent_t hot_spare;
778 RF_SingleComponent_t component;
779 int i, j, d;
780
781 if (unit >= numraid)
782 return (ENXIO);
783 rs = &raid_softc[unit];
784 raidPtr = raidPtrs[unit];
785
786 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
787 (int) DISKPART(dev), (int) unit, (int) cmd));
788
789 /* Must be open for writes for these commands... */
790 switch (cmd) {
791 case DIOCSDINFO:
792 case DIOCWDINFO:
793 case DIOCWLABEL:
794 if ((flag & FWRITE) == 0)
795 return (EBADF);
796 }
797
798 /* Must be initialized for these... */
799 switch (cmd) {
800 case DIOCGDINFO:
801 case DIOCSDINFO:
802 case DIOCWDINFO:
803 case DIOCGPART:
804 case DIOCWLABEL:
805 case DIOCGDEFLABEL:
806 case RAIDFRAME_SHUTDOWN:
807 case RAIDFRAME_REWRITEPARITY:
808 case RAIDFRAME_GET_INFO:
809 case RAIDFRAME_RESET_ACCTOTALS:
810 case RAIDFRAME_GET_ACCTOTALS:
811 case RAIDFRAME_KEEP_ACCTOTALS:
812 case RAIDFRAME_GET_SIZE:
813 case RAIDFRAME_FAIL_DISK:
814 case RAIDFRAME_COPYBACK:
815 case RAIDFRAME_CHECK_RECON_STATUS:
816 case RAIDFRAME_GET_COMPONENT_LABEL:
817 case RAIDFRAME_SET_COMPONENT_LABEL:
818 case RAIDFRAME_ADD_HOT_SPARE:
819 case RAIDFRAME_REMOVE_HOT_SPARE:
820 case RAIDFRAME_INIT_LABELS:
821 case RAIDFRAME_REBUILD_IN_PLACE:
822 case RAIDFRAME_CHECK_PARITY:
823 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
824 case RAIDFRAME_CHECK_COPYBACK_STATUS:
825 case RAIDFRAME_SET_AUTOCONFIG:
826 case RAIDFRAME_SET_ROOT:
827 if ((rs->sc_flags & RAIDF_INITED) == 0)
828 return (ENXIO);
829 }
830
831 switch (cmd) {
832
833 /* configure the system */
834 case RAIDFRAME_CONFIGURE:
835
836 if (raidPtr->valid) {
837 /* There is a valid RAID set running on this unit! */
838 printf("raid%d: Device already configured!\n",unit);
839 }
840
841 /* copy-in the configuration information */
842 /* data points to a pointer to the configuration structure */
843
844 u_cfg = *((RF_Config_t **) data);
845 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
846 if (k_cfg == NULL) {
847 return (ENOMEM);
848 }
849 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
850 sizeof(RF_Config_t));
851 if (retcode) {
852 RF_Free(k_cfg, sizeof(RF_Config_t));
853 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
854 retcode));
855 return (retcode);
856 }
857 /* allocate a buffer for the layout-specific data, and copy it
858 * in */
859 if (k_cfg->layoutSpecificSize) {
860 if (k_cfg->layoutSpecificSize > 10000) {
861 /* sanity check */
862 RF_Free(k_cfg, sizeof(RF_Config_t));
863 return (EINVAL);
864 }
865 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
866 (u_char *));
867 if (specific_buf == NULL) {
868 RF_Free(k_cfg, sizeof(RF_Config_t));
869 return (ENOMEM);
870 }
871 retcode = copyin(k_cfg->layoutSpecific,
872 (caddr_t) specific_buf,
873 k_cfg->layoutSpecificSize);
874 if (retcode) {
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 RF_Free(specific_buf,
877 k_cfg->layoutSpecificSize);
878 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
879 retcode));
880 return (retcode);
881 }
882 } else
883 specific_buf = NULL;
884 k_cfg->layoutSpecific = specific_buf;
885
886 /* should do some kind of sanity check on the configuration.
887 * Store the sum of all the bytes in the last byte? */
888
889 /* configure the system */
890
891 /*
892 * Clear the entire RAID descriptor, just to make sure
893 * there is no stale data left in the case of a
894 * reconfiguration
895 */
896 bzero((char *) raidPtr, sizeof(RF_Raid_t));
897 raidPtr->raidid = unit;
898
899 retcode = rf_Configure(raidPtr, k_cfg, NULL);
900
901 if (retcode == 0) {
902
903 /* allow this many simultaneous IO's to
904 this RAID device */
905 raidPtr->openings = RAIDOUTSTANDING;
906
907 raidinit(raidPtr);
908 rf_markalldirty(raidPtr);
909 }
910 /* free the buffers. No return code here. */
911 if (k_cfg->layoutSpecificSize) {
912 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
913 }
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915
916 return (retcode);
917
918 /* shutdown the system */
919 case RAIDFRAME_SHUTDOWN:
920
921 if ((error = raidlock(rs)) != 0)
922 return (error);
923
924 /*
925 * If somebody has a partition mounted, we shouldn't
926 * shutdown.
927 */
928
929 part = DISKPART(dev);
930 pmask = (1 << part);
931 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
932 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
933 (rs->sc_dkdev.dk_copenmask & pmask))) {
934 raidunlock(rs);
935 return (EBUSY);
936 }
937
938 retcode = rf_Shutdown(raidPtr);
939
940 pool_destroy(&rs->sc_cbufpool);
941
942 /* It's no longer initialized... */
943 rs->sc_flags &= ~RAIDF_INITED;
944
945 /* Detach the disk. */
946 disk_detach(&rs->sc_dkdev);
947
948 raidunlock(rs);
949
950 return (retcode);
951 case RAIDFRAME_GET_COMPONENT_LABEL:
952 clabel_ptr = (RF_ComponentLabel_t **) data;
953 /* need to read the component label for the disk indicated
954 by row,column in clabel */
955
956 /* For practice, let's get it directly fromdisk, rather
957 than from the in-core copy */
958 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
959 (RF_ComponentLabel_t *));
960 if (clabel == NULL)
961 return (ENOMEM);
962
963 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
964
965 retcode = copyin( *clabel_ptr, clabel,
966 sizeof(RF_ComponentLabel_t));
967
968 if (retcode) {
969 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
970 return(retcode);
971 }
972
973 row = clabel->row;
974 column = clabel->column;
975
976 if ((row < 0) || (row >= raidPtr->numRow) ||
977 (column < 0) || (column >= raidPtr->numCol)) {
978 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
979 return(EINVAL);
980 }
981
982 raidread_component_label(raidPtr->Disks[row][column].dev,
983 raidPtr->raid_cinfo[row][column].ci_vp,
984 clabel );
985
986 retcode = copyout((caddr_t) clabel,
987 (caddr_t) *clabel_ptr,
988 sizeof(RF_ComponentLabel_t));
989 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
990 return (retcode);
991
992 case RAIDFRAME_SET_COMPONENT_LABEL:
993 clabel = (RF_ComponentLabel_t *) data;
994
995 /* XXX check the label for valid stuff... */
996 /* Note that some things *should not* get modified --
997 the user should be re-initing the labels instead of
998 trying to patch things.
999 */
1000
1001 printf("Got component label:\n");
1002 printf("Version: %d\n",clabel->version);
1003 printf("Serial Number: %d\n",clabel->serial_number);
1004 printf("Mod counter: %d\n",clabel->mod_counter);
1005 printf("Row: %d\n", clabel->row);
1006 printf("Column: %d\n", clabel->column);
1007 printf("Num Rows: %d\n", clabel->num_rows);
1008 printf("Num Columns: %d\n", clabel->num_columns);
1009 printf("Clean: %d\n", clabel->clean);
1010 printf("Status: %d\n", clabel->status);
1011
1012 row = clabel->row;
1013 column = clabel->column;
1014
1015 if ((row < 0) || (row >= raidPtr->numRow) ||
1016 (column < 0) || (column >= raidPtr->numCol)) {
1017 return(EINVAL);
1018 }
1019
1020 /* XXX this isn't allowed to do anything for now :-) */
1021
1022 /* XXX and before it is, we need to fill in the rest
1023 of the fields!?!?!?! */
1024 #if 0
1025 raidwrite_component_label(
1026 raidPtr->Disks[row][column].dev,
1027 raidPtr->raid_cinfo[row][column].ci_vp,
1028 clabel );
1029 #endif
1030 return (0);
1031
1032 case RAIDFRAME_INIT_LABELS:
1033 clabel = (RF_ComponentLabel_t *) data;
1034 /*
1035 we only want the serial number from
1036 the above. We get all the rest of the information
1037 from the config that was used to create this RAID
1038 set.
1039 */
1040
1041 raidPtr->serial_number = clabel->serial_number;
1042
1043 raid_init_component_label(raidPtr, &ci_label);
1044 ci_label.serial_number = clabel->serial_number;
1045
1046 for(row=0;row<raidPtr->numRow;row++) {
1047 ci_label.row = row;
1048 for(column=0;column<raidPtr->numCol;column++) {
1049 diskPtr = &raidPtr->Disks[row][column];
1050 ci_label.partitionSize = diskPtr->partitionSize;
1051 ci_label.column = column;
1052 raidwrite_component_label(
1053 raidPtr->Disks[row][column].dev,
1054 raidPtr->raid_cinfo[row][column].ci_vp,
1055 &ci_label );
1056 }
1057 }
1058
1059 return (retcode);
1060 case RAIDFRAME_SET_AUTOCONFIG:
1061 d = rf_set_autoconfig(raidPtr, *data);
1062 printf("New autoconfig value is: %d\n", d);
1063 *data = d;
1064 return (retcode);
1065
1066 case RAIDFRAME_SET_ROOT:
1067 d = rf_set_rootpartition(raidPtr, *data);
1068 printf("New rootpartition value is: %d\n", d);
1069 *data = d;
1070 return (retcode);
1071
1072 /* initialize all parity */
1073 case RAIDFRAME_REWRITEPARITY:
1074
1075 if (raidPtr->Layout.map->faultsTolerated == 0) {
1076 /* Parity for RAID 0 is trivially correct */
1077 raidPtr->parity_good = RF_RAID_CLEAN;
1078 return(0);
1079 }
1080
1081 if (raidPtr->parity_rewrite_in_progress == 1) {
1082 /* Re-write is already in progress! */
1083 return(EINVAL);
1084 }
1085
1086 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1087 rf_RewriteParityThread,
1088 raidPtr,"raid_parity");
1089 return (retcode);
1090
1091
1092 case RAIDFRAME_ADD_HOT_SPARE:
1093 sparePtr = (RF_SingleComponent_t *) data;
1094 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1095 printf("Adding spare\n");
1096 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1097 return(retcode);
1098
1099 case RAIDFRAME_REMOVE_HOT_SPARE:
1100 return(retcode);
1101
1102 case RAIDFRAME_REBUILD_IN_PLACE:
1103
1104 if (raidPtr->Layout.map->faultsTolerated == 0) {
1105 /* Can't do this on a RAID 0!! */
1106 return(EINVAL);
1107 }
1108
1109 if (raidPtr->recon_in_progress == 1) {
1110 /* a reconstruct is already in progress! */
1111 return(EINVAL);
1112 }
1113
1114 componentPtr = (RF_SingleComponent_t *) data;
1115 memcpy( &component, componentPtr,
1116 sizeof(RF_SingleComponent_t));
1117 row = component.row;
1118 column = component.column;
1119 printf("Rebuild: %d %d\n",row, column);
1120 if ((row < 0) || (row >= raidPtr->numRow) ||
1121 (column < 0) || (column >= raidPtr->numCol)) {
1122 return(EINVAL);
1123 }
1124
1125 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1126 if (rrcopy == NULL)
1127 return(ENOMEM);
1128
1129 rrcopy->raidPtr = (void *) raidPtr;
1130 rrcopy->row = row;
1131 rrcopy->col = column;
1132
1133 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1134 rf_ReconstructInPlaceThread,
1135 rrcopy,"raid_reconip");
1136 return(retcode);
1137
1138 case RAIDFRAME_GET_INFO:
1139 if (!raidPtr->valid)
1140 return (ENODEV);
1141 ucfgp = (RF_DeviceConfig_t **) data;
1142 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1143 (RF_DeviceConfig_t *));
1144 if (d_cfg == NULL)
1145 return (ENOMEM);
1146 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1147 d_cfg->rows = raidPtr->numRow;
1148 d_cfg->cols = raidPtr->numCol;
1149 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1150 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1151 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1152 return (ENOMEM);
1153 }
1154 d_cfg->nspares = raidPtr->numSpare;
1155 if (d_cfg->nspares >= RF_MAX_DISKS) {
1156 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1157 return (ENOMEM);
1158 }
1159 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1160 d = 0;
1161 for (i = 0; i < d_cfg->rows; i++) {
1162 for (j = 0; j < d_cfg->cols; j++) {
1163 d_cfg->devs[d] = raidPtr->Disks[i][j];
1164 d++;
1165 }
1166 }
1167 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1168 d_cfg->spares[i] = raidPtr->Disks[0][j];
1169 }
1170 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1171 sizeof(RF_DeviceConfig_t));
1172 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1173
1174 return (retcode);
1175
1176 case RAIDFRAME_CHECK_PARITY:
1177 *(int *) data = raidPtr->parity_good;
1178 return (0);
1179
1180 case RAIDFRAME_RESET_ACCTOTALS:
1181 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1182 return (0);
1183
1184 case RAIDFRAME_GET_ACCTOTALS:
1185 totals = (RF_AccTotals_t *) data;
1186 *totals = raidPtr->acc_totals;
1187 return (0);
1188
1189 case RAIDFRAME_KEEP_ACCTOTALS:
1190 raidPtr->keep_acc_totals = *(int *)data;
1191 return (0);
1192
1193 case RAIDFRAME_GET_SIZE:
1194 *(int *) data = raidPtr->totalSectors;
1195 return (0);
1196
1197 /* fail a disk & optionally start reconstruction */
1198 case RAIDFRAME_FAIL_DISK:
1199
1200 if (raidPtr->Layout.map->faultsTolerated == 0) {
1201 /* Can't do this on a RAID 0!! */
1202 return(EINVAL);
1203 }
1204
1205 rr = (struct rf_recon_req *) data;
1206
1207 if (rr->row < 0 || rr->row >= raidPtr->numRow
1208 || rr->col < 0 || rr->col >= raidPtr->numCol)
1209 return (EINVAL);
1210
1211 printf("raid%d: Failing the disk: row: %d col: %d\n",
1212 unit, rr->row, rr->col);
1213
1214 /* make a copy of the recon request so that we don't rely on
1215 * the user's buffer */
1216 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1217 if (rrcopy == NULL)
1218 return(ENOMEM);
1219 bcopy(rr, rrcopy, sizeof(*rr));
1220 rrcopy->raidPtr = (void *) raidPtr;
1221
1222 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1223 rf_ReconThread,
1224 rrcopy,"raid_recon");
1225 return (0);
1226
1227 /* invoke a copyback operation after recon on whatever disk
1228 * needs it, if any */
1229 case RAIDFRAME_COPYBACK:
1230
1231 if (raidPtr->Layout.map->faultsTolerated == 0) {
1232 /* This makes no sense on a RAID 0!! */
1233 return(EINVAL);
1234 }
1235
1236 if (raidPtr->copyback_in_progress == 1) {
1237 /* Copyback is already in progress! */
1238 return(EINVAL);
1239 }
1240
1241 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1242 rf_CopybackThread,
1243 raidPtr,"raid_copyback");
1244 return (retcode);
1245
1246 /* return the percentage completion of reconstruction */
1247 case RAIDFRAME_CHECK_RECON_STATUS:
1248 if (raidPtr->Layout.map->faultsTolerated == 0) {
1249 /* This makes no sense on a RAID 0 */
1250 return(EINVAL);
1251 }
1252 row = 0; /* XXX we only consider a single row... */
1253 if (raidPtr->status[row] != rf_rs_reconstructing)
1254 *(int *) data = 100;
1255 else
1256 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1257 return (0);
1258
1259 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1260 if (raidPtr->Layout.map->faultsTolerated == 0) {
1261 /* This makes no sense on a RAID 0 */
1262 return(EINVAL);
1263 }
1264 if (raidPtr->parity_rewrite_in_progress == 1) {
1265 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1266 } else {
1267 *(int *) data = 100;
1268 }
1269 return (0);
1270
1271 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* This makes no sense on a RAID 0 */
1274 return(EINVAL);
1275 }
1276 if (raidPtr->copyback_in_progress == 1) {
1277 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1278 raidPtr->Layout.numStripe;
1279 } else {
1280 *(int *) data = 100;
1281 }
1282 return (0);
1283
1284
1285 /* the sparetable daemon calls this to wait for the kernel to
1286 * need a spare table. this ioctl does not return until a
1287 * spare table is needed. XXX -- calling mpsleep here in the
1288 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1289 * -- I should either compute the spare table in the kernel,
1290 * or have a different -- XXX XXX -- interface (a different
1291 * character device) for delivering the table -- XXX */
1292 #if 0
1293 case RAIDFRAME_SPARET_WAIT:
1294 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1295 while (!rf_sparet_wait_queue)
1296 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1297 waitreq = rf_sparet_wait_queue;
1298 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1299 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1300
1301 /* structure assignment */
1302 *((RF_SparetWait_t *) data) = *waitreq;
1303
1304 RF_Free(waitreq, sizeof(*waitreq));
1305 return (0);
1306
1307 /* wakes up a process waiting on SPARET_WAIT and puts an error
1308 * code in it that will cause the dameon to exit */
1309 case RAIDFRAME_ABORT_SPARET_WAIT:
1310 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1311 waitreq->fcol = -1;
1312 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1313 waitreq->next = rf_sparet_wait_queue;
1314 rf_sparet_wait_queue = waitreq;
1315 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1316 wakeup(&rf_sparet_wait_queue);
1317 return (0);
1318
1319 /* used by the spare table daemon to deliver a spare table
1320 * into the kernel */
1321 case RAIDFRAME_SEND_SPARET:
1322
1323 /* install the spare table */
1324 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1325
1326 /* respond to the requestor. the return status of the spare
1327 * table installation is passed in the "fcol" field */
1328 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1329 waitreq->fcol = retcode;
1330 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1331 waitreq->next = rf_sparet_resp_queue;
1332 rf_sparet_resp_queue = waitreq;
1333 wakeup(&rf_sparet_resp_queue);
1334 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1335
1336 return (retcode);
1337 #endif
1338
1339 default:
1340 break; /* fall through to the os-specific code below */
1341
1342 }
1343
1344 if (!raidPtr->valid)
1345 return (EINVAL);
1346
1347 /*
1348 * Add support for "regular" device ioctls here.
1349 */
1350
1351 switch (cmd) {
1352 case DIOCGDINFO:
1353 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1354 break;
1355
1356 case DIOCGPART:
1357 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1358 ((struct partinfo *) data)->part =
1359 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1360 break;
1361
1362 case DIOCWDINFO:
1363 case DIOCSDINFO:
1364 if ((error = raidlock(rs)) != 0)
1365 return (error);
1366
1367 rs->sc_flags |= RAIDF_LABELLING;
1368
1369 error = setdisklabel(rs->sc_dkdev.dk_label,
1370 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1371 if (error == 0) {
1372 if (cmd == DIOCWDINFO)
1373 error = writedisklabel(RAIDLABELDEV(dev),
1374 raidstrategy, rs->sc_dkdev.dk_label,
1375 rs->sc_dkdev.dk_cpulabel);
1376 }
1377 rs->sc_flags &= ~RAIDF_LABELLING;
1378
1379 raidunlock(rs);
1380
1381 if (error)
1382 return (error);
1383 break;
1384
1385 case DIOCWLABEL:
1386 if (*(int *) data != 0)
1387 rs->sc_flags |= RAIDF_WLABEL;
1388 else
1389 rs->sc_flags &= ~RAIDF_WLABEL;
1390 break;
1391
1392 case DIOCGDEFLABEL:
1393 raidgetdefaultlabel(raidPtr, rs,
1394 (struct disklabel *) data);
1395 break;
1396
1397 default:
1398 retcode = ENOTTY;
1399 }
1400 return (retcode);
1401
1402 }
1403
1404
1405 /* raidinit -- complete the rest of the initialization for the
1406 RAIDframe device. */
1407
1408
1409 static void
1410 raidinit(raidPtr)
1411 RF_Raid_t *raidPtr;
1412 {
1413 struct raid_softc *rs;
1414 int unit;
1415
1416 unit = raidPtr->raidid;
1417
1418 rs = &raid_softc[unit];
1419 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1420 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1421
1422
1423 /* XXX should check return code first... */
1424 rs->sc_flags |= RAIDF_INITED;
1425
1426 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1427
1428 rs->sc_dkdev.dk_name = rs->sc_xname;
1429
1430 /* disk_attach actually creates space for the CPU disklabel, among
1431 * other things, so it's critical to call this *BEFORE* we try putzing
1432 * with disklabels. */
1433
1434 disk_attach(&rs->sc_dkdev);
1435
1436 /* XXX There may be a weird interaction here between this, and
1437 * protectedSectors, as used in RAIDframe. */
1438
1439 rs->sc_size = raidPtr->totalSectors;
1440
1441 }
1442
1443 /* wake up the daemon & tell it to get us a spare table
1444 * XXX
1445 * the entries in the queues should be tagged with the raidPtr
1446 * so that in the extremely rare case that two recons happen at once,
1447 * we know for which device were requesting a spare table
1448 * XXX
1449 *
1450 * XXX This code is not currently used. GO
1451 */
1452 int
1453 rf_GetSpareTableFromDaemon(req)
1454 RF_SparetWait_t *req;
1455 {
1456 int retcode;
1457
1458 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1459 req->next = rf_sparet_wait_queue;
1460 rf_sparet_wait_queue = req;
1461 wakeup(&rf_sparet_wait_queue);
1462
1463 /* mpsleep unlocks the mutex */
1464 while (!rf_sparet_resp_queue) {
1465 tsleep(&rf_sparet_resp_queue, PRIBIO,
1466 "raidframe getsparetable", 0);
1467 }
1468 req = rf_sparet_resp_queue;
1469 rf_sparet_resp_queue = req->next;
1470 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1471
1472 retcode = req->fcol;
1473 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1474 * alloc'd */
1475 return (retcode);
1476 }
1477
1478 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1479 * bp & passes it down.
1480 * any calls originating in the kernel must use non-blocking I/O
1481 * do some extra sanity checking to return "appropriate" error values for
1482 * certain conditions (to make some standard utilities work)
1483 *
1484 * Formerly known as: rf_DoAccessKernel
1485 */
1486 void
1487 raidstart(raidPtr)
1488 RF_Raid_t *raidPtr;
1489 {
1490 RF_SectorCount_t num_blocks, pb, sum;
1491 RF_RaidAddr_t raid_addr;
1492 int retcode;
1493 struct partition *pp;
1494 daddr_t blocknum;
1495 int unit;
1496 struct raid_softc *rs;
1497 int do_async;
1498 struct buf *bp;
1499
1500 unit = raidPtr->raidid;
1501 rs = &raid_softc[unit];
1502
1503 /* quick check to see if anything has died recently */
1504 RF_LOCK_MUTEX(raidPtr->mutex);
1505 if (raidPtr->numNewFailures > 0) {
1506 rf_update_component_labels(raidPtr);
1507 raidPtr->numNewFailures--;
1508 }
1509 RF_UNLOCK_MUTEX(raidPtr->mutex);
1510
1511 /* Check to see if we're at the limit... */
1512 RF_LOCK_MUTEX(raidPtr->mutex);
1513 while (raidPtr->openings > 0) {
1514 RF_UNLOCK_MUTEX(raidPtr->mutex);
1515
1516 /* get the next item, if any, from the queue */
1517 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1518 /* nothing more to do */
1519 return;
1520 }
1521 BUFQ_REMOVE(&rs->buf_queue, bp);
1522
1523 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1524 * partition.. Need to make it absolute to the underlying
1525 * device.. */
1526
1527 blocknum = bp->b_blkno;
1528 if (DISKPART(bp->b_dev) != RAW_PART) {
1529 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1530 blocknum += pp->p_offset;
1531 }
1532
1533 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1534 (int) blocknum));
1535
1536 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1537 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1538
1539 /* *THIS* is where we adjust what block we're going to...
1540 * but DO NOT TOUCH bp->b_blkno!!! */
1541 raid_addr = blocknum;
1542
1543 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1544 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1545 sum = raid_addr + num_blocks + pb;
1546 if (1 || rf_debugKernelAccess) {
1547 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1548 (int) raid_addr, (int) sum, (int) num_blocks,
1549 (int) pb, (int) bp->b_resid));
1550 }
1551 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1552 || (sum < num_blocks) || (sum < pb)) {
1553 bp->b_error = ENOSPC;
1554 bp->b_flags |= B_ERROR;
1555 bp->b_resid = bp->b_bcount;
1556 biodone(bp);
1557 RF_LOCK_MUTEX(raidPtr->mutex);
1558 continue;
1559 }
1560 /*
1561 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1562 */
1563
1564 if (bp->b_bcount & raidPtr->sectorMask) {
1565 bp->b_error = EINVAL;
1566 bp->b_flags |= B_ERROR;
1567 bp->b_resid = bp->b_bcount;
1568 biodone(bp);
1569 RF_LOCK_MUTEX(raidPtr->mutex);
1570 continue;
1571
1572 }
1573 db1_printf(("Calling DoAccess..\n"));
1574
1575
1576 RF_LOCK_MUTEX(raidPtr->mutex);
1577 raidPtr->openings--;
1578 RF_UNLOCK_MUTEX(raidPtr->mutex);
1579
1580 /*
1581 * Everything is async.
1582 */
1583 do_async = 1;
1584
1585 /* don't ever condition on bp->b_flags & B_WRITE.
1586 * always condition on B_READ instead */
1587
1588 /* XXX we're still at splbio() here... do we *really*
1589 need to be? */
1590
1591
1592 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1593 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1594 do_async, raid_addr, num_blocks,
1595 bp->b_un.b_addr, bp, NULL, NULL,
1596 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1597
1598
1599 RF_LOCK_MUTEX(raidPtr->mutex);
1600 }
1601 RF_UNLOCK_MUTEX(raidPtr->mutex);
1602 }
1603
1604
1605
1606
1607 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1608
1609 int
1610 rf_DispatchKernelIO(queue, req)
1611 RF_DiskQueue_t *queue;
1612 RF_DiskQueueData_t *req;
1613 {
1614 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1615 struct buf *bp;
1616 struct raidbuf *raidbp = NULL;
1617 struct raid_softc *rs;
1618 int unit;
1619 int s;
1620
1621 s=0;
1622 /* s = splbio();*/ /* want to test this */
1623 /* XXX along with the vnode, we also need the softc associated with
1624 * this device.. */
1625
1626 req->queue = queue;
1627
1628 unit = queue->raidPtr->raidid;
1629
1630 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1631
1632 if (unit >= numraid) {
1633 printf("Invalid unit number: %d %d\n", unit, numraid);
1634 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1635 }
1636 rs = &raid_softc[unit];
1637
1638 /* XXX is this the right place? */
1639 disk_busy(&rs->sc_dkdev);
1640
1641 bp = req->bp;
1642 #if 1
1643 /* XXX when there is a physical disk failure, someone is passing us a
1644 * buffer that contains old stuff!! Attempt to deal with this problem
1645 * without taking a performance hit... (not sure where the real bug
1646 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1647
1648 if (bp->b_flags & B_ERROR) {
1649 bp->b_flags &= ~B_ERROR;
1650 }
1651 if (bp->b_error != 0) {
1652 bp->b_error = 0;
1653 }
1654 #endif
1655 raidbp = RAIDGETBUF(rs);
1656
1657 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1658
1659 /*
1660 * context for raidiodone
1661 */
1662 raidbp->rf_obp = bp;
1663 raidbp->req = req;
1664
1665 LIST_INIT(&raidbp->rf_buf.b_dep);
1666
1667 switch (req->type) {
1668 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1669 /* XXX need to do something extra here.. */
1670 /* I'm leaving this in, as I've never actually seen it used,
1671 * and I'd like folks to report it... GO */
1672 printf(("WAKEUP CALLED\n"));
1673 queue->numOutstanding++;
1674
1675 /* XXX need to glue the original buffer into this?? */
1676
1677 KernelWakeupFunc(&raidbp->rf_buf);
1678 break;
1679
1680 case RF_IO_TYPE_READ:
1681 case RF_IO_TYPE_WRITE:
1682
1683 if (req->tracerec) {
1684 RF_ETIMER_START(req->tracerec->timer);
1685 }
1686 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1687 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1688 req->sectorOffset, req->numSector,
1689 req->buf, KernelWakeupFunc, (void *) req,
1690 queue->raidPtr->logBytesPerSector, req->b_proc);
1691
1692 if (rf_debugKernelAccess) {
1693 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1694 (long) bp->b_blkno));
1695 }
1696 queue->numOutstanding++;
1697 queue->last_deq_sector = req->sectorOffset;
1698 /* acc wouldn't have been let in if there were any pending
1699 * reqs at any other priority */
1700 queue->curPriority = req->priority;
1701
1702 db1_printf(("Going for %c to unit %d row %d col %d\n",
1703 req->type, unit, queue->row, queue->col));
1704 db1_printf(("sector %d count %d (%d bytes) %d\n",
1705 (int) req->sectorOffset, (int) req->numSector,
1706 (int) (req->numSector <<
1707 queue->raidPtr->logBytesPerSector),
1708 (int) queue->raidPtr->logBytesPerSector));
1709 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1710 raidbp->rf_buf.b_vp->v_numoutput++;
1711 }
1712 VOP_STRATEGY(&raidbp->rf_buf);
1713
1714 break;
1715
1716 default:
1717 panic("bad req->type in rf_DispatchKernelIO");
1718 }
1719 db1_printf(("Exiting from DispatchKernelIO\n"));
1720 /* splx(s); */ /* want to test this */
1721 return (0);
1722 }
1723 /* this is the callback function associated with a I/O invoked from
1724 kernel code.
1725 */
1726 static void
1727 KernelWakeupFunc(vbp)
1728 struct buf *vbp;
1729 {
1730 RF_DiskQueueData_t *req = NULL;
1731 RF_DiskQueue_t *queue;
1732 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1733 struct buf *bp;
1734 struct raid_softc *rs;
1735 int unit;
1736 register int s;
1737
1738 s = splbio();
1739 db1_printf(("recovering the request queue:\n"));
1740 req = raidbp->req;
1741
1742 bp = raidbp->rf_obp;
1743
1744 queue = (RF_DiskQueue_t *) req->queue;
1745
1746 if (raidbp->rf_buf.b_flags & B_ERROR) {
1747 bp->b_flags |= B_ERROR;
1748 bp->b_error = raidbp->rf_buf.b_error ?
1749 raidbp->rf_buf.b_error : EIO;
1750 }
1751
1752 /* XXX methinks this could be wrong... */
1753 #if 1
1754 bp->b_resid = raidbp->rf_buf.b_resid;
1755 #endif
1756
1757 if (req->tracerec) {
1758 RF_ETIMER_STOP(req->tracerec->timer);
1759 RF_ETIMER_EVAL(req->tracerec->timer);
1760 RF_LOCK_MUTEX(rf_tracing_mutex);
1761 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1762 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1763 req->tracerec->num_phys_ios++;
1764 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1765 }
1766 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1767
1768 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1769
1770
1771 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1772 * ballistic, and mark the component as hosed... */
1773
1774 if (bp->b_flags & B_ERROR) {
1775 /* Mark the disk as dead */
1776 /* but only mark it once... */
1777 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1778 rf_ds_optimal) {
1779 printf("raid%d: IO Error. Marking %s as failed.\n",
1780 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1781 queue->raidPtr->Disks[queue->row][queue->col].status =
1782 rf_ds_failed;
1783 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1784 queue->raidPtr->numFailures++;
1785 queue->raidPtr->numNewFailures++;
1786 /* XXX here we should bump the version number for each component, and write that data out */
1787 } else { /* Disk is already dead... */
1788 /* printf("Disk already marked as dead!\n"); */
1789 }
1790
1791 }
1792
1793 rs = &raid_softc[unit];
1794 RAIDPUTBUF(rs, raidbp);
1795
1796
1797 if (bp->b_resid == 0) {
1798 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1799 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1800 }
1801
1802 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1803 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1804
1805 splx(s);
1806 }
1807
1808
1809
1810 /*
1811 * initialize a buf structure for doing an I/O in the kernel.
1812 */
1813 static void
1814 InitBP(
1815 struct buf * bp,
1816 struct vnode * b_vp,
1817 unsigned rw_flag,
1818 dev_t dev,
1819 RF_SectorNum_t startSect,
1820 RF_SectorCount_t numSect,
1821 caddr_t buf,
1822 void (*cbFunc) (struct buf *),
1823 void *cbArg,
1824 int logBytesPerSector,
1825 struct proc * b_proc)
1826 {
1827 /* bp->b_flags = B_PHYS | rw_flag; */
1828 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1829 bp->b_bcount = numSect << logBytesPerSector;
1830 bp->b_bufsize = bp->b_bcount;
1831 bp->b_error = 0;
1832 bp->b_dev = dev;
1833 bp->b_un.b_addr = buf;
1834 bp->b_blkno = startSect;
1835 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1836 if (bp->b_bcount == 0) {
1837 panic("bp->b_bcount is zero in InitBP!!\n");
1838 }
1839 bp->b_proc = b_proc;
1840 bp->b_iodone = cbFunc;
1841 bp->b_vp = b_vp;
1842
1843 }
1844
1845 static void
1846 raidgetdefaultlabel(raidPtr, rs, lp)
1847 RF_Raid_t *raidPtr;
1848 struct raid_softc *rs;
1849 struct disklabel *lp;
1850 {
1851 db1_printf(("Building a default label...\n"));
1852 bzero(lp, sizeof(*lp));
1853
1854 /* fabricate a label... */
1855 lp->d_secperunit = raidPtr->totalSectors;
1856 lp->d_secsize = raidPtr->bytesPerSector;
1857 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1858 lp->d_ntracks = 1;
1859 lp->d_ncylinders = raidPtr->totalSectors /
1860 (lp->d_nsectors * lp->d_ntracks);
1861 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1862
1863 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1864 lp->d_type = DTYPE_RAID;
1865 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1866 lp->d_rpm = 3600;
1867 lp->d_interleave = 1;
1868 lp->d_flags = 0;
1869
1870 lp->d_partitions[RAW_PART].p_offset = 0;
1871 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1872 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1873 lp->d_npartitions = RAW_PART + 1;
1874
1875 lp->d_magic = DISKMAGIC;
1876 lp->d_magic2 = DISKMAGIC;
1877 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1878
1879 }
1880 /*
1881 * Read the disklabel from the raid device. If one is not present, fake one
1882 * up.
1883 */
1884 static void
1885 raidgetdisklabel(dev)
1886 dev_t dev;
1887 {
1888 int unit = raidunit(dev);
1889 struct raid_softc *rs = &raid_softc[unit];
1890 char *errstring;
1891 struct disklabel *lp = rs->sc_dkdev.dk_label;
1892 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1893 RF_Raid_t *raidPtr;
1894
1895 db1_printf(("Getting the disklabel...\n"));
1896
1897 bzero(clp, sizeof(*clp));
1898
1899 raidPtr = raidPtrs[unit];
1900
1901 raidgetdefaultlabel(raidPtr, rs, lp);
1902
1903 /*
1904 * Call the generic disklabel extraction routine.
1905 */
1906 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1907 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1908 if (errstring)
1909 raidmakedisklabel(rs);
1910 else {
1911 int i;
1912 struct partition *pp;
1913
1914 /*
1915 * Sanity check whether the found disklabel is valid.
1916 *
1917 * This is necessary since total size of the raid device
1918 * may vary when an interleave is changed even though exactly
1919 * same componets are used, and old disklabel may used
1920 * if that is found.
1921 */
1922 if (lp->d_secperunit != rs->sc_size)
1923 printf("WARNING: %s: "
1924 "total sector size in disklabel (%d) != "
1925 "the size of raid (%ld)\n", rs->sc_xname,
1926 lp->d_secperunit, (long) rs->sc_size);
1927 for (i = 0; i < lp->d_npartitions; i++) {
1928 pp = &lp->d_partitions[i];
1929 if (pp->p_offset + pp->p_size > rs->sc_size)
1930 printf("WARNING: %s: end of partition `%c' "
1931 "exceeds the size of raid (%ld)\n",
1932 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1933 }
1934 }
1935
1936 }
1937 /*
1938 * Take care of things one might want to take care of in the event
1939 * that a disklabel isn't present.
1940 */
1941 static void
1942 raidmakedisklabel(rs)
1943 struct raid_softc *rs;
1944 {
1945 struct disklabel *lp = rs->sc_dkdev.dk_label;
1946 db1_printf(("Making a label..\n"));
1947
1948 /*
1949 * For historical reasons, if there's no disklabel present
1950 * the raw partition must be marked FS_BSDFFS.
1951 */
1952
1953 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1954
1955 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1956
1957 lp->d_checksum = dkcksum(lp);
1958 }
1959 /*
1960 * Lookup the provided name in the filesystem. If the file exists,
1961 * is a valid block device, and isn't being used by anyone else,
1962 * set *vpp to the file's vnode.
1963 * You'll find the original of this in ccd.c
1964 */
1965 int
1966 raidlookup(path, p, vpp)
1967 char *path;
1968 struct proc *p;
1969 struct vnode **vpp; /* result */
1970 {
1971 struct nameidata nd;
1972 struct vnode *vp;
1973 struct vattr va;
1974 int error;
1975
1976 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1977 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1978 #ifdef DEBUG
1979 printf("RAIDframe: vn_open returned %d\n", error);
1980 #endif
1981 return (error);
1982 }
1983 vp = nd.ni_vp;
1984 if (vp->v_usecount > 1) {
1985 VOP_UNLOCK(vp, 0);
1986 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1987 return (EBUSY);
1988 }
1989 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1990 VOP_UNLOCK(vp, 0);
1991 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1992 return (error);
1993 }
1994 /* XXX: eventually we should handle VREG, too. */
1995 if (va.va_type != VBLK) {
1996 VOP_UNLOCK(vp, 0);
1997 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1998 return (ENOTBLK);
1999 }
2000 VOP_UNLOCK(vp, 0);
2001 *vpp = vp;
2002 return (0);
2003 }
2004 /*
2005 * Wait interruptibly for an exclusive lock.
2006 *
2007 * XXX
2008 * Several drivers do this; it should be abstracted and made MP-safe.
2009 * (Hmm... where have we seen this warning before :-> GO )
2010 */
2011 static int
2012 raidlock(rs)
2013 struct raid_softc *rs;
2014 {
2015 int error;
2016
2017 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2018 rs->sc_flags |= RAIDF_WANTED;
2019 if ((error =
2020 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2021 return (error);
2022 }
2023 rs->sc_flags |= RAIDF_LOCKED;
2024 return (0);
2025 }
2026 /*
2027 * Unlock and wake up any waiters.
2028 */
2029 static void
2030 raidunlock(rs)
2031 struct raid_softc *rs;
2032 {
2033
2034 rs->sc_flags &= ~RAIDF_LOCKED;
2035 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2036 rs->sc_flags &= ~RAIDF_WANTED;
2037 wakeup(rs);
2038 }
2039 }
2040
2041
2042 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2043 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2044
2045 int
2046 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2047 {
2048 RF_ComponentLabel_t clabel;
2049 raidread_component_label(dev, b_vp, &clabel);
2050 clabel.mod_counter = mod_counter;
2051 clabel.clean = RF_RAID_CLEAN;
2052 raidwrite_component_label(dev, b_vp, &clabel);
2053 return(0);
2054 }
2055
2056
2057 int
2058 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2059 {
2060 RF_ComponentLabel_t clabel;
2061 raidread_component_label(dev, b_vp, &clabel);
2062 clabel.mod_counter = mod_counter;
2063 clabel.clean = RF_RAID_DIRTY;
2064 raidwrite_component_label(dev, b_vp, &clabel);
2065 return(0);
2066 }
2067
2068 /* ARGSUSED */
2069 int
2070 raidread_component_label(dev, b_vp, clabel)
2071 dev_t dev;
2072 struct vnode *b_vp;
2073 RF_ComponentLabel_t *clabel;
2074 {
2075 struct buf *bp;
2076 int error;
2077
2078 /* XXX should probably ensure that we don't try to do this if
2079 someone has changed rf_protected_sectors. */
2080
2081 /* get a block of the appropriate size... */
2082 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2083 bp->b_dev = dev;
2084
2085 /* get our ducks in a row for the read */
2086 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2087 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2088 bp->b_flags = B_BUSY | B_READ;
2089 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2090
2091 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2092
2093 error = biowait(bp);
2094
2095 if (!error) {
2096 memcpy(clabel, bp->b_un.b_addr,
2097 sizeof(RF_ComponentLabel_t));
2098 #if 0
2099 print_component_label( clabel );
2100 #endif
2101 } else {
2102 #if 0
2103 printf("Failed to read RAID component label!\n");
2104 #endif
2105 }
2106
2107 bp->b_flags = B_INVAL | B_AGE;
2108 brelse(bp);
2109 return(error);
2110 }
2111 /* ARGSUSED */
2112 int
2113 raidwrite_component_label(dev, b_vp, clabel)
2114 dev_t dev;
2115 struct vnode *b_vp;
2116 RF_ComponentLabel_t *clabel;
2117 {
2118 struct buf *bp;
2119 int error;
2120
2121 /* get a block of the appropriate size... */
2122 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2123 bp->b_dev = dev;
2124
2125 /* get our ducks in a row for the write */
2126 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2127 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2128 bp->b_flags = B_BUSY | B_WRITE;
2129 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2130
2131 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2132
2133 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2134
2135 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2136 error = biowait(bp);
2137 bp->b_flags = B_INVAL | B_AGE;
2138 brelse(bp);
2139 if (error) {
2140 #if 1
2141 printf("Failed to write RAID component info!\n");
2142 #endif
2143 }
2144
2145 return(error);
2146 }
2147
2148 void
2149 rf_markalldirty( raidPtr )
2150 RF_Raid_t *raidPtr;
2151 {
2152 RF_ComponentLabel_t clabel;
2153 int r,c;
2154
2155 raidPtr->mod_counter++;
2156 for (r = 0; r < raidPtr->numRow; r++) {
2157 for (c = 0; c < raidPtr->numCol; c++) {
2158 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2159 raidread_component_label(
2160 raidPtr->Disks[r][c].dev,
2161 raidPtr->raid_cinfo[r][c].ci_vp,
2162 &clabel);
2163 if (clabel.status == rf_ds_spared) {
2164 /* XXX do something special...
2165 but whatever you do, don't
2166 try to access it!! */
2167 } else {
2168 #if 0
2169 clabel.status =
2170 raidPtr->Disks[r][c].status;
2171 raidwrite_component_label(
2172 raidPtr->Disks[r][c].dev,
2173 raidPtr->raid_cinfo[r][c].ci_vp,
2174 &clabel);
2175 #endif
2176 raidmarkdirty(
2177 raidPtr->Disks[r][c].dev,
2178 raidPtr->raid_cinfo[r][c].ci_vp,
2179 raidPtr->mod_counter);
2180 }
2181 }
2182 }
2183 }
2184 /* printf("Component labels marked dirty.\n"); */
2185 #if 0
2186 for( c = 0; c < raidPtr->numSpare ; c++) {
2187 sparecol = raidPtr->numCol + c;
2188 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2189 /*
2190
2191 XXX this is where we get fancy and map this spare
2192 into it's correct spot in the array.
2193
2194 */
2195 /*
2196
2197 we claim this disk is "optimal" if it's
2198 rf_ds_used_spare, as that means it should be
2199 directly substitutable for the disk it replaced.
2200 We note that too...
2201
2202 */
2203
2204 for(i=0;i<raidPtr->numRow;i++) {
2205 for(j=0;j<raidPtr->numCol;j++) {
2206 if ((raidPtr->Disks[i][j].spareRow ==
2207 r) &&
2208 (raidPtr->Disks[i][j].spareCol ==
2209 sparecol)) {
2210 srow = r;
2211 scol = sparecol;
2212 break;
2213 }
2214 }
2215 }
2216
2217 raidread_component_label(
2218 raidPtr->Disks[r][sparecol].dev,
2219 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2220 &clabel);
2221 /* make sure status is noted */
2222 clabel.version = RF_COMPONENT_LABEL_VERSION;
2223 clabel.mod_counter = raidPtr->mod_counter;
2224 clabel.serial_number = raidPtr->serial_number;
2225 clabel.row = srow;
2226 clabel.column = scol;
2227 clabel.num_rows = raidPtr->numRow;
2228 clabel.num_columns = raidPtr->numCol;
2229 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2230 clabel.status = rf_ds_optimal;
2231 raidwrite_component_label(
2232 raidPtr->Disks[r][sparecol].dev,
2233 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2234 &clabel);
2235 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2236 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2237 }
2238 }
2239
2240 #endif
2241 }
2242
2243
2244 void
2245 rf_update_component_labels( raidPtr )
2246 RF_Raid_t *raidPtr;
2247 {
2248 RF_ComponentLabel_t clabel;
2249 int sparecol;
2250 int r,c;
2251 int i,j;
2252 int srow, scol;
2253
2254 srow = -1;
2255 scol = -1;
2256
2257 /* XXX should do extra checks to make sure things really are clean,
2258 rather than blindly setting the clean bit... */
2259
2260 raidPtr->mod_counter++;
2261
2262 for (r = 0; r < raidPtr->numRow; r++) {
2263 for (c = 0; c < raidPtr->numCol; c++) {
2264 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2265 raidread_component_label(
2266 raidPtr->Disks[r][c].dev,
2267 raidPtr->raid_cinfo[r][c].ci_vp,
2268 &clabel);
2269 /* make sure status is noted */
2270 clabel.status = rf_ds_optimal;
2271 /* bump the counter */
2272 clabel.mod_counter = raidPtr->mod_counter;
2273 #if 0
2274 /* note where this set was configured last */
2275 clabel.last_unit = raidPtr->raidid;
2276 #endif
2277
2278 raidwrite_component_label(
2279 raidPtr->Disks[r][c].dev,
2280 raidPtr->raid_cinfo[r][c].ci_vp,
2281 &clabel);
2282 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2283 raidmarkclean(
2284 raidPtr->Disks[r][c].dev,
2285 raidPtr->raid_cinfo[r][c].ci_vp,
2286 raidPtr->mod_counter);
2287 }
2288 }
2289 /* else we don't touch it.. */
2290 #if 0
2291 else if (raidPtr->Disks[r][c].status !=
2292 rf_ds_failed) {
2293 raidread_component_label(
2294 raidPtr->Disks[r][c].dev,
2295 raidPtr->raid_cinfo[r][c].ci_vp,
2296 &clabel);
2297 /* make sure status is noted */
2298 clabel.status =
2299 raidPtr->Disks[r][c].status;
2300 raidwrite_component_label(
2301 raidPtr->Disks[r][c].dev,
2302 raidPtr->raid_cinfo[r][c].ci_vp,
2303 &clabel);
2304 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2305 raidmarkclean(
2306 raidPtr->Disks[r][c].dev,
2307 raidPtr->raid_cinfo[r][c].ci_vp,
2308 raidPtr->mod_counter);
2309 }
2310 }
2311 #endif
2312 }
2313 }
2314
2315 for( c = 0; c < raidPtr->numSpare ; c++) {
2316 sparecol = raidPtr->numCol + c;
2317 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2318 /*
2319
2320 we claim this disk is "optimal" if it's
2321 rf_ds_used_spare, as that means it should be
2322 directly substitutable for the disk it replaced.
2323 We note that too...
2324
2325 */
2326
2327 for(i=0;i<raidPtr->numRow;i++) {
2328 for(j=0;j<raidPtr->numCol;j++) {
2329 if ((raidPtr->Disks[i][j].spareRow ==
2330 0) &&
2331 (raidPtr->Disks[i][j].spareCol ==
2332 sparecol)) {
2333 srow = i;
2334 scol = j;
2335 break;
2336 }
2337 }
2338 }
2339
2340 /* XXX shouldn't *really* need this... */
2341 raidread_component_label(
2342 raidPtr->Disks[0][sparecol].dev,
2343 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2344 &clabel);
2345 /* make sure status is noted */
2346
2347 raid_init_component_label(raidPtr, &clabel);
2348
2349 clabel.mod_counter = raidPtr->mod_counter;
2350 clabel.row = srow;
2351 clabel.column = scol;
2352 clabel.status = rf_ds_optimal;
2353
2354 raidwrite_component_label(
2355 raidPtr->Disks[0][sparecol].dev,
2356 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2357 &clabel);
2358 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2359 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2360 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2361 raidPtr->mod_counter);
2362 }
2363 }
2364 }
2365 /* printf("Component labels updated\n"); */
2366 }
2367
2368 void
2369 rf_ReconThread(req)
2370 struct rf_recon_req *req;
2371 {
2372 int s;
2373 RF_Raid_t *raidPtr;
2374
2375 s = splbio();
2376 raidPtr = (RF_Raid_t *) req->raidPtr;
2377 raidPtr->recon_in_progress = 1;
2378
2379 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2380 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2381
2382 /* XXX get rid of this! we don't need it at all.. */
2383 RF_Free(req, sizeof(*req));
2384
2385 raidPtr->recon_in_progress = 0;
2386 splx(s);
2387
2388 /* That's all... */
2389 kthread_exit(0); /* does not return */
2390 }
2391
2392 void
2393 rf_RewriteParityThread(raidPtr)
2394 RF_Raid_t *raidPtr;
2395 {
2396 int retcode;
2397 int s;
2398
2399 raidPtr->parity_rewrite_in_progress = 1;
2400 s = splbio();
2401 retcode = rf_RewriteParity(raidPtr);
2402 splx(s);
2403 if (retcode) {
2404 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2405 } else {
2406 /* set the clean bit! If we shutdown correctly,
2407 the clean bit on each component label will get
2408 set */
2409 raidPtr->parity_good = RF_RAID_CLEAN;
2410 }
2411 raidPtr->parity_rewrite_in_progress = 0;
2412
2413 /* That's all... */
2414 kthread_exit(0); /* does not return */
2415 }
2416
2417
2418 void
2419 rf_CopybackThread(raidPtr)
2420 RF_Raid_t *raidPtr;
2421 {
2422 int s;
2423
2424 raidPtr->copyback_in_progress = 1;
2425 s = splbio();
2426 rf_CopybackReconstructedData(raidPtr);
2427 splx(s);
2428 raidPtr->copyback_in_progress = 0;
2429
2430 /* That's all... */
2431 kthread_exit(0); /* does not return */
2432 }
2433
2434
2435 void
2436 rf_ReconstructInPlaceThread(req)
2437 struct rf_recon_req *req;
2438 {
2439 int retcode;
2440 int s;
2441 RF_Raid_t *raidPtr;
2442
2443 s = splbio();
2444 raidPtr = req->raidPtr;
2445 raidPtr->recon_in_progress = 1;
2446 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2447 RF_Free(req, sizeof(*req));
2448 raidPtr->recon_in_progress = 0;
2449 splx(s);
2450
2451 /* That's all... */
2452 kthread_exit(0); /* does not return */
2453 }
2454
2455 void
2456 rf_mountroot_hook(dev)
2457 struct device *dev;
2458 {
2459
2460 }
2461
2462
2463 RF_AutoConfig_t *
2464 rf_find_raid_components()
2465 {
2466 struct devnametobdevmaj *dtobdm;
2467 struct vnode *vp;
2468 struct disklabel label;
2469 struct device *dv;
2470 char *cd_name;
2471 dev_t dev;
2472 int error;
2473 int i;
2474 int good_one;
2475 RF_ComponentLabel_t *clabel;
2476 RF_AutoConfig_t *ac_list;
2477 RF_AutoConfig_t *ac;
2478
2479
2480 /* initialize the AutoConfig list */
2481 ac_list = NULL;
2482
2483 if (raidautoconfig) {
2484
2485 /* we begin by trolling through *all* the devices on the system */
2486
2487 for (dv = alldevs.tqh_first; dv != NULL;
2488 dv = dv->dv_list.tqe_next) {
2489
2490 /* we are only interested in disks... */
2491 if (dv->dv_class != DV_DISK)
2492 continue;
2493
2494 /* we don't care about floppies... */
2495 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2496 continue;
2497 }
2498
2499 /* need to find the device_name_to_block_device_major stuff */
2500 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2501 dtobdm = dev_name2blk;
2502 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2503 dtobdm++;
2504 }
2505
2506 /* get a vnode for the raw partition of this disk */
2507
2508 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2509 if (bdevvp(dev, &vp))
2510 panic("RAID can't alloc vnode");
2511
2512 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2513
2514 if (error) {
2515 /* "Who cares." Continue looking
2516 for something that exists*/
2517 vput(vp);
2518 continue;
2519 }
2520
2521 /* Ok, the disk exists. Go get the disklabel. */
2522 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2523 FREAD, NOCRED, 0);
2524 if (error) {
2525 /*
2526 * XXX can't happen - open() would
2527 * have errored out (or faked up one)
2528 */
2529 printf("can't get label for dev %s%c (%d)!?!?\n",
2530 dv->dv_xname, 'a' + RAW_PART, error);
2531 }
2532
2533 /* don't need this any more. We'll allocate it again
2534 a little later if we really do... */
2535 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2536 vput(vp);
2537
2538 for (i=0; i < label.d_npartitions; i++) {
2539 /* We only support partitions marked as RAID */
2540 if (label.d_partitions[i].p_fstype != FS_RAID)
2541 continue;
2542
2543 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2544 if (bdevvp(dev, &vp))
2545 panic("RAID can't alloc vnode");
2546
2547 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2548 if (error) {
2549 /* Whatever... */
2550 vput(vp);
2551 continue;
2552 }
2553
2554 good_one = 0;
2555
2556 clabel = (RF_ComponentLabel_t *)
2557 malloc(sizeof(RF_ComponentLabel_t),
2558 M_RAIDFRAME, M_NOWAIT);
2559 if (clabel == NULL) {
2560 /* XXX CLEANUP HERE */
2561 printf("RAID auto config: out of memory!\n");
2562 return(NULL); /* XXX probably should panic? */
2563 }
2564
2565 if (!raidread_component_label(dev, vp, clabel)) {
2566 /* Got the label. Does it look reasonable? */
2567 if (rf_reasonable_label(clabel) &&
2568 (clabel->partitionSize <=
2569 label.d_partitions[i].p_size)) {
2570 #if DEBUG
2571 printf("Component on: %s%c: %d\n",
2572 dv->dv_xname, 'a'+i,
2573 label.d_partitions[i].p_size);
2574 print_component_label(clabel);
2575 #endif
2576 /* if it's reasonable, add it,
2577 else ignore it. */
2578 ac = (RF_AutoConfig_t *)
2579 malloc(sizeof(RF_AutoConfig_t),
2580 M_RAIDFRAME,
2581 M_NOWAIT);
2582 if (ac == NULL) {
2583 /* XXX should panic?? */
2584 return(NULL);
2585 }
2586
2587 sprintf(ac->devname, "%s%c",
2588 dv->dv_xname, 'a'+i);
2589 ac->dev = dev;
2590 ac->vp = vp;
2591 ac->clabel = clabel;
2592 ac->next = ac_list;
2593 ac_list = ac;
2594 good_one = 1;
2595 }
2596 }
2597 if (!good_one) {
2598 /* cleanup */
2599 free(clabel, M_RAIDFRAME);
2600 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2601 vput(vp);
2602 }
2603 }
2604 }
2605 }
2606 return(ac_list);
2607 }
2608
2609 static int
2610 rf_reasonable_label(clabel)
2611 RF_ComponentLabel_t *clabel;
2612 {
2613
2614 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2615 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2616 ((clabel->clean == RF_RAID_CLEAN) ||
2617 (clabel->clean == RF_RAID_DIRTY)) &&
2618 clabel->row >=0 &&
2619 clabel->column >= 0 &&
2620 clabel->num_rows > 0 &&
2621 clabel->num_columns > 0 &&
2622 clabel->row < clabel->num_rows &&
2623 clabel->column < clabel->num_columns &&
2624 clabel->blockSize > 0 &&
2625 clabel->numBlocks > 0) {
2626 /* label looks reasonable enough... */
2627 return(1);
2628 }
2629 return(0);
2630 }
2631
2632
2633 void
2634 print_component_label(clabel)
2635 RF_ComponentLabel_t *clabel;
2636 {
2637 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2638 clabel->row, clabel->column,
2639 clabel->num_rows, clabel->num_columns);
2640 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2641 clabel->version, clabel->serial_number,
2642 clabel->mod_counter);
2643 printf(" Clean: %s Status: %d\n",
2644 clabel->clean ? "Yes" : "No", clabel->status );
2645 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2646 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2647 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2648 (char) clabel->parityConfig, clabel->blockSize,
2649 clabel->numBlocks);
2650 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2651 printf(" Last configured as: raid%d\n", clabel->last_unit );
2652 #if 0
2653 printf(" Config order: %d\n", clabel->config_order);
2654 #endif
2655
2656 }
2657
2658 RF_ConfigSet_t *
2659 rf_create_auto_sets(ac_list)
2660 RF_AutoConfig_t *ac_list;
2661 {
2662 RF_AutoConfig_t *ac;
2663 RF_ConfigSet_t *config_sets;
2664 RF_ConfigSet_t *cset;
2665 RF_AutoConfig_t *ac_next;
2666
2667
2668 config_sets = NULL;
2669
2670 /* Go through the AutoConfig list, and figure out which components
2671 belong to what sets. */
2672 ac = ac_list;
2673 while(ac!=NULL) {
2674 /* we're going to putz with ac->next, so save it here
2675 for use at the end of the loop */
2676 ac_next = ac->next;
2677
2678 if (config_sets == NULL) {
2679 /* will need at least this one... */
2680 config_sets = (RF_ConfigSet_t *)
2681 malloc(sizeof(RF_ConfigSet_t),
2682 M_RAIDFRAME, M_NOWAIT);
2683 if (config_sets == NULL) {
2684 panic("rf_create_auto_sets: No memory!\n");
2685 }
2686 /* this one is easy :) */
2687 config_sets->ac = ac;
2688 config_sets->next = NULL;
2689 config_sets->rootable = 0;
2690 ac->next = NULL;
2691 } else {
2692 /* which set does this component fit into? */
2693 cset = config_sets;
2694 while(cset!=NULL) {
2695 if (rf_does_it_fit(cset, ac)) {
2696 /* looks like it matches */
2697 ac->next = cset->ac;
2698 cset->ac = ac;
2699 break;
2700 }
2701 cset = cset->next;
2702 }
2703 if (cset==NULL) {
2704 /* didn't find a match above... new set..*/
2705 cset = (RF_ConfigSet_t *)
2706 malloc(sizeof(RF_ConfigSet_t),
2707 M_RAIDFRAME, M_NOWAIT);
2708 if (cset == NULL) {
2709 panic("rf_create_auto_sets: No memory!\n");
2710 }
2711 cset->ac = ac;
2712 ac->next = NULL;
2713 cset->next = config_sets;
2714 cset->rootable = 0;
2715 config_sets = cset;
2716 }
2717 }
2718 ac = ac_next;
2719 }
2720
2721
2722 return(config_sets);
2723 }
2724
2725 static int
2726 rf_does_it_fit(cset, ac)
2727 RF_ConfigSet_t *cset;
2728 RF_AutoConfig_t *ac;
2729 {
2730 RF_ComponentLabel_t *clabel1, *clabel2;
2731
2732 /* If this one matches the *first* one in the set, that's good
2733 enough, since the other members of the set would have been
2734 through here too... */
2735 /* note that we are not checking partitionSize here..
2736
2737 Note that we are also not checking the mod_counters here.
2738 If everything else matches execpt the mod_counter, that's
2739 good enough for this test. We will deal with the mod_counters
2740 a little later in the autoconfiguration process.
2741
2742 (clabel1->mod_counter == clabel2->mod_counter) &&
2743
2744 */
2745
2746 clabel1 = cset->ac->clabel;
2747 clabel2 = ac->clabel;
2748 if ((clabel1->version == clabel2->version) &&
2749 (clabel1->serial_number == clabel2->serial_number) &&
2750 (clabel1->num_rows == clabel2->num_rows) &&
2751 (clabel1->num_columns == clabel2->num_columns) &&
2752 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2753 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2754 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2755 (clabel1->parityConfig == clabel2->parityConfig) &&
2756 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2757 (clabel1->blockSize == clabel2->blockSize) &&
2758 (clabel1->numBlocks == clabel2->numBlocks) &&
2759 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2760 (clabel1->root_partition == clabel2->root_partition) &&
2761 (clabel1->last_unit == clabel2->last_unit) &&
2762 (clabel1->config_order == clabel2->config_order)) {
2763 /* if it get's here, it almost *has* to be a match */
2764 } else {
2765 /* it's not consistent with somebody in the set..
2766 punt */
2767 return(0);
2768 }
2769 /* all was fine.. it must fit... */
2770 return(1);
2771 }
2772
2773 int
2774 rf_have_enough_components(cset)
2775 RF_ConfigSet_t *cset;
2776 {
2777 RF_AutoConfig_t *ac;
2778 RF_AutoConfig_t *auto_config;
2779 RF_ComponentLabel_t *clabel;
2780 int r,c;
2781 int num_rows;
2782 int num_cols;
2783 int num_missing;
2784
2785 /* check to see that we have enough 'live' components
2786 of this set. If so, we can configure it if necessary */
2787
2788 num_rows = cset->ac->clabel->num_rows;
2789 num_cols = cset->ac->clabel->num_columns;
2790
2791 /* XXX Check for duplicate components!?!?!? */
2792
2793 num_missing = 0;
2794 auto_config = cset->ac;
2795
2796 for(r=0; r<num_rows; r++) {
2797 for(c=0; c<num_cols; c++) {
2798 ac = auto_config;
2799 while(ac!=NULL) {
2800 if (ac->clabel==NULL) {
2801 /* big-time bad news. */
2802 goto fail;
2803 }
2804 if ((ac->clabel->row == r) &&
2805 (ac->clabel->column == c)) {
2806 /* it's this one... */
2807 #if DEBUG
2808 printf("Found: %s at %d,%d\n",
2809 ac->devname,r,c);
2810 #endif
2811 break;
2812 }
2813 ac=ac->next;
2814 }
2815 if (ac==NULL) {
2816 /* Didn't find one here! */
2817 num_missing++;
2818 }
2819 }
2820 }
2821
2822 clabel = cset->ac->clabel;
2823
2824 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2825 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2826 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2827 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2828 /* XXX this needs to be made *much* more general */
2829 /* Too many failures */
2830 return(0);
2831 }
2832 /* otherwise, all is well, and we've got enough to take a kick
2833 at autoconfiguring this set */
2834 return(1);
2835 fail:
2836 return(0);
2837
2838 }
2839
2840 void
2841 rf_create_configuration(ac,config,raidPtr)
2842 RF_AutoConfig_t *ac;
2843 RF_Config_t *config;
2844 RF_Raid_t *raidPtr;
2845 {
2846 RF_ComponentLabel_t *clabel;
2847
2848 clabel = ac->clabel;
2849
2850 /* 1. Fill in the common stuff */
2851 config->numRow = clabel->num_rows;
2852 config->numCol = clabel->num_columns;
2853 config->numSpare = 0; /* XXX should this be set here? */
2854 config->sectPerSU = clabel->sectPerSU;
2855 config->SUsPerPU = clabel->SUsPerPU;
2856 config->SUsPerRU = clabel->SUsPerRU;
2857 config->parityConfig = clabel->parityConfig;
2858 /* XXX... */
2859 strcpy(config->diskQueueType,"fifo");
2860 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2861 config->layoutSpecificSize = 0; /* XXX ?? */
2862
2863 while(ac!=NULL) {
2864 /* row/col values will be in range due to the checks
2865 in reasonable_label() */
2866 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2867 ac->devname);
2868 ac = ac->next;
2869 }
2870
2871 }
2872
2873 int
2874 rf_set_autoconfig(raidPtr, new_value)
2875 RF_Raid_t *raidPtr;
2876 int new_value;
2877 {
2878 RF_ComponentLabel_t clabel;
2879 struct vnode *vp;
2880 dev_t dev;
2881 int row, column;
2882
2883 raidPtr->autoconfigure = new_value;
2884 for(row=0; row<raidPtr->numRow; row++) {
2885 for(column=0; column<raidPtr->numCol; column++) {
2886 dev = raidPtr->Disks[row][column].dev;
2887 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2888 raidread_component_label(dev, vp, &clabel);
2889 clabel.autoconfigure = new_value;
2890 raidwrite_component_label(dev, vp, &clabel);
2891 }
2892 }
2893 return(new_value);
2894 }
2895
2896 int
2897 rf_set_rootpartition(raidPtr, new_value)
2898 RF_Raid_t *raidPtr;
2899 int new_value;
2900 {
2901 RF_ComponentLabel_t clabel;
2902 struct vnode *vp;
2903 dev_t dev;
2904 int row, column;
2905
2906 raidPtr->root_partition = new_value;
2907 for(row=0; row<raidPtr->numRow; row++) {
2908 for(column=0; column<raidPtr->numCol; column++) {
2909 dev = raidPtr->Disks[row][column].dev;
2910 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2911 raidread_component_label(dev, vp, &clabel);
2912 clabel.root_partition = new_value;
2913 raidwrite_component_label(dev, vp, &clabel);
2914 }
2915 }
2916 return(new_value);
2917 }
2918
2919 void
2920 rf_release_all_vps(cset)
2921 RF_ConfigSet_t *cset;
2922 {
2923 RF_AutoConfig_t *ac;
2924
2925 ac = cset->ac;
2926 while(ac!=NULL) {
2927 /* Close the vp, and give it back */
2928 if (ac->vp) {
2929 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2930 vput(ac->vp);
2931 }
2932 ac = ac->next;
2933 }
2934 }
2935
2936
2937 void
2938 rf_cleanup_config_set(cset)
2939 RF_ConfigSet_t *cset;
2940 {
2941 RF_AutoConfig_t *ac;
2942 RF_AutoConfig_t *next_ac;
2943
2944 ac = cset->ac;
2945 while(ac!=NULL) {
2946 next_ac = ac->next;
2947 /* nuke the label */
2948 free(ac->clabel, M_RAIDFRAME);
2949 /* cleanup the config structure */
2950 free(ac, M_RAIDFRAME);
2951 /* "next.." */
2952 ac = next_ac;
2953 }
2954 /* and, finally, nuke the config set */
2955 free(cset, M_RAIDFRAME);
2956 }
2957
2958
2959 void
2960 raid_init_component_label(raidPtr, clabel)
2961 RF_Raid_t *raidPtr;
2962 RF_ComponentLabel_t *clabel;
2963 {
2964 /* current version number */
2965 clabel->version = RF_COMPONENT_LABEL_VERSION;
2966 clabel->serial_number = raidPtr->serial_number;
2967 clabel->mod_counter = raidPtr->mod_counter;
2968 clabel->num_rows = raidPtr->numRow;
2969 clabel->num_columns = raidPtr->numCol;
2970 clabel->clean = RF_RAID_DIRTY; /* not clean */
2971 clabel->status = rf_ds_optimal; /* "It's good!" */
2972
2973 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2974 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2975 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2976
2977 clabel->blockSize = raidPtr->bytesPerSector;
2978 clabel->numBlocks = raidPtr->sectorsPerDisk;
2979
2980 /* XXX not portable */
2981 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2982 clabel->maxOutstanding = raidPtr->maxOutstanding;
2983 clabel->autoconfigure = raidPtr->autoconfigure;
2984 clabel->root_partition = raidPtr->root_partition;
2985 clabel->last_unit = raidPtr->raidid;
2986 clabel->config_order = raidPtr->config_order;
2987 }
2988
2989 int
2990 rf_auto_config_set(cset,unit)
2991 RF_ConfigSet_t *cset;
2992 int *unit;
2993 {
2994 RF_Raid_t *raidPtr;
2995 RF_Config_t *config;
2996 int raidID;
2997 int retcode;
2998
2999 printf("Starting autoconfigure on raid%d\n",raidID);
3000
3001 retcode = 0;
3002 *unit = -1;
3003
3004 /* 1. Create a config structure */
3005
3006 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3007 M_RAIDFRAME,
3008 M_NOWAIT);
3009 if (config==NULL) {
3010 printf("Out of mem!?!?\n");
3011 /* XXX do something more intelligent here. */
3012 return(1);
3013 }
3014 /* XXX raidID needs to be set correctly.. */
3015
3016 /*
3017 2. Figure out what RAID ID this one is supposed to live at
3018 See if we can get the same RAID dev that it was configured
3019 on last time..
3020 */
3021
3022 raidID = cset->ac->clabel->last_unit;
3023 if ((raidID < 0) || (raidID >= numraid)) {
3024 /* let's not wander off into lala land. */
3025 raidID = numraid - 1;
3026 }
3027 if (raidPtrs[raidID]->valid != 0) {
3028
3029 /*
3030 Nope... Go looking for an alternative...
3031 Start high so we don't immediately use raid0 if that's
3032 not taken.
3033 */
3034
3035 for(raidID = numraid; raidID >= 0; raidID--) {
3036 if (raidPtrs[raidID]->valid == 0) {
3037 /* can use this one! */
3038 break;
3039 }
3040 }
3041 }
3042
3043 if (raidID < 0) {
3044 /* punt... */
3045 printf("Unable to auto configure this set!\n");
3046 printf("(Out of RAID devs!)\n");
3047 return(1);
3048 }
3049
3050 raidPtr = raidPtrs[raidID];
3051
3052 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3053 raidPtr->raidid = raidID;
3054 raidPtr->openings = RAIDOUTSTANDING;
3055
3056 /* 3. Build the configuration structure */
3057 rf_create_configuration(cset->ac, config, raidPtr);
3058
3059 /* 4. Do the configuration */
3060 retcode = rf_Configure(raidPtr, config, cset->ac);
3061
3062 if (retcode == 0) {
3063
3064 raidinit(raidPtrs[raidID]);
3065
3066 rf_markalldirty(raidPtrs[raidID]);
3067 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3068 if (cset->ac->clabel->root_partition==1) {
3069 /* everything configured just fine. Make a note
3070 that this set is eligible to be root. */
3071 cset->rootable = 1;
3072 /* XXX do this here? */
3073 raidPtrs[raidID]->root_partition = 1;
3074 }
3075 }
3076
3077 /* 5. Cleanup */
3078 free(config, M_RAIDFRAME);
3079
3080 *unit = raidID;
3081 return(retcode);
3082 }
3083