rf_netbsdkintf.c revision 1.64 1 /* $NetBSD: rf_netbsdkintf.c,v 1.64 2000/02/27 02:35:33 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "opt_raid_autoconfig.h"
139 #include "rf_raid.h"
140 #include "rf_raidframe.h"
141 #include "rf_copyback.h"
142 #include "rf_dag.h"
143 #include "rf_dagflags.h"
144 #include "rf_diskqueue.h"
145 #include "rf_acctrace.h"
146 #include "rf_etimer.h"
147 #include "rf_general.h"
148 #include "rf_debugMem.h"
149 #include "rf_kintf.h"
150 #include "rf_options.h"
151 #include "rf_driver.h"
152 #include "rf_parityscan.h"
153 #include "rf_debugprint.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit __P((RF_Raid_t *));
181
182 void raidattach __P((int));
183 int raidsize __P((dev_t));
184 int raidopen __P((dev_t, int, int, struct proc *));
185 int raidclose __P((dev_t, int, int, struct proc *));
186 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
187 int raidwrite __P((dev_t, struct uio *, int));
188 int raidread __P((dev_t, struct uio *, int));
189 void raidstrategy __P((struct buf *));
190 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
191
192 /*
193 * Pilfered from ccd.c
194 */
195
196 struct raidbuf {
197 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
198 struct buf *rf_obp; /* ptr. to original I/O buf */
199 int rf_flags; /* misc. flags */
200 RF_DiskQueueData_t *req;/* the request that this was part of.. */
201 };
202
203
204 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
205 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
206
207 /* XXX Not sure if the following should be replacing the raidPtrs above,
208 or if it should be used in conjunction with that...
209 */
210
211 struct raid_softc {
212 int sc_flags; /* flags */
213 int sc_cflags; /* configuration flags */
214 size_t sc_size; /* size of the raid device */
215 char sc_xname[20]; /* XXX external name */
216 struct disk sc_dkdev; /* generic disk device info */
217 struct pool sc_cbufpool; /* component buffer pool */
218 struct buf_queue buf_queue; /* used for the device queue */
219 };
220 /* sc_flags */
221 #define RAIDF_INITED 0x01 /* unit has been initialized */
222 #define RAIDF_WLABEL 0x02 /* label area is writable */
223 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
224 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
225 #define RAIDF_LOCKED 0x80 /* unit is locked */
226
227 #define raidunit(x) DISKUNIT(x)
228 int numraid = 0;
229
230 /*
231 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
232 * Be aware that large numbers can allow the driver to consume a lot of
233 * kernel memory, especially on writes, and in degraded mode reads.
234 *
235 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
236 * a single 64K write will typically require 64K for the old data,
237 * 64K for the old parity, and 64K for the new parity, for a total
238 * of 192K (if the parity buffer is not re-used immediately).
239 * Even it if is used immedately, that's still 128K, which when multiplied
240 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
241 *
242 * Now in degraded mode, for example, a 64K read on the above setup may
243 * require data reconstruction, which will require *all* of the 4 remaining
244 * disks to participate -- 4 * 32K/disk == 128K again.
245 */
246
247 #ifndef RAIDOUTSTANDING
248 #define RAIDOUTSTANDING 6
249 #endif
250
251 #define RAIDLABELDEV(dev) \
252 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
253
254 /* declared here, and made public, for the benefit of KVM stuff.. */
255 struct raid_softc *raid_softc;
256
257 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
258 struct disklabel *));
259 static void raidgetdisklabel __P((dev_t));
260 static void raidmakedisklabel __P((struct raid_softc *));
261
262 static int raidlock __P((struct raid_softc *));
263 static void raidunlock __P((struct raid_softc *));
264
265 static void rf_markalldirty __P((RF_Raid_t *));
266 void rf_mountroot_hook __P((struct device *));
267
268 struct device *raidrootdev;
269 struct cfdata cf_raidrootdev;
270 struct cfdriver cfdrv;
271 /* XXX these should be moved up */
272 #include "rf_configure.h"
273 #include <sys/reboot.h>
274
275 void rf_ReconThread __P((struct rf_recon_req *));
276 /* XXX what I want is: */
277 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
278 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
279 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
280 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
281 void rf_buildroothack __P((void *));
282 void rf_final_update_component_labels __P((RF_Raid_t *));
283
284 RF_AutoConfig_t *rf_find_raid_components __P((void));
285 void print_component_label __P((RF_ComponentLabel_t *));
286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
290 RF_Raid_t *));
291 int rf_set_autoconfig __P((RF_Raid_t *, int));
292 int rf_set_rootpartition __P((RF_Raid_t *, int));
293 void rf_release_all_vps __P((RF_ConfigSet_t *));
294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
295 int rf_have_enough_components __P((RF_ConfigSet_t *));
296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place.
300 Note that this is overridden by having
301 RAID_AUTOCONFIG as an option in the
302 kernel config file. */
303 extern struct device *booted_device;
304
305 void
306 raidattach(num)
307 int num;
308 {
309 int raidID;
310 int i, rc;
311 RF_AutoConfig_t *ac_list; /* autoconfig list */
312 RF_ConfigSet_t *config_sets;
313
314 #ifdef DEBUG
315 printf("raidattach: Asked for %d units\n", num);
316 #endif
317
318 if (num <= 0) {
319 #ifdef DIAGNOSTIC
320 panic("raidattach: count <= 0");
321 #endif
322 return;
323 }
324 /* This is where all the initialization stuff gets done. */
325
326 numraid = num;
327
328 /* Make some space for requested number of units... */
329
330 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
331 if (raidPtrs == NULL) {
332 panic("raidPtrs is NULL!!\n");
333 }
334
335 rc = rf_mutex_init(&rf_sparet_wait_mutex);
336 if (rc) {
337 RF_PANIC();
338 }
339
340 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
341
342 for (i = 0; i < num; i++)
343 raidPtrs[i] = NULL;
344 rc = rf_BootRaidframe();
345 if (rc == 0)
346 printf("Kernelized RAIDframe activated\n");
347 else
348 panic("Serious error booting RAID!!\n");
349
350 /* put together some datastructures like the CCD device does.. This
351 * lets us lock the device and what-not when it gets opened. */
352
353 raid_softc = (struct raid_softc *)
354 malloc(num * sizeof(struct raid_softc),
355 M_RAIDFRAME, M_NOWAIT);
356 if (raid_softc == NULL) {
357 printf("WARNING: no memory for RAIDframe driver\n");
358 return;
359 }
360
361 bzero(raid_softc, num * sizeof(struct raid_softc));
362
363 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
364 M_RAIDFRAME, M_NOWAIT);
365 if (raidrootdev == NULL) {
366 panic("No memory for RAIDframe driver!!?!?!\n");
367 }
368
369 for (raidID = 0; raidID < num; raidID++) {
370 BUFQ_INIT(&raid_softc[raidID].buf_queue);
371
372 raidrootdev[raidID].dv_class = DV_DISK;
373 raidrootdev[raidID].dv_cfdata = NULL;
374 raidrootdev[raidID].dv_unit = raidID;
375 raidrootdev[raidID].dv_parent = NULL;
376 raidrootdev[raidID].dv_flags = 0;
377 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
378
379 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
380 (RF_Raid_t *));
381 if (raidPtrs[raidID] == NULL) {
382 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
383 numraid = raidID;
384 return;
385 }
386 }
387
388 #if RAID_AUTOCONFIG
389 raidautoconfig = 1;
390 #endif
391
392 if (raidautoconfig) {
393 /* 1. locate all RAID components on the system */
394
395 #if DEBUG
396 printf("Searching for raid components...\n");
397 #endif
398 ac_list = rf_find_raid_components();
399
400 /* 2. sort them into their respective sets */
401
402 config_sets = rf_create_auto_sets(ac_list);
403
404 /* 3. evaluate each set and configure the valid ones
405 This gets done in rf_buildroothack() */
406
407 /* schedule the creation of the thread to do the
408 "/ on RAID" stuff */
409
410 kthread_create(rf_buildroothack,config_sets);
411
412 #if 0
413 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
414 #endif
415 }
416
417 }
418
419 void
420 rf_buildroothack(arg)
421 void *arg;
422 {
423 RF_ConfigSet_t *config_sets = arg;
424 RF_ConfigSet_t *cset;
425 RF_ConfigSet_t *next_cset;
426 int retcode;
427 int raidID;
428 int rootID;
429 int num_root;
430
431 num_root = 0;
432 cset = config_sets;
433 while(cset != NULL ) {
434 next_cset = cset->next;
435 if (rf_have_enough_components(cset) &&
436 cset->ac->clabel->autoconfigure==1) {
437 retcode = rf_auto_config_set(cset,&raidID);
438 if (!retcode) {
439 if (cset->rootable) {
440 rootID = raidID;
441 num_root++;
442 }
443 } else {
444 /* The autoconfig didn't work :( */
445 #if DEBUG
446 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
447 #endif
448 rf_release_all_vps(cset);
449 }
450 } else {
451 /* we're not autoconfiguring this set...
452 release the associated resources */
453 rf_release_all_vps(cset);
454 }
455 /* cleanup */
456 rf_cleanup_config_set(cset);
457 cset = next_cset;
458 }
459 if (boothowto & RB_ASKNAME) {
460 /* We don't auto-config... */
461 } else {
462 /* They didn't ask, and we found something bootable... */
463
464 if (num_root == 1) {
465 booted_device = &raidrootdev[rootID];
466 } else if (num_root > 1) {
467 /* we can't guess.. require the user to answer... */
468 boothowto |= RB_ASKNAME;
469 }
470 }
471 }
472
473
474 int
475 raidsize(dev)
476 dev_t dev;
477 {
478 struct raid_softc *rs;
479 struct disklabel *lp;
480 int part, unit, omask, size;
481
482 unit = raidunit(dev);
483 if (unit >= numraid)
484 return (-1);
485 rs = &raid_softc[unit];
486
487 if ((rs->sc_flags & RAIDF_INITED) == 0)
488 return (-1);
489
490 part = DISKPART(dev);
491 omask = rs->sc_dkdev.dk_openmask & (1 << part);
492 lp = rs->sc_dkdev.dk_label;
493
494 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
495 return (-1);
496
497 if (lp->d_partitions[part].p_fstype != FS_SWAP)
498 size = -1;
499 else
500 size = lp->d_partitions[part].p_size *
501 (lp->d_secsize / DEV_BSIZE);
502
503 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
504 return (-1);
505
506 return (size);
507
508 }
509
510 int
511 raiddump(dev, blkno, va, size)
512 dev_t dev;
513 daddr_t blkno;
514 caddr_t va;
515 size_t size;
516 {
517 /* Not implemented. */
518 return ENXIO;
519 }
520 /* ARGSUSED */
521 int
522 raidopen(dev, flags, fmt, p)
523 dev_t dev;
524 int flags, fmt;
525 struct proc *p;
526 {
527 int unit = raidunit(dev);
528 struct raid_softc *rs;
529 struct disklabel *lp;
530 int part, pmask;
531 int error = 0;
532
533 if (unit >= numraid)
534 return (ENXIO);
535 rs = &raid_softc[unit];
536
537 if ((error = raidlock(rs)) != 0)
538 return (error);
539 lp = rs->sc_dkdev.dk_label;
540
541 part = DISKPART(dev);
542 pmask = (1 << part);
543
544 db1_printf(("Opening raid device number: %d partition: %d\n",
545 unit, part));
546
547
548 if ((rs->sc_flags & RAIDF_INITED) &&
549 (rs->sc_dkdev.dk_openmask == 0))
550 raidgetdisklabel(dev);
551
552 /* make sure that this partition exists */
553
554 if (part != RAW_PART) {
555 db1_printf(("Not a raw partition..\n"));
556 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
557 ((part >= lp->d_npartitions) ||
558 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
559 error = ENXIO;
560 raidunlock(rs);
561 db1_printf(("Bailing out...\n"));
562 return (error);
563 }
564 }
565 /* Prevent this unit from being unconfigured while open. */
566 switch (fmt) {
567 case S_IFCHR:
568 rs->sc_dkdev.dk_copenmask |= pmask;
569 break;
570
571 case S_IFBLK:
572 rs->sc_dkdev.dk_bopenmask |= pmask;
573 break;
574 }
575
576 if ((rs->sc_dkdev.dk_openmask == 0) &&
577 ((rs->sc_flags & RAIDF_INITED) != 0)) {
578 /* First one... mark things as dirty... Note that we *MUST*
579 have done a configure before this. I DO NOT WANT TO BE
580 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
581 THAT THEY BELONG TOGETHER!!!!! */
582 /* XXX should check to see if we're only open for reading
583 here... If so, we needn't do this, but then need some
584 other way of keeping track of what's happened.. */
585
586 rf_markalldirty( raidPtrs[unit] );
587 }
588
589
590 rs->sc_dkdev.dk_openmask =
591 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
592
593 raidunlock(rs);
594
595 return (error);
596
597
598 }
599 /* ARGSUSED */
600 int
601 raidclose(dev, flags, fmt, p)
602 dev_t dev;
603 int flags, fmt;
604 struct proc *p;
605 {
606 int unit = raidunit(dev);
607 struct raid_softc *rs;
608 int error = 0;
609 int part;
610
611 if (unit >= numraid)
612 return (ENXIO);
613 rs = &raid_softc[unit];
614
615 if ((error = raidlock(rs)) != 0)
616 return (error);
617
618 part = DISKPART(dev);
619
620 /* ...that much closer to allowing unconfiguration... */
621 switch (fmt) {
622 case S_IFCHR:
623 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
624 break;
625
626 case S_IFBLK:
627 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
628 break;
629 }
630 rs->sc_dkdev.dk_openmask =
631 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
632
633 if ((rs->sc_dkdev.dk_openmask == 0) &&
634 ((rs->sc_flags & RAIDF_INITED) != 0)) {
635 /* Last one... device is not unconfigured yet.
636 Device shutdown has taken care of setting the
637 clean bits if RAIDF_INITED is not set
638 mark things as clean... */
639 #if 0
640 printf("Last one on raid%d. Updating status.\n",unit);
641 #endif
642 rf_final_update_component_labels( raidPtrs[unit] );
643 }
644
645 raidunlock(rs);
646 return (0);
647
648 }
649
650 void
651 raidstrategy(bp)
652 register struct buf *bp;
653 {
654 register int s;
655
656 unsigned int raidID = raidunit(bp->b_dev);
657 RF_Raid_t *raidPtr;
658 struct raid_softc *rs = &raid_softc[raidID];
659 struct disklabel *lp;
660 int wlabel;
661
662 if ((rs->sc_flags & RAIDF_INITED) ==0) {
663 bp->b_error = ENXIO;
664 bp->b_flags = B_ERROR;
665 bp->b_resid = bp->b_bcount;
666 biodone(bp);
667 return;
668 }
669 if (raidID >= numraid || !raidPtrs[raidID]) {
670 bp->b_error = ENODEV;
671 bp->b_flags |= B_ERROR;
672 bp->b_resid = bp->b_bcount;
673 biodone(bp);
674 return;
675 }
676 raidPtr = raidPtrs[raidID];
677 if (!raidPtr->valid) {
678 bp->b_error = ENODEV;
679 bp->b_flags |= B_ERROR;
680 bp->b_resid = bp->b_bcount;
681 biodone(bp);
682 return;
683 }
684 if (bp->b_bcount == 0) {
685 db1_printf(("b_bcount is zero..\n"));
686 biodone(bp);
687 return;
688 }
689 lp = rs->sc_dkdev.dk_label;
690
691 /*
692 * Do bounds checking and adjust transfer. If there's an
693 * error, the bounds check will flag that for us.
694 */
695
696 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
697 if (DISKPART(bp->b_dev) != RAW_PART)
698 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
699 db1_printf(("Bounds check failed!!:%d %d\n",
700 (int) bp->b_blkno, (int) wlabel));
701 biodone(bp);
702 return;
703 }
704 s = splbio();
705
706 bp->b_resid = 0;
707
708 /* stuff it onto our queue */
709 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
710
711 raidstart(raidPtrs[raidID]);
712
713 splx(s);
714 }
715 /* ARGSUSED */
716 int
717 raidread(dev, uio, flags)
718 dev_t dev;
719 struct uio *uio;
720 int flags;
721 {
722 int unit = raidunit(dev);
723 struct raid_softc *rs;
724 int part;
725
726 if (unit >= numraid)
727 return (ENXIO);
728 rs = &raid_softc[unit];
729
730 if ((rs->sc_flags & RAIDF_INITED) == 0)
731 return (ENXIO);
732 part = DISKPART(dev);
733
734 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
735
736 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
737
738 }
739 /* ARGSUSED */
740 int
741 raidwrite(dev, uio, flags)
742 dev_t dev;
743 struct uio *uio;
744 int flags;
745 {
746 int unit = raidunit(dev);
747 struct raid_softc *rs;
748
749 if (unit >= numraid)
750 return (ENXIO);
751 rs = &raid_softc[unit];
752
753 if ((rs->sc_flags & RAIDF_INITED) == 0)
754 return (ENXIO);
755 db1_printf(("raidwrite\n"));
756 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
757
758 }
759
760 int
761 raidioctl(dev, cmd, data, flag, p)
762 dev_t dev;
763 u_long cmd;
764 caddr_t data;
765 int flag;
766 struct proc *p;
767 {
768 int unit = raidunit(dev);
769 int error = 0;
770 int part, pmask;
771 struct raid_softc *rs;
772 RF_Config_t *k_cfg, *u_cfg;
773 RF_Raid_t *raidPtr;
774 RF_RaidDisk_t *diskPtr;
775 RF_AccTotals_t *totals;
776 RF_DeviceConfig_t *d_cfg, **ucfgp;
777 u_char *specific_buf;
778 int retcode = 0;
779 int row;
780 int column;
781 struct rf_recon_req *rrcopy, *rr;
782 RF_ComponentLabel_t *clabel;
783 RF_ComponentLabel_t ci_label;
784 RF_ComponentLabel_t **clabel_ptr;
785 RF_SingleComponent_t *sparePtr,*componentPtr;
786 RF_SingleComponent_t hot_spare;
787 RF_SingleComponent_t component;
788 int i, j, d;
789
790 if (unit >= numraid)
791 return (ENXIO);
792 rs = &raid_softc[unit];
793 raidPtr = raidPtrs[unit];
794
795 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
796 (int) DISKPART(dev), (int) unit, (int) cmd));
797
798 /* Must be open for writes for these commands... */
799 switch (cmd) {
800 case DIOCSDINFO:
801 case DIOCWDINFO:
802 case DIOCWLABEL:
803 if ((flag & FWRITE) == 0)
804 return (EBADF);
805 }
806
807 /* Must be initialized for these... */
808 switch (cmd) {
809 case DIOCGDINFO:
810 case DIOCSDINFO:
811 case DIOCWDINFO:
812 case DIOCGPART:
813 case DIOCWLABEL:
814 case DIOCGDEFLABEL:
815 case RAIDFRAME_SHUTDOWN:
816 case RAIDFRAME_REWRITEPARITY:
817 case RAIDFRAME_GET_INFO:
818 case RAIDFRAME_RESET_ACCTOTALS:
819 case RAIDFRAME_GET_ACCTOTALS:
820 case RAIDFRAME_KEEP_ACCTOTALS:
821 case RAIDFRAME_GET_SIZE:
822 case RAIDFRAME_FAIL_DISK:
823 case RAIDFRAME_COPYBACK:
824 case RAIDFRAME_CHECK_RECON_STATUS:
825 case RAIDFRAME_GET_COMPONENT_LABEL:
826 case RAIDFRAME_SET_COMPONENT_LABEL:
827 case RAIDFRAME_ADD_HOT_SPARE:
828 case RAIDFRAME_REMOVE_HOT_SPARE:
829 case RAIDFRAME_INIT_LABELS:
830 case RAIDFRAME_REBUILD_IN_PLACE:
831 case RAIDFRAME_CHECK_PARITY:
832 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
833 case RAIDFRAME_CHECK_COPYBACK_STATUS:
834 case RAIDFRAME_SET_AUTOCONFIG:
835 case RAIDFRAME_SET_ROOT:
836 if ((rs->sc_flags & RAIDF_INITED) == 0)
837 return (ENXIO);
838 }
839
840 switch (cmd) {
841
842 /* configure the system */
843 case RAIDFRAME_CONFIGURE:
844
845 if (raidPtr->valid) {
846 /* There is a valid RAID set running on this unit! */
847 printf("raid%d: Device already configured!\n",unit);
848 }
849
850 /* copy-in the configuration information */
851 /* data points to a pointer to the configuration structure */
852
853 u_cfg = *((RF_Config_t **) data);
854 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
855 if (k_cfg == NULL) {
856 return (ENOMEM);
857 }
858 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
859 sizeof(RF_Config_t));
860 if (retcode) {
861 RF_Free(k_cfg, sizeof(RF_Config_t));
862 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
863 retcode));
864 return (retcode);
865 }
866 /* allocate a buffer for the layout-specific data, and copy it
867 * in */
868 if (k_cfg->layoutSpecificSize) {
869 if (k_cfg->layoutSpecificSize > 10000) {
870 /* sanity check */
871 RF_Free(k_cfg, sizeof(RF_Config_t));
872 return (EINVAL);
873 }
874 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
875 (u_char *));
876 if (specific_buf == NULL) {
877 RF_Free(k_cfg, sizeof(RF_Config_t));
878 return (ENOMEM);
879 }
880 retcode = copyin(k_cfg->layoutSpecific,
881 (caddr_t) specific_buf,
882 k_cfg->layoutSpecificSize);
883 if (retcode) {
884 RF_Free(k_cfg, sizeof(RF_Config_t));
885 RF_Free(specific_buf,
886 k_cfg->layoutSpecificSize);
887 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
888 retcode));
889 return (retcode);
890 }
891 } else
892 specific_buf = NULL;
893 k_cfg->layoutSpecific = specific_buf;
894
895 /* should do some kind of sanity check on the configuration.
896 * Store the sum of all the bytes in the last byte? */
897
898 /* configure the system */
899
900 /*
901 * Clear the entire RAID descriptor, just to make sure
902 * there is no stale data left in the case of a
903 * reconfiguration
904 */
905 bzero((char *) raidPtr, sizeof(RF_Raid_t));
906 raidPtr->raidid = unit;
907
908 retcode = rf_Configure(raidPtr, k_cfg, NULL);
909
910 if (retcode == 0) {
911
912 /* allow this many simultaneous IO's to
913 this RAID device */
914 raidPtr->openings = RAIDOUTSTANDING;
915
916 raidinit(raidPtr);
917 rf_markalldirty(raidPtr);
918 }
919 /* free the buffers. No return code here. */
920 if (k_cfg->layoutSpecificSize) {
921 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
922 }
923 RF_Free(k_cfg, sizeof(RF_Config_t));
924
925 return (retcode);
926
927 /* shutdown the system */
928 case RAIDFRAME_SHUTDOWN:
929
930 if ((error = raidlock(rs)) != 0)
931 return (error);
932
933 /*
934 * If somebody has a partition mounted, we shouldn't
935 * shutdown.
936 */
937
938 part = DISKPART(dev);
939 pmask = (1 << part);
940 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
941 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
942 (rs->sc_dkdev.dk_copenmask & pmask))) {
943 raidunlock(rs);
944 return (EBUSY);
945 }
946
947 retcode = rf_Shutdown(raidPtr);
948
949 pool_destroy(&rs->sc_cbufpool);
950
951 /* It's no longer initialized... */
952 rs->sc_flags &= ~RAIDF_INITED;
953
954 /* Detach the disk. */
955 disk_detach(&rs->sc_dkdev);
956
957 raidunlock(rs);
958
959 return (retcode);
960 case RAIDFRAME_GET_COMPONENT_LABEL:
961 clabel_ptr = (RF_ComponentLabel_t **) data;
962 /* need to read the component label for the disk indicated
963 by row,column in clabel */
964
965 /* For practice, let's get it directly fromdisk, rather
966 than from the in-core copy */
967 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
968 (RF_ComponentLabel_t *));
969 if (clabel == NULL)
970 return (ENOMEM);
971
972 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
973
974 retcode = copyin( *clabel_ptr, clabel,
975 sizeof(RF_ComponentLabel_t));
976
977 if (retcode) {
978 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
979 return(retcode);
980 }
981
982 row = clabel->row;
983 column = clabel->column;
984
985 if ((row < 0) || (row >= raidPtr->numRow) ||
986 (column < 0) || (column >= raidPtr->numCol)) {
987 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
988 return(EINVAL);
989 }
990
991 raidread_component_label(raidPtr->Disks[row][column].dev,
992 raidPtr->raid_cinfo[row][column].ci_vp,
993 clabel );
994
995 retcode = copyout((caddr_t) clabel,
996 (caddr_t) *clabel_ptr,
997 sizeof(RF_ComponentLabel_t));
998 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
999 return (retcode);
1000
1001 case RAIDFRAME_SET_COMPONENT_LABEL:
1002 clabel = (RF_ComponentLabel_t *) data;
1003
1004 /* XXX check the label for valid stuff... */
1005 /* Note that some things *should not* get modified --
1006 the user should be re-initing the labels instead of
1007 trying to patch things.
1008 */
1009
1010 printf("Got component label:\n");
1011 printf("Version: %d\n",clabel->version);
1012 printf("Serial Number: %d\n",clabel->serial_number);
1013 printf("Mod counter: %d\n",clabel->mod_counter);
1014 printf("Row: %d\n", clabel->row);
1015 printf("Column: %d\n", clabel->column);
1016 printf("Num Rows: %d\n", clabel->num_rows);
1017 printf("Num Columns: %d\n", clabel->num_columns);
1018 printf("Clean: %d\n", clabel->clean);
1019 printf("Status: %d\n", clabel->status);
1020
1021 row = clabel->row;
1022 column = clabel->column;
1023
1024 if ((row < 0) || (row >= raidPtr->numRow) ||
1025 (column < 0) || (column >= raidPtr->numCol)) {
1026 return(EINVAL);
1027 }
1028
1029 /* XXX this isn't allowed to do anything for now :-) */
1030
1031 /* XXX and before it is, we need to fill in the rest
1032 of the fields!?!?!?! */
1033 #if 0
1034 raidwrite_component_label(
1035 raidPtr->Disks[row][column].dev,
1036 raidPtr->raid_cinfo[row][column].ci_vp,
1037 clabel );
1038 #endif
1039 return (0);
1040
1041 case RAIDFRAME_INIT_LABELS:
1042 clabel = (RF_ComponentLabel_t *) data;
1043 /*
1044 we only want the serial number from
1045 the above. We get all the rest of the information
1046 from the config that was used to create this RAID
1047 set.
1048 */
1049
1050 raidPtr->serial_number = clabel->serial_number;
1051
1052 raid_init_component_label(raidPtr, &ci_label);
1053 ci_label.serial_number = clabel->serial_number;
1054
1055 for(row=0;row<raidPtr->numRow;row++) {
1056 ci_label.row = row;
1057 for(column=0;column<raidPtr->numCol;column++) {
1058 diskPtr = &raidPtr->Disks[row][column];
1059 ci_label.partitionSize = diskPtr->partitionSize;
1060 ci_label.column = column;
1061 raidwrite_component_label(
1062 raidPtr->Disks[row][column].dev,
1063 raidPtr->raid_cinfo[row][column].ci_vp,
1064 &ci_label );
1065 }
1066 }
1067
1068 return (retcode);
1069 case RAIDFRAME_SET_AUTOCONFIG:
1070 d = rf_set_autoconfig(raidPtr, *data);
1071 printf("New autoconfig value is: %d\n", d);
1072 *data = d;
1073 return (retcode);
1074
1075 case RAIDFRAME_SET_ROOT:
1076 d = rf_set_rootpartition(raidPtr, *data);
1077 printf("New rootpartition value is: %d\n", d);
1078 *data = d;
1079 return (retcode);
1080
1081 /* initialize all parity */
1082 case RAIDFRAME_REWRITEPARITY:
1083
1084 if (raidPtr->Layout.map->faultsTolerated == 0) {
1085 /* Parity for RAID 0 is trivially correct */
1086 raidPtr->parity_good = RF_RAID_CLEAN;
1087 return(0);
1088 }
1089
1090 if (raidPtr->parity_rewrite_in_progress == 1) {
1091 /* Re-write is already in progress! */
1092 return(EINVAL);
1093 }
1094
1095 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1096 rf_RewriteParityThread,
1097 raidPtr,"raid_parity");
1098 return (retcode);
1099
1100
1101 case RAIDFRAME_ADD_HOT_SPARE:
1102 sparePtr = (RF_SingleComponent_t *) data;
1103 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1104 printf("Adding spare\n");
1105 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1106 return(retcode);
1107
1108 case RAIDFRAME_REMOVE_HOT_SPARE:
1109 return(retcode);
1110
1111 case RAIDFRAME_REBUILD_IN_PLACE:
1112
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* Can't do this on a RAID 0!! */
1115 return(EINVAL);
1116 }
1117
1118 if (raidPtr->recon_in_progress == 1) {
1119 /* a reconstruct is already in progress! */
1120 return(EINVAL);
1121 }
1122
1123 componentPtr = (RF_SingleComponent_t *) data;
1124 memcpy( &component, componentPtr,
1125 sizeof(RF_SingleComponent_t));
1126 row = component.row;
1127 column = component.column;
1128 printf("Rebuild: %d %d\n",row, column);
1129 if ((row < 0) || (row >= raidPtr->numRow) ||
1130 (column < 0) || (column >= raidPtr->numCol)) {
1131 return(EINVAL);
1132 }
1133
1134 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1135 if (rrcopy == NULL)
1136 return(ENOMEM);
1137
1138 rrcopy->raidPtr = (void *) raidPtr;
1139 rrcopy->row = row;
1140 rrcopy->col = column;
1141
1142 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1143 rf_ReconstructInPlaceThread,
1144 rrcopy,"raid_reconip");
1145 return(retcode);
1146
1147 case RAIDFRAME_GET_INFO:
1148 if (!raidPtr->valid)
1149 return (ENODEV);
1150 ucfgp = (RF_DeviceConfig_t **) data;
1151 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1152 (RF_DeviceConfig_t *));
1153 if (d_cfg == NULL)
1154 return (ENOMEM);
1155 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1156 d_cfg->rows = raidPtr->numRow;
1157 d_cfg->cols = raidPtr->numCol;
1158 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1159 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1160 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1161 return (ENOMEM);
1162 }
1163 d_cfg->nspares = raidPtr->numSpare;
1164 if (d_cfg->nspares >= RF_MAX_DISKS) {
1165 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1166 return (ENOMEM);
1167 }
1168 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1169 d = 0;
1170 for (i = 0; i < d_cfg->rows; i++) {
1171 for (j = 0; j < d_cfg->cols; j++) {
1172 d_cfg->devs[d] = raidPtr->Disks[i][j];
1173 d++;
1174 }
1175 }
1176 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1177 d_cfg->spares[i] = raidPtr->Disks[0][j];
1178 }
1179 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1180 sizeof(RF_DeviceConfig_t));
1181 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1182
1183 return (retcode);
1184
1185 case RAIDFRAME_CHECK_PARITY:
1186 *(int *) data = raidPtr->parity_good;
1187 return (0);
1188
1189 case RAIDFRAME_RESET_ACCTOTALS:
1190 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1191 return (0);
1192
1193 case RAIDFRAME_GET_ACCTOTALS:
1194 totals = (RF_AccTotals_t *) data;
1195 *totals = raidPtr->acc_totals;
1196 return (0);
1197
1198 case RAIDFRAME_KEEP_ACCTOTALS:
1199 raidPtr->keep_acc_totals = *(int *)data;
1200 return (0);
1201
1202 case RAIDFRAME_GET_SIZE:
1203 *(int *) data = raidPtr->totalSectors;
1204 return (0);
1205
1206 /* fail a disk & optionally start reconstruction */
1207 case RAIDFRAME_FAIL_DISK:
1208
1209 if (raidPtr->Layout.map->faultsTolerated == 0) {
1210 /* Can't do this on a RAID 0!! */
1211 return(EINVAL);
1212 }
1213
1214 rr = (struct rf_recon_req *) data;
1215
1216 if (rr->row < 0 || rr->row >= raidPtr->numRow
1217 || rr->col < 0 || rr->col >= raidPtr->numCol)
1218 return (EINVAL);
1219
1220 printf("raid%d: Failing the disk: row: %d col: %d\n",
1221 unit, rr->row, rr->col);
1222
1223 /* make a copy of the recon request so that we don't rely on
1224 * the user's buffer */
1225 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1226 if (rrcopy == NULL)
1227 return(ENOMEM);
1228 bcopy(rr, rrcopy, sizeof(*rr));
1229 rrcopy->raidPtr = (void *) raidPtr;
1230
1231 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1232 rf_ReconThread,
1233 rrcopy,"raid_recon");
1234 return (0);
1235
1236 /* invoke a copyback operation after recon on whatever disk
1237 * needs it, if any */
1238 case RAIDFRAME_COPYBACK:
1239
1240 if (raidPtr->Layout.map->faultsTolerated == 0) {
1241 /* This makes no sense on a RAID 0!! */
1242 return(EINVAL);
1243 }
1244
1245 if (raidPtr->copyback_in_progress == 1) {
1246 /* Copyback is already in progress! */
1247 return(EINVAL);
1248 }
1249
1250 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1251 rf_CopybackThread,
1252 raidPtr,"raid_copyback");
1253 return (retcode);
1254
1255 /* return the percentage completion of reconstruction */
1256 case RAIDFRAME_CHECK_RECON_STATUS:
1257 if (raidPtr->Layout.map->faultsTolerated == 0) {
1258 /* This makes no sense on a RAID 0 */
1259 return(EINVAL);
1260 }
1261 row = 0; /* XXX we only consider a single row... */
1262 if (raidPtr->status[row] != rf_rs_reconstructing)
1263 *(int *) data = 100;
1264 else
1265 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1266 return (0);
1267
1268 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1269 if (raidPtr->Layout.map->faultsTolerated == 0) {
1270 /* This makes no sense on a RAID 0 */
1271 return(EINVAL);
1272 }
1273 if (raidPtr->parity_rewrite_in_progress == 1) {
1274 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1275 } else {
1276 *(int *) data = 100;
1277 }
1278 return (0);
1279
1280 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1281 if (raidPtr->Layout.map->faultsTolerated == 0) {
1282 /* This makes no sense on a RAID 0 */
1283 return(EINVAL);
1284 }
1285 if (raidPtr->copyback_in_progress == 1) {
1286 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1287 raidPtr->Layout.numStripe;
1288 } else {
1289 *(int *) data = 100;
1290 }
1291 return (0);
1292
1293
1294 /* the sparetable daemon calls this to wait for the kernel to
1295 * need a spare table. this ioctl does not return until a
1296 * spare table is needed. XXX -- calling mpsleep here in the
1297 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1298 * -- I should either compute the spare table in the kernel,
1299 * or have a different -- XXX XXX -- interface (a different
1300 * character device) for delivering the table -- XXX */
1301 #if 0
1302 case RAIDFRAME_SPARET_WAIT:
1303 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1304 while (!rf_sparet_wait_queue)
1305 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1306 waitreq = rf_sparet_wait_queue;
1307 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1308 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1309
1310 /* structure assignment */
1311 *((RF_SparetWait_t *) data) = *waitreq;
1312
1313 RF_Free(waitreq, sizeof(*waitreq));
1314 return (0);
1315
1316 /* wakes up a process waiting on SPARET_WAIT and puts an error
1317 * code in it that will cause the dameon to exit */
1318 case RAIDFRAME_ABORT_SPARET_WAIT:
1319 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1320 waitreq->fcol = -1;
1321 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1322 waitreq->next = rf_sparet_wait_queue;
1323 rf_sparet_wait_queue = waitreq;
1324 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1325 wakeup(&rf_sparet_wait_queue);
1326 return (0);
1327
1328 /* used by the spare table daemon to deliver a spare table
1329 * into the kernel */
1330 case RAIDFRAME_SEND_SPARET:
1331
1332 /* install the spare table */
1333 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1334
1335 /* respond to the requestor. the return status of the spare
1336 * table installation is passed in the "fcol" field */
1337 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1338 waitreq->fcol = retcode;
1339 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1340 waitreq->next = rf_sparet_resp_queue;
1341 rf_sparet_resp_queue = waitreq;
1342 wakeup(&rf_sparet_resp_queue);
1343 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1344
1345 return (retcode);
1346 #endif
1347
1348 default:
1349 break; /* fall through to the os-specific code below */
1350
1351 }
1352
1353 if (!raidPtr->valid)
1354 return (EINVAL);
1355
1356 /*
1357 * Add support for "regular" device ioctls here.
1358 */
1359
1360 switch (cmd) {
1361 case DIOCGDINFO:
1362 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1363 break;
1364
1365 case DIOCGPART:
1366 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1367 ((struct partinfo *) data)->part =
1368 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1369 break;
1370
1371 case DIOCWDINFO:
1372 case DIOCSDINFO:
1373 if ((error = raidlock(rs)) != 0)
1374 return (error);
1375
1376 rs->sc_flags |= RAIDF_LABELLING;
1377
1378 error = setdisklabel(rs->sc_dkdev.dk_label,
1379 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1380 if (error == 0) {
1381 if (cmd == DIOCWDINFO)
1382 error = writedisklabel(RAIDLABELDEV(dev),
1383 raidstrategy, rs->sc_dkdev.dk_label,
1384 rs->sc_dkdev.dk_cpulabel);
1385 }
1386 rs->sc_flags &= ~RAIDF_LABELLING;
1387
1388 raidunlock(rs);
1389
1390 if (error)
1391 return (error);
1392 break;
1393
1394 case DIOCWLABEL:
1395 if (*(int *) data != 0)
1396 rs->sc_flags |= RAIDF_WLABEL;
1397 else
1398 rs->sc_flags &= ~RAIDF_WLABEL;
1399 break;
1400
1401 case DIOCGDEFLABEL:
1402 raidgetdefaultlabel(raidPtr, rs,
1403 (struct disklabel *) data);
1404 break;
1405
1406 default:
1407 retcode = ENOTTY;
1408 }
1409 return (retcode);
1410
1411 }
1412
1413
1414 /* raidinit -- complete the rest of the initialization for the
1415 RAIDframe device. */
1416
1417
1418 static void
1419 raidinit(raidPtr)
1420 RF_Raid_t *raidPtr;
1421 {
1422 struct raid_softc *rs;
1423 int unit;
1424
1425 unit = raidPtr->raidid;
1426
1427 rs = &raid_softc[unit];
1428 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1429 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1430
1431
1432 /* XXX should check return code first... */
1433 rs->sc_flags |= RAIDF_INITED;
1434
1435 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1436
1437 rs->sc_dkdev.dk_name = rs->sc_xname;
1438
1439 /* disk_attach actually creates space for the CPU disklabel, among
1440 * other things, so it's critical to call this *BEFORE* we try putzing
1441 * with disklabels. */
1442
1443 disk_attach(&rs->sc_dkdev);
1444
1445 /* XXX There may be a weird interaction here between this, and
1446 * protectedSectors, as used in RAIDframe. */
1447
1448 rs->sc_size = raidPtr->totalSectors;
1449
1450 }
1451
1452 /* wake up the daemon & tell it to get us a spare table
1453 * XXX
1454 * the entries in the queues should be tagged with the raidPtr
1455 * so that in the extremely rare case that two recons happen at once,
1456 * we know for which device were requesting a spare table
1457 * XXX
1458 *
1459 * XXX This code is not currently used. GO
1460 */
1461 int
1462 rf_GetSpareTableFromDaemon(req)
1463 RF_SparetWait_t *req;
1464 {
1465 int retcode;
1466
1467 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1468 req->next = rf_sparet_wait_queue;
1469 rf_sparet_wait_queue = req;
1470 wakeup(&rf_sparet_wait_queue);
1471
1472 /* mpsleep unlocks the mutex */
1473 while (!rf_sparet_resp_queue) {
1474 tsleep(&rf_sparet_resp_queue, PRIBIO,
1475 "raidframe getsparetable", 0);
1476 }
1477 req = rf_sparet_resp_queue;
1478 rf_sparet_resp_queue = req->next;
1479 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1480
1481 retcode = req->fcol;
1482 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1483 * alloc'd */
1484 return (retcode);
1485 }
1486
1487 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1488 * bp & passes it down.
1489 * any calls originating in the kernel must use non-blocking I/O
1490 * do some extra sanity checking to return "appropriate" error values for
1491 * certain conditions (to make some standard utilities work)
1492 *
1493 * Formerly known as: rf_DoAccessKernel
1494 */
1495 void
1496 raidstart(raidPtr)
1497 RF_Raid_t *raidPtr;
1498 {
1499 RF_SectorCount_t num_blocks, pb, sum;
1500 RF_RaidAddr_t raid_addr;
1501 int retcode;
1502 struct partition *pp;
1503 daddr_t blocknum;
1504 int unit;
1505 struct raid_softc *rs;
1506 int do_async;
1507 struct buf *bp;
1508
1509 unit = raidPtr->raidid;
1510 rs = &raid_softc[unit];
1511
1512 /* quick check to see if anything has died recently */
1513 RF_LOCK_MUTEX(raidPtr->mutex);
1514 if (raidPtr->numNewFailures > 0) {
1515 rf_update_component_labels(raidPtr);
1516 raidPtr->numNewFailures--;
1517 }
1518 RF_UNLOCK_MUTEX(raidPtr->mutex);
1519
1520 /* Check to see if we're at the limit... */
1521 RF_LOCK_MUTEX(raidPtr->mutex);
1522 while (raidPtr->openings > 0) {
1523 RF_UNLOCK_MUTEX(raidPtr->mutex);
1524
1525 /* get the next item, if any, from the queue */
1526 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1527 /* nothing more to do */
1528 return;
1529 }
1530 BUFQ_REMOVE(&rs->buf_queue, bp);
1531
1532 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1533 * partition.. Need to make it absolute to the underlying
1534 * device.. */
1535
1536 blocknum = bp->b_blkno;
1537 if (DISKPART(bp->b_dev) != RAW_PART) {
1538 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1539 blocknum += pp->p_offset;
1540 }
1541
1542 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1543 (int) blocknum));
1544
1545 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1546 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1547
1548 /* *THIS* is where we adjust what block we're going to...
1549 * but DO NOT TOUCH bp->b_blkno!!! */
1550 raid_addr = blocknum;
1551
1552 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1553 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1554 sum = raid_addr + num_blocks + pb;
1555 if (1 || rf_debugKernelAccess) {
1556 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1557 (int) raid_addr, (int) sum, (int) num_blocks,
1558 (int) pb, (int) bp->b_resid));
1559 }
1560 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1561 || (sum < num_blocks) || (sum < pb)) {
1562 bp->b_error = ENOSPC;
1563 bp->b_flags |= B_ERROR;
1564 bp->b_resid = bp->b_bcount;
1565 biodone(bp);
1566 RF_LOCK_MUTEX(raidPtr->mutex);
1567 continue;
1568 }
1569 /*
1570 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1571 */
1572
1573 if (bp->b_bcount & raidPtr->sectorMask) {
1574 bp->b_error = EINVAL;
1575 bp->b_flags |= B_ERROR;
1576 bp->b_resid = bp->b_bcount;
1577 biodone(bp);
1578 RF_LOCK_MUTEX(raidPtr->mutex);
1579 continue;
1580
1581 }
1582 db1_printf(("Calling DoAccess..\n"));
1583
1584
1585 RF_LOCK_MUTEX(raidPtr->mutex);
1586 raidPtr->openings--;
1587 RF_UNLOCK_MUTEX(raidPtr->mutex);
1588
1589 /*
1590 * Everything is async.
1591 */
1592 do_async = 1;
1593
1594 /* don't ever condition on bp->b_flags & B_WRITE.
1595 * always condition on B_READ instead */
1596
1597 /* XXX we're still at splbio() here... do we *really*
1598 need to be? */
1599
1600
1601 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1602 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1603 do_async, raid_addr, num_blocks,
1604 bp->b_un.b_addr, bp, NULL, NULL,
1605 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1606
1607
1608 RF_LOCK_MUTEX(raidPtr->mutex);
1609 }
1610 RF_UNLOCK_MUTEX(raidPtr->mutex);
1611 }
1612
1613
1614
1615
1616 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1617
1618 int
1619 rf_DispatchKernelIO(queue, req)
1620 RF_DiskQueue_t *queue;
1621 RF_DiskQueueData_t *req;
1622 {
1623 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1624 struct buf *bp;
1625 struct raidbuf *raidbp = NULL;
1626 struct raid_softc *rs;
1627 int unit;
1628 int s;
1629
1630 s=0;
1631 /* s = splbio();*/ /* want to test this */
1632 /* XXX along with the vnode, we also need the softc associated with
1633 * this device.. */
1634
1635 req->queue = queue;
1636
1637 unit = queue->raidPtr->raidid;
1638
1639 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1640
1641 if (unit >= numraid) {
1642 printf("Invalid unit number: %d %d\n", unit, numraid);
1643 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1644 }
1645 rs = &raid_softc[unit];
1646
1647 /* XXX is this the right place? */
1648 disk_busy(&rs->sc_dkdev);
1649
1650 bp = req->bp;
1651 #if 1
1652 /* XXX when there is a physical disk failure, someone is passing us a
1653 * buffer that contains old stuff!! Attempt to deal with this problem
1654 * without taking a performance hit... (not sure where the real bug
1655 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1656
1657 if (bp->b_flags & B_ERROR) {
1658 bp->b_flags &= ~B_ERROR;
1659 }
1660 if (bp->b_error != 0) {
1661 bp->b_error = 0;
1662 }
1663 #endif
1664 raidbp = RAIDGETBUF(rs);
1665
1666 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1667
1668 /*
1669 * context for raidiodone
1670 */
1671 raidbp->rf_obp = bp;
1672 raidbp->req = req;
1673
1674 LIST_INIT(&raidbp->rf_buf.b_dep);
1675
1676 switch (req->type) {
1677 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1678 /* XXX need to do something extra here.. */
1679 /* I'm leaving this in, as I've never actually seen it used,
1680 * and I'd like folks to report it... GO */
1681 printf(("WAKEUP CALLED\n"));
1682 queue->numOutstanding++;
1683
1684 /* XXX need to glue the original buffer into this?? */
1685
1686 KernelWakeupFunc(&raidbp->rf_buf);
1687 break;
1688
1689 case RF_IO_TYPE_READ:
1690 case RF_IO_TYPE_WRITE:
1691
1692 if (req->tracerec) {
1693 RF_ETIMER_START(req->tracerec->timer);
1694 }
1695 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1696 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1697 req->sectorOffset, req->numSector,
1698 req->buf, KernelWakeupFunc, (void *) req,
1699 queue->raidPtr->logBytesPerSector, req->b_proc);
1700
1701 if (rf_debugKernelAccess) {
1702 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1703 (long) bp->b_blkno));
1704 }
1705 queue->numOutstanding++;
1706 queue->last_deq_sector = req->sectorOffset;
1707 /* acc wouldn't have been let in if there were any pending
1708 * reqs at any other priority */
1709 queue->curPriority = req->priority;
1710
1711 db1_printf(("Going for %c to unit %d row %d col %d\n",
1712 req->type, unit, queue->row, queue->col));
1713 db1_printf(("sector %d count %d (%d bytes) %d\n",
1714 (int) req->sectorOffset, (int) req->numSector,
1715 (int) (req->numSector <<
1716 queue->raidPtr->logBytesPerSector),
1717 (int) queue->raidPtr->logBytesPerSector));
1718 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1719 raidbp->rf_buf.b_vp->v_numoutput++;
1720 }
1721 VOP_STRATEGY(&raidbp->rf_buf);
1722
1723 break;
1724
1725 default:
1726 panic("bad req->type in rf_DispatchKernelIO");
1727 }
1728 db1_printf(("Exiting from DispatchKernelIO\n"));
1729 /* splx(s); */ /* want to test this */
1730 return (0);
1731 }
1732 /* this is the callback function associated with a I/O invoked from
1733 kernel code.
1734 */
1735 static void
1736 KernelWakeupFunc(vbp)
1737 struct buf *vbp;
1738 {
1739 RF_DiskQueueData_t *req = NULL;
1740 RF_DiskQueue_t *queue;
1741 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1742 struct buf *bp;
1743 struct raid_softc *rs;
1744 int unit;
1745 register int s;
1746
1747 s = splbio();
1748 db1_printf(("recovering the request queue:\n"));
1749 req = raidbp->req;
1750
1751 bp = raidbp->rf_obp;
1752
1753 queue = (RF_DiskQueue_t *) req->queue;
1754
1755 if (raidbp->rf_buf.b_flags & B_ERROR) {
1756 bp->b_flags |= B_ERROR;
1757 bp->b_error = raidbp->rf_buf.b_error ?
1758 raidbp->rf_buf.b_error : EIO;
1759 }
1760
1761 /* XXX methinks this could be wrong... */
1762 #if 1
1763 bp->b_resid = raidbp->rf_buf.b_resid;
1764 #endif
1765
1766 if (req->tracerec) {
1767 RF_ETIMER_STOP(req->tracerec->timer);
1768 RF_ETIMER_EVAL(req->tracerec->timer);
1769 RF_LOCK_MUTEX(rf_tracing_mutex);
1770 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1771 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1772 req->tracerec->num_phys_ios++;
1773 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1774 }
1775 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1776
1777 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1778
1779
1780 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1781 * ballistic, and mark the component as hosed... */
1782
1783 if (bp->b_flags & B_ERROR) {
1784 /* Mark the disk as dead */
1785 /* but only mark it once... */
1786 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1787 rf_ds_optimal) {
1788 printf("raid%d: IO Error. Marking %s as failed.\n",
1789 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1790 queue->raidPtr->Disks[queue->row][queue->col].status =
1791 rf_ds_failed;
1792 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1793 queue->raidPtr->numFailures++;
1794 queue->raidPtr->numNewFailures++;
1795 /* XXX here we should bump the version number for each component, and write that data out */
1796 } else { /* Disk is already dead... */
1797 /* printf("Disk already marked as dead!\n"); */
1798 }
1799
1800 }
1801
1802 rs = &raid_softc[unit];
1803 RAIDPUTBUF(rs, raidbp);
1804
1805
1806 if (bp->b_resid == 0) {
1807 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1808 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1809 }
1810
1811 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1812 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1813
1814 splx(s);
1815 }
1816
1817
1818
1819 /*
1820 * initialize a buf structure for doing an I/O in the kernel.
1821 */
1822 static void
1823 InitBP(
1824 struct buf * bp,
1825 struct vnode * b_vp,
1826 unsigned rw_flag,
1827 dev_t dev,
1828 RF_SectorNum_t startSect,
1829 RF_SectorCount_t numSect,
1830 caddr_t buf,
1831 void (*cbFunc) (struct buf *),
1832 void *cbArg,
1833 int logBytesPerSector,
1834 struct proc * b_proc)
1835 {
1836 /* bp->b_flags = B_PHYS | rw_flag; */
1837 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1838 bp->b_bcount = numSect << logBytesPerSector;
1839 bp->b_bufsize = bp->b_bcount;
1840 bp->b_error = 0;
1841 bp->b_dev = dev;
1842 bp->b_un.b_addr = buf;
1843 bp->b_blkno = startSect;
1844 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1845 if (bp->b_bcount == 0) {
1846 panic("bp->b_bcount is zero in InitBP!!\n");
1847 }
1848 bp->b_proc = b_proc;
1849 bp->b_iodone = cbFunc;
1850 bp->b_vp = b_vp;
1851
1852 }
1853
1854 static void
1855 raidgetdefaultlabel(raidPtr, rs, lp)
1856 RF_Raid_t *raidPtr;
1857 struct raid_softc *rs;
1858 struct disklabel *lp;
1859 {
1860 db1_printf(("Building a default label...\n"));
1861 bzero(lp, sizeof(*lp));
1862
1863 /* fabricate a label... */
1864 lp->d_secperunit = raidPtr->totalSectors;
1865 lp->d_secsize = raidPtr->bytesPerSector;
1866 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1867 lp->d_ntracks = 1;
1868 lp->d_ncylinders = raidPtr->totalSectors /
1869 (lp->d_nsectors * lp->d_ntracks);
1870 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1871
1872 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1873 lp->d_type = DTYPE_RAID;
1874 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1875 lp->d_rpm = 3600;
1876 lp->d_interleave = 1;
1877 lp->d_flags = 0;
1878
1879 lp->d_partitions[RAW_PART].p_offset = 0;
1880 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1881 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1882 lp->d_npartitions = RAW_PART + 1;
1883
1884 lp->d_magic = DISKMAGIC;
1885 lp->d_magic2 = DISKMAGIC;
1886 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1887
1888 }
1889 /*
1890 * Read the disklabel from the raid device. If one is not present, fake one
1891 * up.
1892 */
1893 static void
1894 raidgetdisklabel(dev)
1895 dev_t dev;
1896 {
1897 int unit = raidunit(dev);
1898 struct raid_softc *rs = &raid_softc[unit];
1899 char *errstring;
1900 struct disklabel *lp = rs->sc_dkdev.dk_label;
1901 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1902 RF_Raid_t *raidPtr;
1903
1904 db1_printf(("Getting the disklabel...\n"));
1905
1906 bzero(clp, sizeof(*clp));
1907
1908 raidPtr = raidPtrs[unit];
1909
1910 raidgetdefaultlabel(raidPtr, rs, lp);
1911
1912 /*
1913 * Call the generic disklabel extraction routine.
1914 */
1915 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1916 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1917 if (errstring)
1918 raidmakedisklabel(rs);
1919 else {
1920 int i;
1921 struct partition *pp;
1922
1923 /*
1924 * Sanity check whether the found disklabel is valid.
1925 *
1926 * This is necessary since total size of the raid device
1927 * may vary when an interleave is changed even though exactly
1928 * same componets are used, and old disklabel may used
1929 * if that is found.
1930 */
1931 if (lp->d_secperunit != rs->sc_size)
1932 printf("WARNING: %s: "
1933 "total sector size in disklabel (%d) != "
1934 "the size of raid (%ld)\n", rs->sc_xname,
1935 lp->d_secperunit, (long) rs->sc_size);
1936 for (i = 0; i < lp->d_npartitions; i++) {
1937 pp = &lp->d_partitions[i];
1938 if (pp->p_offset + pp->p_size > rs->sc_size)
1939 printf("WARNING: %s: end of partition `%c' "
1940 "exceeds the size of raid (%ld)\n",
1941 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1942 }
1943 }
1944
1945 }
1946 /*
1947 * Take care of things one might want to take care of in the event
1948 * that a disklabel isn't present.
1949 */
1950 static void
1951 raidmakedisklabel(rs)
1952 struct raid_softc *rs;
1953 {
1954 struct disklabel *lp = rs->sc_dkdev.dk_label;
1955 db1_printf(("Making a label..\n"));
1956
1957 /*
1958 * For historical reasons, if there's no disklabel present
1959 * the raw partition must be marked FS_BSDFFS.
1960 */
1961
1962 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1963
1964 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1965
1966 lp->d_checksum = dkcksum(lp);
1967 }
1968 /*
1969 * Lookup the provided name in the filesystem. If the file exists,
1970 * is a valid block device, and isn't being used by anyone else,
1971 * set *vpp to the file's vnode.
1972 * You'll find the original of this in ccd.c
1973 */
1974 int
1975 raidlookup(path, p, vpp)
1976 char *path;
1977 struct proc *p;
1978 struct vnode **vpp; /* result */
1979 {
1980 struct nameidata nd;
1981 struct vnode *vp;
1982 struct vattr va;
1983 int error;
1984
1985 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1986 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1987 #ifdef DEBUG
1988 printf("RAIDframe: vn_open returned %d\n", error);
1989 #endif
1990 return (error);
1991 }
1992 vp = nd.ni_vp;
1993 if (vp->v_usecount > 1) {
1994 VOP_UNLOCK(vp, 0);
1995 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1996 return (EBUSY);
1997 }
1998 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1999 VOP_UNLOCK(vp, 0);
2000 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2001 return (error);
2002 }
2003 /* XXX: eventually we should handle VREG, too. */
2004 if (va.va_type != VBLK) {
2005 VOP_UNLOCK(vp, 0);
2006 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2007 return (ENOTBLK);
2008 }
2009 VOP_UNLOCK(vp, 0);
2010 *vpp = vp;
2011 return (0);
2012 }
2013 /*
2014 * Wait interruptibly for an exclusive lock.
2015 *
2016 * XXX
2017 * Several drivers do this; it should be abstracted and made MP-safe.
2018 * (Hmm... where have we seen this warning before :-> GO )
2019 */
2020 static int
2021 raidlock(rs)
2022 struct raid_softc *rs;
2023 {
2024 int error;
2025
2026 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2027 rs->sc_flags |= RAIDF_WANTED;
2028 if ((error =
2029 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2030 return (error);
2031 }
2032 rs->sc_flags |= RAIDF_LOCKED;
2033 return (0);
2034 }
2035 /*
2036 * Unlock and wake up any waiters.
2037 */
2038 static void
2039 raidunlock(rs)
2040 struct raid_softc *rs;
2041 {
2042
2043 rs->sc_flags &= ~RAIDF_LOCKED;
2044 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2045 rs->sc_flags &= ~RAIDF_WANTED;
2046 wakeup(rs);
2047 }
2048 }
2049
2050
2051 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2052 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2053
2054 int
2055 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2056 {
2057 RF_ComponentLabel_t clabel;
2058 raidread_component_label(dev, b_vp, &clabel);
2059 clabel.mod_counter = mod_counter;
2060 clabel.clean = RF_RAID_CLEAN;
2061 raidwrite_component_label(dev, b_vp, &clabel);
2062 return(0);
2063 }
2064
2065
2066 int
2067 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2068 {
2069 RF_ComponentLabel_t clabel;
2070 raidread_component_label(dev, b_vp, &clabel);
2071 clabel.mod_counter = mod_counter;
2072 clabel.clean = RF_RAID_DIRTY;
2073 raidwrite_component_label(dev, b_vp, &clabel);
2074 return(0);
2075 }
2076
2077 /* ARGSUSED */
2078 int
2079 raidread_component_label(dev, b_vp, clabel)
2080 dev_t dev;
2081 struct vnode *b_vp;
2082 RF_ComponentLabel_t *clabel;
2083 {
2084 struct buf *bp;
2085 int error;
2086
2087 /* XXX should probably ensure that we don't try to do this if
2088 someone has changed rf_protected_sectors. */
2089
2090 /* get a block of the appropriate size... */
2091 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2092 bp->b_dev = dev;
2093
2094 /* get our ducks in a row for the read */
2095 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2096 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2097 bp->b_flags = B_BUSY | B_READ;
2098 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2099
2100 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2101
2102 error = biowait(bp);
2103
2104 if (!error) {
2105 memcpy(clabel, bp->b_un.b_addr,
2106 sizeof(RF_ComponentLabel_t));
2107 #if 0
2108 print_component_label( clabel );
2109 #endif
2110 } else {
2111 #if 0
2112 printf("Failed to read RAID component label!\n");
2113 #endif
2114 }
2115
2116 bp->b_flags = B_INVAL | B_AGE;
2117 brelse(bp);
2118 return(error);
2119 }
2120 /* ARGSUSED */
2121 int
2122 raidwrite_component_label(dev, b_vp, clabel)
2123 dev_t dev;
2124 struct vnode *b_vp;
2125 RF_ComponentLabel_t *clabel;
2126 {
2127 struct buf *bp;
2128 int error;
2129
2130 /* get a block of the appropriate size... */
2131 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2132 bp->b_dev = dev;
2133
2134 /* get our ducks in a row for the write */
2135 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2136 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2137 bp->b_flags = B_BUSY | B_WRITE;
2138 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2139
2140 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2141
2142 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2143
2144 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2145 error = biowait(bp);
2146 bp->b_flags = B_INVAL | B_AGE;
2147 brelse(bp);
2148 if (error) {
2149 #if 1
2150 printf("Failed to write RAID component info!\n");
2151 #endif
2152 }
2153
2154 return(error);
2155 }
2156
2157 void
2158 rf_markalldirty( raidPtr )
2159 RF_Raid_t *raidPtr;
2160 {
2161 RF_ComponentLabel_t clabel;
2162 int r,c;
2163
2164 raidPtr->mod_counter++;
2165 for (r = 0; r < raidPtr->numRow; r++) {
2166 for (c = 0; c < raidPtr->numCol; c++) {
2167 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2168 raidread_component_label(
2169 raidPtr->Disks[r][c].dev,
2170 raidPtr->raid_cinfo[r][c].ci_vp,
2171 &clabel);
2172 if (clabel.status == rf_ds_spared) {
2173 /* XXX do something special...
2174 but whatever you do, don't
2175 try to access it!! */
2176 } else {
2177 #if 0
2178 clabel.status =
2179 raidPtr->Disks[r][c].status;
2180 raidwrite_component_label(
2181 raidPtr->Disks[r][c].dev,
2182 raidPtr->raid_cinfo[r][c].ci_vp,
2183 &clabel);
2184 #endif
2185 raidmarkdirty(
2186 raidPtr->Disks[r][c].dev,
2187 raidPtr->raid_cinfo[r][c].ci_vp,
2188 raidPtr->mod_counter);
2189 }
2190 }
2191 }
2192 }
2193 /* printf("Component labels marked dirty.\n"); */
2194 #if 0
2195 for( c = 0; c < raidPtr->numSpare ; c++) {
2196 sparecol = raidPtr->numCol + c;
2197 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2198 /*
2199
2200 XXX this is where we get fancy and map this spare
2201 into it's correct spot in the array.
2202
2203 */
2204 /*
2205
2206 we claim this disk is "optimal" if it's
2207 rf_ds_used_spare, as that means it should be
2208 directly substitutable for the disk it replaced.
2209 We note that too...
2210
2211 */
2212
2213 for(i=0;i<raidPtr->numRow;i++) {
2214 for(j=0;j<raidPtr->numCol;j++) {
2215 if ((raidPtr->Disks[i][j].spareRow ==
2216 r) &&
2217 (raidPtr->Disks[i][j].spareCol ==
2218 sparecol)) {
2219 srow = r;
2220 scol = sparecol;
2221 break;
2222 }
2223 }
2224 }
2225
2226 raidread_component_label(
2227 raidPtr->Disks[r][sparecol].dev,
2228 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2229 &clabel);
2230 /* make sure status is noted */
2231 clabel.version = RF_COMPONENT_LABEL_VERSION;
2232 clabel.mod_counter = raidPtr->mod_counter;
2233 clabel.serial_number = raidPtr->serial_number;
2234 clabel.row = srow;
2235 clabel.column = scol;
2236 clabel.num_rows = raidPtr->numRow;
2237 clabel.num_columns = raidPtr->numCol;
2238 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2239 clabel.status = rf_ds_optimal;
2240 raidwrite_component_label(
2241 raidPtr->Disks[r][sparecol].dev,
2242 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2243 &clabel);
2244 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2245 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2246 }
2247 }
2248
2249 #endif
2250 }
2251
2252
2253 void
2254 rf_update_component_labels( raidPtr )
2255 RF_Raid_t *raidPtr;
2256 {
2257 RF_ComponentLabel_t clabel;
2258 int sparecol;
2259 int r,c;
2260 int i,j;
2261 int srow, scol;
2262
2263 srow = -1;
2264 scol = -1;
2265
2266 /* XXX should do extra checks to make sure things really are clean,
2267 rather than blindly setting the clean bit... */
2268
2269 raidPtr->mod_counter++;
2270
2271 for (r = 0; r < raidPtr->numRow; r++) {
2272 for (c = 0; c < raidPtr->numCol; c++) {
2273 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2274 raidread_component_label(
2275 raidPtr->Disks[r][c].dev,
2276 raidPtr->raid_cinfo[r][c].ci_vp,
2277 &clabel);
2278 /* make sure status is noted */
2279 clabel.status = rf_ds_optimal;
2280 /* bump the counter */
2281 clabel.mod_counter = raidPtr->mod_counter;
2282
2283 raidwrite_component_label(
2284 raidPtr->Disks[r][c].dev,
2285 raidPtr->raid_cinfo[r][c].ci_vp,
2286 &clabel);
2287 }
2288 /* else we don't touch it.. */
2289 }
2290 }
2291
2292 for( c = 0; c < raidPtr->numSpare ; c++) {
2293 sparecol = raidPtr->numCol + c;
2294 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2295 /*
2296
2297 we claim this disk is "optimal" if it's
2298 rf_ds_used_spare, as that means it should be
2299 directly substitutable for the disk it replaced.
2300 We note that too...
2301
2302 */
2303
2304 for(i=0;i<raidPtr->numRow;i++) {
2305 for(j=0;j<raidPtr->numCol;j++) {
2306 if ((raidPtr->Disks[i][j].spareRow ==
2307 0) &&
2308 (raidPtr->Disks[i][j].spareCol ==
2309 sparecol)) {
2310 srow = i;
2311 scol = j;
2312 break;
2313 }
2314 }
2315 }
2316
2317 /* XXX shouldn't *really* need this... */
2318 raidread_component_label(
2319 raidPtr->Disks[0][sparecol].dev,
2320 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2321 &clabel);
2322 /* make sure status is noted */
2323
2324 raid_init_component_label(raidPtr, &clabel);
2325
2326 clabel.mod_counter = raidPtr->mod_counter;
2327 clabel.row = srow;
2328 clabel.column = scol;
2329 clabel.status = rf_ds_optimal;
2330
2331 raidwrite_component_label(
2332 raidPtr->Disks[0][sparecol].dev,
2333 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2334 &clabel);
2335 }
2336 }
2337 /* printf("Component labels updated\n"); */
2338 }
2339
2340
2341 void
2342 rf_final_update_component_labels( raidPtr )
2343 RF_Raid_t *raidPtr;
2344 {
2345 RF_ComponentLabel_t clabel;
2346 int sparecol;
2347 int r,c;
2348 int i,j;
2349 int srow, scol;
2350
2351 srow = -1;
2352 scol = -1;
2353
2354 /* XXX should do extra checks to make sure things really are clean,
2355 rather than blindly setting the clean bit... */
2356
2357 raidPtr->mod_counter++;
2358
2359 for (r = 0; r < raidPtr->numRow; r++) {
2360 for (c = 0; c < raidPtr->numCol; c++) {
2361 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2362 raidread_component_label(
2363 raidPtr->Disks[r][c].dev,
2364 raidPtr->raid_cinfo[r][c].ci_vp,
2365 &clabel);
2366 /* make sure status is noted */
2367 clabel.status = rf_ds_optimal;
2368 /* bump the counter */
2369 clabel.mod_counter = raidPtr->mod_counter;
2370
2371 raidwrite_component_label(
2372 raidPtr->Disks[r][c].dev,
2373 raidPtr->raid_cinfo[r][c].ci_vp,
2374 &clabel);
2375 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2376 raidmarkclean(
2377 raidPtr->Disks[r][c].dev,
2378 raidPtr->raid_cinfo[r][c].ci_vp,
2379 raidPtr->mod_counter);
2380 }
2381 }
2382 /* else we don't touch it.. */
2383 }
2384 }
2385
2386 for( c = 0; c < raidPtr->numSpare ; c++) {
2387 sparecol = raidPtr->numCol + c;
2388 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2389 /*
2390
2391 we claim this disk is "optimal" if it's
2392 rf_ds_used_spare, as that means it should be
2393 directly substitutable for the disk it replaced.
2394 We note that too...
2395
2396 */
2397
2398 for(i=0;i<raidPtr->numRow;i++) {
2399 for(j=0;j<raidPtr->numCol;j++) {
2400 if ((raidPtr->Disks[i][j].spareRow ==
2401 0) &&
2402 (raidPtr->Disks[i][j].spareCol ==
2403 sparecol)) {
2404 srow = i;
2405 scol = j;
2406 break;
2407 }
2408 }
2409 }
2410
2411 /* XXX shouldn't *really* need this... */
2412 raidread_component_label(
2413 raidPtr->Disks[0][sparecol].dev,
2414 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2415 &clabel);
2416 /* make sure status is noted */
2417
2418 raid_init_component_label(raidPtr, &clabel);
2419
2420 clabel.mod_counter = raidPtr->mod_counter;
2421 clabel.row = srow;
2422 clabel.column = scol;
2423 clabel.status = rf_ds_optimal;
2424
2425 raidwrite_component_label(
2426 raidPtr->Disks[0][sparecol].dev,
2427 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2428 &clabel);
2429 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2430 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2431 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2432 raidPtr->mod_counter);
2433 }
2434 }
2435 }
2436 /* printf("Component labels updated\n"); */
2437 }
2438
2439
2440 void
2441 rf_ReconThread(req)
2442 struct rf_recon_req *req;
2443 {
2444 int s;
2445 RF_Raid_t *raidPtr;
2446
2447 s = splbio();
2448 raidPtr = (RF_Raid_t *) req->raidPtr;
2449 raidPtr->recon_in_progress = 1;
2450
2451 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2452 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2453
2454 /* XXX get rid of this! we don't need it at all.. */
2455 RF_Free(req, sizeof(*req));
2456
2457 raidPtr->recon_in_progress = 0;
2458 splx(s);
2459
2460 /* That's all... */
2461 kthread_exit(0); /* does not return */
2462 }
2463
2464 void
2465 rf_RewriteParityThread(raidPtr)
2466 RF_Raid_t *raidPtr;
2467 {
2468 int retcode;
2469 int s;
2470
2471 raidPtr->parity_rewrite_in_progress = 1;
2472 s = splbio();
2473 retcode = rf_RewriteParity(raidPtr);
2474 splx(s);
2475 if (retcode) {
2476 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2477 } else {
2478 /* set the clean bit! If we shutdown correctly,
2479 the clean bit on each component label will get
2480 set */
2481 raidPtr->parity_good = RF_RAID_CLEAN;
2482 }
2483 raidPtr->parity_rewrite_in_progress = 0;
2484
2485 /* That's all... */
2486 kthread_exit(0); /* does not return */
2487 }
2488
2489
2490 void
2491 rf_CopybackThread(raidPtr)
2492 RF_Raid_t *raidPtr;
2493 {
2494 int s;
2495
2496 raidPtr->copyback_in_progress = 1;
2497 s = splbio();
2498 rf_CopybackReconstructedData(raidPtr);
2499 splx(s);
2500 raidPtr->copyback_in_progress = 0;
2501
2502 /* That's all... */
2503 kthread_exit(0); /* does not return */
2504 }
2505
2506
2507 void
2508 rf_ReconstructInPlaceThread(req)
2509 struct rf_recon_req *req;
2510 {
2511 int retcode;
2512 int s;
2513 RF_Raid_t *raidPtr;
2514
2515 s = splbio();
2516 raidPtr = req->raidPtr;
2517 raidPtr->recon_in_progress = 1;
2518 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2519 RF_Free(req, sizeof(*req));
2520 raidPtr->recon_in_progress = 0;
2521 splx(s);
2522
2523 /* That's all... */
2524 kthread_exit(0); /* does not return */
2525 }
2526
2527 void
2528 rf_mountroot_hook(dev)
2529 struct device *dev;
2530 {
2531
2532 }
2533
2534
2535 RF_AutoConfig_t *
2536 rf_find_raid_components()
2537 {
2538 struct devnametobdevmaj *dtobdm;
2539 struct vnode *vp;
2540 struct disklabel label;
2541 struct device *dv;
2542 char *cd_name;
2543 dev_t dev;
2544 int error;
2545 int i;
2546 int good_one;
2547 RF_ComponentLabel_t *clabel;
2548 RF_AutoConfig_t *ac_list;
2549 RF_AutoConfig_t *ac;
2550
2551
2552 /* initialize the AutoConfig list */
2553 ac_list = NULL;
2554
2555 if (raidautoconfig) {
2556
2557 /* we begin by trolling through *all* the devices on the system */
2558
2559 for (dv = alldevs.tqh_first; dv != NULL;
2560 dv = dv->dv_list.tqe_next) {
2561
2562 /* we are only interested in disks... */
2563 if (dv->dv_class != DV_DISK)
2564 continue;
2565
2566 /* we don't care about floppies... */
2567 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2568 continue;
2569 }
2570
2571 /* need to find the device_name_to_block_device_major stuff */
2572 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2573 dtobdm = dev_name2blk;
2574 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2575 dtobdm++;
2576 }
2577
2578 /* get a vnode for the raw partition of this disk */
2579
2580 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2581 if (bdevvp(dev, &vp))
2582 panic("RAID can't alloc vnode");
2583
2584 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2585
2586 if (error) {
2587 /* "Who cares." Continue looking
2588 for something that exists*/
2589 vput(vp);
2590 continue;
2591 }
2592
2593 /* Ok, the disk exists. Go get the disklabel. */
2594 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2595 FREAD, NOCRED, 0);
2596 if (error) {
2597 /*
2598 * XXX can't happen - open() would
2599 * have errored out (or faked up one)
2600 */
2601 printf("can't get label for dev %s%c (%d)!?!?\n",
2602 dv->dv_xname, 'a' + RAW_PART, error);
2603 }
2604
2605 /* don't need this any more. We'll allocate it again
2606 a little later if we really do... */
2607 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2608 vput(vp);
2609
2610 for (i=0; i < label.d_npartitions; i++) {
2611 /* We only support partitions marked as RAID */
2612 if (label.d_partitions[i].p_fstype != FS_RAID)
2613 continue;
2614
2615 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2616 if (bdevvp(dev, &vp))
2617 panic("RAID can't alloc vnode");
2618
2619 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2620 if (error) {
2621 /* Whatever... */
2622 vput(vp);
2623 continue;
2624 }
2625
2626 good_one = 0;
2627
2628 clabel = (RF_ComponentLabel_t *)
2629 malloc(sizeof(RF_ComponentLabel_t),
2630 M_RAIDFRAME, M_NOWAIT);
2631 if (clabel == NULL) {
2632 /* XXX CLEANUP HERE */
2633 printf("RAID auto config: out of memory!\n");
2634 return(NULL); /* XXX probably should panic? */
2635 }
2636
2637 if (!raidread_component_label(dev, vp, clabel)) {
2638 /* Got the label. Does it look reasonable? */
2639 if (rf_reasonable_label(clabel) &&
2640 (clabel->partitionSize <=
2641 label.d_partitions[i].p_size)) {
2642 #if DEBUG
2643 printf("Component on: %s%c: %d\n",
2644 dv->dv_xname, 'a'+i,
2645 label.d_partitions[i].p_size);
2646 print_component_label(clabel);
2647 #endif
2648 /* if it's reasonable, add it,
2649 else ignore it. */
2650 ac = (RF_AutoConfig_t *)
2651 malloc(sizeof(RF_AutoConfig_t),
2652 M_RAIDFRAME,
2653 M_NOWAIT);
2654 if (ac == NULL) {
2655 /* XXX should panic?? */
2656 return(NULL);
2657 }
2658
2659 sprintf(ac->devname, "%s%c",
2660 dv->dv_xname, 'a'+i);
2661 ac->dev = dev;
2662 ac->vp = vp;
2663 ac->clabel = clabel;
2664 ac->next = ac_list;
2665 ac_list = ac;
2666 good_one = 1;
2667 }
2668 }
2669 if (!good_one) {
2670 /* cleanup */
2671 free(clabel, M_RAIDFRAME);
2672 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2673 vput(vp);
2674 }
2675 }
2676 }
2677 }
2678 return(ac_list);
2679 }
2680
2681 static int
2682 rf_reasonable_label(clabel)
2683 RF_ComponentLabel_t *clabel;
2684 {
2685
2686 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2687 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2688 ((clabel->clean == RF_RAID_CLEAN) ||
2689 (clabel->clean == RF_RAID_DIRTY)) &&
2690 clabel->row >=0 &&
2691 clabel->column >= 0 &&
2692 clabel->num_rows > 0 &&
2693 clabel->num_columns > 0 &&
2694 clabel->row < clabel->num_rows &&
2695 clabel->column < clabel->num_columns &&
2696 clabel->blockSize > 0 &&
2697 clabel->numBlocks > 0) {
2698 /* label looks reasonable enough... */
2699 return(1);
2700 }
2701 return(0);
2702 }
2703
2704
2705 void
2706 print_component_label(clabel)
2707 RF_ComponentLabel_t *clabel;
2708 {
2709 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2710 clabel->row, clabel->column,
2711 clabel->num_rows, clabel->num_columns);
2712 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2713 clabel->version, clabel->serial_number,
2714 clabel->mod_counter);
2715 printf(" Clean: %s Status: %d\n",
2716 clabel->clean ? "Yes" : "No", clabel->status );
2717 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2718 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2719 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2720 (char) clabel->parityConfig, clabel->blockSize,
2721 clabel->numBlocks);
2722 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2723 printf(" Last configured as: raid%d\n", clabel->last_unit );
2724 #if 0
2725 printf(" Config order: %d\n", clabel->config_order);
2726 #endif
2727
2728 }
2729
2730 RF_ConfigSet_t *
2731 rf_create_auto_sets(ac_list)
2732 RF_AutoConfig_t *ac_list;
2733 {
2734 RF_AutoConfig_t *ac;
2735 RF_ConfigSet_t *config_sets;
2736 RF_ConfigSet_t *cset;
2737 RF_AutoConfig_t *ac_next;
2738
2739
2740 config_sets = NULL;
2741
2742 /* Go through the AutoConfig list, and figure out which components
2743 belong to what sets. */
2744 ac = ac_list;
2745 while(ac!=NULL) {
2746 /* we're going to putz with ac->next, so save it here
2747 for use at the end of the loop */
2748 ac_next = ac->next;
2749
2750 if (config_sets == NULL) {
2751 /* will need at least this one... */
2752 config_sets = (RF_ConfigSet_t *)
2753 malloc(sizeof(RF_ConfigSet_t),
2754 M_RAIDFRAME, M_NOWAIT);
2755 if (config_sets == NULL) {
2756 panic("rf_create_auto_sets: No memory!\n");
2757 }
2758 /* this one is easy :) */
2759 config_sets->ac = ac;
2760 config_sets->next = NULL;
2761 config_sets->rootable = 0;
2762 ac->next = NULL;
2763 } else {
2764 /* which set does this component fit into? */
2765 cset = config_sets;
2766 while(cset!=NULL) {
2767 if (rf_does_it_fit(cset, ac)) {
2768 /* looks like it matches */
2769 ac->next = cset->ac;
2770 cset->ac = ac;
2771 break;
2772 }
2773 cset = cset->next;
2774 }
2775 if (cset==NULL) {
2776 /* didn't find a match above... new set..*/
2777 cset = (RF_ConfigSet_t *)
2778 malloc(sizeof(RF_ConfigSet_t),
2779 M_RAIDFRAME, M_NOWAIT);
2780 if (cset == NULL) {
2781 panic("rf_create_auto_sets: No memory!\n");
2782 }
2783 cset->ac = ac;
2784 ac->next = NULL;
2785 cset->next = config_sets;
2786 cset->rootable = 0;
2787 config_sets = cset;
2788 }
2789 }
2790 ac = ac_next;
2791 }
2792
2793
2794 return(config_sets);
2795 }
2796
2797 static int
2798 rf_does_it_fit(cset, ac)
2799 RF_ConfigSet_t *cset;
2800 RF_AutoConfig_t *ac;
2801 {
2802 RF_ComponentLabel_t *clabel1, *clabel2;
2803
2804 /* If this one matches the *first* one in the set, that's good
2805 enough, since the other members of the set would have been
2806 through here too... */
2807 /* note that we are not checking partitionSize here..
2808
2809 Note that we are also not checking the mod_counters here.
2810 If everything else matches execpt the mod_counter, that's
2811 good enough for this test. We will deal with the mod_counters
2812 a little later in the autoconfiguration process.
2813
2814 (clabel1->mod_counter == clabel2->mod_counter) &&
2815
2816 */
2817
2818 clabel1 = cset->ac->clabel;
2819 clabel2 = ac->clabel;
2820 if ((clabel1->version == clabel2->version) &&
2821 (clabel1->serial_number == clabel2->serial_number) &&
2822 (clabel1->num_rows == clabel2->num_rows) &&
2823 (clabel1->num_columns == clabel2->num_columns) &&
2824 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2825 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2826 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2827 (clabel1->parityConfig == clabel2->parityConfig) &&
2828 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2829 (clabel1->blockSize == clabel2->blockSize) &&
2830 (clabel1->numBlocks == clabel2->numBlocks) &&
2831 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2832 (clabel1->root_partition == clabel2->root_partition) &&
2833 (clabel1->last_unit == clabel2->last_unit) &&
2834 (clabel1->config_order == clabel2->config_order)) {
2835 /* if it get's here, it almost *has* to be a match */
2836 } else {
2837 /* it's not consistent with somebody in the set..
2838 punt */
2839 return(0);
2840 }
2841 /* all was fine.. it must fit... */
2842 return(1);
2843 }
2844
2845 int
2846 rf_have_enough_components(cset)
2847 RF_ConfigSet_t *cset;
2848 {
2849 RF_AutoConfig_t *ac;
2850 RF_AutoConfig_t *auto_config;
2851 RF_ComponentLabel_t *clabel;
2852 int r,c;
2853 int num_rows;
2854 int num_cols;
2855 int num_missing;
2856
2857 /* check to see that we have enough 'live' components
2858 of this set. If so, we can configure it if necessary */
2859
2860 num_rows = cset->ac->clabel->num_rows;
2861 num_cols = cset->ac->clabel->num_columns;
2862
2863 /* XXX Check for duplicate components!?!?!? */
2864
2865 num_missing = 0;
2866 auto_config = cset->ac;
2867
2868 for(r=0; r<num_rows; r++) {
2869 for(c=0; c<num_cols; c++) {
2870 ac = auto_config;
2871 while(ac!=NULL) {
2872 if (ac->clabel==NULL) {
2873 /* big-time bad news. */
2874 goto fail;
2875 }
2876 if ((ac->clabel->row == r) &&
2877 (ac->clabel->column == c)) {
2878 /* it's this one... */
2879 #if DEBUG
2880 printf("Found: %s at %d,%d\n",
2881 ac->devname,r,c);
2882 #endif
2883 break;
2884 }
2885 ac=ac->next;
2886 }
2887 if (ac==NULL) {
2888 /* Didn't find one here! */
2889 num_missing++;
2890 }
2891 }
2892 }
2893
2894 clabel = cset->ac->clabel;
2895
2896 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2897 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2898 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2899 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2900 /* XXX this needs to be made *much* more general */
2901 /* Too many failures */
2902 return(0);
2903 }
2904 /* otherwise, all is well, and we've got enough to take a kick
2905 at autoconfiguring this set */
2906 return(1);
2907 fail:
2908 return(0);
2909
2910 }
2911
2912 void
2913 rf_create_configuration(ac,config,raidPtr)
2914 RF_AutoConfig_t *ac;
2915 RF_Config_t *config;
2916 RF_Raid_t *raidPtr;
2917 {
2918 RF_ComponentLabel_t *clabel;
2919
2920 clabel = ac->clabel;
2921
2922 /* 1. Fill in the common stuff */
2923 config->numRow = clabel->num_rows;
2924 config->numCol = clabel->num_columns;
2925 config->numSpare = 0; /* XXX should this be set here? */
2926 config->sectPerSU = clabel->sectPerSU;
2927 config->SUsPerPU = clabel->SUsPerPU;
2928 config->SUsPerRU = clabel->SUsPerRU;
2929 config->parityConfig = clabel->parityConfig;
2930 /* XXX... */
2931 strcpy(config->diskQueueType,"fifo");
2932 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2933 config->layoutSpecificSize = 0; /* XXX ?? */
2934
2935 while(ac!=NULL) {
2936 /* row/col values will be in range due to the checks
2937 in reasonable_label() */
2938 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2939 ac->devname);
2940 ac = ac->next;
2941 }
2942
2943 }
2944
2945 int
2946 rf_set_autoconfig(raidPtr, new_value)
2947 RF_Raid_t *raidPtr;
2948 int new_value;
2949 {
2950 RF_ComponentLabel_t clabel;
2951 struct vnode *vp;
2952 dev_t dev;
2953 int row, column;
2954
2955 raidPtr->autoconfigure = new_value;
2956 for(row=0; row<raidPtr->numRow; row++) {
2957 for(column=0; column<raidPtr->numCol; column++) {
2958 dev = raidPtr->Disks[row][column].dev;
2959 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2960 raidread_component_label(dev, vp, &clabel);
2961 clabel.autoconfigure = new_value;
2962 raidwrite_component_label(dev, vp, &clabel);
2963 }
2964 }
2965 return(new_value);
2966 }
2967
2968 int
2969 rf_set_rootpartition(raidPtr, new_value)
2970 RF_Raid_t *raidPtr;
2971 int new_value;
2972 {
2973 RF_ComponentLabel_t clabel;
2974 struct vnode *vp;
2975 dev_t dev;
2976 int row, column;
2977
2978 raidPtr->root_partition = new_value;
2979 for(row=0; row<raidPtr->numRow; row++) {
2980 for(column=0; column<raidPtr->numCol; column++) {
2981 dev = raidPtr->Disks[row][column].dev;
2982 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2983 raidread_component_label(dev, vp, &clabel);
2984 clabel.root_partition = new_value;
2985 raidwrite_component_label(dev, vp, &clabel);
2986 }
2987 }
2988 return(new_value);
2989 }
2990
2991 void
2992 rf_release_all_vps(cset)
2993 RF_ConfigSet_t *cset;
2994 {
2995 RF_AutoConfig_t *ac;
2996
2997 ac = cset->ac;
2998 while(ac!=NULL) {
2999 /* Close the vp, and give it back */
3000 if (ac->vp) {
3001 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3002 vput(ac->vp);
3003 }
3004 ac = ac->next;
3005 }
3006 }
3007
3008
3009 void
3010 rf_cleanup_config_set(cset)
3011 RF_ConfigSet_t *cset;
3012 {
3013 RF_AutoConfig_t *ac;
3014 RF_AutoConfig_t *next_ac;
3015
3016 ac = cset->ac;
3017 while(ac!=NULL) {
3018 next_ac = ac->next;
3019 /* nuke the label */
3020 free(ac->clabel, M_RAIDFRAME);
3021 /* cleanup the config structure */
3022 free(ac, M_RAIDFRAME);
3023 /* "next.." */
3024 ac = next_ac;
3025 }
3026 /* and, finally, nuke the config set */
3027 free(cset, M_RAIDFRAME);
3028 }
3029
3030
3031 void
3032 raid_init_component_label(raidPtr, clabel)
3033 RF_Raid_t *raidPtr;
3034 RF_ComponentLabel_t *clabel;
3035 {
3036 /* current version number */
3037 clabel->version = RF_COMPONENT_LABEL_VERSION;
3038 clabel->serial_number = raidPtr->serial_number;
3039 clabel->mod_counter = raidPtr->mod_counter;
3040 clabel->num_rows = raidPtr->numRow;
3041 clabel->num_columns = raidPtr->numCol;
3042 clabel->clean = RF_RAID_DIRTY; /* not clean */
3043 clabel->status = rf_ds_optimal; /* "It's good!" */
3044
3045 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3046 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3047 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3048
3049 clabel->blockSize = raidPtr->bytesPerSector;
3050 clabel->numBlocks = raidPtr->sectorsPerDisk;
3051
3052 /* XXX not portable */
3053 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3054 clabel->maxOutstanding = raidPtr->maxOutstanding;
3055 clabel->autoconfigure = raidPtr->autoconfigure;
3056 clabel->root_partition = raidPtr->root_partition;
3057 clabel->last_unit = raidPtr->raidid;
3058 clabel->config_order = raidPtr->config_order;
3059 }
3060
3061 int
3062 rf_auto_config_set(cset,unit)
3063 RF_ConfigSet_t *cset;
3064 int *unit;
3065 {
3066 RF_Raid_t *raidPtr;
3067 RF_Config_t *config;
3068 int raidID;
3069 int retcode;
3070
3071 printf("Starting autoconfigure on raid%d\n",raidID);
3072
3073 retcode = 0;
3074 *unit = -1;
3075
3076 /* 1. Create a config structure */
3077
3078 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3079 M_RAIDFRAME,
3080 M_NOWAIT);
3081 if (config==NULL) {
3082 printf("Out of mem!?!?\n");
3083 /* XXX do something more intelligent here. */
3084 return(1);
3085 }
3086 /* XXX raidID needs to be set correctly.. */
3087
3088 /*
3089 2. Figure out what RAID ID this one is supposed to live at
3090 See if we can get the same RAID dev that it was configured
3091 on last time..
3092 */
3093
3094 raidID = cset->ac->clabel->last_unit;
3095 if ((raidID < 0) || (raidID >= numraid)) {
3096 /* let's not wander off into lala land. */
3097 raidID = numraid - 1;
3098 }
3099 if (raidPtrs[raidID]->valid != 0) {
3100
3101 /*
3102 Nope... Go looking for an alternative...
3103 Start high so we don't immediately use raid0 if that's
3104 not taken.
3105 */
3106
3107 for(raidID = numraid; raidID >= 0; raidID--) {
3108 if (raidPtrs[raidID]->valid == 0) {
3109 /* can use this one! */
3110 break;
3111 }
3112 }
3113 }
3114
3115 if (raidID < 0) {
3116 /* punt... */
3117 printf("Unable to auto configure this set!\n");
3118 printf("(Out of RAID devs!)\n");
3119 return(1);
3120 }
3121
3122 raidPtr = raidPtrs[raidID];
3123
3124 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3125 raidPtr->raidid = raidID;
3126 raidPtr->openings = RAIDOUTSTANDING;
3127
3128 /* 3. Build the configuration structure */
3129 rf_create_configuration(cset->ac, config, raidPtr);
3130
3131 /* 4. Do the configuration */
3132 retcode = rf_Configure(raidPtr, config, cset->ac);
3133
3134 if (retcode == 0) {
3135
3136 raidinit(raidPtrs[raidID]);
3137
3138 rf_markalldirty(raidPtrs[raidID]);
3139 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3140 if (cset->ac->clabel->root_partition==1) {
3141 /* everything configured just fine. Make a note
3142 that this set is eligible to be root. */
3143 cset->rootable = 1;
3144 /* XXX do this here? */
3145 raidPtrs[raidID]->root_partition = 1;
3146 }
3147 }
3148
3149 /* 5. Cleanup */
3150 free(config, M_RAIDFRAME);
3151
3152 *unit = raidID;
3153 return(retcode);
3154 }
3155