rf_netbsdkintf.c revision 1.50 1 /* $NetBSD: rf_netbsdkintf.c,v 1.50 2000/02/21 23:33:45 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place */
296 /* XXX ugly hack. */
297 const char *raid_rooty = "raid0";
298 extern struct device *booted_device;
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < numraid; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 if (raidautoconfig) {
384 /* 1. locate all RAID components on the system */
385
386 #if DEBUG
387 printf("Searching for raid components...\n");
388 #endif
389 ac_list = rf_find_raid_components();
390
391 /* 2. sort them into their respective sets */
392
393 config_sets = rf_create_auto_sets(ac_list);
394
395 /* 3. evaluate each set and configure the valid ones
396 This gets done in rf_buildroothack() */
397
398 /* schedule the creation of the thread to do the
399 "/ on RAID" stuff */
400
401 kthread_create(rf_buildroothack,config_sets);
402
403 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
404 /* XXXX pick raid0 for now... and this should be only done
405 if we find something that's bootable!!! */
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 if (boothowto & RB_ASKNAME) {
410 /* We don't auto-config... */
411 } else {
412 /* They didn't ask, and we found something bootable... */
413 /* XXX pretend for now.. */
414 #if 0
415 booted_device = &raidrootdev[0];
416 #endif
417 }
418 }
419
420 }
421
422 void
423 rf_buildroothack(arg)
424 void *arg;
425 {
426 RF_ConfigSet_t *config_sets = arg;
427 RF_ConfigSet_t *cset;
428 RF_ConfigSet_t *next_cset;
429 RF_Raid_t *raidPtr;
430 RF_Config_t *config;
431 int raidID;
432 int retcode;
433
434 raidID=0;
435 cset = config_sets;
436 while(cset != NULL ) {
437 next_cset = cset->next;
438 if (cset->ac->clabel->autoconfigure==1) {
439 printf("Starting autoconfigure on raid%d\n",raidID);
440 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
441 M_RAIDFRAME,
442 M_NOWAIT);
443 if (config==NULL) {
444 printf("Out of mem!?!?\n");
445 /* XXX do something more intelligent here. */
446 return;
447 }
448 /* XXX raidID needs to be set correctly.. */
449 raidPtr = raidPtrs[raidID];
450 /* XXX all this stuff should be done SOMEWHERE ELSE! */
451 raidPtr->raidid = raidID;
452 raidPtr->openings = RAIDOUTSTANDING;
453 rf_create_configuration(cset->ac, config, raidPtr);
454 retcode = rf_Configure( raidPtr, config, cset->ac );
455
456 if (retcode == 0) {
457 #if DEBUG
458 printf("Calling raidinit()\n");
459 #endif
460 /* XXX the 0's below are bogus! */
461 retcode = raidinit(0, raidPtrs[raidID], 0);
462 if (retcode) {
463 printf("init returned: %d\n",retcode);
464 }
465 rf_markalldirty( raidPtrs[raidID] );
466 }
467 raidID++; /* XXX for now.. */
468 free(config, M_RAIDFRAME);
469 } else {
470 /* we're not autoconfiguring this set...
471 release the associated resources */
472 #if DEBUG
473 printf("Releasing vp's\n");
474 #endif
475 rf_release_all_vps(cset);
476 #if DEBUG
477 printf("Done.\n");
478 #endif
479 }
480 /* cleanup */
481 #if DEBUG
482 printf("Cleaning up config set\n");
483 #endif
484 rf_cleanup_config_set(cset);
485 #if DEBUG
486 printf("Done cleanup\n");
487 #endif
488 cset = next_cset;
489 }
490 }
491
492
493 int
494 raidsize(dev)
495 dev_t dev;
496 {
497 struct raid_softc *rs;
498 struct disklabel *lp;
499 int part, unit, omask, size;
500
501 unit = raidunit(dev);
502 if (unit >= numraid)
503 return (-1);
504 rs = &raid_softc[unit];
505
506 if ((rs->sc_flags & RAIDF_INITED) == 0)
507 return (-1);
508
509 part = DISKPART(dev);
510 omask = rs->sc_dkdev.dk_openmask & (1 << part);
511 lp = rs->sc_dkdev.dk_label;
512
513 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
514 return (-1);
515
516 if (lp->d_partitions[part].p_fstype != FS_SWAP)
517 size = -1;
518 else
519 size = lp->d_partitions[part].p_size *
520 (lp->d_secsize / DEV_BSIZE);
521
522 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
523 return (-1);
524
525 return (size);
526
527 }
528
529 int
530 raiddump(dev, blkno, va, size)
531 dev_t dev;
532 daddr_t blkno;
533 caddr_t va;
534 size_t size;
535 {
536 /* Not implemented. */
537 return ENXIO;
538 }
539 /* ARGSUSED */
540 int
541 raidopen(dev, flags, fmt, p)
542 dev_t dev;
543 int flags, fmt;
544 struct proc *p;
545 {
546 int unit = raidunit(dev);
547 struct raid_softc *rs;
548 struct disklabel *lp;
549 int part, pmask;
550 int error = 0;
551
552 if (unit >= numraid)
553 return (ENXIO);
554 rs = &raid_softc[unit];
555
556 if ((error = raidlock(rs)) != 0)
557 return (error);
558 lp = rs->sc_dkdev.dk_label;
559
560 part = DISKPART(dev);
561 pmask = (1 << part);
562
563 db1_printf(("Opening raid device number: %d partition: %d\n",
564 unit, part));
565
566
567 if ((rs->sc_flags & RAIDF_INITED) &&
568 (rs->sc_dkdev.dk_openmask == 0))
569 raidgetdisklabel(dev);
570
571 /* make sure that this partition exists */
572
573 if (part != RAW_PART) {
574 db1_printf(("Not a raw partition..\n"));
575 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
576 ((part >= lp->d_npartitions) ||
577 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
578 error = ENXIO;
579 raidunlock(rs);
580 db1_printf(("Bailing out...\n"));
581 return (error);
582 }
583 }
584 /* Prevent this unit from being unconfigured while open. */
585 switch (fmt) {
586 case S_IFCHR:
587 rs->sc_dkdev.dk_copenmask |= pmask;
588 break;
589
590 case S_IFBLK:
591 rs->sc_dkdev.dk_bopenmask |= pmask;
592 break;
593 }
594
595 if ((rs->sc_dkdev.dk_openmask == 0) &&
596 ((rs->sc_flags & RAIDF_INITED) != 0)) {
597 /* First one... mark things as dirty... Note that we *MUST*
598 have done a configure before this. I DO NOT WANT TO BE
599 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
600 THAT THEY BELONG TOGETHER!!!!! */
601 /* XXX should check to see if we're only open for reading
602 here... If so, we needn't do this, but then need some
603 other way of keeping track of what's happened.. */
604
605 rf_markalldirty( raidPtrs[unit] );
606 }
607
608
609 rs->sc_dkdev.dk_openmask =
610 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
611
612 raidunlock(rs);
613
614 return (error);
615
616
617 }
618 /* ARGSUSED */
619 int
620 raidclose(dev, flags, fmt, p)
621 dev_t dev;
622 int flags, fmt;
623 struct proc *p;
624 {
625 int unit = raidunit(dev);
626 struct raid_softc *rs;
627 int error = 0;
628 int part;
629
630 if (unit >= numraid)
631 return (ENXIO);
632 rs = &raid_softc[unit];
633
634 if ((error = raidlock(rs)) != 0)
635 return (error);
636
637 part = DISKPART(dev);
638
639 /* ...that much closer to allowing unconfiguration... */
640 switch (fmt) {
641 case S_IFCHR:
642 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
643 break;
644
645 case S_IFBLK:
646 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
647 break;
648 }
649 rs->sc_dkdev.dk_openmask =
650 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
651
652 if ((rs->sc_dkdev.dk_openmask == 0) &&
653 ((rs->sc_flags & RAIDF_INITED) != 0)) {
654 /* Last one... device is not unconfigured yet.
655 Device shutdown has taken care of setting the
656 clean bits if RAIDF_INITED is not set
657 mark things as clean... */
658 rf_update_component_labels( raidPtrs[unit] );
659 }
660
661 raidunlock(rs);
662 return (0);
663
664 }
665
666 void
667 raidstrategy(bp)
668 register struct buf *bp;
669 {
670 register int s;
671
672 unsigned int raidID = raidunit(bp->b_dev);
673 RF_Raid_t *raidPtr;
674 struct raid_softc *rs = &raid_softc[raidID];
675 struct disklabel *lp;
676 int wlabel;
677
678 if ((rs->sc_flags & RAIDF_INITED) ==0) {
679 bp->b_error = ENXIO;
680 bp->b_flags = B_ERROR;
681 bp->b_resid = bp->b_bcount;
682 biodone(bp);
683 return;
684 }
685 if (raidID >= numraid || !raidPtrs[raidID]) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 raidPtr = raidPtrs[raidID];
693 if (!raidPtr->valid) {
694 bp->b_error = ENODEV;
695 bp->b_flags |= B_ERROR;
696 bp->b_resid = bp->b_bcount;
697 biodone(bp);
698 return;
699 }
700 if (bp->b_bcount == 0) {
701 db1_printf(("b_bcount is zero..\n"));
702 biodone(bp);
703 return;
704 }
705 lp = rs->sc_dkdev.dk_label;
706
707 /*
708 * Do bounds checking and adjust transfer. If there's an
709 * error, the bounds check will flag that for us.
710 */
711
712 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
713 if (DISKPART(bp->b_dev) != RAW_PART)
714 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
715 db1_printf(("Bounds check failed!!:%d %d\n",
716 (int) bp->b_blkno, (int) wlabel));
717 biodone(bp);
718 return;
719 }
720 s = splbio();
721
722 bp->b_resid = 0;
723
724 /* stuff it onto our queue */
725 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
726
727 raidstart(raidPtrs[raidID]);
728
729 splx(s);
730 }
731 /* ARGSUSED */
732 int
733 raidread(dev, uio, flags)
734 dev_t dev;
735 struct uio *uio;
736 int flags;
737 {
738 int unit = raidunit(dev);
739 struct raid_softc *rs;
740 int part;
741
742 if (unit >= numraid)
743 return (ENXIO);
744 rs = &raid_softc[unit];
745
746 if ((rs->sc_flags & RAIDF_INITED) == 0)
747 return (ENXIO);
748 part = DISKPART(dev);
749
750 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
751
752 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
753
754 }
755 /* ARGSUSED */
756 int
757 raidwrite(dev, uio, flags)
758 dev_t dev;
759 struct uio *uio;
760 int flags;
761 {
762 int unit = raidunit(dev);
763 struct raid_softc *rs;
764
765 if (unit >= numraid)
766 return (ENXIO);
767 rs = &raid_softc[unit];
768
769 if ((rs->sc_flags & RAIDF_INITED) == 0)
770 return (ENXIO);
771 db1_printf(("raidwrite\n"));
772 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
773
774 }
775
776 int
777 raidioctl(dev, cmd, data, flag, p)
778 dev_t dev;
779 u_long cmd;
780 caddr_t data;
781 int flag;
782 struct proc *p;
783 {
784 int unit = raidunit(dev);
785 int error = 0;
786 int part, pmask;
787 struct raid_softc *rs;
788 RF_Config_t *k_cfg, *u_cfg;
789 RF_Raid_t *raidPtr;
790 RF_RaidDisk_t *diskPtr;
791 RF_AccTotals_t *totals;
792 RF_DeviceConfig_t *d_cfg, **ucfgp;
793 u_char *specific_buf;
794 int retcode = 0;
795 int row;
796 int column;
797 struct rf_recon_req *rrcopy, *rr;
798 RF_ComponentLabel_t *clabel;
799 RF_ComponentLabel_t ci_label;
800 RF_ComponentLabel_t **clabel_ptr;
801 RF_SingleComponent_t *sparePtr,*componentPtr;
802 RF_SingleComponent_t hot_spare;
803 RF_SingleComponent_t component;
804 int i, j, d;
805
806 if (unit >= numraid)
807 return (ENXIO);
808 rs = &raid_softc[unit];
809 raidPtr = raidPtrs[unit];
810
811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
812 (int) DISKPART(dev), (int) unit, (int) cmd));
813
814 /* Must be open for writes for these commands... */
815 switch (cmd) {
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 case DIOCWLABEL:
819 if ((flag & FWRITE) == 0)
820 return (EBADF);
821 }
822
823 /* Must be initialized for these... */
824 switch (cmd) {
825 case DIOCGDINFO:
826 case DIOCSDINFO:
827 case DIOCWDINFO:
828 case DIOCGPART:
829 case DIOCWLABEL:
830 case DIOCGDEFLABEL:
831 case RAIDFRAME_SHUTDOWN:
832 case RAIDFRAME_REWRITEPARITY:
833 case RAIDFRAME_GET_INFO:
834 case RAIDFRAME_RESET_ACCTOTALS:
835 case RAIDFRAME_GET_ACCTOTALS:
836 case RAIDFRAME_KEEP_ACCTOTALS:
837 case RAIDFRAME_GET_SIZE:
838 case RAIDFRAME_FAIL_DISK:
839 case RAIDFRAME_COPYBACK:
840 case RAIDFRAME_CHECK_RECON_STATUS:
841 case RAIDFRAME_GET_COMPONENT_LABEL:
842 case RAIDFRAME_SET_COMPONENT_LABEL:
843 case RAIDFRAME_ADD_HOT_SPARE:
844 case RAIDFRAME_REMOVE_HOT_SPARE:
845 case RAIDFRAME_INIT_LABELS:
846 case RAIDFRAME_REBUILD_IN_PLACE:
847 case RAIDFRAME_CHECK_PARITY:
848 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
849 case RAIDFRAME_CHECK_COPYBACK_STATUS:
850 case RAIDFRAME_SET_AUTOCONFIG:
851 case RAIDFRAME_SET_ROOT:
852 if ((rs->sc_flags & RAIDF_INITED) == 0)
853 return (ENXIO);
854 }
855
856 switch (cmd) {
857
858 /* configure the system */
859 case RAIDFRAME_CONFIGURE:
860
861 if (raidPtr->valid) {
862 /* There is a valid RAID set running on this unit! */
863 printf("raid%d: Device already configured!\n",unit);
864 }
865
866 /* copy-in the configuration information */
867 /* data points to a pointer to the configuration structure */
868
869 u_cfg = *((RF_Config_t **) data);
870 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
871 if (k_cfg == NULL) {
872 return (ENOMEM);
873 }
874 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
875 sizeof(RF_Config_t));
876 if (retcode) {
877 RF_Free(k_cfg, sizeof(RF_Config_t));
878 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
879 retcode));
880 return (retcode);
881 }
882 /* allocate a buffer for the layout-specific data, and copy it
883 * in */
884 if (k_cfg->layoutSpecificSize) {
885 if (k_cfg->layoutSpecificSize > 10000) {
886 /* sanity check */
887 RF_Free(k_cfg, sizeof(RF_Config_t));
888 return (EINVAL);
889 }
890 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
891 (u_char *));
892 if (specific_buf == NULL) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 return (ENOMEM);
895 }
896 retcode = copyin(k_cfg->layoutSpecific,
897 (caddr_t) specific_buf,
898 k_cfg->layoutSpecificSize);
899 if (retcode) {
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 RF_Free(specific_buf,
902 k_cfg->layoutSpecificSize);
903 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
904 retcode));
905 return (retcode);
906 }
907 } else
908 specific_buf = NULL;
909 k_cfg->layoutSpecific = specific_buf;
910
911 /* should do some kind of sanity check on the configuration.
912 * Store the sum of all the bytes in the last byte? */
913
914 /* configure the system */
915
916 /*
917 * Clear the entire RAID descriptor, just to make sure
918 * there is no stale data left in the case of a
919 * reconfiguration
920 */
921 bzero((char *) raidPtr, sizeof(RF_Raid_t));
922 raidPtr->raidid = unit;
923
924 retcode = rf_Configure(raidPtr, k_cfg, NULL);
925
926 if (retcode == 0) {
927
928 /* allow this many simultaneous IO's to
929 this RAID device */
930 raidPtr->openings = RAIDOUTSTANDING;
931
932 retcode = raidinit(dev, raidPtr, unit);
933 rf_markalldirty( raidPtr );
934 }
935 /* free the buffers. No return code here. */
936 if (k_cfg->layoutSpecificSize) {
937 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
938 }
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940
941 return (retcode);
942
943 /* shutdown the system */
944 case RAIDFRAME_SHUTDOWN:
945
946 if ((error = raidlock(rs)) != 0)
947 return (error);
948
949 /*
950 * If somebody has a partition mounted, we shouldn't
951 * shutdown.
952 */
953
954 part = DISKPART(dev);
955 pmask = (1 << part);
956 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
957 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
958 (rs->sc_dkdev.dk_copenmask & pmask))) {
959 raidunlock(rs);
960 return (EBUSY);
961 }
962
963 retcode = rf_Shutdown(raidPtr);
964
965 pool_destroy(&rs->sc_cbufpool);
966
967 /* It's no longer initialized... */
968 rs->sc_flags &= ~RAIDF_INITED;
969
970 /* Detach the disk. */
971 disk_detach(&rs->sc_dkdev);
972
973 raidunlock(rs);
974
975 return (retcode);
976 case RAIDFRAME_GET_COMPONENT_LABEL:
977 clabel_ptr = (RF_ComponentLabel_t **) data;
978 /* need to read the component label for the disk indicated
979 by row,column in clabel */
980
981 /* For practice, let's get it directly fromdisk, rather
982 than from the in-core copy */
983 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
984 (RF_ComponentLabel_t *));
985 if (clabel == NULL)
986 return (ENOMEM);
987
988 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
989
990 retcode = copyin( *clabel_ptr, clabel,
991 sizeof(RF_ComponentLabel_t));
992
993 if (retcode) {
994 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
995 return(retcode);
996 }
997
998 row = clabel->row;
999 column = clabel->column;
1000
1001 if ((row < 0) || (row >= raidPtr->numRow) ||
1002 (column < 0) || (column >= raidPtr->numCol)) {
1003 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1004 return(EINVAL);
1005 }
1006
1007 raidread_component_label(raidPtr->Disks[row][column].dev,
1008 raidPtr->raid_cinfo[row][column].ci_vp,
1009 clabel );
1010
1011 retcode = copyout((caddr_t) clabel,
1012 (caddr_t) *clabel_ptr,
1013 sizeof(RF_ComponentLabel_t));
1014 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1015 return (retcode);
1016
1017 case RAIDFRAME_SET_COMPONENT_LABEL:
1018 clabel = (RF_ComponentLabel_t *) data;
1019
1020 /* XXX check the label for valid stuff... */
1021 /* Note that some things *should not* get modified --
1022 the user should be re-initing the labels instead of
1023 trying to patch things.
1024 */
1025
1026 printf("Got component label:\n");
1027 printf("Version: %d\n",clabel->version);
1028 printf("Serial Number: %d\n",clabel->serial_number);
1029 printf("Mod counter: %d\n",clabel->mod_counter);
1030 printf("Row: %d\n", clabel->row);
1031 printf("Column: %d\n", clabel->column);
1032 printf("Num Rows: %d\n", clabel->num_rows);
1033 printf("Num Columns: %d\n", clabel->num_columns);
1034 printf("Clean: %d\n", clabel->clean);
1035 printf("Status: %d\n", clabel->status);
1036
1037 row = clabel->row;
1038 column = clabel->column;
1039
1040 if ((row < 0) || (row >= raidPtr->numRow) ||
1041 (column < 0) || (column >= raidPtr->numCol)) {
1042 return(EINVAL);
1043 }
1044
1045 /* XXX this isn't allowed to do anything for now :-) */
1046
1047 /* XXX and before it is, we need to fill in the rest
1048 of the fields!?!?!?! */
1049 #if 0
1050 raidwrite_component_label(
1051 raidPtr->Disks[row][column].dev,
1052 raidPtr->raid_cinfo[row][column].ci_vp,
1053 clabel );
1054 #endif
1055 return (0);
1056
1057 case RAIDFRAME_INIT_LABELS:
1058 clabel = (RF_ComponentLabel_t *) data;
1059 /*
1060 we only want the serial number from
1061 the above. We get all the rest of the information
1062 from the config that was used to create this RAID
1063 set.
1064 */
1065
1066 raidPtr->serial_number = clabel->serial_number;
1067
1068 raid_init_component_label(raidPtr, clabel);
1069
1070 for(row=0;row<raidPtr->numRow;row++) {
1071 ci_label.row = row;
1072 for(column=0;column<raidPtr->numCol;column++) {
1073 diskPtr = &raidPtr->Disks[row][column];
1074 ci_label.blockSize = diskPtr->blockSize;
1075 ci_label.numBlocks = diskPtr->numBlocks;
1076 ci_label.partitionSize = diskPtr->partitionSize;
1077 ci_label.column = column;
1078 raidwrite_component_label(
1079 raidPtr->Disks[row][column].dev,
1080 raidPtr->raid_cinfo[row][column].ci_vp,
1081 &ci_label );
1082 }
1083 }
1084
1085 return (retcode);
1086 case RAIDFRAME_SET_AUTOCONFIG:
1087 d = rf_set_autoconfig(raidPtr, *data);
1088 printf("New autoconfig value is: %d\n", d);
1089 *data = d;
1090 return (retcode);
1091
1092 case RAIDFRAME_SET_ROOT:
1093 d = rf_set_rootpartition(raidPtr, *data);
1094 printf("New rootpartition value is: %d\n", d);
1095 *data = d;
1096 return (retcode);
1097
1098 /* initialize all parity */
1099 case RAIDFRAME_REWRITEPARITY:
1100
1101 if (raidPtr->Layout.map->faultsTolerated == 0) {
1102 /* Parity for RAID 0 is trivially correct */
1103 raidPtr->parity_good = RF_RAID_CLEAN;
1104 return(0);
1105 }
1106
1107 if (raidPtr->parity_rewrite_in_progress == 1) {
1108 /* Re-write is already in progress! */
1109 return(EINVAL);
1110 }
1111
1112 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1113 rf_RewriteParityThread,
1114 raidPtr,"raid_parity");
1115 return (retcode);
1116
1117
1118 case RAIDFRAME_ADD_HOT_SPARE:
1119 sparePtr = (RF_SingleComponent_t *) data;
1120 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1121 printf("Adding spare\n");
1122 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1123 return(retcode);
1124
1125 case RAIDFRAME_REMOVE_HOT_SPARE:
1126 return(retcode);
1127
1128 case RAIDFRAME_REBUILD_IN_PLACE:
1129
1130 if (raidPtr->Layout.map->faultsTolerated == 0) {
1131 /* Can't do this on a RAID 0!! */
1132 return(EINVAL);
1133 }
1134
1135 if (raidPtr->recon_in_progress == 1) {
1136 /* a reconstruct is already in progress! */
1137 return(EINVAL);
1138 }
1139
1140 componentPtr = (RF_SingleComponent_t *) data;
1141 memcpy( &component, componentPtr,
1142 sizeof(RF_SingleComponent_t));
1143 row = component.row;
1144 column = component.column;
1145 printf("Rebuild: %d %d\n",row, column);
1146 if ((row < 0) || (row >= raidPtr->numRow) ||
1147 (column < 0) || (column >= raidPtr->numCol)) {
1148 return(EINVAL);
1149 }
1150
1151 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1152 if (rrcopy == NULL)
1153 return(ENOMEM);
1154
1155 rrcopy->raidPtr = (void *) raidPtr;
1156 rrcopy->row = row;
1157 rrcopy->col = column;
1158
1159 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1160 rf_ReconstructInPlaceThread,
1161 rrcopy,"raid_reconip");
1162 return(retcode);
1163
1164 case RAIDFRAME_GET_INFO:
1165 if (!raidPtr->valid)
1166 return (ENODEV);
1167 ucfgp = (RF_DeviceConfig_t **) data;
1168 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1169 (RF_DeviceConfig_t *));
1170 if (d_cfg == NULL)
1171 return (ENOMEM);
1172 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1173 d_cfg->rows = raidPtr->numRow;
1174 d_cfg->cols = raidPtr->numCol;
1175 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1176 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1177 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1178 return (ENOMEM);
1179 }
1180 d_cfg->nspares = raidPtr->numSpare;
1181 if (d_cfg->nspares >= RF_MAX_DISKS) {
1182 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1183 return (ENOMEM);
1184 }
1185 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1186 d = 0;
1187 for (i = 0; i < d_cfg->rows; i++) {
1188 for (j = 0; j < d_cfg->cols; j++) {
1189 d_cfg->devs[d] = raidPtr->Disks[i][j];
1190 d++;
1191 }
1192 }
1193 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1194 d_cfg->spares[i] = raidPtr->Disks[0][j];
1195 }
1196 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1197 sizeof(RF_DeviceConfig_t));
1198 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1199
1200 return (retcode);
1201
1202 case RAIDFRAME_CHECK_PARITY:
1203 *(int *) data = raidPtr->parity_good;
1204 return (0);
1205
1206 case RAIDFRAME_RESET_ACCTOTALS:
1207 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1208 return (0);
1209
1210 case RAIDFRAME_GET_ACCTOTALS:
1211 totals = (RF_AccTotals_t *) data;
1212 *totals = raidPtr->acc_totals;
1213 return (0);
1214
1215 case RAIDFRAME_KEEP_ACCTOTALS:
1216 raidPtr->keep_acc_totals = *(int *)data;
1217 return (0);
1218
1219 case RAIDFRAME_GET_SIZE:
1220 *(int *) data = raidPtr->totalSectors;
1221 return (0);
1222
1223 /* fail a disk & optionally start reconstruction */
1224 case RAIDFRAME_FAIL_DISK:
1225
1226 if (raidPtr->Layout.map->faultsTolerated == 0) {
1227 /* Can't do this on a RAID 0!! */
1228 return(EINVAL);
1229 }
1230
1231 rr = (struct rf_recon_req *) data;
1232
1233 if (rr->row < 0 || rr->row >= raidPtr->numRow
1234 || rr->col < 0 || rr->col >= raidPtr->numCol)
1235 return (EINVAL);
1236
1237 printf("raid%d: Failing the disk: row: %d col: %d\n",
1238 unit, rr->row, rr->col);
1239
1240 /* make a copy of the recon request so that we don't rely on
1241 * the user's buffer */
1242 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1243 if (rrcopy == NULL)
1244 return(ENOMEM);
1245 bcopy(rr, rrcopy, sizeof(*rr));
1246 rrcopy->raidPtr = (void *) raidPtr;
1247
1248 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1249 rf_ReconThread,
1250 rrcopy,"raid_recon");
1251 return (0);
1252
1253 /* invoke a copyback operation after recon on whatever disk
1254 * needs it, if any */
1255 case RAIDFRAME_COPYBACK:
1256
1257 if (raidPtr->Layout.map->faultsTolerated == 0) {
1258 /* This makes no sense on a RAID 0!! */
1259 return(EINVAL);
1260 }
1261
1262 if (raidPtr->copyback_in_progress == 1) {
1263 /* Copyback is already in progress! */
1264 return(EINVAL);
1265 }
1266
1267 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1268 rf_CopybackThread,
1269 raidPtr,"raid_copyback");
1270 return (retcode);
1271
1272 /* return the percentage completion of reconstruction */
1273 case RAIDFRAME_CHECK_RECON_STATUS:
1274 if (raidPtr->Layout.map->faultsTolerated == 0) {
1275 /* This makes no sense on a RAID 0 */
1276 return(EINVAL);
1277 }
1278 row = 0; /* XXX we only consider a single row... */
1279 if (raidPtr->status[row] != rf_rs_reconstructing)
1280 *(int *) data = 100;
1281 else
1282 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1283 return (0);
1284
1285 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1286 if (raidPtr->Layout.map->faultsTolerated == 0) {
1287 /* This makes no sense on a RAID 0 */
1288 return(EINVAL);
1289 }
1290 if (raidPtr->parity_rewrite_in_progress == 1) {
1291 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1292 } else {
1293 *(int *) data = 100;
1294 }
1295 return (0);
1296
1297 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1298 if (raidPtr->Layout.map->faultsTolerated == 0) {
1299 /* This makes no sense on a RAID 0 */
1300 return(EINVAL);
1301 }
1302 if (raidPtr->copyback_in_progress == 1) {
1303 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1304 raidPtr->Layout.numStripe;
1305 } else {
1306 *(int *) data = 100;
1307 }
1308 return (0);
1309
1310
1311 /* the sparetable daemon calls this to wait for the kernel to
1312 * need a spare table. this ioctl does not return until a
1313 * spare table is needed. XXX -- calling mpsleep here in the
1314 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1315 * -- I should either compute the spare table in the kernel,
1316 * or have a different -- XXX XXX -- interface (a different
1317 * character device) for delivering the table -- XXX */
1318 #if 0
1319 case RAIDFRAME_SPARET_WAIT:
1320 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1321 while (!rf_sparet_wait_queue)
1322 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1323 waitreq = rf_sparet_wait_queue;
1324 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1325 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1326
1327 /* structure assignment */
1328 *((RF_SparetWait_t *) data) = *waitreq;
1329
1330 RF_Free(waitreq, sizeof(*waitreq));
1331 return (0);
1332
1333 /* wakes up a process waiting on SPARET_WAIT and puts an error
1334 * code in it that will cause the dameon to exit */
1335 case RAIDFRAME_ABORT_SPARET_WAIT:
1336 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1337 waitreq->fcol = -1;
1338 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1339 waitreq->next = rf_sparet_wait_queue;
1340 rf_sparet_wait_queue = waitreq;
1341 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1342 wakeup(&rf_sparet_wait_queue);
1343 return (0);
1344
1345 /* used by the spare table daemon to deliver a spare table
1346 * into the kernel */
1347 case RAIDFRAME_SEND_SPARET:
1348
1349 /* install the spare table */
1350 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1351
1352 /* respond to the requestor. the return status of the spare
1353 * table installation is passed in the "fcol" field */
1354 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1355 waitreq->fcol = retcode;
1356 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1357 waitreq->next = rf_sparet_resp_queue;
1358 rf_sparet_resp_queue = waitreq;
1359 wakeup(&rf_sparet_resp_queue);
1360 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1361
1362 return (retcode);
1363 #endif
1364
1365 default:
1366 break; /* fall through to the os-specific code below */
1367
1368 }
1369
1370 if (!raidPtr->valid)
1371 return (EINVAL);
1372
1373 /*
1374 * Add support for "regular" device ioctls here.
1375 */
1376
1377 switch (cmd) {
1378 case DIOCGDINFO:
1379 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1380 break;
1381
1382 case DIOCGPART:
1383 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1384 ((struct partinfo *) data)->part =
1385 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1386 break;
1387
1388 case DIOCWDINFO:
1389 case DIOCSDINFO:
1390 if ((error = raidlock(rs)) != 0)
1391 return (error);
1392
1393 rs->sc_flags |= RAIDF_LABELLING;
1394
1395 error = setdisklabel(rs->sc_dkdev.dk_label,
1396 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1397 if (error == 0) {
1398 if (cmd == DIOCWDINFO)
1399 error = writedisklabel(RAIDLABELDEV(dev),
1400 raidstrategy, rs->sc_dkdev.dk_label,
1401 rs->sc_dkdev.dk_cpulabel);
1402 }
1403 rs->sc_flags &= ~RAIDF_LABELLING;
1404
1405 raidunlock(rs);
1406
1407 if (error)
1408 return (error);
1409 break;
1410
1411 case DIOCWLABEL:
1412 if (*(int *) data != 0)
1413 rs->sc_flags |= RAIDF_WLABEL;
1414 else
1415 rs->sc_flags &= ~RAIDF_WLABEL;
1416 break;
1417
1418 case DIOCGDEFLABEL:
1419 raidgetdefaultlabel(raidPtr, rs,
1420 (struct disklabel *) data);
1421 break;
1422
1423 default:
1424 retcode = ENOTTY;
1425 }
1426 return (retcode);
1427
1428 }
1429
1430
1431 /* raidinit -- complete the rest of the initialization for the
1432 RAIDframe device. */
1433
1434
1435 static int
1436 raidinit(dev, raidPtr, unit)
1437 dev_t dev;
1438 RF_Raid_t *raidPtr;
1439 int unit;
1440 {
1441 int retcode;
1442 struct raid_softc *rs;
1443
1444 retcode = 0;
1445
1446 rs = &raid_softc[unit];
1447 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1448 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1449
1450
1451 /* XXX should check return code first... */
1452 rs->sc_flags |= RAIDF_INITED;
1453
1454 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1455
1456 rs->sc_dkdev.dk_name = rs->sc_xname;
1457
1458 /* disk_attach actually creates space for the CPU disklabel, among
1459 * other things, so it's critical to call this *BEFORE* we try putzing
1460 * with disklabels. */
1461
1462 disk_attach(&rs->sc_dkdev);
1463
1464 /* XXX There may be a weird interaction here between this, and
1465 * protectedSectors, as used in RAIDframe. */
1466
1467 rs->sc_size = raidPtr->totalSectors;
1468 rs->sc_dev = dev;
1469
1470 return (retcode);
1471 }
1472
1473 /* wake up the daemon & tell it to get us a spare table
1474 * XXX
1475 * the entries in the queues should be tagged with the raidPtr
1476 * so that in the extremely rare case that two recons happen at once,
1477 * we know for which device were requesting a spare table
1478 * XXX
1479 *
1480 * XXX This code is not currently used. GO
1481 */
1482 int
1483 rf_GetSpareTableFromDaemon(req)
1484 RF_SparetWait_t *req;
1485 {
1486 int retcode;
1487
1488 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1489 req->next = rf_sparet_wait_queue;
1490 rf_sparet_wait_queue = req;
1491 wakeup(&rf_sparet_wait_queue);
1492
1493 /* mpsleep unlocks the mutex */
1494 while (!rf_sparet_resp_queue) {
1495 tsleep(&rf_sparet_resp_queue, PRIBIO,
1496 "raidframe getsparetable", 0);
1497 }
1498 req = rf_sparet_resp_queue;
1499 rf_sparet_resp_queue = req->next;
1500 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1501
1502 retcode = req->fcol;
1503 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1504 * alloc'd */
1505 return (retcode);
1506 }
1507
1508 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1509 * bp & passes it down.
1510 * any calls originating in the kernel must use non-blocking I/O
1511 * do some extra sanity checking to return "appropriate" error values for
1512 * certain conditions (to make some standard utilities work)
1513 *
1514 * Formerly known as: rf_DoAccessKernel
1515 */
1516 void
1517 raidstart(raidPtr)
1518 RF_Raid_t *raidPtr;
1519 {
1520 RF_SectorCount_t num_blocks, pb, sum;
1521 RF_RaidAddr_t raid_addr;
1522 int retcode;
1523 struct partition *pp;
1524 daddr_t blocknum;
1525 int unit;
1526 struct raid_softc *rs;
1527 int do_async;
1528 struct buf *bp;
1529
1530 unit = raidPtr->raidid;
1531 rs = &raid_softc[unit];
1532
1533 /* Check to see if we're at the limit... */
1534 RF_LOCK_MUTEX(raidPtr->mutex);
1535 while (raidPtr->openings > 0) {
1536 RF_UNLOCK_MUTEX(raidPtr->mutex);
1537
1538 /* get the next item, if any, from the queue */
1539 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1540 /* nothing more to do */
1541 return;
1542 }
1543 BUFQ_REMOVE(&rs->buf_queue, bp);
1544
1545 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1546 * partition.. Need to make it absolute to the underlying
1547 * device.. */
1548
1549 blocknum = bp->b_blkno;
1550 if (DISKPART(bp->b_dev) != RAW_PART) {
1551 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1552 blocknum += pp->p_offset;
1553 }
1554
1555 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1556 (int) blocknum));
1557
1558 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1559 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1560
1561 /* *THIS* is where we adjust what block we're going to...
1562 * but DO NOT TOUCH bp->b_blkno!!! */
1563 raid_addr = blocknum;
1564
1565 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1566 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1567 sum = raid_addr + num_blocks + pb;
1568 if (1 || rf_debugKernelAccess) {
1569 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1570 (int) raid_addr, (int) sum, (int) num_blocks,
1571 (int) pb, (int) bp->b_resid));
1572 }
1573 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1574 || (sum < num_blocks) || (sum < pb)) {
1575 bp->b_error = ENOSPC;
1576 bp->b_flags |= B_ERROR;
1577 bp->b_resid = bp->b_bcount;
1578 biodone(bp);
1579 RF_LOCK_MUTEX(raidPtr->mutex);
1580 continue;
1581 }
1582 /*
1583 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1584 */
1585
1586 if (bp->b_bcount & raidPtr->sectorMask) {
1587 bp->b_error = EINVAL;
1588 bp->b_flags |= B_ERROR;
1589 bp->b_resid = bp->b_bcount;
1590 biodone(bp);
1591 RF_LOCK_MUTEX(raidPtr->mutex);
1592 continue;
1593
1594 }
1595 db1_printf(("Calling DoAccess..\n"));
1596
1597
1598 RF_LOCK_MUTEX(raidPtr->mutex);
1599 raidPtr->openings--;
1600 RF_UNLOCK_MUTEX(raidPtr->mutex);
1601
1602 /*
1603 * Everything is async.
1604 */
1605 do_async = 1;
1606
1607 /* don't ever condition on bp->b_flags & B_WRITE.
1608 * always condition on B_READ instead */
1609
1610 /* XXX we're still at splbio() here... do we *really*
1611 need to be? */
1612
1613
1614 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1615 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1616 do_async, raid_addr, num_blocks,
1617 bp->b_un.b_addr, bp, NULL, NULL,
1618 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1619
1620
1621 RF_LOCK_MUTEX(raidPtr->mutex);
1622 }
1623 RF_UNLOCK_MUTEX(raidPtr->mutex);
1624 }
1625
1626
1627
1628
1629 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1630
1631 int
1632 rf_DispatchKernelIO(queue, req)
1633 RF_DiskQueue_t *queue;
1634 RF_DiskQueueData_t *req;
1635 {
1636 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1637 struct buf *bp;
1638 struct raidbuf *raidbp = NULL;
1639 struct raid_softc *rs;
1640 int unit;
1641 int s;
1642
1643 s=0;
1644 /* s = splbio();*/ /* want to test this */
1645 /* XXX along with the vnode, we also need the softc associated with
1646 * this device.. */
1647
1648 req->queue = queue;
1649
1650 unit = queue->raidPtr->raidid;
1651
1652 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1653
1654 if (unit >= numraid) {
1655 printf("Invalid unit number: %d %d\n", unit, numraid);
1656 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1657 }
1658 rs = &raid_softc[unit];
1659
1660 /* XXX is this the right place? */
1661 disk_busy(&rs->sc_dkdev);
1662
1663 bp = req->bp;
1664 #if 1
1665 /* XXX when there is a physical disk failure, someone is passing us a
1666 * buffer that contains old stuff!! Attempt to deal with this problem
1667 * without taking a performance hit... (not sure where the real bug
1668 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1669
1670 if (bp->b_flags & B_ERROR) {
1671 bp->b_flags &= ~B_ERROR;
1672 }
1673 if (bp->b_error != 0) {
1674 bp->b_error = 0;
1675 }
1676 #endif
1677 raidbp = RAIDGETBUF(rs);
1678
1679 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1680
1681 /*
1682 * context for raidiodone
1683 */
1684 raidbp->rf_obp = bp;
1685 raidbp->req = req;
1686
1687 LIST_INIT(&raidbp->rf_buf.b_dep);
1688
1689 switch (req->type) {
1690 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1691 /* XXX need to do something extra here.. */
1692 /* I'm leaving this in, as I've never actually seen it used,
1693 * and I'd like folks to report it... GO */
1694 printf(("WAKEUP CALLED\n"));
1695 queue->numOutstanding++;
1696
1697 /* XXX need to glue the original buffer into this?? */
1698
1699 KernelWakeupFunc(&raidbp->rf_buf);
1700 break;
1701
1702 case RF_IO_TYPE_READ:
1703 case RF_IO_TYPE_WRITE:
1704
1705 if (req->tracerec) {
1706 RF_ETIMER_START(req->tracerec->timer);
1707 }
1708 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1709 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1710 req->sectorOffset, req->numSector,
1711 req->buf, KernelWakeupFunc, (void *) req,
1712 queue->raidPtr->logBytesPerSector, req->b_proc);
1713
1714 if (rf_debugKernelAccess) {
1715 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1716 (long) bp->b_blkno));
1717 }
1718 queue->numOutstanding++;
1719 queue->last_deq_sector = req->sectorOffset;
1720 /* acc wouldn't have been let in if there were any pending
1721 * reqs at any other priority */
1722 queue->curPriority = req->priority;
1723
1724 db1_printf(("Going for %c to unit %d row %d col %d\n",
1725 req->type, unit, queue->row, queue->col));
1726 db1_printf(("sector %d count %d (%d bytes) %d\n",
1727 (int) req->sectorOffset, (int) req->numSector,
1728 (int) (req->numSector <<
1729 queue->raidPtr->logBytesPerSector),
1730 (int) queue->raidPtr->logBytesPerSector));
1731 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1732 raidbp->rf_buf.b_vp->v_numoutput++;
1733 }
1734 VOP_STRATEGY(&raidbp->rf_buf);
1735
1736 break;
1737
1738 default:
1739 panic("bad req->type in rf_DispatchKernelIO");
1740 }
1741 db1_printf(("Exiting from DispatchKernelIO\n"));
1742 /* splx(s); */ /* want to test this */
1743 return (0);
1744 }
1745 /* this is the callback function associated with a I/O invoked from
1746 kernel code.
1747 */
1748 static void
1749 KernelWakeupFunc(vbp)
1750 struct buf *vbp;
1751 {
1752 RF_DiskQueueData_t *req = NULL;
1753 RF_DiskQueue_t *queue;
1754 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1755 struct buf *bp;
1756 struct raid_softc *rs;
1757 int unit;
1758 register int s;
1759
1760 s = splbio();
1761 db1_printf(("recovering the request queue:\n"));
1762 req = raidbp->req;
1763
1764 bp = raidbp->rf_obp;
1765
1766 queue = (RF_DiskQueue_t *) req->queue;
1767
1768 if (raidbp->rf_buf.b_flags & B_ERROR) {
1769 bp->b_flags |= B_ERROR;
1770 bp->b_error = raidbp->rf_buf.b_error ?
1771 raidbp->rf_buf.b_error : EIO;
1772 }
1773
1774 /* XXX methinks this could be wrong... */
1775 #if 1
1776 bp->b_resid = raidbp->rf_buf.b_resid;
1777 #endif
1778
1779 if (req->tracerec) {
1780 RF_ETIMER_STOP(req->tracerec->timer);
1781 RF_ETIMER_EVAL(req->tracerec->timer);
1782 RF_LOCK_MUTEX(rf_tracing_mutex);
1783 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1784 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1785 req->tracerec->num_phys_ios++;
1786 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1787 }
1788 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1789
1790 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1791
1792
1793 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1794 * ballistic, and mark the component as hosed... */
1795
1796 if (bp->b_flags & B_ERROR) {
1797 /* Mark the disk as dead */
1798 /* but only mark it once... */
1799 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1800 rf_ds_optimal) {
1801 printf("raid%d: IO Error. Marking %s as failed.\n",
1802 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1803 queue->raidPtr->Disks[queue->row][queue->col].status =
1804 rf_ds_failed;
1805 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1806 queue->raidPtr->numFailures++;
1807 /* XXX here we should bump the version number for each component, and write that data out */
1808 } else { /* Disk is already dead... */
1809 /* printf("Disk already marked as dead!\n"); */
1810 }
1811
1812 }
1813
1814 rs = &raid_softc[unit];
1815 RAIDPUTBUF(rs, raidbp);
1816
1817
1818 if (bp->b_resid == 0) {
1819 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1820 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1821 }
1822
1823 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1824 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1825
1826 splx(s);
1827 }
1828
1829
1830
1831 /*
1832 * initialize a buf structure for doing an I/O in the kernel.
1833 */
1834 static void
1835 InitBP(
1836 struct buf * bp,
1837 struct vnode * b_vp,
1838 unsigned rw_flag,
1839 dev_t dev,
1840 RF_SectorNum_t startSect,
1841 RF_SectorCount_t numSect,
1842 caddr_t buf,
1843 void (*cbFunc) (struct buf *),
1844 void *cbArg,
1845 int logBytesPerSector,
1846 struct proc * b_proc)
1847 {
1848 /* bp->b_flags = B_PHYS | rw_flag; */
1849 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1850 bp->b_bcount = numSect << logBytesPerSector;
1851 bp->b_bufsize = bp->b_bcount;
1852 bp->b_error = 0;
1853 bp->b_dev = dev;
1854 bp->b_un.b_addr = buf;
1855 bp->b_blkno = startSect;
1856 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1857 if (bp->b_bcount == 0) {
1858 panic("bp->b_bcount is zero in InitBP!!\n");
1859 }
1860 bp->b_proc = b_proc;
1861 bp->b_iodone = cbFunc;
1862 bp->b_vp = b_vp;
1863
1864 }
1865
1866 static void
1867 raidgetdefaultlabel(raidPtr, rs, lp)
1868 RF_Raid_t *raidPtr;
1869 struct raid_softc *rs;
1870 struct disklabel *lp;
1871 {
1872 db1_printf(("Building a default label...\n"));
1873 bzero(lp, sizeof(*lp));
1874
1875 /* fabricate a label... */
1876 lp->d_secperunit = raidPtr->totalSectors;
1877 lp->d_secsize = raidPtr->bytesPerSector;
1878 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1879 lp->d_ntracks = 1;
1880 lp->d_ncylinders = raidPtr->totalSectors /
1881 (lp->d_nsectors * lp->d_ntracks);
1882 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1883
1884 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1885 lp->d_type = DTYPE_RAID;
1886 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1887 lp->d_rpm = 3600;
1888 lp->d_interleave = 1;
1889 lp->d_flags = 0;
1890
1891 lp->d_partitions[RAW_PART].p_offset = 0;
1892 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1893 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1894 lp->d_npartitions = RAW_PART + 1;
1895
1896 lp->d_magic = DISKMAGIC;
1897 lp->d_magic2 = DISKMAGIC;
1898 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1899
1900 }
1901 /*
1902 * Read the disklabel from the raid device. If one is not present, fake one
1903 * up.
1904 */
1905 static void
1906 raidgetdisklabel(dev)
1907 dev_t dev;
1908 {
1909 int unit = raidunit(dev);
1910 struct raid_softc *rs = &raid_softc[unit];
1911 char *errstring;
1912 struct disklabel *lp = rs->sc_dkdev.dk_label;
1913 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1914 RF_Raid_t *raidPtr;
1915
1916 db1_printf(("Getting the disklabel...\n"));
1917
1918 bzero(clp, sizeof(*clp));
1919
1920 raidPtr = raidPtrs[unit];
1921
1922 raidgetdefaultlabel(raidPtr, rs, lp);
1923
1924 /*
1925 * Call the generic disklabel extraction routine.
1926 */
1927 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1928 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1929 if (errstring)
1930 raidmakedisklabel(rs);
1931 else {
1932 int i;
1933 struct partition *pp;
1934
1935 /*
1936 * Sanity check whether the found disklabel is valid.
1937 *
1938 * This is necessary since total size of the raid device
1939 * may vary when an interleave is changed even though exactly
1940 * same componets are used, and old disklabel may used
1941 * if that is found.
1942 */
1943 if (lp->d_secperunit != rs->sc_size)
1944 printf("WARNING: %s: "
1945 "total sector size in disklabel (%d) != "
1946 "the size of raid (%ld)\n", rs->sc_xname,
1947 lp->d_secperunit, (long) rs->sc_size);
1948 for (i = 0; i < lp->d_npartitions; i++) {
1949 pp = &lp->d_partitions[i];
1950 if (pp->p_offset + pp->p_size > rs->sc_size)
1951 printf("WARNING: %s: end of partition `%c' "
1952 "exceeds the size of raid (%ld)\n",
1953 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1954 }
1955 }
1956
1957 }
1958 /*
1959 * Take care of things one might want to take care of in the event
1960 * that a disklabel isn't present.
1961 */
1962 static void
1963 raidmakedisklabel(rs)
1964 struct raid_softc *rs;
1965 {
1966 struct disklabel *lp = rs->sc_dkdev.dk_label;
1967 db1_printf(("Making a label..\n"));
1968
1969 /*
1970 * For historical reasons, if there's no disklabel present
1971 * the raw partition must be marked FS_BSDFFS.
1972 */
1973
1974 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1975
1976 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1977
1978 lp->d_checksum = dkcksum(lp);
1979 }
1980 /*
1981 * Lookup the provided name in the filesystem. If the file exists,
1982 * is a valid block device, and isn't being used by anyone else,
1983 * set *vpp to the file's vnode.
1984 * You'll find the original of this in ccd.c
1985 */
1986 int
1987 raidlookup(path, p, vpp)
1988 char *path;
1989 struct proc *p;
1990 struct vnode **vpp; /* result */
1991 {
1992 struct nameidata nd;
1993 struct vnode *vp;
1994 struct vattr va;
1995 int error;
1996
1997 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1998 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1999 #ifdef DEBUG
2000 printf("RAIDframe: vn_open returned %d\n", error);
2001 #endif
2002 return (error);
2003 }
2004 vp = nd.ni_vp;
2005 if (vp->v_usecount > 1) {
2006 VOP_UNLOCK(vp, 0);
2007 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2008 return (EBUSY);
2009 }
2010 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2011 VOP_UNLOCK(vp, 0);
2012 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2013 return (error);
2014 }
2015 /* XXX: eventually we should handle VREG, too. */
2016 if (va.va_type != VBLK) {
2017 VOP_UNLOCK(vp, 0);
2018 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2019 return (ENOTBLK);
2020 }
2021 VOP_UNLOCK(vp, 0);
2022 *vpp = vp;
2023 return (0);
2024 }
2025 /*
2026 * Wait interruptibly for an exclusive lock.
2027 *
2028 * XXX
2029 * Several drivers do this; it should be abstracted and made MP-safe.
2030 * (Hmm... where have we seen this warning before :-> GO )
2031 */
2032 static int
2033 raidlock(rs)
2034 struct raid_softc *rs;
2035 {
2036 int error;
2037
2038 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2039 rs->sc_flags |= RAIDF_WANTED;
2040 if ((error =
2041 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2042 return (error);
2043 }
2044 rs->sc_flags |= RAIDF_LOCKED;
2045 return (0);
2046 }
2047 /*
2048 * Unlock and wake up any waiters.
2049 */
2050 static void
2051 raidunlock(rs)
2052 struct raid_softc *rs;
2053 {
2054
2055 rs->sc_flags &= ~RAIDF_LOCKED;
2056 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2057 rs->sc_flags &= ~RAIDF_WANTED;
2058 wakeup(rs);
2059 }
2060 }
2061
2062
2063 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2064 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2065
2066 int
2067 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2068 {
2069 RF_ComponentLabel_t clabel;
2070 raidread_component_label(dev, b_vp, &clabel);
2071 clabel.mod_counter = mod_counter;
2072 clabel.clean = RF_RAID_CLEAN;
2073 raidwrite_component_label(dev, b_vp, &clabel);
2074 return(0);
2075 }
2076
2077
2078 int
2079 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2080 {
2081 RF_ComponentLabel_t clabel;
2082 raidread_component_label(dev, b_vp, &clabel);
2083 clabel.mod_counter = mod_counter;
2084 clabel.clean = RF_RAID_DIRTY;
2085 raidwrite_component_label(dev, b_vp, &clabel);
2086 return(0);
2087 }
2088
2089 /* ARGSUSED */
2090 int
2091 raidread_component_label(dev, b_vp, clabel)
2092 dev_t dev;
2093 struct vnode *b_vp;
2094 RF_ComponentLabel_t *clabel;
2095 {
2096 struct buf *bp;
2097 int error;
2098
2099 /* XXX should probably ensure that we don't try to do this if
2100 someone has changed rf_protected_sectors. */
2101
2102 /* get a block of the appropriate size... */
2103 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2104 bp->b_dev = dev;
2105
2106 /* get our ducks in a row for the read */
2107 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2108 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2109 bp->b_flags = B_BUSY | B_READ;
2110 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2111
2112 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2113
2114 error = biowait(bp);
2115
2116 if (!error) {
2117 memcpy(clabel, bp->b_un.b_addr,
2118 sizeof(RF_ComponentLabel_t));
2119 #if 0
2120 print_component_label( clabel );
2121 #endif
2122 } else {
2123 #if 0
2124 printf("Failed to read RAID component label!\n");
2125 #endif
2126 }
2127
2128 bp->b_flags = B_INVAL | B_AGE;
2129 brelse(bp);
2130 return(error);
2131 }
2132 /* ARGSUSED */
2133 int
2134 raidwrite_component_label(dev, b_vp, clabel)
2135 dev_t dev;
2136 struct vnode *b_vp;
2137 RF_ComponentLabel_t *clabel;
2138 {
2139 struct buf *bp;
2140 int error;
2141
2142 /* get a block of the appropriate size... */
2143 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2144 bp->b_dev = dev;
2145
2146 /* get our ducks in a row for the write */
2147 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2148 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2149 bp->b_flags = B_BUSY | B_WRITE;
2150 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2151
2152 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2153
2154 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2155
2156 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2157 error = biowait(bp);
2158 bp->b_flags = B_INVAL | B_AGE;
2159 brelse(bp);
2160 if (error) {
2161 #if 1
2162 printf("Failed to write RAID component info!\n");
2163 #endif
2164 }
2165
2166 return(error);
2167 }
2168
2169 void
2170 rf_markalldirty( raidPtr )
2171 RF_Raid_t *raidPtr;
2172 {
2173 RF_ComponentLabel_t clabel;
2174 int r,c;
2175
2176 raidPtr->mod_counter++;
2177 for (r = 0; r < raidPtr->numRow; r++) {
2178 for (c = 0; c < raidPtr->numCol; c++) {
2179 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2180 raidread_component_label(
2181 raidPtr->Disks[r][c].dev,
2182 raidPtr->raid_cinfo[r][c].ci_vp,
2183 &clabel);
2184 if (clabel.status == rf_ds_spared) {
2185 /* XXX do something special...
2186 but whatever you do, don't
2187 try to access it!! */
2188 } else {
2189 #if 0
2190 clabel.status =
2191 raidPtr->Disks[r][c].status;
2192 raidwrite_component_label(
2193 raidPtr->Disks[r][c].dev,
2194 raidPtr->raid_cinfo[r][c].ci_vp,
2195 &clabel);
2196 #endif
2197 raidmarkdirty(
2198 raidPtr->Disks[r][c].dev,
2199 raidPtr->raid_cinfo[r][c].ci_vp,
2200 raidPtr->mod_counter);
2201 }
2202 }
2203 }
2204 }
2205 /* printf("Component labels marked dirty.\n"); */
2206 #if 0
2207 for( c = 0; c < raidPtr->numSpare ; c++) {
2208 sparecol = raidPtr->numCol + c;
2209 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2210 /*
2211
2212 XXX this is where we get fancy and map this spare
2213 into it's correct spot in the array.
2214
2215 */
2216 /*
2217
2218 we claim this disk is "optimal" if it's
2219 rf_ds_used_spare, as that means it should be
2220 directly substitutable for the disk it replaced.
2221 We note that too...
2222
2223 */
2224
2225 for(i=0;i<raidPtr->numRow;i++) {
2226 for(j=0;j<raidPtr->numCol;j++) {
2227 if ((raidPtr->Disks[i][j].spareRow ==
2228 r) &&
2229 (raidPtr->Disks[i][j].spareCol ==
2230 sparecol)) {
2231 srow = r;
2232 scol = sparecol;
2233 break;
2234 }
2235 }
2236 }
2237
2238 raidread_component_label(
2239 raidPtr->Disks[r][sparecol].dev,
2240 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2241 &clabel);
2242 /* make sure status is noted */
2243 clabel.version = RF_COMPONENT_LABEL_VERSION;
2244 clabel.mod_counter = raidPtr->mod_counter;
2245 clabel.serial_number = raidPtr->serial_number;
2246 clabel.row = srow;
2247 clabel.column = scol;
2248 clabel.num_rows = raidPtr->numRow;
2249 clabel.num_columns = raidPtr->numCol;
2250 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2251 clabel.status = rf_ds_optimal;
2252 raidwrite_component_label(
2253 raidPtr->Disks[r][sparecol].dev,
2254 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2255 &clabel);
2256 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2257 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2258 }
2259 }
2260
2261 #endif
2262 }
2263
2264
2265 void
2266 rf_update_component_labels( raidPtr )
2267 RF_Raid_t *raidPtr;
2268 {
2269 RF_ComponentLabel_t clabel;
2270 int sparecol;
2271 int r,c;
2272 int i,j;
2273 int srow, scol;
2274
2275 srow = -1;
2276 scol = -1;
2277
2278 /* XXX should do extra checks to make sure things really are clean,
2279 rather than blindly setting the clean bit... */
2280
2281 raidPtr->mod_counter++;
2282
2283 for (r = 0; r < raidPtr->numRow; r++) {
2284 for (c = 0; c < raidPtr->numCol; c++) {
2285 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2286 raidread_component_label(
2287 raidPtr->Disks[r][c].dev,
2288 raidPtr->raid_cinfo[r][c].ci_vp,
2289 &clabel);
2290 /* make sure status is noted */
2291 clabel.status = rf_ds_optimal;
2292 raidwrite_component_label(
2293 raidPtr->Disks[r][c].dev,
2294 raidPtr->raid_cinfo[r][c].ci_vp,
2295 &clabel);
2296 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2297 raidmarkclean(
2298 raidPtr->Disks[r][c].dev,
2299 raidPtr->raid_cinfo[r][c].ci_vp,
2300 raidPtr->mod_counter);
2301 }
2302 }
2303 /* else we don't touch it.. */
2304 #if 0
2305 else if (raidPtr->Disks[r][c].status !=
2306 rf_ds_failed) {
2307 raidread_component_label(
2308 raidPtr->Disks[r][c].dev,
2309 raidPtr->raid_cinfo[r][c].ci_vp,
2310 &clabel);
2311 /* make sure status is noted */
2312 clabel.status =
2313 raidPtr->Disks[r][c].status;
2314 raidwrite_component_label(
2315 raidPtr->Disks[r][c].dev,
2316 raidPtr->raid_cinfo[r][c].ci_vp,
2317 &clabel);
2318 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2319 raidmarkclean(
2320 raidPtr->Disks[r][c].dev,
2321 raidPtr->raid_cinfo[r][c].ci_vp,
2322 raidPtr->mod_counter);
2323 }
2324 }
2325 #endif
2326 }
2327 }
2328
2329 for( c = 0; c < raidPtr->numSpare ; c++) {
2330 sparecol = raidPtr->numCol + c;
2331 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2332 /*
2333
2334 we claim this disk is "optimal" if it's
2335 rf_ds_used_spare, as that means it should be
2336 directly substitutable for the disk it replaced.
2337 We note that too...
2338
2339 */
2340
2341 for(i=0;i<raidPtr->numRow;i++) {
2342 for(j=0;j<raidPtr->numCol;j++) {
2343 if ((raidPtr->Disks[i][j].spareRow ==
2344 0) &&
2345 (raidPtr->Disks[i][j].spareCol ==
2346 sparecol)) {
2347 srow = i;
2348 scol = j;
2349 break;
2350 }
2351 }
2352 }
2353
2354 raidread_component_label(
2355 raidPtr->Disks[0][sparecol].dev,
2356 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2357 &clabel);
2358 /* make sure status is noted */
2359 clabel.version = RF_COMPONENT_LABEL_VERSION;
2360 clabel.mod_counter = raidPtr->mod_counter;
2361 clabel.serial_number = raidPtr->serial_number;
2362 clabel.row = srow;
2363 clabel.column = scol;
2364 clabel.num_rows = raidPtr->numRow;
2365 clabel.num_columns = raidPtr->numCol;
2366 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2367 clabel.status = rf_ds_optimal;
2368 raidwrite_component_label(
2369 raidPtr->Disks[0][sparecol].dev,
2370 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2371 &clabel);
2372 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2373 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2374 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2375 raidPtr->mod_counter);
2376 }
2377 }
2378 }
2379 /* printf("Component labels updated\n"); */
2380 }
2381
2382 void
2383 rf_ReconThread(req)
2384 struct rf_recon_req *req;
2385 {
2386 int s;
2387 RF_Raid_t *raidPtr;
2388
2389 s = splbio();
2390 raidPtr = (RF_Raid_t *) req->raidPtr;
2391 raidPtr->recon_in_progress = 1;
2392
2393 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2394 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2395
2396 /* XXX get rid of this! we don't need it at all.. */
2397 RF_Free(req, sizeof(*req));
2398
2399 raidPtr->recon_in_progress = 0;
2400 splx(s);
2401
2402 /* That's all... */
2403 kthread_exit(0); /* does not return */
2404 }
2405
2406 void
2407 rf_RewriteParityThread(raidPtr)
2408 RF_Raid_t *raidPtr;
2409 {
2410 int retcode;
2411 int s;
2412
2413 raidPtr->parity_rewrite_in_progress = 1;
2414 s = splbio();
2415 retcode = rf_RewriteParity(raidPtr);
2416 splx(s);
2417 if (retcode) {
2418 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2419 } else {
2420 /* set the clean bit! If we shutdown correctly,
2421 the clean bit on each component label will get
2422 set */
2423 raidPtr->parity_good = RF_RAID_CLEAN;
2424 }
2425 raidPtr->parity_rewrite_in_progress = 0;
2426
2427 /* That's all... */
2428 kthread_exit(0); /* does not return */
2429 }
2430
2431
2432 void
2433 rf_CopybackThread(raidPtr)
2434 RF_Raid_t *raidPtr;
2435 {
2436 int s;
2437
2438 raidPtr->copyback_in_progress = 1;
2439 s = splbio();
2440 rf_CopybackReconstructedData(raidPtr);
2441 splx(s);
2442 raidPtr->copyback_in_progress = 0;
2443
2444 /* That's all... */
2445 kthread_exit(0); /* does not return */
2446 }
2447
2448
2449 void
2450 rf_ReconstructInPlaceThread(req)
2451 struct rf_recon_req *req;
2452 {
2453 int retcode;
2454 int s;
2455 RF_Raid_t *raidPtr;
2456
2457 s = splbio();
2458 raidPtr = req->raidPtr;
2459 raidPtr->recon_in_progress = 1;
2460 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2461 RF_Free(req, sizeof(*req));
2462 raidPtr->recon_in_progress = 0;
2463 splx(s);
2464
2465 /* That's all... */
2466 kthread_exit(0); /* does not return */
2467 }
2468
2469 void
2470 rf_mountroot_hook(dev)
2471 struct device *dev;
2472 {
2473 #if 1
2474 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2475 #endif
2476 if (boothowto & RB_ASKNAME) {
2477 /* We don't auto-config... */
2478 } else {
2479 /* They didn't ask, and we found something bootable... */
2480 /* XXX pretend for now.. */
2481 if (raidautoconfig) {
2482 rootspec = raid_rooty;
2483 }
2484 }
2485 }
2486
2487
2488 RF_AutoConfig_t *
2489 rf_find_raid_components()
2490 {
2491 struct devnametobdevmaj *dtobdm;
2492 struct vnode *vp;
2493 struct disklabel label;
2494 struct device *dv;
2495 char *cd_name;
2496 dev_t dev;
2497 int error;
2498 int i;
2499 int good_one;
2500 RF_ComponentLabel_t *clabel;
2501 RF_AutoConfig_t *ac_list;
2502 RF_AutoConfig_t *ac;
2503
2504
2505 /* initialize the AutoConfig list */
2506 ac_list = NULL;
2507
2508 if (raidautoconfig) {
2509
2510 /* we begin by trolling through *all* the devices on the system */
2511
2512 for (dv = alldevs.tqh_first; dv != NULL;
2513 dv = dv->dv_list.tqe_next) {
2514
2515 /* we are only interested in disks... */
2516 if (dv->dv_class != DV_DISK)
2517 continue;
2518
2519 /* we don't care about floppies... */
2520 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2521 continue;
2522 }
2523
2524 /* need to find the device_name_to_block_device_major stuff */
2525 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2526 dtobdm = dev_name2blk;
2527 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2528 dtobdm++;
2529 }
2530
2531 /* get a vnode for the raw partition of this disk */
2532
2533 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2534 if (bdevvp(dev, &vp))
2535 panic("RAID can't alloc vnode");
2536
2537 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2538
2539 if (error) {
2540 /* "Who cares." Continue looking
2541 for something that exists*/
2542 vput(vp);
2543 continue;
2544 }
2545
2546 /* Ok, the disk exists. Go get the disklabel. */
2547 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2548 FREAD, NOCRED, 0);
2549 if (error) {
2550 /*
2551 * XXX can't happen - open() would
2552 * have errored out (or faked up one)
2553 */
2554 printf("can't get label for dev %s%c (%d)!?!?\n",
2555 dv->dv_xname, 'a' + RAW_PART, error);
2556 }
2557
2558 /* don't need this any more. We'll allocate it again
2559 a little later if we really do... */
2560 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2561 vput(vp);
2562
2563 for (i=0; i < label.d_npartitions; i++) {
2564 /* We only support partitions marked as RAID */
2565 if (label.d_partitions[i].p_fstype != FS_RAID)
2566 continue;
2567
2568 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2569 if (bdevvp(dev, &vp))
2570 panic("RAID can't alloc vnode");
2571
2572 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2573 if (error) {
2574 /* Whatever... */
2575 vput(vp);
2576 continue;
2577 }
2578
2579 good_one = 0;
2580
2581 clabel = (RF_ComponentLabel_t *)
2582 malloc(sizeof(RF_ComponentLabel_t),
2583 M_RAIDFRAME, M_NOWAIT);
2584 if (clabel == NULL) {
2585 /* XXX CLEANUP HERE */
2586 printf("RAID auto config: out of memory!\n");
2587 return(NULL); /* XXX probably should panic? */
2588 }
2589
2590 if (!raidread_component_label(dev, vp, clabel)) {
2591 /* Got the label. Does it look reasonable? */
2592 if (rf_reasonable_label(clabel) &&
2593 (clabel->partitionSize ==
2594 label.d_partitions[i].p_size)) {
2595 #if DEBUG
2596 printf("Component on: %s%c: %d\n",
2597 dv->dv_xname, 'a'+i,
2598 label.d_partitions[i].p_size);
2599 print_component_label(clabel);
2600 #endif
2601 /* if it's reasonable, add it,
2602 else ignore it. */
2603 ac = (RF_AutoConfig_t *)
2604 malloc(sizeof(RF_AutoConfig_t),
2605 M_RAIDFRAME,
2606 M_NOWAIT);
2607 if (ac == NULL) {
2608 /* XXX should panic?? */
2609 return(NULL);
2610 }
2611
2612 sprintf(ac->devname, "%s%c",
2613 dv->dv_xname, 'a'+i);
2614 ac->dev = dev;
2615 ac->vp = vp;
2616 ac->clabel = clabel;
2617 ac->next = ac_list;
2618 ac_list = ac;
2619 good_one = 1;
2620 }
2621 }
2622 if (!good_one) {
2623 /* cleanup */
2624 free(clabel, M_RAIDFRAME);
2625 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2626 vput(vp);
2627 }
2628 }
2629 }
2630 }
2631 return(ac_list);
2632 }
2633
2634 static int
2635 rf_reasonable_label(clabel)
2636 RF_ComponentLabel_t *clabel;
2637 {
2638
2639 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2640 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2641 ((clabel->clean == RF_RAID_CLEAN) ||
2642 (clabel->clean == RF_RAID_DIRTY)) &&
2643 clabel->row >=0 &&
2644 clabel->column >= 0 &&
2645 clabel->num_rows > 0 &&
2646 clabel->num_columns > 0 &&
2647 clabel->row < clabel->num_rows &&
2648 clabel->column < clabel->num_columns &&
2649 clabel->blockSize > 0 &&
2650 clabel->numBlocks > 0) {
2651 /* label looks reasonable enough... */
2652 return(1);
2653 }
2654 return(0);
2655 }
2656
2657
2658 void
2659 print_component_label(clabel)
2660 RF_ComponentLabel_t *clabel;
2661 {
2662 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2663 clabel->row, clabel->column,
2664 clabel->num_rows, clabel->num_columns);
2665 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2666 clabel->version, clabel->serial_number,
2667 clabel->mod_counter);
2668 printf(" Clean: %s Status: %d\n",
2669 clabel->clean ? "Yes" : "No", clabel->status );
2670 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2671 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2672 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2673 (char) clabel->parityConfig, clabel->blockSize,
2674 clabel->numBlocks);
2675 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2676 printf(" Last configured as: raid%d\n", clabel->last_unit );
2677 printf(" Config order: %d\n", clabel->config_order);
2678
2679 }
2680
2681 RF_ConfigSet_t *
2682 rf_create_auto_sets(ac_list)
2683 RF_AutoConfig_t *ac_list;
2684 {
2685 RF_AutoConfig_t *ac;
2686 RF_ConfigSet_t *config_sets;
2687 RF_ConfigSet_t *cset;
2688 RF_AutoConfig_t *ac_next;
2689
2690
2691 config_sets = NULL;
2692
2693 /* Go through the AutoConfig list, and figure out which components
2694 belong to what sets. */
2695 ac = ac_list;
2696 while(ac!=NULL) {
2697 /* we're going to putz with ac->next, so save it here
2698 for use at the end of the loop */
2699 ac_next = ac->next;
2700
2701 if (config_sets == NULL) {
2702 /* will need at least this one... */
2703 config_sets = (RF_ConfigSet_t *)
2704 malloc(sizeof(RF_ConfigSet_t),
2705 M_RAIDFRAME, M_NOWAIT);
2706 if (config_sets == NULL) {
2707 panic("rf_create_auto_sets: No memory!\n");
2708 }
2709 /* this one is easy :) */
2710 config_sets->ac = ac;
2711 config_sets->next = NULL;
2712 ac->next = NULL;
2713 } else {
2714 /* which set does this component fit into? */
2715 cset = config_sets;
2716 while(cset!=NULL) {
2717 if (rf_does_it_fit(cset, ac)) {
2718 /* looks like it matches */
2719 ac->next = cset->ac;
2720 cset->ac = ac;
2721 break;
2722 }
2723 cset = cset->next;
2724 }
2725 if (cset==NULL) {
2726 /* didn't find a match above... new set..*/
2727 cset = (RF_ConfigSet_t *)
2728 malloc(sizeof(RF_ConfigSet_t),
2729 M_RAIDFRAME, M_NOWAIT);
2730 if (cset == NULL) {
2731 panic("rf_create_auto_sets: No memory!\n");
2732 }
2733 cset->ac = ac;
2734 ac->next = NULL;
2735 cset->next = config_sets;
2736 config_sets = cset;
2737 }
2738 }
2739 ac = ac_next;
2740 }
2741
2742
2743 return(config_sets);
2744 }
2745
2746 static int
2747 rf_does_it_fit(cset, ac)
2748 RF_ConfigSet_t *cset;
2749 RF_AutoConfig_t *ac;
2750 {
2751 RF_ComponentLabel_t *clabel1, *clabel2;
2752
2753 /* If this one matches the *first* one in the set, that's good
2754 enough, since the other members of the set would have been
2755 through here too... */
2756
2757 clabel1 = cset->ac->clabel;
2758 clabel2 = ac->clabel;
2759 if ((clabel1->version == clabel2->version) &&
2760 (clabel1->serial_number == clabel2->serial_number) &&
2761 (clabel1->mod_counter == clabel2->mod_counter) &&
2762 (clabel1->num_rows == clabel2->num_rows) &&
2763 (clabel1->num_columns == clabel2->num_columns) &&
2764 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2765 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2766 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2767 (clabel1->parityConfig == clabel2->parityConfig) &&
2768 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2769 (clabel1->blockSize == clabel2->blockSize) &&
2770 (clabel1->numBlocks == clabel2->numBlocks) &&
2771 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2772 (clabel1->root_partition == clabel2->root_partition) &&
2773 (clabel1->last_unit == clabel2->last_unit) &&
2774 (clabel1->config_order == clabel2->config_order)) {
2775 /* if it get's here, it almost *has* to be a match */
2776 } else {
2777 /* it's not consistent with somebody in the set..
2778 punt */
2779 return(0);
2780 }
2781 /* all was fine.. it must fit... */
2782 return(1);
2783 }
2784
2785 #if 0
2786 int have_enough();
2787 int
2788 have_enough()
2789 {
2790 /* check to see that we have enough 'live' components
2791 of this set. If so, we can configure it if necessary */
2792
2793 }
2794 #endif
2795
2796 void
2797 rf_create_configuration(ac,config,raidPtr)
2798 RF_AutoConfig_t *ac;
2799 RF_Config_t *config;
2800 RF_Raid_t *raidPtr;
2801 {
2802 RF_ComponentLabel_t *clabel;
2803
2804 clabel = ac->clabel;
2805
2806 /* 1. Fill in the common stuff */
2807 config->numRow = clabel->num_rows;
2808 config->numCol = clabel->num_columns;
2809 config->numSpare = 0; /* XXX should this be set here? */
2810 config->sectPerSU = clabel->sectPerSU;
2811 config->SUsPerPU = clabel->SUsPerPU;
2812 config->SUsPerRU = clabel->SUsPerRU;
2813 config->parityConfig = clabel->parityConfig;
2814 /* XXX... */
2815 strcpy(config->diskQueueType,"fifo");
2816 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2817 config->layoutSpecificSize = 0; /* XXX ?? */
2818
2819 while(ac!=NULL) {
2820 /* row/col values will be in range due to the checks
2821 in reasonable_label() */
2822 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2823 ac->devname);
2824 ac = ac->next;
2825 }
2826
2827 }
2828
2829 int
2830 rf_set_autoconfig(raidPtr, new_value)
2831 RF_Raid_t *raidPtr;
2832 int new_value;
2833 {
2834 RF_ComponentLabel_t clabel;
2835 struct vnode *vp;
2836 dev_t dev;
2837 int row, column;
2838
2839 for(row=0; row<raidPtr->numRow; row++) {
2840 for(column=0; column<raidPtr->numCol; column++) {
2841 dev = raidPtr->Disks[row][column].dev;
2842 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2843 raidread_component_label(dev, vp, &clabel);
2844 clabel.autoconfigure = new_value;
2845 raidwrite_component_label(dev, vp, &clabel);
2846 }
2847 }
2848 return(new_value);
2849 }
2850
2851 int
2852 rf_set_rootpartition(raidPtr, new_value)
2853 RF_Raid_t *raidPtr;
2854 int new_value;
2855 {
2856 RF_ComponentLabel_t clabel;
2857 struct vnode *vp;
2858 dev_t dev;
2859 int row, column;
2860
2861 for(row=0; row<raidPtr->numRow; row++) {
2862 for(column=0; column<raidPtr->numCol; column++) {
2863 dev = raidPtr->Disks[row][column].dev;
2864 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2865 raidread_component_label(dev, vp, &clabel);
2866 clabel.root_partition = new_value;
2867 raidwrite_component_label(dev, vp, &clabel);
2868 }
2869 }
2870 return(new_value);
2871 }
2872
2873 void
2874 rf_release_all_vps(cset)
2875 RF_ConfigSet_t *cset;
2876 {
2877 RF_AutoConfig_t *ac;
2878
2879 ac = cset->ac;
2880 while(ac!=NULL) {
2881 /* Close the vp, and give it back */
2882 if (ac->vp) {
2883 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2884 vput(ac->vp);
2885 }
2886 ac = ac->next;
2887 }
2888 }
2889
2890
2891 void
2892 rf_cleanup_config_set(cset)
2893 RF_ConfigSet_t *cset;
2894 {
2895 RF_AutoConfig_t *ac;
2896 RF_AutoConfig_t *next_ac;
2897
2898 ac = cset->ac;
2899 while(ac!=NULL) {
2900 next_ac = ac->next;
2901 /* nuke the label */
2902 free(ac->clabel, M_RAIDFRAME);
2903 /* cleanup the config structure */
2904 free(ac, M_RAIDFRAME);
2905 /* "next.." */
2906 ac = next_ac;
2907 }
2908 /* and, finally, nuke the config set */
2909 free(cset, M_RAIDFRAME);
2910 }
2911
2912
2913 void
2914 raid_init_component_label(raidPtr, clabel)
2915 RF_Raid_t *raidPtr;
2916 RF_ComponentLabel_t *clabel;
2917 {
2918 /* current version number */
2919 clabel->version = RF_COMPONENT_LABEL_VERSION;
2920 clabel->serial_number = clabel->serial_number;
2921 clabel->mod_counter = raidPtr->mod_counter;
2922 clabel->num_rows = raidPtr->numRow;
2923 clabel->num_columns = raidPtr->numCol;
2924 clabel->clean = RF_RAID_DIRTY; /* not clean */
2925 clabel->status = rf_ds_optimal; /* "It's good!" */
2926
2927 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2928 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2929 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2930 /* XXX not portable */
2931 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2932 /* XXX THIS SHOULD BE SET RIGHT!! */
2933 clabel->maxOutstanding = 100;
2934 clabel->autoconfigure = 0;
2935 clabel->root_partition = 0;
2936 clabel->last_unit = raidPtr->raidid;
2937 }
2938