rf_netbsdkintf.c revision 1.53 1 /* $NetBSD: rf_netbsdkintf.c,v 1.53 2000/02/22 23:13:15 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that...
208 Note: Don't use sc_dev until the raidinit(0,_,_) call in
209 rf_auto_config_set() actually passes in a real dev_t! */
210
211 struct raid_softc {
212 int sc_flags; /* flags */
213 int sc_cflags; /* configuration flags */
214 size_t sc_size; /* size of the raid device */
215 dev_t sc_dev; /* our device.. */
216 char sc_xname[20]; /* XXX external name */
217 struct disk sc_dkdev; /* generic disk device info */
218 struct pool sc_cbufpool; /* component buffer pool */
219 struct buf_queue buf_queue; /* used for the device queue */
220 };
221 /* sc_flags */
222 #define RAIDF_INITED 0x01 /* unit has been initialized */
223 #define RAIDF_WLABEL 0x02 /* label area is writable */
224 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
225 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
226 #define RAIDF_LOCKED 0x80 /* unit is locked */
227
228 #define raidunit(x) DISKUNIT(x)
229 int numraid = 0;
230
231 /*
232 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
233 * Be aware that large numbers can allow the driver to consume a lot of
234 * kernel memory, especially on writes, and in degraded mode reads.
235 *
236 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
237 * a single 64K write will typically require 64K for the old data,
238 * 64K for the old parity, and 64K for the new parity, for a total
239 * of 192K (if the parity buffer is not re-used immediately).
240 * Even it if is used immedately, that's still 128K, which when multiplied
241 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
242 *
243 * Now in degraded mode, for example, a 64K read on the above setup may
244 * require data reconstruction, which will require *all* of the 4 remaining
245 * disks to participate -- 4 * 32K/disk == 128K again.
246 */
247
248 #ifndef RAIDOUTSTANDING
249 #define RAIDOUTSTANDING 6
250 #endif
251
252 #define RAIDLABELDEV(dev) \
253 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
254
255 /* declared here, and made public, for the benefit of KVM stuff.. */
256 struct raid_softc *raid_softc;
257
258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
259 struct disklabel *));
260 static void raidgetdisklabel __P((dev_t));
261 static void raidmakedisklabel __P((struct raid_softc *));
262
263 static int raidlock __P((struct raid_softc *));
264 static void raidunlock __P((struct raid_softc *));
265
266 static void rf_markalldirty __P((RF_Raid_t *));
267 void rf_mountroot_hook __P((struct device *));
268
269 struct device *raidrootdev;
270 struct cfdata cf_raidrootdev;
271 struct cfdriver cfdrv;
272 /* XXX these should be moved up */
273 #include "rf_configure.h"
274 #include <sys/reboot.h>
275
276 void rf_ReconThread __P((struct rf_recon_req *));
277 /* XXX what I want is: */
278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
282 void rf_buildroothack __P((void *));
283
284 RF_AutoConfig_t *rf_find_raid_components __P((void));
285 void print_component_label __P((RF_ComponentLabel_t *));
286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
290 RF_Raid_t *));
291 int rf_set_autoconfig __P((RF_Raid_t *, int));
292 int rf_set_rootpartition __P((RF_Raid_t *, int));
293 void rf_release_all_vps __P((RF_ConfigSet_t *));
294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
295 int rf_have_enough_components __P((RF_ConfigSet_t *));
296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place */
300 /* XXX ugly hack. */
301 const char *raid_rooty = "raid0";
302 extern struct device *booted_device;
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310 RF_AutoConfig_t *ac_list; /* autoconfig list */
311 RF_ConfigSet_t *config_sets;
312
313 #ifdef DEBUG
314 printf("raidattach: Asked for %d units\n", num);
315 #endif
316
317 if (num <= 0) {
318 #ifdef DIAGNOSTIC
319 panic("raidattach: count <= 0");
320 #endif
321 return;
322 }
323 /* This is where all the initialization stuff gets done. */
324
325 numraid = num;
326
327 /* Make some space for requested number of units... */
328
329 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
330 if (raidPtrs == NULL) {
331 panic("raidPtrs is NULL!!\n");
332 }
333
334 rc = rf_mutex_init(&rf_sparet_wait_mutex);
335 if (rc) {
336 RF_PANIC();
337 }
338
339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
340
341 for (i = 0; i < numraid; i++)
342 raidPtrs[i] = NULL;
343 rc = rf_BootRaidframe();
344 if (rc == 0)
345 printf("Kernelized RAIDframe activated\n");
346 else
347 panic("Serious error booting RAID!!\n");
348
349 /* put together some datastructures like the CCD device does.. This
350 * lets us lock the device and what-not when it gets opened. */
351
352 raid_softc = (struct raid_softc *)
353 malloc(num * sizeof(struct raid_softc),
354 M_RAIDFRAME, M_NOWAIT);
355 if (raid_softc == NULL) {
356 printf("WARNING: no memory for RAIDframe driver\n");
357 return;
358 }
359
360 bzero(raid_softc, num * sizeof(struct raid_softc));
361
362 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
363 M_RAIDFRAME, M_NOWAIT);
364 if (raidrootdev == NULL) {
365 panic("No memory for RAIDframe driver!!?!?!\n");
366 }
367
368 for (raidID = 0; raidID < num; raidID++) {
369 BUFQ_INIT(&raid_softc[raidID].buf_queue);
370
371 raidrootdev[raidID].dv_class = DV_DISK;
372 raidrootdev[raidID].dv_cfdata = NULL;
373 raidrootdev[raidID].dv_unit = raidID;
374 raidrootdev[raidID].dv_parent = NULL;
375 raidrootdev[raidID].dv_flags = 0;
376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
377
378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
379 (RF_Raid_t *));
380 if (raidPtrs[raidID] == NULL) {
381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
382 numraid = raidID;
383 return;
384 }
385 }
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
408 /* XXXX pick raid0 for now... and this should be only done
409 if we find something that's bootable!!! */
410 #if 0
411 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
412 #endif
413 if (boothowto & RB_ASKNAME) {
414 /* We don't auto-config... */
415 } else {
416 /* They didn't ask, and we found something bootable... */
417 /* XXX pretend for now.. */
418 #if 0
419 booted_device = &raidrootdev[0];
420 #endif
421 }
422 }
423
424 }
425
426 void
427 rf_buildroothack(arg)
428 void *arg;
429 {
430 RF_ConfigSet_t *config_sets = arg;
431 RF_ConfigSet_t *cset;
432 RF_ConfigSet_t *next_cset;
433 int retcode;
434 int raidID;
435 int rootID;
436 int num_root;
437
438 num_root = 0;
439 cset = config_sets;
440 while(cset != NULL ) {
441 next_cset = cset->next;
442 if (rf_have_enough_components(cset) &&
443 cset->ac->clabel->autoconfigure==1) {
444 retcode = rf_auto_config_set(cset,&raidID);
445 if (!retcode) {
446 if (cset->rootable) {
447 rootID = raidID;
448 num_root++;
449 }
450 } else {
451 /* The autoconfig didn't work :( */
452 #if DEBUG
453 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
454 #endif
455 rf_release_all_vps(cset);
456 #if DEBUG
457 printf("Done cleanup\n");
458 #endif
459 }
460 } else {
461 /* we're not autoconfiguring this set...
462 release the associated resources */
463 #if DEBUG
464 printf("Releasing vp's\n");
465 #endif
466 rf_release_all_vps(cset);
467 #if DEBUG
468 printf("Done.\n");
469 #endif
470 }
471 /* cleanup */
472 #if DEBUG
473 printf("Cleaning up config set\n");
474 #endif
475 rf_cleanup_config_set(cset);
476 #if DEBUG
477 printf("Done cleanup\n");
478 #endif
479 cset = next_cset;
480 }
481 if (boothowto & RB_ASKNAME) {
482 /* We don't auto-config... */
483 } else {
484 /* They didn't ask, and we found something bootable... */
485 /* XXX pretend for now.. */
486 if (num_root == 1) {
487 #if 1
488 booted_device = &raidrootdev[rootID];
489 #endif
490 } else if (num_root > 1) {
491 /* we can't guess.. require the user to answer... */
492 boothowto |= RB_ASKNAME;
493 }
494 }
495 }
496
497
498 int
499 raidsize(dev)
500 dev_t dev;
501 {
502 struct raid_softc *rs;
503 struct disklabel *lp;
504 int part, unit, omask, size;
505
506 unit = raidunit(dev);
507 if (unit >= numraid)
508 return (-1);
509 rs = &raid_softc[unit];
510
511 if ((rs->sc_flags & RAIDF_INITED) == 0)
512 return (-1);
513
514 part = DISKPART(dev);
515 omask = rs->sc_dkdev.dk_openmask & (1 << part);
516 lp = rs->sc_dkdev.dk_label;
517
518 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
519 return (-1);
520
521 if (lp->d_partitions[part].p_fstype != FS_SWAP)
522 size = -1;
523 else
524 size = lp->d_partitions[part].p_size *
525 (lp->d_secsize / DEV_BSIZE);
526
527 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
528 return (-1);
529
530 return (size);
531
532 }
533
534 int
535 raiddump(dev, blkno, va, size)
536 dev_t dev;
537 daddr_t blkno;
538 caddr_t va;
539 size_t size;
540 {
541 /* Not implemented. */
542 return ENXIO;
543 }
544 /* ARGSUSED */
545 int
546 raidopen(dev, flags, fmt, p)
547 dev_t dev;
548 int flags, fmt;
549 struct proc *p;
550 {
551 int unit = raidunit(dev);
552 struct raid_softc *rs;
553 struct disklabel *lp;
554 int part, pmask;
555 int error = 0;
556
557 if (unit >= numraid)
558 return (ENXIO);
559 rs = &raid_softc[unit];
560
561 if ((error = raidlock(rs)) != 0)
562 return (error);
563 lp = rs->sc_dkdev.dk_label;
564
565 part = DISKPART(dev);
566 pmask = (1 << part);
567
568 db1_printf(("Opening raid device number: %d partition: %d\n",
569 unit, part));
570
571
572 if ((rs->sc_flags & RAIDF_INITED) &&
573 (rs->sc_dkdev.dk_openmask == 0))
574 raidgetdisklabel(dev);
575
576 /* make sure that this partition exists */
577
578 if (part != RAW_PART) {
579 db1_printf(("Not a raw partition..\n"));
580 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
581 ((part >= lp->d_npartitions) ||
582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
583 error = ENXIO;
584 raidunlock(rs);
585 db1_printf(("Bailing out...\n"));
586 return (error);
587 }
588 }
589 /* Prevent this unit from being unconfigured while open. */
590 switch (fmt) {
591 case S_IFCHR:
592 rs->sc_dkdev.dk_copenmask |= pmask;
593 break;
594
595 case S_IFBLK:
596 rs->sc_dkdev.dk_bopenmask |= pmask;
597 break;
598 }
599
600 if ((rs->sc_dkdev.dk_openmask == 0) &&
601 ((rs->sc_flags & RAIDF_INITED) != 0)) {
602 /* First one... mark things as dirty... Note that we *MUST*
603 have done a configure before this. I DO NOT WANT TO BE
604 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
605 THAT THEY BELONG TOGETHER!!!!! */
606 /* XXX should check to see if we're only open for reading
607 here... If so, we needn't do this, but then need some
608 other way of keeping track of what's happened.. */
609
610 rf_markalldirty( raidPtrs[unit] );
611 }
612
613
614 rs->sc_dkdev.dk_openmask =
615 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
616
617 raidunlock(rs);
618
619 return (error);
620
621
622 }
623 /* ARGSUSED */
624 int
625 raidclose(dev, flags, fmt, p)
626 dev_t dev;
627 int flags, fmt;
628 struct proc *p;
629 {
630 int unit = raidunit(dev);
631 struct raid_softc *rs;
632 int error = 0;
633 int part;
634
635 if (unit >= numraid)
636 return (ENXIO);
637 rs = &raid_softc[unit];
638
639 if ((error = raidlock(rs)) != 0)
640 return (error);
641
642 part = DISKPART(dev);
643
644 /* ...that much closer to allowing unconfiguration... */
645 switch (fmt) {
646 case S_IFCHR:
647 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
648 break;
649
650 case S_IFBLK:
651 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
652 break;
653 }
654 rs->sc_dkdev.dk_openmask =
655 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
656
657 if ((rs->sc_dkdev.dk_openmask == 0) &&
658 ((rs->sc_flags & RAIDF_INITED) != 0)) {
659 /* Last one... device is not unconfigured yet.
660 Device shutdown has taken care of setting the
661 clean bits if RAIDF_INITED is not set
662 mark things as clean... */
663 rf_update_component_labels( raidPtrs[unit] );
664 }
665
666 raidunlock(rs);
667 return (0);
668
669 }
670
671 void
672 raidstrategy(bp)
673 register struct buf *bp;
674 {
675 register int s;
676
677 unsigned int raidID = raidunit(bp->b_dev);
678 RF_Raid_t *raidPtr;
679 struct raid_softc *rs = &raid_softc[raidID];
680 struct disklabel *lp;
681 int wlabel;
682
683 if ((rs->sc_flags & RAIDF_INITED) ==0) {
684 bp->b_error = ENXIO;
685 bp->b_flags = B_ERROR;
686 bp->b_resid = bp->b_bcount;
687 biodone(bp);
688 return;
689 }
690 if (raidID >= numraid || !raidPtrs[raidID]) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 raidPtr = raidPtrs[raidID];
698 if (!raidPtr->valid) {
699 bp->b_error = ENODEV;
700 bp->b_flags |= B_ERROR;
701 bp->b_resid = bp->b_bcount;
702 biodone(bp);
703 return;
704 }
705 if (bp->b_bcount == 0) {
706 db1_printf(("b_bcount is zero..\n"));
707 biodone(bp);
708 return;
709 }
710 lp = rs->sc_dkdev.dk_label;
711
712 /*
713 * Do bounds checking and adjust transfer. If there's an
714 * error, the bounds check will flag that for us.
715 */
716
717 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
718 if (DISKPART(bp->b_dev) != RAW_PART)
719 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
720 db1_printf(("Bounds check failed!!:%d %d\n",
721 (int) bp->b_blkno, (int) wlabel));
722 biodone(bp);
723 return;
724 }
725 s = splbio();
726
727 bp->b_resid = 0;
728
729 /* stuff it onto our queue */
730 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
731
732 raidstart(raidPtrs[raidID]);
733
734 splx(s);
735 }
736 /* ARGSUSED */
737 int
738 raidread(dev, uio, flags)
739 dev_t dev;
740 struct uio *uio;
741 int flags;
742 {
743 int unit = raidunit(dev);
744 struct raid_softc *rs;
745 int part;
746
747 if (unit >= numraid)
748 return (ENXIO);
749 rs = &raid_softc[unit];
750
751 if ((rs->sc_flags & RAIDF_INITED) == 0)
752 return (ENXIO);
753 part = DISKPART(dev);
754
755 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
756
757 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
758
759 }
760 /* ARGSUSED */
761 int
762 raidwrite(dev, uio, flags)
763 dev_t dev;
764 struct uio *uio;
765 int flags;
766 {
767 int unit = raidunit(dev);
768 struct raid_softc *rs;
769
770 if (unit >= numraid)
771 return (ENXIO);
772 rs = &raid_softc[unit];
773
774 if ((rs->sc_flags & RAIDF_INITED) == 0)
775 return (ENXIO);
776 db1_printf(("raidwrite\n"));
777 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
778
779 }
780
781 int
782 raidioctl(dev, cmd, data, flag, p)
783 dev_t dev;
784 u_long cmd;
785 caddr_t data;
786 int flag;
787 struct proc *p;
788 {
789 int unit = raidunit(dev);
790 int error = 0;
791 int part, pmask;
792 struct raid_softc *rs;
793 RF_Config_t *k_cfg, *u_cfg;
794 RF_Raid_t *raidPtr;
795 RF_RaidDisk_t *diskPtr;
796 RF_AccTotals_t *totals;
797 RF_DeviceConfig_t *d_cfg, **ucfgp;
798 u_char *specific_buf;
799 int retcode = 0;
800 int row;
801 int column;
802 struct rf_recon_req *rrcopy, *rr;
803 RF_ComponentLabel_t *clabel;
804 RF_ComponentLabel_t ci_label;
805 RF_ComponentLabel_t **clabel_ptr;
806 RF_SingleComponent_t *sparePtr,*componentPtr;
807 RF_SingleComponent_t hot_spare;
808 RF_SingleComponent_t component;
809 int i, j, d;
810
811 if (unit >= numraid)
812 return (ENXIO);
813 rs = &raid_softc[unit];
814 raidPtr = raidPtrs[unit];
815
816 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
817 (int) DISKPART(dev), (int) unit, (int) cmd));
818
819 /* Must be open for writes for these commands... */
820 switch (cmd) {
821 case DIOCSDINFO:
822 case DIOCWDINFO:
823 case DIOCWLABEL:
824 if ((flag & FWRITE) == 0)
825 return (EBADF);
826 }
827
828 /* Must be initialized for these... */
829 switch (cmd) {
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 case DIOCGPART:
834 case DIOCWLABEL:
835 case DIOCGDEFLABEL:
836 case RAIDFRAME_SHUTDOWN:
837 case RAIDFRAME_REWRITEPARITY:
838 case RAIDFRAME_GET_INFO:
839 case RAIDFRAME_RESET_ACCTOTALS:
840 case RAIDFRAME_GET_ACCTOTALS:
841 case RAIDFRAME_KEEP_ACCTOTALS:
842 case RAIDFRAME_GET_SIZE:
843 case RAIDFRAME_FAIL_DISK:
844 case RAIDFRAME_COPYBACK:
845 case RAIDFRAME_CHECK_RECON_STATUS:
846 case RAIDFRAME_GET_COMPONENT_LABEL:
847 case RAIDFRAME_SET_COMPONENT_LABEL:
848 case RAIDFRAME_ADD_HOT_SPARE:
849 case RAIDFRAME_REMOVE_HOT_SPARE:
850 case RAIDFRAME_INIT_LABELS:
851 case RAIDFRAME_REBUILD_IN_PLACE:
852 case RAIDFRAME_CHECK_PARITY:
853 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
854 case RAIDFRAME_CHECK_COPYBACK_STATUS:
855 case RAIDFRAME_SET_AUTOCONFIG:
856 case RAIDFRAME_SET_ROOT:
857 if ((rs->sc_flags & RAIDF_INITED) == 0)
858 return (ENXIO);
859 }
860
861 switch (cmd) {
862
863 /* configure the system */
864 case RAIDFRAME_CONFIGURE:
865
866 if (raidPtr->valid) {
867 /* There is a valid RAID set running on this unit! */
868 printf("raid%d: Device already configured!\n",unit);
869 }
870
871 /* copy-in the configuration information */
872 /* data points to a pointer to the configuration structure */
873
874 u_cfg = *((RF_Config_t **) data);
875 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
876 if (k_cfg == NULL) {
877 return (ENOMEM);
878 }
879 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
880 sizeof(RF_Config_t));
881 if (retcode) {
882 RF_Free(k_cfg, sizeof(RF_Config_t));
883 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
884 retcode));
885 return (retcode);
886 }
887 /* allocate a buffer for the layout-specific data, and copy it
888 * in */
889 if (k_cfg->layoutSpecificSize) {
890 if (k_cfg->layoutSpecificSize > 10000) {
891 /* sanity check */
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 return (EINVAL);
894 }
895 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
896 (u_char *));
897 if (specific_buf == NULL) {
898 RF_Free(k_cfg, sizeof(RF_Config_t));
899 return (ENOMEM);
900 }
901 retcode = copyin(k_cfg->layoutSpecific,
902 (caddr_t) specific_buf,
903 k_cfg->layoutSpecificSize);
904 if (retcode) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 RF_Free(specific_buf,
907 k_cfg->layoutSpecificSize);
908 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
909 retcode));
910 return (retcode);
911 }
912 } else
913 specific_buf = NULL;
914 k_cfg->layoutSpecific = specific_buf;
915
916 /* should do some kind of sanity check on the configuration.
917 * Store the sum of all the bytes in the last byte? */
918
919 /* configure the system */
920
921 /*
922 * Clear the entire RAID descriptor, just to make sure
923 * there is no stale data left in the case of a
924 * reconfiguration
925 */
926 bzero((char *) raidPtr, sizeof(RF_Raid_t));
927 raidPtr->raidid = unit;
928
929 retcode = rf_Configure(raidPtr, k_cfg, NULL);
930
931 if (retcode == 0) {
932
933 /* allow this many simultaneous IO's to
934 this RAID device */
935 raidPtr->openings = RAIDOUTSTANDING;
936
937 retcode = raidinit(dev, raidPtr, unit);
938 rf_markalldirty( raidPtr );
939 }
940 /* free the buffers. No return code here. */
941 if (k_cfg->layoutSpecificSize) {
942 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
943 }
944 RF_Free(k_cfg, sizeof(RF_Config_t));
945
946 return (retcode);
947
948 /* shutdown the system */
949 case RAIDFRAME_SHUTDOWN:
950
951 if ((error = raidlock(rs)) != 0)
952 return (error);
953
954 /*
955 * If somebody has a partition mounted, we shouldn't
956 * shutdown.
957 */
958
959 part = DISKPART(dev);
960 pmask = (1 << part);
961 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
962 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
963 (rs->sc_dkdev.dk_copenmask & pmask))) {
964 raidunlock(rs);
965 return (EBUSY);
966 }
967
968 retcode = rf_Shutdown(raidPtr);
969
970 pool_destroy(&rs->sc_cbufpool);
971
972 /* It's no longer initialized... */
973 rs->sc_flags &= ~RAIDF_INITED;
974
975 /* Detach the disk. */
976 disk_detach(&rs->sc_dkdev);
977
978 raidunlock(rs);
979
980 return (retcode);
981 case RAIDFRAME_GET_COMPONENT_LABEL:
982 clabel_ptr = (RF_ComponentLabel_t **) data;
983 /* need to read the component label for the disk indicated
984 by row,column in clabel */
985
986 /* For practice, let's get it directly fromdisk, rather
987 than from the in-core copy */
988 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
989 (RF_ComponentLabel_t *));
990 if (clabel == NULL)
991 return (ENOMEM);
992
993 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
994
995 retcode = copyin( *clabel_ptr, clabel,
996 sizeof(RF_ComponentLabel_t));
997
998 if (retcode) {
999 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1000 return(retcode);
1001 }
1002
1003 row = clabel->row;
1004 column = clabel->column;
1005
1006 if ((row < 0) || (row >= raidPtr->numRow) ||
1007 (column < 0) || (column >= raidPtr->numCol)) {
1008 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1009 return(EINVAL);
1010 }
1011
1012 raidread_component_label(raidPtr->Disks[row][column].dev,
1013 raidPtr->raid_cinfo[row][column].ci_vp,
1014 clabel );
1015
1016 retcode = copyout((caddr_t) clabel,
1017 (caddr_t) *clabel_ptr,
1018 sizeof(RF_ComponentLabel_t));
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return (retcode);
1021
1022 case RAIDFRAME_SET_COMPONENT_LABEL:
1023 clabel = (RF_ComponentLabel_t *) data;
1024
1025 /* XXX check the label for valid stuff... */
1026 /* Note that some things *should not* get modified --
1027 the user should be re-initing the labels instead of
1028 trying to patch things.
1029 */
1030
1031 printf("Got component label:\n");
1032 printf("Version: %d\n",clabel->version);
1033 printf("Serial Number: %d\n",clabel->serial_number);
1034 printf("Mod counter: %d\n",clabel->mod_counter);
1035 printf("Row: %d\n", clabel->row);
1036 printf("Column: %d\n", clabel->column);
1037 printf("Num Rows: %d\n", clabel->num_rows);
1038 printf("Num Columns: %d\n", clabel->num_columns);
1039 printf("Clean: %d\n", clabel->clean);
1040 printf("Status: %d\n", clabel->status);
1041
1042 row = clabel->row;
1043 column = clabel->column;
1044
1045 if ((row < 0) || (row >= raidPtr->numRow) ||
1046 (column < 0) || (column >= raidPtr->numCol)) {
1047 return(EINVAL);
1048 }
1049
1050 /* XXX this isn't allowed to do anything for now :-) */
1051
1052 /* XXX and before it is, we need to fill in the rest
1053 of the fields!?!?!?! */
1054 #if 0
1055 raidwrite_component_label(
1056 raidPtr->Disks[row][column].dev,
1057 raidPtr->raid_cinfo[row][column].ci_vp,
1058 clabel );
1059 #endif
1060 return (0);
1061
1062 case RAIDFRAME_INIT_LABELS:
1063 clabel = (RF_ComponentLabel_t *) data;
1064 /*
1065 we only want the serial number from
1066 the above. We get all the rest of the information
1067 from the config that was used to create this RAID
1068 set.
1069 */
1070
1071 raidPtr->serial_number = clabel->serial_number;
1072
1073 raid_init_component_label(raidPtr, &ci_label);
1074 ci_label.serial_number = clabel->serial_number;
1075
1076 for(row=0;row<raidPtr->numRow;row++) {
1077 ci_label.row = row;
1078 for(column=0;column<raidPtr->numCol;column++) {
1079 diskPtr = &raidPtr->Disks[row][column];
1080 ci_label.blockSize = diskPtr->blockSize;
1081 ci_label.numBlocks = diskPtr->numBlocks;
1082 ci_label.partitionSize = diskPtr->partitionSize;
1083 ci_label.column = column;
1084 raidwrite_component_label(
1085 raidPtr->Disks[row][column].dev,
1086 raidPtr->raid_cinfo[row][column].ci_vp,
1087 &ci_label );
1088 }
1089 }
1090
1091 return (retcode);
1092 case RAIDFRAME_SET_AUTOCONFIG:
1093 d = rf_set_autoconfig(raidPtr, *data);
1094 printf("New autoconfig value is: %d\n", d);
1095 *data = d;
1096 return (retcode);
1097
1098 case RAIDFRAME_SET_ROOT:
1099 d = rf_set_rootpartition(raidPtr, *data);
1100 printf("New rootpartition value is: %d\n", d);
1101 *data = d;
1102 return (retcode);
1103
1104 /* initialize all parity */
1105 case RAIDFRAME_REWRITEPARITY:
1106
1107 if (raidPtr->Layout.map->faultsTolerated == 0) {
1108 /* Parity for RAID 0 is trivially correct */
1109 raidPtr->parity_good = RF_RAID_CLEAN;
1110 return(0);
1111 }
1112
1113 if (raidPtr->parity_rewrite_in_progress == 1) {
1114 /* Re-write is already in progress! */
1115 return(EINVAL);
1116 }
1117
1118 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1119 rf_RewriteParityThread,
1120 raidPtr,"raid_parity");
1121 return (retcode);
1122
1123
1124 case RAIDFRAME_ADD_HOT_SPARE:
1125 sparePtr = (RF_SingleComponent_t *) data;
1126 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1127 printf("Adding spare\n");
1128 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1129 return(retcode);
1130
1131 case RAIDFRAME_REMOVE_HOT_SPARE:
1132 return(retcode);
1133
1134 case RAIDFRAME_REBUILD_IN_PLACE:
1135
1136 if (raidPtr->Layout.map->faultsTolerated == 0) {
1137 /* Can't do this on a RAID 0!! */
1138 return(EINVAL);
1139 }
1140
1141 if (raidPtr->recon_in_progress == 1) {
1142 /* a reconstruct is already in progress! */
1143 return(EINVAL);
1144 }
1145
1146 componentPtr = (RF_SingleComponent_t *) data;
1147 memcpy( &component, componentPtr,
1148 sizeof(RF_SingleComponent_t));
1149 row = component.row;
1150 column = component.column;
1151 printf("Rebuild: %d %d\n",row, column);
1152 if ((row < 0) || (row >= raidPtr->numRow) ||
1153 (column < 0) || (column >= raidPtr->numCol)) {
1154 return(EINVAL);
1155 }
1156
1157 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1158 if (rrcopy == NULL)
1159 return(ENOMEM);
1160
1161 rrcopy->raidPtr = (void *) raidPtr;
1162 rrcopy->row = row;
1163 rrcopy->col = column;
1164
1165 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1166 rf_ReconstructInPlaceThread,
1167 rrcopy,"raid_reconip");
1168 return(retcode);
1169
1170 case RAIDFRAME_GET_INFO:
1171 if (!raidPtr->valid)
1172 return (ENODEV);
1173 ucfgp = (RF_DeviceConfig_t **) data;
1174 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1175 (RF_DeviceConfig_t *));
1176 if (d_cfg == NULL)
1177 return (ENOMEM);
1178 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1179 d_cfg->rows = raidPtr->numRow;
1180 d_cfg->cols = raidPtr->numCol;
1181 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1182 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1183 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1184 return (ENOMEM);
1185 }
1186 d_cfg->nspares = raidPtr->numSpare;
1187 if (d_cfg->nspares >= RF_MAX_DISKS) {
1188 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1189 return (ENOMEM);
1190 }
1191 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1192 d = 0;
1193 for (i = 0; i < d_cfg->rows; i++) {
1194 for (j = 0; j < d_cfg->cols; j++) {
1195 d_cfg->devs[d] = raidPtr->Disks[i][j];
1196 d++;
1197 }
1198 }
1199 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1200 d_cfg->spares[i] = raidPtr->Disks[0][j];
1201 }
1202 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1203 sizeof(RF_DeviceConfig_t));
1204 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1205
1206 return (retcode);
1207
1208 case RAIDFRAME_CHECK_PARITY:
1209 *(int *) data = raidPtr->parity_good;
1210 return (0);
1211
1212 case RAIDFRAME_RESET_ACCTOTALS:
1213 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1214 return (0);
1215
1216 case RAIDFRAME_GET_ACCTOTALS:
1217 totals = (RF_AccTotals_t *) data;
1218 *totals = raidPtr->acc_totals;
1219 return (0);
1220
1221 case RAIDFRAME_KEEP_ACCTOTALS:
1222 raidPtr->keep_acc_totals = *(int *)data;
1223 return (0);
1224
1225 case RAIDFRAME_GET_SIZE:
1226 *(int *) data = raidPtr->totalSectors;
1227 return (0);
1228
1229 /* fail a disk & optionally start reconstruction */
1230 case RAIDFRAME_FAIL_DISK:
1231
1232 if (raidPtr->Layout.map->faultsTolerated == 0) {
1233 /* Can't do this on a RAID 0!! */
1234 return(EINVAL);
1235 }
1236
1237 rr = (struct rf_recon_req *) data;
1238
1239 if (rr->row < 0 || rr->row >= raidPtr->numRow
1240 || rr->col < 0 || rr->col >= raidPtr->numCol)
1241 return (EINVAL);
1242
1243 printf("raid%d: Failing the disk: row: %d col: %d\n",
1244 unit, rr->row, rr->col);
1245
1246 /* make a copy of the recon request so that we don't rely on
1247 * the user's buffer */
1248 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1249 if (rrcopy == NULL)
1250 return(ENOMEM);
1251 bcopy(rr, rrcopy, sizeof(*rr));
1252 rrcopy->raidPtr = (void *) raidPtr;
1253
1254 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1255 rf_ReconThread,
1256 rrcopy,"raid_recon");
1257 return (0);
1258
1259 /* invoke a copyback operation after recon on whatever disk
1260 * needs it, if any */
1261 case RAIDFRAME_COPYBACK:
1262
1263 if (raidPtr->Layout.map->faultsTolerated == 0) {
1264 /* This makes no sense on a RAID 0!! */
1265 return(EINVAL);
1266 }
1267
1268 if (raidPtr->copyback_in_progress == 1) {
1269 /* Copyback is already in progress! */
1270 return(EINVAL);
1271 }
1272
1273 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1274 rf_CopybackThread,
1275 raidPtr,"raid_copyback");
1276 return (retcode);
1277
1278 /* return the percentage completion of reconstruction */
1279 case RAIDFRAME_CHECK_RECON_STATUS:
1280 if (raidPtr->Layout.map->faultsTolerated == 0) {
1281 /* This makes no sense on a RAID 0 */
1282 return(EINVAL);
1283 }
1284 row = 0; /* XXX we only consider a single row... */
1285 if (raidPtr->status[row] != rf_rs_reconstructing)
1286 *(int *) data = 100;
1287 else
1288 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1289 return (0);
1290
1291 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1292 if (raidPtr->Layout.map->faultsTolerated == 0) {
1293 /* This makes no sense on a RAID 0 */
1294 return(EINVAL);
1295 }
1296 if (raidPtr->parity_rewrite_in_progress == 1) {
1297 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1298 } else {
1299 *(int *) data = 100;
1300 }
1301 return (0);
1302
1303 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0 */
1306 return(EINVAL);
1307 }
1308 if (raidPtr->copyback_in_progress == 1) {
1309 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1310 raidPtr->Layout.numStripe;
1311 } else {
1312 *(int *) data = 100;
1313 }
1314 return (0);
1315
1316
1317 /* the sparetable daemon calls this to wait for the kernel to
1318 * need a spare table. this ioctl does not return until a
1319 * spare table is needed. XXX -- calling mpsleep here in the
1320 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1321 * -- I should either compute the spare table in the kernel,
1322 * or have a different -- XXX XXX -- interface (a different
1323 * character device) for delivering the table -- XXX */
1324 #if 0
1325 case RAIDFRAME_SPARET_WAIT:
1326 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1327 while (!rf_sparet_wait_queue)
1328 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1329 waitreq = rf_sparet_wait_queue;
1330 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1331 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1332
1333 /* structure assignment */
1334 *((RF_SparetWait_t *) data) = *waitreq;
1335
1336 RF_Free(waitreq, sizeof(*waitreq));
1337 return (0);
1338
1339 /* wakes up a process waiting on SPARET_WAIT and puts an error
1340 * code in it that will cause the dameon to exit */
1341 case RAIDFRAME_ABORT_SPARET_WAIT:
1342 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1343 waitreq->fcol = -1;
1344 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1345 waitreq->next = rf_sparet_wait_queue;
1346 rf_sparet_wait_queue = waitreq;
1347 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1348 wakeup(&rf_sparet_wait_queue);
1349 return (0);
1350
1351 /* used by the spare table daemon to deliver a spare table
1352 * into the kernel */
1353 case RAIDFRAME_SEND_SPARET:
1354
1355 /* install the spare table */
1356 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1357
1358 /* respond to the requestor. the return status of the spare
1359 * table installation is passed in the "fcol" field */
1360 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1361 waitreq->fcol = retcode;
1362 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1363 waitreq->next = rf_sparet_resp_queue;
1364 rf_sparet_resp_queue = waitreq;
1365 wakeup(&rf_sparet_resp_queue);
1366 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1367
1368 return (retcode);
1369 #endif
1370
1371 default:
1372 break; /* fall through to the os-specific code below */
1373
1374 }
1375
1376 if (!raidPtr->valid)
1377 return (EINVAL);
1378
1379 /*
1380 * Add support for "regular" device ioctls here.
1381 */
1382
1383 switch (cmd) {
1384 case DIOCGDINFO:
1385 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1386 break;
1387
1388 case DIOCGPART:
1389 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1390 ((struct partinfo *) data)->part =
1391 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1392 break;
1393
1394 case DIOCWDINFO:
1395 case DIOCSDINFO:
1396 if ((error = raidlock(rs)) != 0)
1397 return (error);
1398
1399 rs->sc_flags |= RAIDF_LABELLING;
1400
1401 error = setdisklabel(rs->sc_dkdev.dk_label,
1402 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1403 if (error == 0) {
1404 if (cmd == DIOCWDINFO)
1405 error = writedisklabel(RAIDLABELDEV(dev),
1406 raidstrategy, rs->sc_dkdev.dk_label,
1407 rs->sc_dkdev.dk_cpulabel);
1408 }
1409 rs->sc_flags &= ~RAIDF_LABELLING;
1410
1411 raidunlock(rs);
1412
1413 if (error)
1414 return (error);
1415 break;
1416
1417 case DIOCWLABEL:
1418 if (*(int *) data != 0)
1419 rs->sc_flags |= RAIDF_WLABEL;
1420 else
1421 rs->sc_flags &= ~RAIDF_WLABEL;
1422 break;
1423
1424 case DIOCGDEFLABEL:
1425 raidgetdefaultlabel(raidPtr, rs,
1426 (struct disklabel *) data);
1427 break;
1428
1429 default:
1430 retcode = ENOTTY;
1431 }
1432 return (retcode);
1433
1434 }
1435
1436
1437 /* raidinit -- complete the rest of the initialization for the
1438 RAIDframe device. */
1439
1440
1441 static int
1442 raidinit(dev, raidPtr, unit)
1443 dev_t dev;
1444 RF_Raid_t *raidPtr;
1445 int unit;
1446 {
1447 int retcode;
1448 struct raid_softc *rs;
1449
1450 retcode = 0;
1451
1452 rs = &raid_softc[unit];
1453 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1454 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1455
1456
1457 /* XXX should check return code first... */
1458 rs->sc_flags |= RAIDF_INITED;
1459
1460 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1461
1462 rs->sc_dkdev.dk_name = rs->sc_xname;
1463
1464 /* disk_attach actually creates space for the CPU disklabel, among
1465 * other things, so it's critical to call this *BEFORE* we try putzing
1466 * with disklabels. */
1467
1468 disk_attach(&rs->sc_dkdev);
1469
1470 /* XXX There may be a weird interaction here between this, and
1471 * protectedSectors, as used in RAIDframe. */
1472
1473 rs->sc_size = raidPtr->totalSectors;
1474 rs->sc_dev = dev;
1475
1476 return (retcode);
1477 }
1478
1479 /* wake up the daemon & tell it to get us a spare table
1480 * XXX
1481 * the entries in the queues should be tagged with the raidPtr
1482 * so that in the extremely rare case that two recons happen at once,
1483 * we know for which device were requesting a spare table
1484 * XXX
1485 *
1486 * XXX This code is not currently used. GO
1487 */
1488 int
1489 rf_GetSpareTableFromDaemon(req)
1490 RF_SparetWait_t *req;
1491 {
1492 int retcode;
1493
1494 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1495 req->next = rf_sparet_wait_queue;
1496 rf_sparet_wait_queue = req;
1497 wakeup(&rf_sparet_wait_queue);
1498
1499 /* mpsleep unlocks the mutex */
1500 while (!rf_sparet_resp_queue) {
1501 tsleep(&rf_sparet_resp_queue, PRIBIO,
1502 "raidframe getsparetable", 0);
1503 }
1504 req = rf_sparet_resp_queue;
1505 rf_sparet_resp_queue = req->next;
1506 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1507
1508 retcode = req->fcol;
1509 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1510 * alloc'd */
1511 return (retcode);
1512 }
1513
1514 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1515 * bp & passes it down.
1516 * any calls originating in the kernel must use non-blocking I/O
1517 * do some extra sanity checking to return "appropriate" error values for
1518 * certain conditions (to make some standard utilities work)
1519 *
1520 * Formerly known as: rf_DoAccessKernel
1521 */
1522 void
1523 raidstart(raidPtr)
1524 RF_Raid_t *raidPtr;
1525 {
1526 RF_SectorCount_t num_blocks, pb, sum;
1527 RF_RaidAddr_t raid_addr;
1528 int retcode;
1529 struct partition *pp;
1530 daddr_t blocknum;
1531 int unit;
1532 struct raid_softc *rs;
1533 int do_async;
1534 struct buf *bp;
1535
1536 unit = raidPtr->raidid;
1537 rs = &raid_softc[unit];
1538
1539 /* Check to see if we're at the limit... */
1540 RF_LOCK_MUTEX(raidPtr->mutex);
1541 while (raidPtr->openings > 0) {
1542 RF_UNLOCK_MUTEX(raidPtr->mutex);
1543
1544 /* get the next item, if any, from the queue */
1545 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1546 /* nothing more to do */
1547 return;
1548 }
1549 BUFQ_REMOVE(&rs->buf_queue, bp);
1550
1551 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1552 * partition.. Need to make it absolute to the underlying
1553 * device.. */
1554
1555 blocknum = bp->b_blkno;
1556 if (DISKPART(bp->b_dev) != RAW_PART) {
1557 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1558 blocknum += pp->p_offset;
1559 }
1560
1561 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1562 (int) blocknum));
1563
1564 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1565 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1566
1567 /* *THIS* is where we adjust what block we're going to...
1568 * but DO NOT TOUCH bp->b_blkno!!! */
1569 raid_addr = blocknum;
1570
1571 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1572 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1573 sum = raid_addr + num_blocks + pb;
1574 if (1 || rf_debugKernelAccess) {
1575 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1576 (int) raid_addr, (int) sum, (int) num_blocks,
1577 (int) pb, (int) bp->b_resid));
1578 }
1579 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1580 || (sum < num_blocks) || (sum < pb)) {
1581 bp->b_error = ENOSPC;
1582 bp->b_flags |= B_ERROR;
1583 bp->b_resid = bp->b_bcount;
1584 biodone(bp);
1585 RF_LOCK_MUTEX(raidPtr->mutex);
1586 continue;
1587 }
1588 /*
1589 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1590 */
1591
1592 if (bp->b_bcount & raidPtr->sectorMask) {
1593 bp->b_error = EINVAL;
1594 bp->b_flags |= B_ERROR;
1595 bp->b_resid = bp->b_bcount;
1596 biodone(bp);
1597 RF_LOCK_MUTEX(raidPtr->mutex);
1598 continue;
1599
1600 }
1601 db1_printf(("Calling DoAccess..\n"));
1602
1603
1604 RF_LOCK_MUTEX(raidPtr->mutex);
1605 raidPtr->openings--;
1606 RF_UNLOCK_MUTEX(raidPtr->mutex);
1607
1608 /*
1609 * Everything is async.
1610 */
1611 do_async = 1;
1612
1613 /* don't ever condition on bp->b_flags & B_WRITE.
1614 * always condition on B_READ instead */
1615
1616 /* XXX we're still at splbio() here... do we *really*
1617 need to be? */
1618
1619
1620 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1621 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1622 do_async, raid_addr, num_blocks,
1623 bp->b_un.b_addr, bp, NULL, NULL,
1624 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1625
1626
1627 RF_LOCK_MUTEX(raidPtr->mutex);
1628 }
1629 RF_UNLOCK_MUTEX(raidPtr->mutex);
1630 }
1631
1632
1633
1634
1635 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1636
1637 int
1638 rf_DispatchKernelIO(queue, req)
1639 RF_DiskQueue_t *queue;
1640 RF_DiskQueueData_t *req;
1641 {
1642 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1643 struct buf *bp;
1644 struct raidbuf *raidbp = NULL;
1645 struct raid_softc *rs;
1646 int unit;
1647 int s;
1648
1649 s=0;
1650 /* s = splbio();*/ /* want to test this */
1651 /* XXX along with the vnode, we also need the softc associated with
1652 * this device.. */
1653
1654 req->queue = queue;
1655
1656 unit = queue->raidPtr->raidid;
1657
1658 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1659
1660 if (unit >= numraid) {
1661 printf("Invalid unit number: %d %d\n", unit, numraid);
1662 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1663 }
1664 rs = &raid_softc[unit];
1665
1666 /* XXX is this the right place? */
1667 disk_busy(&rs->sc_dkdev);
1668
1669 bp = req->bp;
1670 #if 1
1671 /* XXX when there is a physical disk failure, someone is passing us a
1672 * buffer that contains old stuff!! Attempt to deal with this problem
1673 * without taking a performance hit... (not sure where the real bug
1674 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1675
1676 if (bp->b_flags & B_ERROR) {
1677 bp->b_flags &= ~B_ERROR;
1678 }
1679 if (bp->b_error != 0) {
1680 bp->b_error = 0;
1681 }
1682 #endif
1683 raidbp = RAIDGETBUF(rs);
1684
1685 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1686
1687 /*
1688 * context for raidiodone
1689 */
1690 raidbp->rf_obp = bp;
1691 raidbp->req = req;
1692
1693 LIST_INIT(&raidbp->rf_buf.b_dep);
1694
1695 switch (req->type) {
1696 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1697 /* XXX need to do something extra here.. */
1698 /* I'm leaving this in, as I've never actually seen it used,
1699 * and I'd like folks to report it... GO */
1700 printf(("WAKEUP CALLED\n"));
1701 queue->numOutstanding++;
1702
1703 /* XXX need to glue the original buffer into this?? */
1704
1705 KernelWakeupFunc(&raidbp->rf_buf);
1706 break;
1707
1708 case RF_IO_TYPE_READ:
1709 case RF_IO_TYPE_WRITE:
1710
1711 if (req->tracerec) {
1712 RF_ETIMER_START(req->tracerec->timer);
1713 }
1714 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1715 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1716 req->sectorOffset, req->numSector,
1717 req->buf, KernelWakeupFunc, (void *) req,
1718 queue->raidPtr->logBytesPerSector, req->b_proc);
1719
1720 if (rf_debugKernelAccess) {
1721 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1722 (long) bp->b_blkno));
1723 }
1724 queue->numOutstanding++;
1725 queue->last_deq_sector = req->sectorOffset;
1726 /* acc wouldn't have been let in if there were any pending
1727 * reqs at any other priority */
1728 queue->curPriority = req->priority;
1729
1730 db1_printf(("Going for %c to unit %d row %d col %d\n",
1731 req->type, unit, queue->row, queue->col));
1732 db1_printf(("sector %d count %d (%d bytes) %d\n",
1733 (int) req->sectorOffset, (int) req->numSector,
1734 (int) (req->numSector <<
1735 queue->raidPtr->logBytesPerSector),
1736 (int) queue->raidPtr->logBytesPerSector));
1737 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1738 raidbp->rf_buf.b_vp->v_numoutput++;
1739 }
1740 VOP_STRATEGY(&raidbp->rf_buf);
1741
1742 break;
1743
1744 default:
1745 panic("bad req->type in rf_DispatchKernelIO");
1746 }
1747 db1_printf(("Exiting from DispatchKernelIO\n"));
1748 /* splx(s); */ /* want to test this */
1749 return (0);
1750 }
1751 /* this is the callback function associated with a I/O invoked from
1752 kernel code.
1753 */
1754 static void
1755 KernelWakeupFunc(vbp)
1756 struct buf *vbp;
1757 {
1758 RF_DiskQueueData_t *req = NULL;
1759 RF_DiskQueue_t *queue;
1760 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1761 struct buf *bp;
1762 struct raid_softc *rs;
1763 int unit;
1764 register int s;
1765
1766 s = splbio();
1767 db1_printf(("recovering the request queue:\n"));
1768 req = raidbp->req;
1769
1770 bp = raidbp->rf_obp;
1771
1772 queue = (RF_DiskQueue_t *) req->queue;
1773
1774 if (raidbp->rf_buf.b_flags & B_ERROR) {
1775 bp->b_flags |= B_ERROR;
1776 bp->b_error = raidbp->rf_buf.b_error ?
1777 raidbp->rf_buf.b_error : EIO;
1778 }
1779
1780 /* XXX methinks this could be wrong... */
1781 #if 1
1782 bp->b_resid = raidbp->rf_buf.b_resid;
1783 #endif
1784
1785 if (req->tracerec) {
1786 RF_ETIMER_STOP(req->tracerec->timer);
1787 RF_ETIMER_EVAL(req->tracerec->timer);
1788 RF_LOCK_MUTEX(rf_tracing_mutex);
1789 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1790 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1791 req->tracerec->num_phys_ios++;
1792 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1793 }
1794 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1795
1796 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1797
1798
1799 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1800 * ballistic, and mark the component as hosed... */
1801
1802 if (bp->b_flags & B_ERROR) {
1803 /* Mark the disk as dead */
1804 /* but only mark it once... */
1805 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1806 rf_ds_optimal) {
1807 printf("raid%d: IO Error. Marking %s as failed.\n",
1808 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1809 queue->raidPtr->Disks[queue->row][queue->col].status =
1810 rf_ds_failed;
1811 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1812 queue->raidPtr->numFailures++;
1813 /* XXX here we should bump the version number for each component, and write that data out */
1814 } else { /* Disk is already dead... */
1815 /* printf("Disk already marked as dead!\n"); */
1816 }
1817
1818 }
1819
1820 rs = &raid_softc[unit];
1821 RAIDPUTBUF(rs, raidbp);
1822
1823
1824 if (bp->b_resid == 0) {
1825 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1826 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1827 }
1828
1829 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1830 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1831
1832 splx(s);
1833 }
1834
1835
1836
1837 /*
1838 * initialize a buf structure for doing an I/O in the kernel.
1839 */
1840 static void
1841 InitBP(
1842 struct buf * bp,
1843 struct vnode * b_vp,
1844 unsigned rw_flag,
1845 dev_t dev,
1846 RF_SectorNum_t startSect,
1847 RF_SectorCount_t numSect,
1848 caddr_t buf,
1849 void (*cbFunc) (struct buf *),
1850 void *cbArg,
1851 int logBytesPerSector,
1852 struct proc * b_proc)
1853 {
1854 /* bp->b_flags = B_PHYS | rw_flag; */
1855 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1856 bp->b_bcount = numSect << logBytesPerSector;
1857 bp->b_bufsize = bp->b_bcount;
1858 bp->b_error = 0;
1859 bp->b_dev = dev;
1860 bp->b_un.b_addr = buf;
1861 bp->b_blkno = startSect;
1862 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1863 if (bp->b_bcount == 0) {
1864 panic("bp->b_bcount is zero in InitBP!!\n");
1865 }
1866 bp->b_proc = b_proc;
1867 bp->b_iodone = cbFunc;
1868 bp->b_vp = b_vp;
1869
1870 }
1871
1872 static void
1873 raidgetdefaultlabel(raidPtr, rs, lp)
1874 RF_Raid_t *raidPtr;
1875 struct raid_softc *rs;
1876 struct disklabel *lp;
1877 {
1878 db1_printf(("Building a default label...\n"));
1879 bzero(lp, sizeof(*lp));
1880
1881 /* fabricate a label... */
1882 lp->d_secperunit = raidPtr->totalSectors;
1883 lp->d_secsize = raidPtr->bytesPerSector;
1884 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1885 lp->d_ntracks = 1;
1886 lp->d_ncylinders = raidPtr->totalSectors /
1887 (lp->d_nsectors * lp->d_ntracks);
1888 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1889
1890 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1891 lp->d_type = DTYPE_RAID;
1892 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1893 lp->d_rpm = 3600;
1894 lp->d_interleave = 1;
1895 lp->d_flags = 0;
1896
1897 lp->d_partitions[RAW_PART].p_offset = 0;
1898 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1899 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1900 lp->d_npartitions = RAW_PART + 1;
1901
1902 lp->d_magic = DISKMAGIC;
1903 lp->d_magic2 = DISKMAGIC;
1904 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1905
1906 }
1907 /*
1908 * Read the disklabel from the raid device. If one is not present, fake one
1909 * up.
1910 */
1911 static void
1912 raidgetdisklabel(dev)
1913 dev_t dev;
1914 {
1915 int unit = raidunit(dev);
1916 struct raid_softc *rs = &raid_softc[unit];
1917 char *errstring;
1918 struct disklabel *lp = rs->sc_dkdev.dk_label;
1919 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1920 RF_Raid_t *raidPtr;
1921
1922 db1_printf(("Getting the disklabel...\n"));
1923
1924 bzero(clp, sizeof(*clp));
1925
1926 raidPtr = raidPtrs[unit];
1927
1928 raidgetdefaultlabel(raidPtr, rs, lp);
1929
1930 /*
1931 * Call the generic disklabel extraction routine.
1932 */
1933 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1934 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1935 if (errstring)
1936 raidmakedisklabel(rs);
1937 else {
1938 int i;
1939 struct partition *pp;
1940
1941 /*
1942 * Sanity check whether the found disklabel is valid.
1943 *
1944 * This is necessary since total size of the raid device
1945 * may vary when an interleave is changed even though exactly
1946 * same componets are used, and old disklabel may used
1947 * if that is found.
1948 */
1949 if (lp->d_secperunit != rs->sc_size)
1950 printf("WARNING: %s: "
1951 "total sector size in disklabel (%d) != "
1952 "the size of raid (%ld)\n", rs->sc_xname,
1953 lp->d_secperunit, (long) rs->sc_size);
1954 for (i = 0; i < lp->d_npartitions; i++) {
1955 pp = &lp->d_partitions[i];
1956 if (pp->p_offset + pp->p_size > rs->sc_size)
1957 printf("WARNING: %s: end of partition `%c' "
1958 "exceeds the size of raid (%ld)\n",
1959 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1960 }
1961 }
1962
1963 }
1964 /*
1965 * Take care of things one might want to take care of in the event
1966 * that a disklabel isn't present.
1967 */
1968 static void
1969 raidmakedisklabel(rs)
1970 struct raid_softc *rs;
1971 {
1972 struct disklabel *lp = rs->sc_dkdev.dk_label;
1973 db1_printf(("Making a label..\n"));
1974
1975 /*
1976 * For historical reasons, if there's no disklabel present
1977 * the raw partition must be marked FS_BSDFFS.
1978 */
1979
1980 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1981
1982 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1983
1984 lp->d_checksum = dkcksum(lp);
1985 }
1986 /*
1987 * Lookup the provided name in the filesystem. If the file exists,
1988 * is a valid block device, and isn't being used by anyone else,
1989 * set *vpp to the file's vnode.
1990 * You'll find the original of this in ccd.c
1991 */
1992 int
1993 raidlookup(path, p, vpp)
1994 char *path;
1995 struct proc *p;
1996 struct vnode **vpp; /* result */
1997 {
1998 struct nameidata nd;
1999 struct vnode *vp;
2000 struct vattr va;
2001 int error;
2002
2003 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2004 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2005 #ifdef DEBUG
2006 printf("RAIDframe: vn_open returned %d\n", error);
2007 #endif
2008 return (error);
2009 }
2010 vp = nd.ni_vp;
2011 if (vp->v_usecount > 1) {
2012 VOP_UNLOCK(vp, 0);
2013 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2014 return (EBUSY);
2015 }
2016 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2017 VOP_UNLOCK(vp, 0);
2018 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2019 return (error);
2020 }
2021 /* XXX: eventually we should handle VREG, too. */
2022 if (va.va_type != VBLK) {
2023 VOP_UNLOCK(vp, 0);
2024 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2025 return (ENOTBLK);
2026 }
2027 VOP_UNLOCK(vp, 0);
2028 *vpp = vp;
2029 return (0);
2030 }
2031 /*
2032 * Wait interruptibly for an exclusive lock.
2033 *
2034 * XXX
2035 * Several drivers do this; it should be abstracted and made MP-safe.
2036 * (Hmm... where have we seen this warning before :-> GO )
2037 */
2038 static int
2039 raidlock(rs)
2040 struct raid_softc *rs;
2041 {
2042 int error;
2043
2044 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2045 rs->sc_flags |= RAIDF_WANTED;
2046 if ((error =
2047 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2048 return (error);
2049 }
2050 rs->sc_flags |= RAIDF_LOCKED;
2051 return (0);
2052 }
2053 /*
2054 * Unlock and wake up any waiters.
2055 */
2056 static void
2057 raidunlock(rs)
2058 struct raid_softc *rs;
2059 {
2060
2061 rs->sc_flags &= ~RAIDF_LOCKED;
2062 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2063 rs->sc_flags &= ~RAIDF_WANTED;
2064 wakeup(rs);
2065 }
2066 }
2067
2068
2069 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2070 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2071
2072 int
2073 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2074 {
2075 RF_ComponentLabel_t clabel;
2076 raidread_component_label(dev, b_vp, &clabel);
2077 clabel.mod_counter = mod_counter;
2078 clabel.clean = RF_RAID_CLEAN;
2079 raidwrite_component_label(dev, b_vp, &clabel);
2080 return(0);
2081 }
2082
2083
2084 int
2085 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2086 {
2087 RF_ComponentLabel_t clabel;
2088 raidread_component_label(dev, b_vp, &clabel);
2089 clabel.mod_counter = mod_counter;
2090 clabel.clean = RF_RAID_DIRTY;
2091 raidwrite_component_label(dev, b_vp, &clabel);
2092 return(0);
2093 }
2094
2095 /* ARGSUSED */
2096 int
2097 raidread_component_label(dev, b_vp, clabel)
2098 dev_t dev;
2099 struct vnode *b_vp;
2100 RF_ComponentLabel_t *clabel;
2101 {
2102 struct buf *bp;
2103 int error;
2104
2105 /* XXX should probably ensure that we don't try to do this if
2106 someone has changed rf_protected_sectors. */
2107
2108 /* get a block of the appropriate size... */
2109 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2110 bp->b_dev = dev;
2111
2112 /* get our ducks in a row for the read */
2113 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2114 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2115 bp->b_flags = B_BUSY | B_READ;
2116 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2117
2118 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2119
2120 error = biowait(bp);
2121
2122 if (!error) {
2123 memcpy(clabel, bp->b_un.b_addr,
2124 sizeof(RF_ComponentLabel_t));
2125 #if 0
2126 print_component_label( clabel );
2127 #endif
2128 } else {
2129 #if 0
2130 printf("Failed to read RAID component label!\n");
2131 #endif
2132 }
2133
2134 bp->b_flags = B_INVAL | B_AGE;
2135 brelse(bp);
2136 return(error);
2137 }
2138 /* ARGSUSED */
2139 int
2140 raidwrite_component_label(dev, b_vp, clabel)
2141 dev_t dev;
2142 struct vnode *b_vp;
2143 RF_ComponentLabel_t *clabel;
2144 {
2145 struct buf *bp;
2146 int error;
2147
2148 /* get a block of the appropriate size... */
2149 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2150 bp->b_dev = dev;
2151
2152 /* get our ducks in a row for the write */
2153 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2154 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2155 bp->b_flags = B_BUSY | B_WRITE;
2156 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2157
2158 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2159
2160 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2161
2162 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2163 error = biowait(bp);
2164 bp->b_flags = B_INVAL | B_AGE;
2165 brelse(bp);
2166 if (error) {
2167 #if 1
2168 printf("Failed to write RAID component info!\n");
2169 #endif
2170 }
2171
2172 return(error);
2173 }
2174
2175 void
2176 rf_markalldirty( raidPtr )
2177 RF_Raid_t *raidPtr;
2178 {
2179 RF_ComponentLabel_t clabel;
2180 int r,c;
2181
2182 raidPtr->mod_counter++;
2183 for (r = 0; r < raidPtr->numRow; r++) {
2184 for (c = 0; c < raidPtr->numCol; c++) {
2185 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2186 raidread_component_label(
2187 raidPtr->Disks[r][c].dev,
2188 raidPtr->raid_cinfo[r][c].ci_vp,
2189 &clabel);
2190 if (clabel.status == rf_ds_spared) {
2191 /* XXX do something special...
2192 but whatever you do, don't
2193 try to access it!! */
2194 } else {
2195 #if 0
2196 clabel.status =
2197 raidPtr->Disks[r][c].status;
2198 raidwrite_component_label(
2199 raidPtr->Disks[r][c].dev,
2200 raidPtr->raid_cinfo[r][c].ci_vp,
2201 &clabel);
2202 #endif
2203 raidmarkdirty(
2204 raidPtr->Disks[r][c].dev,
2205 raidPtr->raid_cinfo[r][c].ci_vp,
2206 raidPtr->mod_counter);
2207 }
2208 }
2209 }
2210 }
2211 /* printf("Component labels marked dirty.\n"); */
2212 #if 0
2213 for( c = 0; c < raidPtr->numSpare ; c++) {
2214 sparecol = raidPtr->numCol + c;
2215 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2216 /*
2217
2218 XXX this is where we get fancy and map this spare
2219 into it's correct spot in the array.
2220
2221 */
2222 /*
2223
2224 we claim this disk is "optimal" if it's
2225 rf_ds_used_spare, as that means it should be
2226 directly substitutable for the disk it replaced.
2227 We note that too...
2228
2229 */
2230
2231 for(i=0;i<raidPtr->numRow;i++) {
2232 for(j=0;j<raidPtr->numCol;j++) {
2233 if ((raidPtr->Disks[i][j].spareRow ==
2234 r) &&
2235 (raidPtr->Disks[i][j].spareCol ==
2236 sparecol)) {
2237 srow = r;
2238 scol = sparecol;
2239 break;
2240 }
2241 }
2242 }
2243
2244 raidread_component_label(
2245 raidPtr->Disks[r][sparecol].dev,
2246 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2247 &clabel);
2248 /* make sure status is noted */
2249 clabel.version = RF_COMPONENT_LABEL_VERSION;
2250 clabel.mod_counter = raidPtr->mod_counter;
2251 clabel.serial_number = raidPtr->serial_number;
2252 clabel.row = srow;
2253 clabel.column = scol;
2254 clabel.num_rows = raidPtr->numRow;
2255 clabel.num_columns = raidPtr->numCol;
2256 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2257 clabel.status = rf_ds_optimal;
2258 raidwrite_component_label(
2259 raidPtr->Disks[r][sparecol].dev,
2260 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2261 &clabel);
2262 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2263 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2264 }
2265 }
2266
2267 #endif
2268 }
2269
2270
2271 void
2272 rf_update_component_labels( raidPtr )
2273 RF_Raid_t *raidPtr;
2274 {
2275 RF_ComponentLabel_t clabel;
2276 int sparecol;
2277 int r,c;
2278 int i,j;
2279 int srow, scol;
2280
2281 srow = -1;
2282 scol = -1;
2283
2284 /* XXX should do extra checks to make sure things really are clean,
2285 rather than blindly setting the clean bit... */
2286
2287 raidPtr->mod_counter++;
2288
2289 for (r = 0; r < raidPtr->numRow; r++) {
2290 for (c = 0; c < raidPtr->numCol; c++) {
2291 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2292 raidread_component_label(
2293 raidPtr->Disks[r][c].dev,
2294 raidPtr->raid_cinfo[r][c].ci_vp,
2295 &clabel);
2296 /* make sure status is noted */
2297 clabel.status = rf_ds_optimal;
2298 raidwrite_component_label(
2299 raidPtr->Disks[r][c].dev,
2300 raidPtr->raid_cinfo[r][c].ci_vp,
2301 &clabel);
2302 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2303 raidmarkclean(
2304 raidPtr->Disks[r][c].dev,
2305 raidPtr->raid_cinfo[r][c].ci_vp,
2306 raidPtr->mod_counter);
2307 }
2308 }
2309 /* else we don't touch it.. */
2310 #if 0
2311 else if (raidPtr->Disks[r][c].status !=
2312 rf_ds_failed) {
2313 raidread_component_label(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 &clabel);
2317 /* make sure status is noted */
2318 clabel.status =
2319 raidPtr->Disks[r][c].status;
2320 raidwrite_component_label(
2321 raidPtr->Disks[r][c].dev,
2322 raidPtr->raid_cinfo[r][c].ci_vp,
2323 &clabel);
2324 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2325 raidmarkclean(
2326 raidPtr->Disks[r][c].dev,
2327 raidPtr->raid_cinfo[r][c].ci_vp,
2328 raidPtr->mod_counter);
2329 }
2330 }
2331 #endif
2332 }
2333 }
2334
2335 for( c = 0; c < raidPtr->numSpare ; c++) {
2336 sparecol = raidPtr->numCol + c;
2337 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2338 /*
2339
2340 we claim this disk is "optimal" if it's
2341 rf_ds_used_spare, as that means it should be
2342 directly substitutable for the disk it replaced.
2343 We note that too...
2344
2345 */
2346
2347 for(i=0;i<raidPtr->numRow;i++) {
2348 for(j=0;j<raidPtr->numCol;j++) {
2349 if ((raidPtr->Disks[i][j].spareRow ==
2350 0) &&
2351 (raidPtr->Disks[i][j].spareCol ==
2352 sparecol)) {
2353 srow = i;
2354 scol = j;
2355 break;
2356 }
2357 }
2358 }
2359
2360 raidread_component_label(
2361 raidPtr->Disks[0][sparecol].dev,
2362 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2363 &clabel);
2364 /* make sure status is noted */
2365 clabel.version = RF_COMPONENT_LABEL_VERSION;
2366 clabel.mod_counter = raidPtr->mod_counter;
2367 clabel.serial_number = raidPtr->serial_number;
2368 clabel.row = srow;
2369 clabel.column = scol;
2370 clabel.num_rows = raidPtr->numRow;
2371 clabel.num_columns = raidPtr->numCol;
2372 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2373 clabel.status = rf_ds_optimal;
2374 raidwrite_component_label(
2375 raidPtr->Disks[0][sparecol].dev,
2376 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2377 &clabel);
2378 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2379 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2380 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2381 raidPtr->mod_counter);
2382 }
2383 }
2384 }
2385 /* printf("Component labels updated\n"); */
2386 }
2387
2388 void
2389 rf_ReconThread(req)
2390 struct rf_recon_req *req;
2391 {
2392 int s;
2393 RF_Raid_t *raidPtr;
2394
2395 s = splbio();
2396 raidPtr = (RF_Raid_t *) req->raidPtr;
2397 raidPtr->recon_in_progress = 1;
2398
2399 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2400 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2401
2402 /* XXX get rid of this! we don't need it at all.. */
2403 RF_Free(req, sizeof(*req));
2404
2405 raidPtr->recon_in_progress = 0;
2406 splx(s);
2407
2408 /* That's all... */
2409 kthread_exit(0); /* does not return */
2410 }
2411
2412 void
2413 rf_RewriteParityThread(raidPtr)
2414 RF_Raid_t *raidPtr;
2415 {
2416 int retcode;
2417 int s;
2418
2419 raidPtr->parity_rewrite_in_progress = 1;
2420 s = splbio();
2421 retcode = rf_RewriteParity(raidPtr);
2422 splx(s);
2423 if (retcode) {
2424 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2425 } else {
2426 /* set the clean bit! If we shutdown correctly,
2427 the clean bit on each component label will get
2428 set */
2429 raidPtr->parity_good = RF_RAID_CLEAN;
2430 }
2431 raidPtr->parity_rewrite_in_progress = 0;
2432
2433 /* That's all... */
2434 kthread_exit(0); /* does not return */
2435 }
2436
2437
2438 void
2439 rf_CopybackThread(raidPtr)
2440 RF_Raid_t *raidPtr;
2441 {
2442 int s;
2443
2444 raidPtr->copyback_in_progress = 1;
2445 s = splbio();
2446 rf_CopybackReconstructedData(raidPtr);
2447 splx(s);
2448 raidPtr->copyback_in_progress = 0;
2449
2450 /* That's all... */
2451 kthread_exit(0); /* does not return */
2452 }
2453
2454
2455 void
2456 rf_ReconstructInPlaceThread(req)
2457 struct rf_recon_req *req;
2458 {
2459 int retcode;
2460 int s;
2461 RF_Raid_t *raidPtr;
2462
2463 s = splbio();
2464 raidPtr = req->raidPtr;
2465 raidPtr->recon_in_progress = 1;
2466 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2467 RF_Free(req, sizeof(*req));
2468 raidPtr->recon_in_progress = 0;
2469 splx(s);
2470
2471 /* That's all... */
2472 kthread_exit(0); /* does not return */
2473 }
2474
2475 void
2476 rf_mountroot_hook(dev)
2477 struct device *dev;
2478 {
2479 #if 1
2480 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2481 #endif
2482 if (boothowto & RB_ASKNAME) {
2483 /* We don't auto-config... */
2484 } else {
2485 /* They didn't ask, and we found something bootable... */
2486 /* XXX pretend for now.. */
2487 if (raidautoconfig) {
2488 rootspec = raid_rooty;
2489 }
2490 }
2491 }
2492
2493
2494 RF_AutoConfig_t *
2495 rf_find_raid_components()
2496 {
2497 struct devnametobdevmaj *dtobdm;
2498 struct vnode *vp;
2499 struct disklabel label;
2500 struct device *dv;
2501 char *cd_name;
2502 dev_t dev;
2503 int error;
2504 int i;
2505 int good_one;
2506 RF_ComponentLabel_t *clabel;
2507 RF_AutoConfig_t *ac_list;
2508 RF_AutoConfig_t *ac;
2509
2510
2511 /* initialize the AutoConfig list */
2512 ac_list = NULL;
2513
2514 if (raidautoconfig) {
2515
2516 /* we begin by trolling through *all* the devices on the system */
2517
2518 for (dv = alldevs.tqh_first; dv != NULL;
2519 dv = dv->dv_list.tqe_next) {
2520
2521 /* we are only interested in disks... */
2522 if (dv->dv_class != DV_DISK)
2523 continue;
2524
2525 /* we don't care about floppies... */
2526 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2527 continue;
2528 }
2529
2530 /* need to find the device_name_to_block_device_major stuff */
2531 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2532 dtobdm = dev_name2blk;
2533 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2534 dtobdm++;
2535 }
2536
2537 /* get a vnode for the raw partition of this disk */
2538
2539 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2540 if (bdevvp(dev, &vp))
2541 panic("RAID can't alloc vnode");
2542
2543 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2544
2545 if (error) {
2546 /* "Who cares." Continue looking
2547 for something that exists*/
2548 vput(vp);
2549 continue;
2550 }
2551
2552 /* Ok, the disk exists. Go get the disklabel. */
2553 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2554 FREAD, NOCRED, 0);
2555 if (error) {
2556 /*
2557 * XXX can't happen - open() would
2558 * have errored out (or faked up one)
2559 */
2560 printf("can't get label for dev %s%c (%d)!?!?\n",
2561 dv->dv_xname, 'a' + RAW_PART, error);
2562 }
2563
2564 /* don't need this any more. We'll allocate it again
2565 a little later if we really do... */
2566 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2567 vput(vp);
2568
2569 for (i=0; i < label.d_npartitions; i++) {
2570 /* We only support partitions marked as RAID */
2571 if (label.d_partitions[i].p_fstype != FS_RAID)
2572 continue;
2573
2574 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2575 if (bdevvp(dev, &vp))
2576 panic("RAID can't alloc vnode");
2577
2578 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2579 if (error) {
2580 /* Whatever... */
2581 vput(vp);
2582 continue;
2583 }
2584
2585 good_one = 0;
2586
2587 clabel = (RF_ComponentLabel_t *)
2588 malloc(sizeof(RF_ComponentLabel_t),
2589 M_RAIDFRAME, M_NOWAIT);
2590 if (clabel == NULL) {
2591 /* XXX CLEANUP HERE */
2592 printf("RAID auto config: out of memory!\n");
2593 return(NULL); /* XXX probably should panic? */
2594 }
2595
2596 if (!raidread_component_label(dev, vp, clabel)) {
2597 /* Got the label. Does it look reasonable? */
2598 if (rf_reasonable_label(clabel) &&
2599 (clabel->partitionSize ==
2600 label.d_partitions[i].p_size)) {
2601 #if DEBUG
2602 printf("Component on: %s%c: %d\n",
2603 dv->dv_xname, 'a'+i,
2604 label.d_partitions[i].p_size);
2605 print_component_label(clabel);
2606 #endif
2607 /* if it's reasonable, add it,
2608 else ignore it. */
2609 ac = (RF_AutoConfig_t *)
2610 malloc(sizeof(RF_AutoConfig_t),
2611 M_RAIDFRAME,
2612 M_NOWAIT);
2613 if (ac == NULL) {
2614 /* XXX should panic?? */
2615 return(NULL);
2616 }
2617
2618 sprintf(ac->devname, "%s%c",
2619 dv->dv_xname, 'a'+i);
2620 ac->dev = dev;
2621 ac->vp = vp;
2622 ac->clabel = clabel;
2623 ac->next = ac_list;
2624 ac_list = ac;
2625 good_one = 1;
2626 }
2627 }
2628 if (!good_one) {
2629 /* cleanup */
2630 free(clabel, M_RAIDFRAME);
2631 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2632 vput(vp);
2633 }
2634 }
2635 }
2636 }
2637 return(ac_list);
2638 }
2639
2640 static int
2641 rf_reasonable_label(clabel)
2642 RF_ComponentLabel_t *clabel;
2643 {
2644
2645 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2646 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2647 ((clabel->clean == RF_RAID_CLEAN) ||
2648 (clabel->clean == RF_RAID_DIRTY)) &&
2649 clabel->row >=0 &&
2650 clabel->column >= 0 &&
2651 clabel->num_rows > 0 &&
2652 clabel->num_columns > 0 &&
2653 clabel->row < clabel->num_rows &&
2654 clabel->column < clabel->num_columns &&
2655 clabel->blockSize > 0 &&
2656 clabel->numBlocks > 0) {
2657 /* label looks reasonable enough... */
2658 return(1);
2659 }
2660 return(0);
2661 }
2662
2663
2664 void
2665 print_component_label(clabel)
2666 RF_ComponentLabel_t *clabel;
2667 {
2668 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2669 clabel->row, clabel->column,
2670 clabel->num_rows, clabel->num_columns);
2671 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2672 clabel->version, clabel->serial_number,
2673 clabel->mod_counter);
2674 printf(" Clean: %s Status: %d\n",
2675 clabel->clean ? "Yes" : "No", clabel->status );
2676 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2677 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2678 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2679 (char) clabel->parityConfig, clabel->blockSize,
2680 clabel->numBlocks);
2681 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2682 printf(" Last configured as: raid%d\n", clabel->last_unit );
2683 #if 0
2684 printf(" Config order: %d\n", clabel->config_order);
2685 #endif
2686
2687 }
2688
2689 RF_ConfigSet_t *
2690 rf_create_auto_sets(ac_list)
2691 RF_AutoConfig_t *ac_list;
2692 {
2693 RF_AutoConfig_t *ac;
2694 RF_ConfigSet_t *config_sets;
2695 RF_ConfigSet_t *cset;
2696 RF_AutoConfig_t *ac_next;
2697
2698
2699 config_sets = NULL;
2700
2701 /* Go through the AutoConfig list, and figure out which components
2702 belong to what sets. */
2703 ac = ac_list;
2704 while(ac!=NULL) {
2705 /* we're going to putz with ac->next, so save it here
2706 for use at the end of the loop */
2707 ac_next = ac->next;
2708
2709 if (config_sets == NULL) {
2710 /* will need at least this one... */
2711 config_sets = (RF_ConfigSet_t *)
2712 malloc(sizeof(RF_ConfigSet_t),
2713 M_RAIDFRAME, M_NOWAIT);
2714 if (config_sets == NULL) {
2715 panic("rf_create_auto_sets: No memory!\n");
2716 }
2717 /* this one is easy :) */
2718 config_sets->ac = ac;
2719 config_sets->next = NULL;
2720 config_sets->rootable = 0;
2721 ac->next = NULL;
2722 } else {
2723 /* which set does this component fit into? */
2724 cset = config_sets;
2725 while(cset!=NULL) {
2726 if (rf_does_it_fit(cset, ac)) {
2727 /* looks like it matches */
2728 ac->next = cset->ac;
2729 cset->ac = ac;
2730 break;
2731 }
2732 cset = cset->next;
2733 }
2734 if (cset==NULL) {
2735 /* didn't find a match above... new set..*/
2736 cset = (RF_ConfigSet_t *)
2737 malloc(sizeof(RF_ConfigSet_t),
2738 M_RAIDFRAME, M_NOWAIT);
2739 if (cset == NULL) {
2740 panic("rf_create_auto_sets: No memory!\n");
2741 }
2742 cset->ac = ac;
2743 ac->next = NULL;
2744 cset->next = config_sets;
2745 cset->rootable = 0;
2746 config_sets = cset;
2747 }
2748 }
2749 ac = ac_next;
2750 }
2751
2752
2753 return(config_sets);
2754 }
2755
2756 static int
2757 rf_does_it_fit(cset, ac)
2758 RF_ConfigSet_t *cset;
2759 RF_AutoConfig_t *ac;
2760 {
2761 RF_ComponentLabel_t *clabel1, *clabel2;
2762
2763 /* If this one matches the *first* one in the set, that's good
2764 enough, since the other members of the set would have been
2765 through here too... */
2766
2767 clabel1 = cset->ac->clabel;
2768 clabel2 = ac->clabel;
2769 if ((clabel1->version == clabel2->version) &&
2770 (clabel1->serial_number == clabel2->serial_number) &&
2771 (clabel1->mod_counter == clabel2->mod_counter) &&
2772 (clabel1->num_rows == clabel2->num_rows) &&
2773 (clabel1->num_columns == clabel2->num_columns) &&
2774 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2775 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2776 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2777 (clabel1->parityConfig == clabel2->parityConfig) &&
2778 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2779 (clabel1->blockSize == clabel2->blockSize) &&
2780 (clabel1->numBlocks == clabel2->numBlocks) &&
2781 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2782 (clabel1->root_partition == clabel2->root_partition) &&
2783 (clabel1->last_unit == clabel2->last_unit) &&
2784 (clabel1->config_order == clabel2->config_order)) {
2785 /* if it get's here, it almost *has* to be a match */
2786 } else {
2787 /* it's not consistent with somebody in the set..
2788 punt */
2789 return(0);
2790 }
2791 /* all was fine.. it must fit... */
2792 return(1);
2793 }
2794
2795 int
2796 rf_have_enough_components(cset)
2797 RF_ConfigSet_t *cset;
2798 {
2799 RF_AutoConfig_t *ac;
2800 RF_AutoConfig_t *auto_config;
2801 RF_ComponentLabel_t *clabel;
2802 int r,c;
2803 int num_rows;
2804 int num_cols;
2805 int num_missing;
2806
2807 /* check to see that we have enough 'live' components
2808 of this set. If so, we can configure it if necessary */
2809
2810 num_rows = cset->ac->clabel->num_rows;
2811 num_cols = cset->ac->clabel->num_columns;
2812
2813 /* XXX Check for duplicate components!?!?!? */
2814
2815 num_missing = 0;
2816 auto_config = cset->ac;
2817
2818 for(r=0; r<num_rows; r++) {
2819 for(c=0; c<num_cols; c++) {
2820 ac = auto_config;
2821 while(ac!=NULL) {
2822 if (ac->clabel==NULL) {
2823 /* big-time bad news. */
2824 goto fail;
2825 }
2826 if ((ac->clabel->row == r) &&
2827 (ac->clabel->column == c)) {
2828 /* it's this one... */
2829 #if DEBUG
2830 printf("Found: %s at %d,%d\n",
2831 ac->devname,r,c);
2832 #endif
2833 break;
2834 }
2835 ac=ac->next;
2836 }
2837 if (ac==NULL) {
2838 /* Didn't find one here! */
2839 num_missing++;
2840 }
2841 }
2842 }
2843
2844 clabel = cset->ac->clabel;
2845
2846 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2847 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2848 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2849 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2850 /* XXX this needs to be made *much* more general */
2851 /* Too many failures */
2852 return(0);
2853 }
2854 /* otherwise, all is well, and we've got enough to take a kick
2855 at autoconfiguring this set */
2856 return(1);
2857 fail:
2858 return(0);
2859
2860 }
2861
2862 void
2863 rf_create_configuration(ac,config,raidPtr)
2864 RF_AutoConfig_t *ac;
2865 RF_Config_t *config;
2866 RF_Raid_t *raidPtr;
2867 {
2868 RF_ComponentLabel_t *clabel;
2869
2870 clabel = ac->clabel;
2871
2872 /* 1. Fill in the common stuff */
2873 config->numRow = clabel->num_rows;
2874 config->numCol = clabel->num_columns;
2875 config->numSpare = 0; /* XXX should this be set here? */
2876 config->sectPerSU = clabel->sectPerSU;
2877 config->SUsPerPU = clabel->SUsPerPU;
2878 config->SUsPerRU = clabel->SUsPerRU;
2879 config->parityConfig = clabel->parityConfig;
2880 /* XXX... */
2881 strcpy(config->diskQueueType,"fifo");
2882 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2883 config->layoutSpecificSize = 0; /* XXX ?? */
2884
2885 while(ac!=NULL) {
2886 /* row/col values will be in range due to the checks
2887 in reasonable_label() */
2888 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2889 ac->devname);
2890 ac = ac->next;
2891 }
2892
2893 }
2894
2895 int
2896 rf_set_autoconfig(raidPtr, new_value)
2897 RF_Raid_t *raidPtr;
2898 int new_value;
2899 {
2900 RF_ComponentLabel_t clabel;
2901 struct vnode *vp;
2902 dev_t dev;
2903 int row, column;
2904
2905 for(row=0; row<raidPtr->numRow; row++) {
2906 for(column=0; column<raidPtr->numCol; column++) {
2907 dev = raidPtr->Disks[row][column].dev;
2908 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2909 raidread_component_label(dev, vp, &clabel);
2910 clabel.autoconfigure = new_value;
2911 raidwrite_component_label(dev, vp, &clabel);
2912 }
2913 }
2914 return(new_value);
2915 }
2916
2917 int
2918 rf_set_rootpartition(raidPtr, new_value)
2919 RF_Raid_t *raidPtr;
2920 int new_value;
2921 {
2922 RF_ComponentLabel_t clabel;
2923 struct vnode *vp;
2924 dev_t dev;
2925 int row, column;
2926
2927 for(row=0; row<raidPtr->numRow; row++) {
2928 for(column=0; column<raidPtr->numCol; column++) {
2929 dev = raidPtr->Disks[row][column].dev;
2930 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2931 raidread_component_label(dev, vp, &clabel);
2932 clabel.root_partition = new_value;
2933 raidwrite_component_label(dev, vp, &clabel);
2934 }
2935 }
2936 return(new_value);
2937 }
2938
2939 void
2940 rf_release_all_vps(cset)
2941 RF_ConfigSet_t *cset;
2942 {
2943 RF_AutoConfig_t *ac;
2944
2945 ac = cset->ac;
2946 while(ac!=NULL) {
2947 /* Close the vp, and give it back */
2948 if (ac->vp) {
2949 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2950 vput(ac->vp);
2951 }
2952 ac = ac->next;
2953 }
2954 }
2955
2956
2957 void
2958 rf_cleanup_config_set(cset)
2959 RF_ConfigSet_t *cset;
2960 {
2961 RF_AutoConfig_t *ac;
2962 RF_AutoConfig_t *next_ac;
2963
2964 ac = cset->ac;
2965 while(ac!=NULL) {
2966 next_ac = ac->next;
2967 /* nuke the label */
2968 free(ac->clabel, M_RAIDFRAME);
2969 /* cleanup the config structure */
2970 free(ac, M_RAIDFRAME);
2971 /* "next.." */
2972 ac = next_ac;
2973 }
2974 /* and, finally, nuke the config set */
2975 free(cset, M_RAIDFRAME);
2976 }
2977
2978
2979 void
2980 raid_init_component_label(raidPtr, clabel)
2981 RF_Raid_t *raidPtr;
2982 RF_ComponentLabel_t *clabel;
2983 {
2984 /* current version number */
2985 clabel->version = RF_COMPONENT_LABEL_VERSION;
2986 clabel->serial_number = clabel->serial_number;
2987 clabel->mod_counter = raidPtr->mod_counter;
2988 clabel->num_rows = raidPtr->numRow;
2989 clabel->num_columns = raidPtr->numCol;
2990 clabel->clean = RF_RAID_DIRTY; /* not clean */
2991 clabel->status = rf_ds_optimal; /* "It's good!" */
2992
2993 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
2994 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
2995 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
2996 /* XXX not portable */
2997 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
2998 /* XXX THIS SHOULD BE SET RIGHT!! */
2999 clabel->maxOutstanding = 100;
3000 clabel->autoconfigure = 0;
3001 clabel->root_partition = 0;
3002 clabel->last_unit = raidPtr->raidid;
3003 clabel->config_order = 0;
3004 }
3005
3006 int
3007 rf_auto_config_set(cset,unit)
3008 RF_ConfigSet_t *cset;
3009 int *unit;
3010 {
3011 RF_Raid_t *raidPtr;
3012 RF_Config_t *config;
3013 int raidID;
3014 int retcode;
3015
3016 printf("Starting autoconfigure on raid%d\n",raidID);
3017
3018 retcode = 0;
3019 *unit = -1;
3020
3021 /* 1. Create a config structure */
3022
3023 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3024 M_RAIDFRAME,
3025 M_NOWAIT);
3026 if (config==NULL) {
3027 printf("Out of mem!?!?\n");
3028 /* XXX do something more intelligent here. */
3029 return(1);
3030 }
3031 /* XXX raidID needs to be set correctly.. */
3032
3033 /*
3034 2. Figure out what RAID ID this one is supposed to live at
3035 See if we can get the same RAID dev that it was configured
3036 on last time..
3037 */
3038
3039 raidID = cset->ac->clabel->last_unit;
3040 if ((raidID < 0) || (raidID >= numraid)) {
3041 /* let's not wander off into lala land. */
3042 raidID = numraid - 1;
3043 }
3044 if (raidPtrs[raidID]->valid != 0) {
3045
3046 /*
3047 Nope... Go looking for an alternative...
3048 Start high so we don't immediately use raid0 if that's
3049 not taken.
3050 */
3051
3052 for(raidID = numraid; raidID >= 0; raidID--) {
3053 if (raidPtrs[raidID]->valid == 0) {
3054 /* can use this one! */
3055 break;
3056 }
3057 }
3058 }
3059
3060 if (raidID < 0) {
3061 /* punt... */
3062 printf("Unable to auto configure this set!\n");
3063 printf("(Out of RAID devs!)\n");
3064 return(1);
3065 }
3066
3067 raidPtr = raidPtrs[raidID];
3068
3069 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3070 raidPtr->raidid = raidID;
3071 raidPtr->openings = RAIDOUTSTANDING;
3072
3073 /* 3. Build the configuration structure */
3074 rf_create_configuration(cset->ac, config, raidPtr);
3075
3076 /* 4. Do the configuration */
3077 retcode = rf_Configure(raidPtr, config, cset->ac);
3078
3079 if (retcode == 0) {
3080 #if DEBUG
3081 printf("Calling raidinit()\n");
3082 #endif
3083 /* XXX the 0 below is bogus! */
3084 retcode = raidinit(0, raidPtrs[raidID], raidID);
3085 if (retcode) {
3086 printf("init returned: %d\n",retcode);
3087 }
3088 rf_markalldirty( raidPtrs[raidID] );
3089 if (cset->ac->clabel->root_partition==1) {
3090 /* everything configured just fine. Make a note
3091 that this set is eligible to be root. */
3092 cset->rootable = 1;
3093 }
3094 }
3095
3096 /* 5. Cleanup */
3097 free(config, M_RAIDFRAME);
3098
3099 *unit = raidID;
3100 return(retcode);
3101 }
3102