rf_netbsdkintf.c revision 1.58 1 /* $NetBSD: rf_netbsdkintf.c,v 1.58 2000/02/25 02:21:12 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that...
208 Note: Don't use sc_dev until the raidinit(0,_,_) call in
209 rf_auto_config_set() actually passes in a real dev_t! */
210
211 struct raid_softc {
212 int sc_flags; /* flags */
213 int sc_cflags; /* configuration flags */
214 size_t sc_size; /* size of the raid device */
215 dev_t sc_dev; /* our device.. */
216 char sc_xname[20]; /* XXX external name */
217 struct disk sc_dkdev; /* generic disk device info */
218 struct pool sc_cbufpool; /* component buffer pool */
219 struct buf_queue buf_queue; /* used for the device queue */
220 };
221 /* sc_flags */
222 #define RAIDF_INITED 0x01 /* unit has been initialized */
223 #define RAIDF_WLABEL 0x02 /* label area is writable */
224 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
225 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
226 #define RAIDF_LOCKED 0x80 /* unit is locked */
227
228 #define raidunit(x) DISKUNIT(x)
229 int numraid = 0;
230
231 /*
232 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
233 * Be aware that large numbers can allow the driver to consume a lot of
234 * kernel memory, especially on writes, and in degraded mode reads.
235 *
236 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
237 * a single 64K write will typically require 64K for the old data,
238 * 64K for the old parity, and 64K for the new parity, for a total
239 * of 192K (if the parity buffer is not re-used immediately).
240 * Even it if is used immedately, that's still 128K, which when multiplied
241 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
242 *
243 * Now in degraded mode, for example, a 64K read on the above setup may
244 * require data reconstruction, which will require *all* of the 4 remaining
245 * disks to participate -- 4 * 32K/disk == 128K again.
246 */
247
248 #ifndef RAIDOUTSTANDING
249 #define RAIDOUTSTANDING 6
250 #endif
251
252 #define RAIDLABELDEV(dev) \
253 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
254
255 /* declared here, and made public, for the benefit of KVM stuff.. */
256 struct raid_softc *raid_softc;
257
258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
259 struct disklabel *));
260 static void raidgetdisklabel __P((dev_t));
261 static void raidmakedisklabel __P((struct raid_softc *));
262
263 static int raidlock __P((struct raid_softc *));
264 static void raidunlock __P((struct raid_softc *));
265
266 static void rf_markalldirty __P((RF_Raid_t *));
267 void rf_mountroot_hook __P((struct device *));
268
269 struct device *raidrootdev;
270 struct cfdata cf_raidrootdev;
271 struct cfdriver cfdrv;
272 /* XXX these should be moved up */
273 #include "rf_configure.h"
274 #include <sys/reboot.h>
275
276 void rf_ReconThread __P((struct rf_recon_req *));
277 /* XXX what I want is: */
278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
282 void rf_buildroothack __P((void *));
283
284 RF_AutoConfig_t *rf_find_raid_components __P((void));
285 void print_component_label __P((RF_ComponentLabel_t *));
286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
290 RF_Raid_t *));
291 int rf_set_autoconfig __P((RF_Raid_t *, int));
292 int rf_set_rootpartition __P((RF_Raid_t *, int));
293 void rf_release_all_vps __P((RF_ConfigSet_t *));
294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
295 int rf_have_enough_components __P((RF_ConfigSet_t *));
296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place */
300 /* XXX ugly hack. */
301 const char *raid_rooty = "raid0";
302 extern struct device *booted_device;
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310 RF_AutoConfig_t *ac_list; /* autoconfig list */
311 RF_ConfigSet_t *config_sets;
312
313 #ifdef DEBUG
314 printf("raidattach: Asked for %d units\n", num);
315 #endif
316
317 if (num <= 0) {
318 #ifdef DIAGNOSTIC
319 panic("raidattach: count <= 0");
320 #endif
321 return;
322 }
323 /* This is where all the initialization stuff gets done. */
324
325 numraid = num;
326
327 /* Make some space for requested number of units... */
328
329 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
330 if (raidPtrs == NULL) {
331 panic("raidPtrs is NULL!!\n");
332 }
333
334 rc = rf_mutex_init(&rf_sparet_wait_mutex);
335 if (rc) {
336 RF_PANIC();
337 }
338
339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
340
341 for (i = 0; i < num; i++)
342 raidPtrs[i] = NULL;
343 rc = rf_BootRaidframe();
344 if (rc == 0)
345 printf("Kernelized RAIDframe activated\n");
346 else
347 panic("Serious error booting RAID!!\n");
348
349 /* put together some datastructures like the CCD device does.. This
350 * lets us lock the device and what-not when it gets opened. */
351
352 raid_softc = (struct raid_softc *)
353 malloc(num * sizeof(struct raid_softc),
354 M_RAIDFRAME, M_NOWAIT);
355 if (raid_softc == NULL) {
356 printf("WARNING: no memory for RAIDframe driver\n");
357 return;
358 }
359
360 bzero(raid_softc, num * sizeof(struct raid_softc));
361
362 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
363 M_RAIDFRAME, M_NOWAIT);
364 if (raidrootdev == NULL) {
365 panic("No memory for RAIDframe driver!!?!?!\n");
366 }
367
368 for (raidID = 0; raidID < num; raidID++) {
369 BUFQ_INIT(&raid_softc[raidID].buf_queue);
370
371 raidrootdev[raidID].dv_class = DV_DISK;
372 raidrootdev[raidID].dv_cfdata = NULL;
373 raidrootdev[raidID].dv_unit = raidID;
374 raidrootdev[raidID].dv_parent = NULL;
375 raidrootdev[raidID].dv_flags = 0;
376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
377
378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
379 (RF_Raid_t *));
380 if (raidPtrs[raidID] == NULL) {
381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
382 numraid = raidID;
383 return;
384 }
385 }
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
408 /* XXXX pick raid0 for now... and this should be only done
409 if we find something that's bootable!!! */
410 #if 0
411 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
412 #endif
413 if (boothowto & RB_ASKNAME) {
414 /* We don't auto-config... */
415 } else {
416 /* They didn't ask, and we found something bootable... */
417 /* XXX pretend for now.. */
418 #if 0
419 booted_device = &raidrootdev[0];
420 #endif
421 }
422 }
423
424 }
425
426 void
427 rf_buildroothack(arg)
428 void *arg;
429 {
430 RF_ConfigSet_t *config_sets = arg;
431 RF_ConfigSet_t *cset;
432 RF_ConfigSet_t *next_cset;
433 int retcode;
434 int raidID;
435 int rootID;
436 int num_root;
437
438 num_root = 0;
439 cset = config_sets;
440 while(cset != NULL ) {
441 next_cset = cset->next;
442 if (rf_have_enough_components(cset) &&
443 cset->ac->clabel->autoconfigure==1) {
444 retcode = rf_auto_config_set(cset,&raidID);
445 if (!retcode) {
446 if (cset->rootable) {
447 rootID = raidID;
448 num_root++;
449 }
450 } else {
451 /* The autoconfig didn't work :( */
452 #if DEBUG
453 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
454 #endif
455 rf_release_all_vps(cset);
456 #if DEBUG
457 printf("Done cleanup\n");
458 #endif
459 }
460 } else {
461 /* we're not autoconfiguring this set...
462 release the associated resources */
463 #if DEBUG
464 printf("Releasing vp's\n");
465 #endif
466 rf_release_all_vps(cset);
467 #if DEBUG
468 printf("Done.\n");
469 #endif
470 }
471 /* cleanup */
472 #if DEBUG
473 printf("Cleaning up config set\n");
474 #endif
475 rf_cleanup_config_set(cset);
476 #if DEBUG
477 printf("Done cleanup\n");
478 #endif
479 cset = next_cset;
480 }
481 if (boothowto & RB_ASKNAME) {
482 /* We don't auto-config... */
483 } else {
484 /* They didn't ask, and we found something bootable... */
485 /* XXX pretend for now.. */
486 if (num_root == 1) {
487 #if 1
488 booted_device = &raidrootdev[rootID];
489 #endif
490 } else if (num_root > 1) {
491 /* we can't guess.. require the user to answer... */
492 boothowto |= RB_ASKNAME;
493 }
494 }
495 }
496
497
498 int
499 raidsize(dev)
500 dev_t dev;
501 {
502 struct raid_softc *rs;
503 struct disklabel *lp;
504 int part, unit, omask, size;
505
506 unit = raidunit(dev);
507 if (unit >= numraid)
508 return (-1);
509 rs = &raid_softc[unit];
510
511 if ((rs->sc_flags & RAIDF_INITED) == 0)
512 return (-1);
513
514 part = DISKPART(dev);
515 omask = rs->sc_dkdev.dk_openmask & (1 << part);
516 lp = rs->sc_dkdev.dk_label;
517
518 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
519 return (-1);
520
521 if (lp->d_partitions[part].p_fstype != FS_SWAP)
522 size = -1;
523 else
524 size = lp->d_partitions[part].p_size *
525 (lp->d_secsize / DEV_BSIZE);
526
527 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
528 return (-1);
529
530 return (size);
531
532 }
533
534 int
535 raiddump(dev, blkno, va, size)
536 dev_t dev;
537 daddr_t blkno;
538 caddr_t va;
539 size_t size;
540 {
541 /* Not implemented. */
542 return ENXIO;
543 }
544 /* ARGSUSED */
545 int
546 raidopen(dev, flags, fmt, p)
547 dev_t dev;
548 int flags, fmt;
549 struct proc *p;
550 {
551 int unit = raidunit(dev);
552 struct raid_softc *rs;
553 struct disklabel *lp;
554 int part, pmask;
555 int error = 0;
556
557 if (unit >= numraid)
558 return (ENXIO);
559 rs = &raid_softc[unit];
560
561 if ((error = raidlock(rs)) != 0)
562 return (error);
563 lp = rs->sc_dkdev.dk_label;
564
565 part = DISKPART(dev);
566 pmask = (1 << part);
567
568 db1_printf(("Opening raid device number: %d partition: %d\n",
569 unit, part));
570
571
572 if ((rs->sc_flags & RAIDF_INITED) &&
573 (rs->sc_dkdev.dk_openmask == 0))
574 raidgetdisklabel(dev);
575
576 /* make sure that this partition exists */
577
578 if (part != RAW_PART) {
579 db1_printf(("Not a raw partition..\n"));
580 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
581 ((part >= lp->d_npartitions) ||
582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
583 error = ENXIO;
584 raidunlock(rs);
585 db1_printf(("Bailing out...\n"));
586 return (error);
587 }
588 }
589 /* Prevent this unit from being unconfigured while open. */
590 switch (fmt) {
591 case S_IFCHR:
592 rs->sc_dkdev.dk_copenmask |= pmask;
593 break;
594
595 case S_IFBLK:
596 rs->sc_dkdev.dk_bopenmask |= pmask;
597 break;
598 }
599
600 if ((rs->sc_dkdev.dk_openmask == 0) &&
601 ((rs->sc_flags & RAIDF_INITED) != 0)) {
602 /* First one... mark things as dirty... Note that we *MUST*
603 have done a configure before this. I DO NOT WANT TO BE
604 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
605 THAT THEY BELONG TOGETHER!!!!! */
606 /* XXX should check to see if we're only open for reading
607 here... If so, we needn't do this, but then need some
608 other way of keeping track of what's happened.. */
609
610 rf_markalldirty( raidPtrs[unit] );
611 }
612
613
614 rs->sc_dkdev.dk_openmask =
615 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
616
617 raidunlock(rs);
618
619 return (error);
620
621
622 }
623 /* ARGSUSED */
624 int
625 raidclose(dev, flags, fmt, p)
626 dev_t dev;
627 int flags, fmt;
628 struct proc *p;
629 {
630 int unit = raidunit(dev);
631 struct raid_softc *rs;
632 int error = 0;
633 int part;
634
635 if (unit >= numraid)
636 return (ENXIO);
637 rs = &raid_softc[unit];
638
639 if ((error = raidlock(rs)) != 0)
640 return (error);
641
642 part = DISKPART(dev);
643
644 /* ...that much closer to allowing unconfiguration... */
645 switch (fmt) {
646 case S_IFCHR:
647 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
648 break;
649
650 case S_IFBLK:
651 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
652 break;
653 }
654 rs->sc_dkdev.dk_openmask =
655 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
656
657 if ((rs->sc_dkdev.dk_openmask == 0) &&
658 ((rs->sc_flags & RAIDF_INITED) != 0)) {
659 /* Last one... device is not unconfigured yet.
660 Device shutdown has taken care of setting the
661 clean bits if RAIDF_INITED is not set
662 mark things as clean... */
663 #ifdef DEBUG
664 printf("Last one on raid%d. Updating status.\n",unit);
665 #endif
666 rf_update_component_labels( raidPtrs[unit] );
667 }
668
669 raidunlock(rs);
670 return (0);
671
672 }
673
674 void
675 raidstrategy(bp)
676 register struct buf *bp;
677 {
678 register int s;
679
680 unsigned int raidID = raidunit(bp->b_dev);
681 RF_Raid_t *raidPtr;
682 struct raid_softc *rs = &raid_softc[raidID];
683 struct disklabel *lp;
684 int wlabel;
685
686 if ((rs->sc_flags & RAIDF_INITED) ==0) {
687 bp->b_error = ENXIO;
688 bp->b_flags = B_ERROR;
689 bp->b_resid = bp->b_bcount;
690 biodone(bp);
691 return;
692 }
693 if (raidID >= numraid || !raidPtrs[raidID]) {
694 bp->b_error = ENODEV;
695 bp->b_flags |= B_ERROR;
696 bp->b_resid = bp->b_bcount;
697 biodone(bp);
698 return;
699 }
700 raidPtr = raidPtrs[raidID];
701 if (!raidPtr->valid) {
702 bp->b_error = ENODEV;
703 bp->b_flags |= B_ERROR;
704 bp->b_resid = bp->b_bcount;
705 biodone(bp);
706 return;
707 }
708 if (bp->b_bcount == 0) {
709 db1_printf(("b_bcount is zero..\n"));
710 biodone(bp);
711 return;
712 }
713 lp = rs->sc_dkdev.dk_label;
714
715 /*
716 * Do bounds checking and adjust transfer. If there's an
717 * error, the bounds check will flag that for us.
718 */
719
720 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
721 if (DISKPART(bp->b_dev) != RAW_PART)
722 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
723 db1_printf(("Bounds check failed!!:%d %d\n",
724 (int) bp->b_blkno, (int) wlabel));
725 biodone(bp);
726 return;
727 }
728 s = splbio();
729
730 bp->b_resid = 0;
731
732 /* stuff it onto our queue */
733 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
734
735 raidstart(raidPtrs[raidID]);
736
737 splx(s);
738 }
739 /* ARGSUSED */
740 int
741 raidread(dev, uio, flags)
742 dev_t dev;
743 struct uio *uio;
744 int flags;
745 {
746 int unit = raidunit(dev);
747 struct raid_softc *rs;
748 int part;
749
750 if (unit >= numraid)
751 return (ENXIO);
752 rs = &raid_softc[unit];
753
754 if ((rs->sc_flags & RAIDF_INITED) == 0)
755 return (ENXIO);
756 part = DISKPART(dev);
757
758 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
759
760 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
761
762 }
763 /* ARGSUSED */
764 int
765 raidwrite(dev, uio, flags)
766 dev_t dev;
767 struct uio *uio;
768 int flags;
769 {
770 int unit = raidunit(dev);
771 struct raid_softc *rs;
772
773 if (unit >= numraid)
774 return (ENXIO);
775 rs = &raid_softc[unit];
776
777 if ((rs->sc_flags & RAIDF_INITED) == 0)
778 return (ENXIO);
779 db1_printf(("raidwrite\n"));
780 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
781
782 }
783
784 int
785 raidioctl(dev, cmd, data, flag, p)
786 dev_t dev;
787 u_long cmd;
788 caddr_t data;
789 int flag;
790 struct proc *p;
791 {
792 int unit = raidunit(dev);
793 int error = 0;
794 int part, pmask;
795 struct raid_softc *rs;
796 RF_Config_t *k_cfg, *u_cfg;
797 RF_Raid_t *raidPtr;
798 RF_RaidDisk_t *diskPtr;
799 RF_AccTotals_t *totals;
800 RF_DeviceConfig_t *d_cfg, **ucfgp;
801 u_char *specific_buf;
802 int retcode = 0;
803 int row;
804 int column;
805 struct rf_recon_req *rrcopy, *rr;
806 RF_ComponentLabel_t *clabel;
807 RF_ComponentLabel_t ci_label;
808 RF_ComponentLabel_t **clabel_ptr;
809 RF_SingleComponent_t *sparePtr,*componentPtr;
810 RF_SingleComponent_t hot_spare;
811 RF_SingleComponent_t component;
812 int i, j, d;
813
814 if (unit >= numraid)
815 return (ENXIO);
816 rs = &raid_softc[unit];
817 raidPtr = raidPtrs[unit];
818
819 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
820 (int) DISKPART(dev), (int) unit, (int) cmd));
821
822 /* Must be open for writes for these commands... */
823 switch (cmd) {
824 case DIOCSDINFO:
825 case DIOCWDINFO:
826 case DIOCWLABEL:
827 if ((flag & FWRITE) == 0)
828 return (EBADF);
829 }
830
831 /* Must be initialized for these... */
832 switch (cmd) {
833 case DIOCGDINFO:
834 case DIOCSDINFO:
835 case DIOCWDINFO:
836 case DIOCGPART:
837 case DIOCWLABEL:
838 case DIOCGDEFLABEL:
839 case RAIDFRAME_SHUTDOWN:
840 case RAIDFRAME_REWRITEPARITY:
841 case RAIDFRAME_GET_INFO:
842 case RAIDFRAME_RESET_ACCTOTALS:
843 case RAIDFRAME_GET_ACCTOTALS:
844 case RAIDFRAME_KEEP_ACCTOTALS:
845 case RAIDFRAME_GET_SIZE:
846 case RAIDFRAME_FAIL_DISK:
847 case RAIDFRAME_COPYBACK:
848 case RAIDFRAME_CHECK_RECON_STATUS:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_COPYBACK_STATUS:
858 case RAIDFRAME_SET_AUTOCONFIG:
859 case RAIDFRAME_SET_ROOT:
860 if ((rs->sc_flags & RAIDF_INITED) == 0)
861 return (ENXIO);
862 }
863
864 switch (cmd) {
865
866 /* configure the system */
867 case RAIDFRAME_CONFIGURE:
868
869 if (raidPtr->valid) {
870 /* There is a valid RAID set running on this unit! */
871 printf("raid%d: Device already configured!\n",unit);
872 }
873
874 /* copy-in the configuration information */
875 /* data points to a pointer to the configuration structure */
876
877 u_cfg = *((RF_Config_t **) data);
878 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
879 if (k_cfg == NULL) {
880 return (ENOMEM);
881 }
882 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
883 sizeof(RF_Config_t));
884 if (retcode) {
885 RF_Free(k_cfg, sizeof(RF_Config_t));
886 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
887 retcode));
888 return (retcode);
889 }
890 /* allocate a buffer for the layout-specific data, and copy it
891 * in */
892 if (k_cfg->layoutSpecificSize) {
893 if (k_cfg->layoutSpecificSize > 10000) {
894 /* sanity check */
895 RF_Free(k_cfg, sizeof(RF_Config_t));
896 return (EINVAL);
897 }
898 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
899 (u_char *));
900 if (specific_buf == NULL) {
901 RF_Free(k_cfg, sizeof(RF_Config_t));
902 return (ENOMEM);
903 }
904 retcode = copyin(k_cfg->layoutSpecific,
905 (caddr_t) specific_buf,
906 k_cfg->layoutSpecificSize);
907 if (retcode) {
908 RF_Free(k_cfg, sizeof(RF_Config_t));
909 RF_Free(specific_buf,
910 k_cfg->layoutSpecificSize);
911 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
912 retcode));
913 return (retcode);
914 }
915 } else
916 specific_buf = NULL;
917 k_cfg->layoutSpecific = specific_buf;
918
919 /* should do some kind of sanity check on the configuration.
920 * Store the sum of all the bytes in the last byte? */
921
922 /* configure the system */
923
924 /*
925 * Clear the entire RAID descriptor, just to make sure
926 * there is no stale data left in the case of a
927 * reconfiguration
928 */
929 bzero((char *) raidPtr, sizeof(RF_Raid_t));
930 raidPtr->raidid = unit;
931
932 retcode = rf_Configure(raidPtr, k_cfg, NULL);
933
934 if (retcode == 0) {
935
936 /* allow this many simultaneous IO's to
937 this RAID device */
938 raidPtr->openings = RAIDOUTSTANDING;
939
940 retcode = raidinit(dev, raidPtr, unit);
941 rf_markalldirty( raidPtr );
942 }
943 /* free the buffers. No return code here. */
944 if (k_cfg->layoutSpecificSize) {
945 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
946 }
947 RF_Free(k_cfg, sizeof(RF_Config_t));
948
949 return (retcode);
950
951 /* shutdown the system */
952 case RAIDFRAME_SHUTDOWN:
953
954 if ((error = raidlock(rs)) != 0)
955 return (error);
956
957 /*
958 * If somebody has a partition mounted, we shouldn't
959 * shutdown.
960 */
961
962 part = DISKPART(dev);
963 pmask = (1 << part);
964 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
965 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
966 (rs->sc_dkdev.dk_copenmask & pmask))) {
967 raidunlock(rs);
968 return (EBUSY);
969 }
970
971 retcode = rf_Shutdown(raidPtr);
972
973 pool_destroy(&rs->sc_cbufpool);
974
975 /* It's no longer initialized... */
976 rs->sc_flags &= ~RAIDF_INITED;
977
978 /* Detach the disk. */
979 disk_detach(&rs->sc_dkdev);
980
981 raidunlock(rs);
982
983 return (retcode);
984 case RAIDFRAME_GET_COMPONENT_LABEL:
985 clabel_ptr = (RF_ComponentLabel_t **) data;
986 /* need to read the component label for the disk indicated
987 by row,column in clabel */
988
989 /* For practice, let's get it directly fromdisk, rather
990 than from the in-core copy */
991 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
992 (RF_ComponentLabel_t *));
993 if (clabel == NULL)
994 return (ENOMEM);
995
996 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
997
998 retcode = copyin( *clabel_ptr, clabel,
999 sizeof(RF_ComponentLabel_t));
1000
1001 if (retcode) {
1002 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1003 return(retcode);
1004 }
1005
1006 row = clabel->row;
1007 column = clabel->column;
1008
1009 if ((row < 0) || (row >= raidPtr->numRow) ||
1010 (column < 0) || (column >= raidPtr->numCol)) {
1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1012 return(EINVAL);
1013 }
1014
1015 raidread_component_label(raidPtr->Disks[row][column].dev,
1016 raidPtr->raid_cinfo[row][column].ci_vp,
1017 clabel );
1018
1019 retcode = copyout((caddr_t) clabel,
1020 (caddr_t) *clabel_ptr,
1021 sizeof(RF_ComponentLabel_t));
1022 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1023 return (retcode);
1024
1025 case RAIDFRAME_SET_COMPONENT_LABEL:
1026 clabel = (RF_ComponentLabel_t *) data;
1027
1028 /* XXX check the label for valid stuff... */
1029 /* Note that some things *should not* get modified --
1030 the user should be re-initing the labels instead of
1031 trying to patch things.
1032 */
1033
1034 printf("Got component label:\n");
1035 printf("Version: %d\n",clabel->version);
1036 printf("Serial Number: %d\n",clabel->serial_number);
1037 printf("Mod counter: %d\n",clabel->mod_counter);
1038 printf("Row: %d\n", clabel->row);
1039 printf("Column: %d\n", clabel->column);
1040 printf("Num Rows: %d\n", clabel->num_rows);
1041 printf("Num Columns: %d\n", clabel->num_columns);
1042 printf("Clean: %d\n", clabel->clean);
1043 printf("Status: %d\n", clabel->status);
1044
1045 row = clabel->row;
1046 column = clabel->column;
1047
1048 if ((row < 0) || (row >= raidPtr->numRow) ||
1049 (column < 0) || (column >= raidPtr->numCol)) {
1050 return(EINVAL);
1051 }
1052
1053 /* XXX this isn't allowed to do anything for now :-) */
1054
1055 /* XXX and before it is, we need to fill in the rest
1056 of the fields!?!?!?! */
1057 #if 0
1058 raidwrite_component_label(
1059 raidPtr->Disks[row][column].dev,
1060 raidPtr->raid_cinfo[row][column].ci_vp,
1061 clabel );
1062 #endif
1063 return (0);
1064
1065 case RAIDFRAME_INIT_LABELS:
1066 clabel = (RF_ComponentLabel_t *) data;
1067 /*
1068 we only want the serial number from
1069 the above. We get all the rest of the information
1070 from the config that was used to create this RAID
1071 set.
1072 */
1073
1074 raidPtr->serial_number = clabel->serial_number;
1075
1076 raid_init_component_label(raidPtr, &ci_label);
1077 ci_label.serial_number = clabel->serial_number;
1078
1079 for(row=0;row<raidPtr->numRow;row++) {
1080 ci_label.row = row;
1081 for(column=0;column<raidPtr->numCol;column++) {
1082 diskPtr = &raidPtr->Disks[row][column];
1083 ci_label.partitionSize = diskPtr->partitionSize;
1084 ci_label.column = column;
1085 raidwrite_component_label(
1086 raidPtr->Disks[row][column].dev,
1087 raidPtr->raid_cinfo[row][column].ci_vp,
1088 &ci_label );
1089 }
1090 }
1091
1092 return (retcode);
1093 case RAIDFRAME_SET_AUTOCONFIG:
1094 d = rf_set_autoconfig(raidPtr, *data);
1095 printf("New autoconfig value is: %d\n", d);
1096 *data = d;
1097 return (retcode);
1098
1099 case RAIDFRAME_SET_ROOT:
1100 d = rf_set_rootpartition(raidPtr, *data);
1101 printf("New rootpartition value is: %d\n", d);
1102 *data = d;
1103 return (retcode);
1104
1105 /* initialize all parity */
1106 case RAIDFRAME_REWRITEPARITY:
1107
1108 if (raidPtr->Layout.map->faultsTolerated == 0) {
1109 /* Parity for RAID 0 is trivially correct */
1110 raidPtr->parity_good = RF_RAID_CLEAN;
1111 return(0);
1112 }
1113
1114 if (raidPtr->parity_rewrite_in_progress == 1) {
1115 /* Re-write is already in progress! */
1116 return(EINVAL);
1117 }
1118
1119 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1120 rf_RewriteParityThread,
1121 raidPtr,"raid_parity");
1122 return (retcode);
1123
1124
1125 case RAIDFRAME_ADD_HOT_SPARE:
1126 sparePtr = (RF_SingleComponent_t *) data;
1127 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1128 printf("Adding spare\n");
1129 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1130 return(retcode);
1131
1132 case RAIDFRAME_REMOVE_HOT_SPARE:
1133 return(retcode);
1134
1135 case RAIDFRAME_REBUILD_IN_PLACE:
1136
1137 if (raidPtr->Layout.map->faultsTolerated == 0) {
1138 /* Can't do this on a RAID 0!! */
1139 return(EINVAL);
1140 }
1141
1142 if (raidPtr->recon_in_progress == 1) {
1143 /* a reconstruct is already in progress! */
1144 return(EINVAL);
1145 }
1146
1147 componentPtr = (RF_SingleComponent_t *) data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 row = component.row;
1151 column = component.column;
1152 printf("Rebuild: %d %d\n",row, column);
1153 if ((row < 0) || (row >= raidPtr->numRow) ||
1154 (column < 0) || (column >= raidPtr->numCol)) {
1155 return(EINVAL);
1156 }
1157
1158 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1159 if (rrcopy == NULL)
1160 return(ENOMEM);
1161
1162 rrcopy->raidPtr = (void *) raidPtr;
1163 rrcopy->row = row;
1164 rrcopy->col = column;
1165
1166 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1167 rf_ReconstructInPlaceThread,
1168 rrcopy,"raid_reconip");
1169 return(retcode);
1170
1171 case RAIDFRAME_GET_INFO:
1172 if (!raidPtr->valid)
1173 return (ENODEV);
1174 ucfgp = (RF_DeviceConfig_t **) data;
1175 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1176 (RF_DeviceConfig_t *));
1177 if (d_cfg == NULL)
1178 return (ENOMEM);
1179 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1180 d_cfg->rows = raidPtr->numRow;
1181 d_cfg->cols = raidPtr->numCol;
1182 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1183 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1184 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1185 return (ENOMEM);
1186 }
1187 d_cfg->nspares = raidPtr->numSpare;
1188 if (d_cfg->nspares >= RF_MAX_DISKS) {
1189 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1190 return (ENOMEM);
1191 }
1192 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1193 d = 0;
1194 for (i = 0; i < d_cfg->rows; i++) {
1195 for (j = 0; j < d_cfg->cols; j++) {
1196 d_cfg->devs[d] = raidPtr->Disks[i][j];
1197 d++;
1198 }
1199 }
1200 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1201 d_cfg->spares[i] = raidPtr->Disks[0][j];
1202 }
1203 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1204 sizeof(RF_DeviceConfig_t));
1205 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1206
1207 return (retcode);
1208
1209 case RAIDFRAME_CHECK_PARITY:
1210 *(int *) data = raidPtr->parity_good;
1211 return (0);
1212
1213 case RAIDFRAME_RESET_ACCTOTALS:
1214 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1215 return (0);
1216
1217 case RAIDFRAME_GET_ACCTOTALS:
1218 totals = (RF_AccTotals_t *) data;
1219 *totals = raidPtr->acc_totals;
1220 return (0);
1221
1222 case RAIDFRAME_KEEP_ACCTOTALS:
1223 raidPtr->keep_acc_totals = *(int *)data;
1224 return (0);
1225
1226 case RAIDFRAME_GET_SIZE:
1227 *(int *) data = raidPtr->totalSectors;
1228 return (0);
1229
1230 /* fail a disk & optionally start reconstruction */
1231 case RAIDFRAME_FAIL_DISK:
1232
1233 if (raidPtr->Layout.map->faultsTolerated == 0) {
1234 /* Can't do this on a RAID 0!! */
1235 return(EINVAL);
1236 }
1237
1238 rr = (struct rf_recon_req *) data;
1239
1240 if (rr->row < 0 || rr->row >= raidPtr->numRow
1241 || rr->col < 0 || rr->col >= raidPtr->numCol)
1242 return (EINVAL);
1243
1244 printf("raid%d: Failing the disk: row: %d col: %d\n",
1245 unit, rr->row, rr->col);
1246
1247 /* make a copy of the recon request so that we don't rely on
1248 * the user's buffer */
1249 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1250 if (rrcopy == NULL)
1251 return(ENOMEM);
1252 bcopy(rr, rrcopy, sizeof(*rr));
1253 rrcopy->raidPtr = (void *) raidPtr;
1254
1255 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1256 rf_ReconThread,
1257 rrcopy,"raid_recon");
1258 return (0);
1259
1260 /* invoke a copyback operation after recon on whatever disk
1261 * needs it, if any */
1262 case RAIDFRAME_COPYBACK:
1263
1264 if (raidPtr->Layout.map->faultsTolerated == 0) {
1265 /* This makes no sense on a RAID 0!! */
1266 return(EINVAL);
1267 }
1268
1269 if (raidPtr->copyback_in_progress == 1) {
1270 /* Copyback is already in progress! */
1271 return(EINVAL);
1272 }
1273
1274 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1275 rf_CopybackThread,
1276 raidPtr,"raid_copyback");
1277 return (retcode);
1278
1279 /* return the percentage completion of reconstruction */
1280 case RAIDFRAME_CHECK_RECON_STATUS:
1281 if (raidPtr->Layout.map->faultsTolerated == 0) {
1282 /* This makes no sense on a RAID 0 */
1283 return(EINVAL);
1284 }
1285 row = 0; /* XXX we only consider a single row... */
1286 if (raidPtr->status[row] != rf_rs_reconstructing)
1287 *(int *) data = 100;
1288 else
1289 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1290 return (0);
1291
1292 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1293 if (raidPtr->Layout.map->faultsTolerated == 0) {
1294 /* This makes no sense on a RAID 0 */
1295 return(EINVAL);
1296 }
1297 if (raidPtr->parity_rewrite_in_progress == 1) {
1298 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1299 } else {
1300 *(int *) data = 100;
1301 }
1302 return (0);
1303
1304 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1305 if (raidPtr->Layout.map->faultsTolerated == 0) {
1306 /* This makes no sense on a RAID 0 */
1307 return(EINVAL);
1308 }
1309 if (raidPtr->copyback_in_progress == 1) {
1310 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1311 raidPtr->Layout.numStripe;
1312 } else {
1313 *(int *) data = 100;
1314 }
1315 return (0);
1316
1317
1318 /* the sparetable daemon calls this to wait for the kernel to
1319 * need a spare table. this ioctl does not return until a
1320 * spare table is needed. XXX -- calling mpsleep here in the
1321 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1322 * -- I should either compute the spare table in the kernel,
1323 * or have a different -- XXX XXX -- interface (a different
1324 * character device) for delivering the table -- XXX */
1325 #if 0
1326 case RAIDFRAME_SPARET_WAIT:
1327 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1328 while (!rf_sparet_wait_queue)
1329 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1330 waitreq = rf_sparet_wait_queue;
1331 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1332 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1333
1334 /* structure assignment */
1335 *((RF_SparetWait_t *) data) = *waitreq;
1336
1337 RF_Free(waitreq, sizeof(*waitreq));
1338 return (0);
1339
1340 /* wakes up a process waiting on SPARET_WAIT and puts an error
1341 * code in it that will cause the dameon to exit */
1342 case RAIDFRAME_ABORT_SPARET_WAIT:
1343 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1344 waitreq->fcol = -1;
1345 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1346 waitreq->next = rf_sparet_wait_queue;
1347 rf_sparet_wait_queue = waitreq;
1348 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1349 wakeup(&rf_sparet_wait_queue);
1350 return (0);
1351
1352 /* used by the spare table daemon to deliver a spare table
1353 * into the kernel */
1354 case RAIDFRAME_SEND_SPARET:
1355
1356 /* install the spare table */
1357 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1358
1359 /* respond to the requestor. the return status of the spare
1360 * table installation is passed in the "fcol" field */
1361 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1362 waitreq->fcol = retcode;
1363 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1364 waitreq->next = rf_sparet_resp_queue;
1365 rf_sparet_resp_queue = waitreq;
1366 wakeup(&rf_sparet_resp_queue);
1367 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1368
1369 return (retcode);
1370 #endif
1371
1372 default:
1373 break; /* fall through to the os-specific code below */
1374
1375 }
1376
1377 if (!raidPtr->valid)
1378 return (EINVAL);
1379
1380 /*
1381 * Add support for "regular" device ioctls here.
1382 */
1383
1384 switch (cmd) {
1385 case DIOCGDINFO:
1386 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1387 break;
1388
1389 case DIOCGPART:
1390 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1391 ((struct partinfo *) data)->part =
1392 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1393 break;
1394
1395 case DIOCWDINFO:
1396 case DIOCSDINFO:
1397 if ((error = raidlock(rs)) != 0)
1398 return (error);
1399
1400 rs->sc_flags |= RAIDF_LABELLING;
1401
1402 error = setdisklabel(rs->sc_dkdev.dk_label,
1403 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1404 if (error == 0) {
1405 if (cmd == DIOCWDINFO)
1406 error = writedisklabel(RAIDLABELDEV(dev),
1407 raidstrategy, rs->sc_dkdev.dk_label,
1408 rs->sc_dkdev.dk_cpulabel);
1409 }
1410 rs->sc_flags &= ~RAIDF_LABELLING;
1411
1412 raidunlock(rs);
1413
1414 if (error)
1415 return (error);
1416 break;
1417
1418 case DIOCWLABEL:
1419 if (*(int *) data != 0)
1420 rs->sc_flags |= RAIDF_WLABEL;
1421 else
1422 rs->sc_flags &= ~RAIDF_WLABEL;
1423 break;
1424
1425 case DIOCGDEFLABEL:
1426 raidgetdefaultlabel(raidPtr, rs,
1427 (struct disklabel *) data);
1428 break;
1429
1430 default:
1431 retcode = ENOTTY;
1432 }
1433 return (retcode);
1434
1435 }
1436
1437
1438 /* raidinit -- complete the rest of the initialization for the
1439 RAIDframe device. */
1440
1441
1442 static int
1443 raidinit(dev, raidPtr, unit)
1444 dev_t dev;
1445 RF_Raid_t *raidPtr;
1446 int unit;
1447 {
1448 int retcode;
1449 struct raid_softc *rs;
1450
1451 retcode = 0;
1452
1453 rs = &raid_softc[unit];
1454 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1455 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1456
1457
1458 /* XXX should check return code first... */
1459 rs->sc_flags |= RAIDF_INITED;
1460
1461 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1462
1463 rs->sc_dkdev.dk_name = rs->sc_xname;
1464
1465 /* disk_attach actually creates space for the CPU disklabel, among
1466 * other things, so it's critical to call this *BEFORE* we try putzing
1467 * with disklabels. */
1468
1469 disk_attach(&rs->sc_dkdev);
1470
1471 /* XXX There may be a weird interaction here between this, and
1472 * protectedSectors, as used in RAIDframe. */
1473
1474 rs->sc_size = raidPtr->totalSectors;
1475 rs->sc_dev = dev;
1476
1477 return (retcode);
1478 }
1479
1480 /* wake up the daemon & tell it to get us a spare table
1481 * XXX
1482 * the entries in the queues should be tagged with the raidPtr
1483 * so that in the extremely rare case that two recons happen at once,
1484 * we know for which device were requesting a spare table
1485 * XXX
1486 *
1487 * XXX This code is not currently used. GO
1488 */
1489 int
1490 rf_GetSpareTableFromDaemon(req)
1491 RF_SparetWait_t *req;
1492 {
1493 int retcode;
1494
1495 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1496 req->next = rf_sparet_wait_queue;
1497 rf_sparet_wait_queue = req;
1498 wakeup(&rf_sparet_wait_queue);
1499
1500 /* mpsleep unlocks the mutex */
1501 while (!rf_sparet_resp_queue) {
1502 tsleep(&rf_sparet_resp_queue, PRIBIO,
1503 "raidframe getsparetable", 0);
1504 }
1505 req = rf_sparet_resp_queue;
1506 rf_sparet_resp_queue = req->next;
1507 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1508
1509 retcode = req->fcol;
1510 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1511 * alloc'd */
1512 return (retcode);
1513 }
1514
1515 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1516 * bp & passes it down.
1517 * any calls originating in the kernel must use non-blocking I/O
1518 * do some extra sanity checking to return "appropriate" error values for
1519 * certain conditions (to make some standard utilities work)
1520 *
1521 * Formerly known as: rf_DoAccessKernel
1522 */
1523 void
1524 raidstart(raidPtr)
1525 RF_Raid_t *raidPtr;
1526 {
1527 RF_SectorCount_t num_blocks, pb, sum;
1528 RF_RaidAddr_t raid_addr;
1529 int retcode;
1530 struct partition *pp;
1531 daddr_t blocknum;
1532 int unit;
1533 struct raid_softc *rs;
1534 int do_async;
1535 struct buf *bp;
1536
1537 unit = raidPtr->raidid;
1538 rs = &raid_softc[unit];
1539
1540 /* quick check to see if anything has died recently */
1541 RF_LOCK_MUTEX(raidPtr->mutex);
1542 if (raidPtr->numNewFailures > 0) {
1543 rf_update_component_labels(raidPtr);
1544 raidPtr->numNewFailures--;
1545 }
1546 RF_UNLOCK_MUTEX(raidPtr->mutex);
1547
1548 /* Check to see if we're at the limit... */
1549 RF_LOCK_MUTEX(raidPtr->mutex);
1550 while (raidPtr->openings > 0) {
1551 RF_UNLOCK_MUTEX(raidPtr->mutex);
1552
1553 /* get the next item, if any, from the queue */
1554 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1555 /* nothing more to do */
1556 return;
1557 }
1558 BUFQ_REMOVE(&rs->buf_queue, bp);
1559
1560 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1561 * partition.. Need to make it absolute to the underlying
1562 * device.. */
1563
1564 blocknum = bp->b_blkno;
1565 if (DISKPART(bp->b_dev) != RAW_PART) {
1566 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1567 blocknum += pp->p_offset;
1568 }
1569
1570 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1571 (int) blocknum));
1572
1573 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1574 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1575
1576 /* *THIS* is where we adjust what block we're going to...
1577 * but DO NOT TOUCH bp->b_blkno!!! */
1578 raid_addr = blocknum;
1579
1580 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1581 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1582 sum = raid_addr + num_blocks + pb;
1583 if (1 || rf_debugKernelAccess) {
1584 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1585 (int) raid_addr, (int) sum, (int) num_blocks,
1586 (int) pb, (int) bp->b_resid));
1587 }
1588 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1589 || (sum < num_blocks) || (sum < pb)) {
1590 bp->b_error = ENOSPC;
1591 bp->b_flags |= B_ERROR;
1592 bp->b_resid = bp->b_bcount;
1593 biodone(bp);
1594 RF_LOCK_MUTEX(raidPtr->mutex);
1595 continue;
1596 }
1597 /*
1598 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1599 */
1600
1601 if (bp->b_bcount & raidPtr->sectorMask) {
1602 bp->b_error = EINVAL;
1603 bp->b_flags |= B_ERROR;
1604 bp->b_resid = bp->b_bcount;
1605 biodone(bp);
1606 RF_LOCK_MUTEX(raidPtr->mutex);
1607 continue;
1608
1609 }
1610 db1_printf(("Calling DoAccess..\n"));
1611
1612
1613 RF_LOCK_MUTEX(raidPtr->mutex);
1614 raidPtr->openings--;
1615 RF_UNLOCK_MUTEX(raidPtr->mutex);
1616
1617 /*
1618 * Everything is async.
1619 */
1620 do_async = 1;
1621
1622 /* don't ever condition on bp->b_flags & B_WRITE.
1623 * always condition on B_READ instead */
1624
1625 /* XXX we're still at splbio() here... do we *really*
1626 need to be? */
1627
1628
1629 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1630 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1631 do_async, raid_addr, num_blocks,
1632 bp->b_un.b_addr, bp, NULL, NULL,
1633 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1634
1635
1636 RF_LOCK_MUTEX(raidPtr->mutex);
1637 }
1638 RF_UNLOCK_MUTEX(raidPtr->mutex);
1639 }
1640
1641
1642
1643
1644 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1645
1646 int
1647 rf_DispatchKernelIO(queue, req)
1648 RF_DiskQueue_t *queue;
1649 RF_DiskQueueData_t *req;
1650 {
1651 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1652 struct buf *bp;
1653 struct raidbuf *raidbp = NULL;
1654 struct raid_softc *rs;
1655 int unit;
1656 int s;
1657
1658 s=0;
1659 /* s = splbio();*/ /* want to test this */
1660 /* XXX along with the vnode, we also need the softc associated with
1661 * this device.. */
1662
1663 req->queue = queue;
1664
1665 unit = queue->raidPtr->raidid;
1666
1667 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1668
1669 if (unit >= numraid) {
1670 printf("Invalid unit number: %d %d\n", unit, numraid);
1671 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1672 }
1673 rs = &raid_softc[unit];
1674
1675 /* XXX is this the right place? */
1676 disk_busy(&rs->sc_dkdev);
1677
1678 bp = req->bp;
1679 #if 1
1680 /* XXX when there is a physical disk failure, someone is passing us a
1681 * buffer that contains old stuff!! Attempt to deal with this problem
1682 * without taking a performance hit... (not sure where the real bug
1683 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1684
1685 if (bp->b_flags & B_ERROR) {
1686 bp->b_flags &= ~B_ERROR;
1687 }
1688 if (bp->b_error != 0) {
1689 bp->b_error = 0;
1690 }
1691 #endif
1692 raidbp = RAIDGETBUF(rs);
1693
1694 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1695
1696 /*
1697 * context for raidiodone
1698 */
1699 raidbp->rf_obp = bp;
1700 raidbp->req = req;
1701
1702 LIST_INIT(&raidbp->rf_buf.b_dep);
1703
1704 switch (req->type) {
1705 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1706 /* XXX need to do something extra here.. */
1707 /* I'm leaving this in, as I've never actually seen it used,
1708 * and I'd like folks to report it... GO */
1709 printf(("WAKEUP CALLED\n"));
1710 queue->numOutstanding++;
1711
1712 /* XXX need to glue the original buffer into this?? */
1713
1714 KernelWakeupFunc(&raidbp->rf_buf);
1715 break;
1716
1717 case RF_IO_TYPE_READ:
1718 case RF_IO_TYPE_WRITE:
1719
1720 if (req->tracerec) {
1721 RF_ETIMER_START(req->tracerec->timer);
1722 }
1723 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1724 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1725 req->sectorOffset, req->numSector,
1726 req->buf, KernelWakeupFunc, (void *) req,
1727 queue->raidPtr->logBytesPerSector, req->b_proc);
1728
1729 if (rf_debugKernelAccess) {
1730 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1731 (long) bp->b_blkno));
1732 }
1733 queue->numOutstanding++;
1734 queue->last_deq_sector = req->sectorOffset;
1735 /* acc wouldn't have been let in if there were any pending
1736 * reqs at any other priority */
1737 queue->curPriority = req->priority;
1738
1739 db1_printf(("Going for %c to unit %d row %d col %d\n",
1740 req->type, unit, queue->row, queue->col));
1741 db1_printf(("sector %d count %d (%d bytes) %d\n",
1742 (int) req->sectorOffset, (int) req->numSector,
1743 (int) (req->numSector <<
1744 queue->raidPtr->logBytesPerSector),
1745 (int) queue->raidPtr->logBytesPerSector));
1746 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1747 raidbp->rf_buf.b_vp->v_numoutput++;
1748 }
1749 VOP_STRATEGY(&raidbp->rf_buf);
1750
1751 break;
1752
1753 default:
1754 panic("bad req->type in rf_DispatchKernelIO");
1755 }
1756 db1_printf(("Exiting from DispatchKernelIO\n"));
1757 /* splx(s); */ /* want to test this */
1758 return (0);
1759 }
1760 /* this is the callback function associated with a I/O invoked from
1761 kernel code.
1762 */
1763 static void
1764 KernelWakeupFunc(vbp)
1765 struct buf *vbp;
1766 {
1767 RF_DiskQueueData_t *req = NULL;
1768 RF_DiskQueue_t *queue;
1769 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1770 struct buf *bp;
1771 struct raid_softc *rs;
1772 int unit;
1773 register int s;
1774
1775 s = splbio();
1776 db1_printf(("recovering the request queue:\n"));
1777 req = raidbp->req;
1778
1779 bp = raidbp->rf_obp;
1780
1781 queue = (RF_DiskQueue_t *) req->queue;
1782
1783 if (raidbp->rf_buf.b_flags & B_ERROR) {
1784 bp->b_flags |= B_ERROR;
1785 bp->b_error = raidbp->rf_buf.b_error ?
1786 raidbp->rf_buf.b_error : EIO;
1787 }
1788
1789 /* XXX methinks this could be wrong... */
1790 #if 1
1791 bp->b_resid = raidbp->rf_buf.b_resid;
1792 #endif
1793
1794 if (req->tracerec) {
1795 RF_ETIMER_STOP(req->tracerec->timer);
1796 RF_ETIMER_EVAL(req->tracerec->timer);
1797 RF_LOCK_MUTEX(rf_tracing_mutex);
1798 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1799 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1800 req->tracerec->num_phys_ios++;
1801 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1802 }
1803 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1804
1805 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1806
1807
1808 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1809 * ballistic, and mark the component as hosed... */
1810
1811 if (bp->b_flags & B_ERROR) {
1812 /* Mark the disk as dead */
1813 /* but only mark it once... */
1814 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1815 rf_ds_optimal) {
1816 printf("raid%d: IO Error. Marking %s as failed.\n",
1817 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1818 queue->raidPtr->Disks[queue->row][queue->col].status =
1819 rf_ds_failed;
1820 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1821 queue->raidPtr->numFailures++;
1822 queue->raidPtr->numNewFailures++;
1823 /* XXX here we should bump the version number for each component, and write that data out */
1824 } else { /* Disk is already dead... */
1825 /* printf("Disk already marked as dead!\n"); */
1826 }
1827
1828 }
1829
1830 rs = &raid_softc[unit];
1831 RAIDPUTBUF(rs, raidbp);
1832
1833
1834 if (bp->b_resid == 0) {
1835 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1836 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1837 }
1838
1839 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1840 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1841
1842 splx(s);
1843 }
1844
1845
1846
1847 /*
1848 * initialize a buf structure for doing an I/O in the kernel.
1849 */
1850 static void
1851 InitBP(
1852 struct buf * bp,
1853 struct vnode * b_vp,
1854 unsigned rw_flag,
1855 dev_t dev,
1856 RF_SectorNum_t startSect,
1857 RF_SectorCount_t numSect,
1858 caddr_t buf,
1859 void (*cbFunc) (struct buf *),
1860 void *cbArg,
1861 int logBytesPerSector,
1862 struct proc * b_proc)
1863 {
1864 /* bp->b_flags = B_PHYS | rw_flag; */
1865 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1866 bp->b_bcount = numSect << logBytesPerSector;
1867 bp->b_bufsize = bp->b_bcount;
1868 bp->b_error = 0;
1869 bp->b_dev = dev;
1870 bp->b_un.b_addr = buf;
1871 bp->b_blkno = startSect;
1872 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1873 if (bp->b_bcount == 0) {
1874 panic("bp->b_bcount is zero in InitBP!!\n");
1875 }
1876 bp->b_proc = b_proc;
1877 bp->b_iodone = cbFunc;
1878 bp->b_vp = b_vp;
1879
1880 }
1881
1882 static void
1883 raidgetdefaultlabel(raidPtr, rs, lp)
1884 RF_Raid_t *raidPtr;
1885 struct raid_softc *rs;
1886 struct disklabel *lp;
1887 {
1888 db1_printf(("Building a default label...\n"));
1889 bzero(lp, sizeof(*lp));
1890
1891 /* fabricate a label... */
1892 lp->d_secperunit = raidPtr->totalSectors;
1893 lp->d_secsize = raidPtr->bytesPerSector;
1894 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1895 lp->d_ntracks = 1;
1896 lp->d_ncylinders = raidPtr->totalSectors /
1897 (lp->d_nsectors * lp->d_ntracks);
1898 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1899
1900 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1901 lp->d_type = DTYPE_RAID;
1902 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1903 lp->d_rpm = 3600;
1904 lp->d_interleave = 1;
1905 lp->d_flags = 0;
1906
1907 lp->d_partitions[RAW_PART].p_offset = 0;
1908 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1909 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1910 lp->d_npartitions = RAW_PART + 1;
1911
1912 lp->d_magic = DISKMAGIC;
1913 lp->d_magic2 = DISKMAGIC;
1914 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1915
1916 }
1917 /*
1918 * Read the disklabel from the raid device. If one is not present, fake one
1919 * up.
1920 */
1921 static void
1922 raidgetdisklabel(dev)
1923 dev_t dev;
1924 {
1925 int unit = raidunit(dev);
1926 struct raid_softc *rs = &raid_softc[unit];
1927 char *errstring;
1928 struct disklabel *lp = rs->sc_dkdev.dk_label;
1929 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1930 RF_Raid_t *raidPtr;
1931
1932 db1_printf(("Getting the disklabel...\n"));
1933
1934 bzero(clp, sizeof(*clp));
1935
1936 raidPtr = raidPtrs[unit];
1937
1938 raidgetdefaultlabel(raidPtr, rs, lp);
1939
1940 /*
1941 * Call the generic disklabel extraction routine.
1942 */
1943 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1944 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1945 if (errstring)
1946 raidmakedisklabel(rs);
1947 else {
1948 int i;
1949 struct partition *pp;
1950
1951 /*
1952 * Sanity check whether the found disklabel is valid.
1953 *
1954 * This is necessary since total size of the raid device
1955 * may vary when an interleave is changed even though exactly
1956 * same componets are used, and old disklabel may used
1957 * if that is found.
1958 */
1959 if (lp->d_secperunit != rs->sc_size)
1960 printf("WARNING: %s: "
1961 "total sector size in disklabel (%d) != "
1962 "the size of raid (%ld)\n", rs->sc_xname,
1963 lp->d_secperunit, (long) rs->sc_size);
1964 for (i = 0; i < lp->d_npartitions; i++) {
1965 pp = &lp->d_partitions[i];
1966 if (pp->p_offset + pp->p_size > rs->sc_size)
1967 printf("WARNING: %s: end of partition `%c' "
1968 "exceeds the size of raid (%ld)\n",
1969 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1970 }
1971 }
1972
1973 }
1974 /*
1975 * Take care of things one might want to take care of in the event
1976 * that a disklabel isn't present.
1977 */
1978 static void
1979 raidmakedisklabel(rs)
1980 struct raid_softc *rs;
1981 {
1982 struct disklabel *lp = rs->sc_dkdev.dk_label;
1983 db1_printf(("Making a label..\n"));
1984
1985 /*
1986 * For historical reasons, if there's no disklabel present
1987 * the raw partition must be marked FS_BSDFFS.
1988 */
1989
1990 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1991
1992 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1993
1994 lp->d_checksum = dkcksum(lp);
1995 }
1996 /*
1997 * Lookup the provided name in the filesystem. If the file exists,
1998 * is a valid block device, and isn't being used by anyone else,
1999 * set *vpp to the file's vnode.
2000 * You'll find the original of this in ccd.c
2001 */
2002 int
2003 raidlookup(path, p, vpp)
2004 char *path;
2005 struct proc *p;
2006 struct vnode **vpp; /* result */
2007 {
2008 struct nameidata nd;
2009 struct vnode *vp;
2010 struct vattr va;
2011 int error;
2012
2013 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2014 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2015 #ifdef DEBUG
2016 printf("RAIDframe: vn_open returned %d\n", error);
2017 #endif
2018 return (error);
2019 }
2020 vp = nd.ni_vp;
2021 if (vp->v_usecount > 1) {
2022 VOP_UNLOCK(vp, 0);
2023 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2024 return (EBUSY);
2025 }
2026 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2027 VOP_UNLOCK(vp, 0);
2028 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2029 return (error);
2030 }
2031 /* XXX: eventually we should handle VREG, too. */
2032 if (va.va_type != VBLK) {
2033 VOP_UNLOCK(vp, 0);
2034 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2035 return (ENOTBLK);
2036 }
2037 VOP_UNLOCK(vp, 0);
2038 *vpp = vp;
2039 return (0);
2040 }
2041 /*
2042 * Wait interruptibly for an exclusive lock.
2043 *
2044 * XXX
2045 * Several drivers do this; it should be abstracted and made MP-safe.
2046 * (Hmm... where have we seen this warning before :-> GO )
2047 */
2048 static int
2049 raidlock(rs)
2050 struct raid_softc *rs;
2051 {
2052 int error;
2053
2054 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2055 rs->sc_flags |= RAIDF_WANTED;
2056 if ((error =
2057 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2058 return (error);
2059 }
2060 rs->sc_flags |= RAIDF_LOCKED;
2061 return (0);
2062 }
2063 /*
2064 * Unlock and wake up any waiters.
2065 */
2066 static void
2067 raidunlock(rs)
2068 struct raid_softc *rs;
2069 {
2070
2071 rs->sc_flags &= ~RAIDF_LOCKED;
2072 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2073 rs->sc_flags &= ~RAIDF_WANTED;
2074 wakeup(rs);
2075 }
2076 }
2077
2078
2079 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2080 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2081
2082 int
2083 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2084 {
2085 RF_ComponentLabel_t clabel;
2086 raidread_component_label(dev, b_vp, &clabel);
2087 clabel.mod_counter = mod_counter;
2088 clabel.clean = RF_RAID_CLEAN;
2089 raidwrite_component_label(dev, b_vp, &clabel);
2090 return(0);
2091 }
2092
2093
2094 int
2095 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2096 {
2097 RF_ComponentLabel_t clabel;
2098 raidread_component_label(dev, b_vp, &clabel);
2099 clabel.mod_counter = mod_counter;
2100 clabel.clean = RF_RAID_DIRTY;
2101 raidwrite_component_label(dev, b_vp, &clabel);
2102 return(0);
2103 }
2104
2105 /* ARGSUSED */
2106 int
2107 raidread_component_label(dev, b_vp, clabel)
2108 dev_t dev;
2109 struct vnode *b_vp;
2110 RF_ComponentLabel_t *clabel;
2111 {
2112 struct buf *bp;
2113 int error;
2114
2115 /* XXX should probably ensure that we don't try to do this if
2116 someone has changed rf_protected_sectors. */
2117
2118 /* get a block of the appropriate size... */
2119 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2120 bp->b_dev = dev;
2121
2122 /* get our ducks in a row for the read */
2123 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2124 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2125 bp->b_flags = B_BUSY | B_READ;
2126 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2127
2128 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2129
2130 error = biowait(bp);
2131
2132 if (!error) {
2133 memcpy(clabel, bp->b_un.b_addr,
2134 sizeof(RF_ComponentLabel_t));
2135 #if 0
2136 print_component_label( clabel );
2137 #endif
2138 } else {
2139 #if 0
2140 printf("Failed to read RAID component label!\n");
2141 #endif
2142 }
2143
2144 bp->b_flags = B_INVAL | B_AGE;
2145 brelse(bp);
2146 return(error);
2147 }
2148 /* ARGSUSED */
2149 int
2150 raidwrite_component_label(dev, b_vp, clabel)
2151 dev_t dev;
2152 struct vnode *b_vp;
2153 RF_ComponentLabel_t *clabel;
2154 {
2155 struct buf *bp;
2156 int error;
2157
2158 /* get a block of the appropriate size... */
2159 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2160 bp->b_dev = dev;
2161
2162 /* get our ducks in a row for the write */
2163 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2164 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2165 bp->b_flags = B_BUSY | B_WRITE;
2166 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2167
2168 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2169
2170 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2171
2172 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2173 error = biowait(bp);
2174 bp->b_flags = B_INVAL | B_AGE;
2175 brelse(bp);
2176 if (error) {
2177 #if 1
2178 printf("Failed to write RAID component info!\n");
2179 #endif
2180 }
2181
2182 return(error);
2183 }
2184
2185 void
2186 rf_markalldirty( raidPtr )
2187 RF_Raid_t *raidPtr;
2188 {
2189 RF_ComponentLabel_t clabel;
2190 int r,c;
2191
2192 raidPtr->mod_counter++;
2193 for (r = 0; r < raidPtr->numRow; r++) {
2194 for (c = 0; c < raidPtr->numCol; c++) {
2195 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2196 raidread_component_label(
2197 raidPtr->Disks[r][c].dev,
2198 raidPtr->raid_cinfo[r][c].ci_vp,
2199 &clabel);
2200 if (clabel.status == rf_ds_spared) {
2201 /* XXX do something special...
2202 but whatever you do, don't
2203 try to access it!! */
2204 } else {
2205 #if 0
2206 clabel.status =
2207 raidPtr->Disks[r][c].status;
2208 raidwrite_component_label(
2209 raidPtr->Disks[r][c].dev,
2210 raidPtr->raid_cinfo[r][c].ci_vp,
2211 &clabel);
2212 #endif
2213 raidmarkdirty(
2214 raidPtr->Disks[r][c].dev,
2215 raidPtr->raid_cinfo[r][c].ci_vp,
2216 raidPtr->mod_counter);
2217 }
2218 }
2219 }
2220 }
2221 /* printf("Component labels marked dirty.\n"); */
2222 #if 0
2223 for( c = 0; c < raidPtr->numSpare ; c++) {
2224 sparecol = raidPtr->numCol + c;
2225 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2226 /*
2227
2228 XXX this is where we get fancy and map this spare
2229 into it's correct spot in the array.
2230
2231 */
2232 /*
2233
2234 we claim this disk is "optimal" if it's
2235 rf_ds_used_spare, as that means it should be
2236 directly substitutable for the disk it replaced.
2237 We note that too...
2238
2239 */
2240
2241 for(i=0;i<raidPtr->numRow;i++) {
2242 for(j=0;j<raidPtr->numCol;j++) {
2243 if ((raidPtr->Disks[i][j].spareRow ==
2244 r) &&
2245 (raidPtr->Disks[i][j].spareCol ==
2246 sparecol)) {
2247 srow = r;
2248 scol = sparecol;
2249 break;
2250 }
2251 }
2252 }
2253
2254 raidread_component_label(
2255 raidPtr->Disks[r][sparecol].dev,
2256 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2257 &clabel);
2258 /* make sure status is noted */
2259 clabel.version = RF_COMPONENT_LABEL_VERSION;
2260 clabel.mod_counter = raidPtr->mod_counter;
2261 clabel.serial_number = raidPtr->serial_number;
2262 clabel.row = srow;
2263 clabel.column = scol;
2264 clabel.num_rows = raidPtr->numRow;
2265 clabel.num_columns = raidPtr->numCol;
2266 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2267 clabel.status = rf_ds_optimal;
2268 raidwrite_component_label(
2269 raidPtr->Disks[r][sparecol].dev,
2270 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2271 &clabel);
2272 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2273 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2274 }
2275 }
2276
2277 #endif
2278 }
2279
2280
2281 void
2282 rf_update_component_labels( raidPtr )
2283 RF_Raid_t *raidPtr;
2284 {
2285 RF_ComponentLabel_t clabel;
2286 int sparecol;
2287 int r,c;
2288 int i,j;
2289 int srow, scol;
2290
2291 srow = -1;
2292 scol = -1;
2293
2294 /* XXX should do extra checks to make sure things really are clean,
2295 rather than blindly setting the clean bit... */
2296
2297 raidPtr->mod_counter++;
2298
2299 for (r = 0; r < raidPtr->numRow; r++) {
2300 for (c = 0; c < raidPtr->numCol; c++) {
2301 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2302 raidread_component_label(
2303 raidPtr->Disks[r][c].dev,
2304 raidPtr->raid_cinfo[r][c].ci_vp,
2305 &clabel);
2306 /* make sure status is noted */
2307 clabel.status = rf_ds_optimal;
2308 /* bump the counter */
2309 clabel.mod_counter++;
2310 #if 0
2311 /* note where this set was configured last */
2312 clabel.last_unit = raidPtr->raidid;
2313 #endif
2314 #if DEBUG
2315 if (raidPtr->mod_counter !=
2316 clabel.mod_counter) {
2317 printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
2318 }
2319 #endif
2320
2321 raidwrite_component_label(
2322 raidPtr->Disks[r][c].dev,
2323 raidPtr->raid_cinfo[r][c].ci_vp,
2324 &clabel);
2325 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2326 raidmarkclean(
2327 raidPtr->Disks[r][c].dev,
2328 raidPtr->raid_cinfo[r][c].ci_vp,
2329 raidPtr->mod_counter);
2330 }
2331 }
2332 /* else we don't touch it.. */
2333 #if 0
2334 else if (raidPtr->Disks[r][c].status !=
2335 rf_ds_failed) {
2336 raidread_component_label(
2337 raidPtr->Disks[r][c].dev,
2338 raidPtr->raid_cinfo[r][c].ci_vp,
2339 &clabel);
2340 /* make sure status is noted */
2341 clabel.status =
2342 raidPtr->Disks[r][c].status;
2343 raidwrite_component_label(
2344 raidPtr->Disks[r][c].dev,
2345 raidPtr->raid_cinfo[r][c].ci_vp,
2346 &clabel);
2347 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2348 raidmarkclean(
2349 raidPtr->Disks[r][c].dev,
2350 raidPtr->raid_cinfo[r][c].ci_vp,
2351 raidPtr->mod_counter);
2352 }
2353 }
2354 #endif
2355 }
2356 }
2357
2358 for( c = 0; c < raidPtr->numSpare ; c++) {
2359 sparecol = raidPtr->numCol + c;
2360 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2361 /*
2362
2363 we claim this disk is "optimal" if it's
2364 rf_ds_used_spare, as that means it should be
2365 directly substitutable for the disk it replaced.
2366 We note that too...
2367
2368 */
2369
2370 for(i=0;i<raidPtr->numRow;i++) {
2371 for(j=0;j<raidPtr->numCol;j++) {
2372 if ((raidPtr->Disks[i][j].spareRow ==
2373 0) &&
2374 (raidPtr->Disks[i][j].spareCol ==
2375 sparecol)) {
2376 srow = i;
2377 scol = j;
2378 break;
2379 }
2380 }
2381 }
2382
2383 /* XXX shouldn't *really* need this... */
2384 raidread_component_label(
2385 raidPtr->Disks[0][sparecol].dev,
2386 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2387 &clabel);
2388 /* make sure status is noted */
2389
2390 raid_init_component_label(raidPtr, &clabel);
2391
2392 clabel.mod_counter = raidPtr->mod_counter;
2393 clabel.row = srow;
2394 clabel.column = scol;
2395 clabel.status = rf_ds_optimal;
2396
2397 raidwrite_component_label(
2398 raidPtr->Disks[0][sparecol].dev,
2399 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2400 &clabel);
2401 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2402 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2403 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2404 raidPtr->mod_counter);
2405 }
2406 }
2407 }
2408 /* printf("Component labels updated\n"); */
2409 }
2410
2411 void
2412 rf_ReconThread(req)
2413 struct rf_recon_req *req;
2414 {
2415 int s;
2416 RF_Raid_t *raidPtr;
2417
2418 s = splbio();
2419 raidPtr = (RF_Raid_t *) req->raidPtr;
2420 raidPtr->recon_in_progress = 1;
2421
2422 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2423 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2424
2425 /* XXX get rid of this! we don't need it at all.. */
2426 RF_Free(req, sizeof(*req));
2427
2428 raidPtr->recon_in_progress = 0;
2429 splx(s);
2430
2431 /* That's all... */
2432 kthread_exit(0); /* does not return */
2433 }
2434
2435 void
2436 rf_RewriteParityThread(raidPtr)
2437 RF_Raid_t *raidPtr;
2438 {
2439 int retcode;
2440 int s;
2441
2442 raidPtr->parity_rewrite_in_progress = 1;
2443 s = splbio();
2444 retcode = rf_RewriteParity(raidPtr);
2445 splx(s);
2446 if (retcode) {
2447 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2448 } else {
2449 /* set the clean bit! If we shutdown correctly,
2450 the clean bit on each component label will get
2451 set */
2452 raidPtr->parity_good = RF_RAID_CLEAN;
2453 }
2454 raidPtr->parity_rewrite_in_progress = 0;
2455
2456 /* That's all... */
2457 kthread_exit(0); /* does not return */
2458 }
2459
2460
2461 void
2462 rf_CopybackThread(raidPtr)
2463 RF_Raid_t *raidPtr;
2464 {
2465 int s;
2466
2467 raidPtr->copyback_in_progress = 1;
2468 s = splbio();
2469 rf_CopybackReconstructedData(raidPtr);
2470 splx(s);
2471 raidPtr->copyback_in_progress = 0;
2472
2473 /* That's all... */
2474 kthread_exit(0); /* does not return */
2475 }
2476
2477
2478 void
2479 rf_ReconstructInPlaceThread(req)
2480 struct rf_recon_req *req;
2481 {
2482 int retcode;
2483 int s;
2484 RF_Raid_t *raidPtr;
2485
2486 s = splbio();
2487 raidPtr = req->raidPtr;
2488 raidPtr->recon_in_progress = 1;
2489 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2490 RF_Free(req, sizeof(*req));
2491 raidPtr->recon_in_progress = 0;
2492 splx(s);
2493
2494 /* That's all... */
2495 kthread_exit(0); /* does not return */
2496 }
2497
2498 void
2499 rf_mountroot_hook(dev)
2500 struct device *dev;
2501 {
2502 #if 1
2503 printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
2504 #endif
2505 if (boothowto & RB_ASKNAME) {
2506 /* We don't auto-config... */
2507 } else {
2508 /* They didn't ask, and we found something bootable... */
2509 /* XXX pretend for now.. */
2510 if (raidautoconfig) {
2511 rootspec = raid_rooty;
2512 }
2513 }
2514 }
2515
2516
2517 RF_AutoConfig_t *
2518 rf_find_raid_components()
2519 {
2520 struct devnametobdevmaj *dtobdm;
2521 struct vnode *vp;
2522 struct disklabel label;
2523 struct device *dv;
2524 char *cd_name;
2525 dev_t dev;
2526 int error;
2527 int i;
2528 int good_one;
2529 RF_ComponentLabel_t *clabel;
2530 RF_AutoConfig_t *ac_list;
2531 RF_AutoConfig_t *ac;
2532
2533
2534 /* initialize the AutoConfig list */
2535 ac_list = NULL;
2536
2537 if (raidautoconfig) {
2538
2539 /* we begin by trolling through *all* the devices on the system */
2540
2541 for (dv = alldevs.tqh_first; dv != NULL;
2542 dv = dv->dv_list.tqe_next) {
2543
2544 /* we are only interested in disks... */
2545 if (dv->dv_class != DV_DISK)
2546 continue;
2547
2548 /* we don't care about floppies... */
2549 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2550 continue;
2551 }
2552
2553 /* need to find the device_name_to_block_device_major stuff */
2554 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2555 dtobdm = dev_name2blk;
2556 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2557 dtobdm++;
2558 }
2559
2560 /* get a vnode for the raw partition of this disk */
2561
2562 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2563 if (bdevvp(dev, &vp))
2564 panic("RAID can't alloc vnode");
2565
2566 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2567
2568 if (error) {
2569 /* "Who cares." Continue looking
2570 for something that exists*/
2571 vput(vp);
2572 continue;
2573 }
2574
2575 /* Ok, the disk exists. Go get the disklabel. */
2576 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2577 FREAD, NOCRED, 0);
2578 if (error) {
2579 /*
2580 * XXX can't happen - open() would
2581 * have errored out (or faked up one)
2582 */
2583 printf("can't get label for dev %s%c (%d)!?!?\n",
2584 dv->dv_xname, 'a' + RAW_PART, error);
2585 }
2586
2587 /* don't need this any more. We'll allocate it again
2588 a little later if we really do... */
2589 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2590 vput(vp);
2591
2592 for (i=0; i < label.d_npartitions; i++) {
2593 /* We only support partitions marked as RAID */
2594 if (label.d_partitions[i].p_fstype != FS_RAID)
2595 continue;
2596
2597 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2598 if (bdevvp(dev, &vp))
2599 panic("RAID can't alloc vnode");
2600
2601 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2602 if (error) {
2603 /* Whatever... */
2604 vput(vp);
2605 continue;
2606 }
2607
2608 good_one = 0;
2609
2610 clabel = (RF_ComponentLabel_t *)
2611 malloc(sizeof(RF_ComponentLabel_t),
2612 M_RAIDFRAME, M_NOWAIT);
2613 if (clabel == NULL) {
2614 /* XXX CLEANUP HERE */
2615 printf("RAID auto config: out of memory!\n");
2616 return(NULL); /* XXX probably should panic? */
2617 }
2618
2619 if (!raidread_component_label(dev, vp, clabel)) {
2620 /* Got the label. Does it look reasonable? */
2621 if (rf_reasonable_label(clabel) &&
2622 (clabel->partitionSize <=
2623 label.d_partitions[i].p_size)) {
2624 #if DEBUG
2625 printf("Component on: %s%c: %d\n",
2626 dv->dv_xname, 'a'+i,
2627 label.d_partitions[i].p_size);
2628 print_component_label(clabel);
2629 #endif
2630 /* if it's reasonable, add it,
2631 else ignore it. */
2632 ac = (RF_AutoConfig_t *)
2633 malloc(sizeof(RF_AutoConfig_t),
2634 M_RAIDFRAME,
2635 M_NOWAIT);
2636 if (ac == NULL) {
2637 /* XXX should panic?? */
2638 return(NULL);
2639 }
2640
2641 sprintf(ac->devname, "%s%c",
2642 dv->dv_xname, 'a'+i);
2643 ac->dev = dev;
2644 ac->vp = vp;
2645 ac->clabel = clabel;
2646 ac->next = ac_list;
2647 ac_list = ac;
2648 good_one = 1;
2649 }
2650 }
2651 if (!good_one) {
2652 /* cleanup */
2653 free(clabel, M_RAIDFRAME);
2654 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2655 vput(vp);
2656 }
2657 }
2658 }
2659 }
2660 return(ac_list);
2661 }
2662
2663 static int
2664 rf_reasonable_label(clabel)
2665 RF_ComponentLabel_t *clabel;
2666 {
2667
2668 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2669 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2670 ((clabel->clean == RF_RAID_CLEAN) ||
2671 (clabel->clean == RF_RAID_DIRTY)) &&
2672 clabel->row >=0 &&
2673 clabel->column >= 0 &&
2674 clabel->num_rows > 0 &&
2675 clabel->num_columns > 0 &&
2676 clabel->row < clabel->num_rows &&
2677 clabel->column < clabel->num_columns &&
2678 clabel->blockSize > 0 &&
2679 clabel->numBlocks > 0) {
2680 /* label looks reasonable enough... */
2681 return(1);
2682 }
2683 return(0);
2684 }
2685
2686
2687 void
2688 print_component_label(clabel)
2689 RF_ComponentLabel_t *clabel;
2690 {
2691 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2692 clabel->row, clabel->column,
2693 clabel->num_rows, clabel->num_columns);
2694 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2695 clabel->version, clabel->serial_number,
2696 clabel->mod_counter);
2697 printf(" Clean: %s Status: %d\n",
2698 clabel->clean ? "Yes" : "No", clabel->status );
2699 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2700 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2701 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2702 (char) clabel->parityConfig, clabel->blockSize,
2703 clabel->numBlocks);
2704 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2705 printf(" Last configured as: raid%d\n", clabel->last_unit );
2706 #if 0
2707 printf(" Config order: %d\n", clabel->config_order);
2708 #endif
2709
2710 }
2711
2712 RF_ConfigSet_t *
2713 rf_create_auto_sets(ac_list)
2714 RF_AutoConfig_t *ac_list;
2715 {
2716 RF_AutoConfig_t *ac;
2717 RF_ConfigSet_t *config_sets;
2718 RF_ConfigSet_t *cset;
2719 RF_AutoConfig_t *ac_next;
2720
2721
2722 config_sets = NULL;
2723
2724 /* Go through the AutoConfig list, and figure out which components
2725 belong to what sets. */
2726 ac = ac_list;
2727 while(ac!=NULL) {
2728 /* we're going to putz with ac->next, so save it here
2729 for use at the end of the loop */
2730 ac_next = ac->next;
2731
2732 if (config_sets == NULL) {
2733 /* will need at least this one... */
2734 config_sets = (RF_ConfigSet_t *)
2735 malloc(sizeof(RF_ConfigSet_t),
2736 M_RAIDFRAME, M_NOWAIT);
2737 if (config_sets == NULL) {
2738 panic("rf_create_auto_sets: No memory!\n");
2739 }
2740 /* this one is easy :) */
2741 config_sets->ac = ac;
2742 config_sets->next = NULL;
2743 config_sets->rootable = 0;
2744 ac->next = NULL;
2745 } else {
2746 /* which set does this component fit into? */
2747 cset = config_sets;
2748 while(cset!=NULL) {
2749 if (rf_does_it_fit(cset, ac)) {
2750 /* looks like it matches */
2751 ac->next = cset->ac;
2752 cset->ac = ac;
2753 break;
2754 }
2755 cset = cset->next;
2756 }
2757 if (cset==NULL) {
2758 /* didn't find a match above... new set..*/
2759 cset = (RF_ConfigSet_t *)
2760 malloc(sizeof(RF_ConfigSet_t),
2761 M_RAIDFRAME, M_NOWAIT);
2762 if (cset == NULL) {
2763 panic("rf_create_auto_sets: No memory!\n");
2764 }
2765 cset->ac = ac;
2766 ac->next = NULL;
2767 cset->next = config_sets;
2768 cset->rootable = 0;
2769 config_sets = cset;
2770 }
2771 }
2772 ac = ac_next;
2773 }
2774
2775
2776 return(config_sets);
2777 }
2778
2779 static int
2780 rf_does_it_fit(cset, ac)
2781 RF_ConfigSet_t *cset;
2782 RF_AutoConfig_t *ac;
2783 {
2784 RF_ComponentLabel_t *clabel1, *clabel2;
2785
2786 /* If this one matches the *first* one in the set, that's good
2787 enough, since the other members of the set would have been
2788 through here too... */
2789 /* note that we are not checking partitionSize here.. */
2790
2791 clabel1 = cset->ac->clabel;
2792 clabel2 = ac->clabel;
2793 if ((clabel1->version == clabel2->version) &&
2794 (clabel1->serial_number == clabel2->serial_number) &&
2795 (clabel1->mod_counter == clabel2->mod_counter) &&
2796 (clabel1->num_rows == clabel2->num_rows) &&
2797 (clabel1->num_columns == clabel2->num_columns) &&
2798 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2799 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2800 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2801 (clabel1->parityConfig == clabel2->parityConfig) &&
2802 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2803 (clabel1->blockSize == clabel2->blockSize) &&
2804 (clabel1->numBlocks == clabel2->numBlocks) &&
2805 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2806 (clabel1->root_partition == clabel2->root_partition) &&
2807 (clabel1->last_unit == clabel2->last_unit) &&
2808 (clabel1->config_order == clabel2->config_order)) {
2809 /* if it get's here, it almost *has* to be a match */
2810 } else {
2811 /* it's not consistent with somebody in the set..
2812 punt */
2813 return(0);
2814 }
2815 /* all was fine.. it must fit... */
2816 return(1);
2817 }
2818
2819 int
2820 rf_have_enough_components(cset)
2821 RF_ConfigSet_t *cset;
2822 {
2823 RF_AutoConfig_t *ac;
2824 RF_AutoConfig_t *auto_config;
2825 RF_ComponentLabel_t *clabel;
2826 int r,c;
2827 int num_rows;
2828 int num_cols;
2829 int num_missing;
2830
2831 /* check to see that we have enough 'live' components
2832 of this set. If so, we can configure it if necessary */
2833
2834 num_rows = cset->ac->clabel->num_rows;
2835 num_cols = cset->ac->clabel->num_columns;
2836
2837 /* XXX Check for duplicate components!?!?!? */
2838
2839 num_missing = 0;
2840 auto_config = cset->ac;
2841
2842 for(r=0; r<num_rows; r++) {
2843 for(c=0; c<num_cols; c++) {
2844 ac = auto_config;
2845 while(ac!=NULL) {
2846 if (ac->clabel==NULL) {
2847 /* big-time bad news. */
2848 goto fail;
2849 }
2850 if ((ac->clabel->row == r) &&
2851 (ac->clabel->column == c)) {
2852 /* it's this one... */
2853 #if DEBUG
2854 printf("Found: %s at %d,%d\n",
2855 ac->devname,r,c);
2856 #endif
2857 break;
2858 }
2859 ac=ac->next;
2860 }
2861 if (ac==NULL) {
2862 /* Didn't find one here! */
2863 num_missing++;
2864 }
2865 }
2866 }
2867
2868 clabel = cset->ac->clabel;
2869
2870 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2871 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2872 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2873 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2874 /* XXX this needs to be made *much* more general */
2875 /* Too many failures */
2876 return(0);
2877 }
2878 /* otherwise, all is well, and we've got enough to take a kick
2879 at autoconfiguring this set */
2880 return(1);
2881 fail:
2882 return(0);
2883
2884 }
2885
2886 void
2887 rf_create_configuration(ac,config,raidPtr)
2888 RF_AutoConfig_t *ac;
2889 RF_Config_t *config;
2890 RF_Raid_t *raidPtr;
2891 {
2892 RF_ComponentLabel_t *clabel;
2893
2894 clabel = ac->clabel;
2895
2896 /* 1. Fill in the common stuff */
2897 config->numRow = clabel->num_rows;
2898 config->numCol = clabel->num_columns;
2899 config->numSpare = 0; /* XXX should this be set here? */
2900 config->sectPerSU = clabel->sectPerSU;
2901 config->SUsPerPU = clabel->SUsPerPU;
2902 config->SUsPerRU = clabel->SUsPerRU;
2903 config->parityConfig = clabel->parityConfig;
2904 /* XXX... */
2905 strcpy(config->diskQueueType,"fifo");
2906 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2907 config->layoutSpecificSize = 0; /* XXX ?? */
2908
2909 while(ac!=NULL) {
2910 /* row/col values will be in range due to the checks
2911 in reasonable_label() */
2912 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2913 ac->devname);
2914 ac = ac->next;
2915 }
2916
2917 }
2918
2919 int
2920 rf_set_autoconfig(raidPtr, new_value)
2921 RF_Raid_t *raidPtr;
2922 int new_value;
2923 {
2924 RF_ComponentLabel_t clabel;
2925 struct vnode *vp;
2926 dev_t dev;
2927 int row, column;
2928
2929 raidPtr->autoconfigure = new_value;
2930 for(row=0; row<raidPtr->numRow; row++) {
2931 for(column=0; column<raidPtr->numCol; column++) {
2932 dev = raidPtr->Disks[row][column].dev;
2933 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2934 raidread_component_label(dev, vp, &clabel);
2935 clabel.autoconfigure = new_value;
2936 raidwrite_component_label(dev, vp, &clabel);
2937 }
2938 }
2939 return(new_value);
2940 }
2941
2942 int
2943 rf_set_rootpartition(raidPtr, new_value)
2944 RF_Raid_t *raidPtr;
2945 int new_value;
2946 {
2947 RF_ComponentLabel_t clabel;
2948 struct vnode *vp;
2949 dev_t dev;
2950 int row, column;
2951
2952 raidPtr->root_partition = new_value;
2953 for(row=0; row<raidPtr->numRow; row++) {
2954 for(column=0; column<raidPtr->numCol; column++) {
2955 dev = raidPtr->Disks[row][column].dev;
2956 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2957 raidread_component_label(dev, vp, &clabel);
2958 clabel.root_partition = new_value;
2959 raidwrite_component_label(dev, vp, &clabel);
2960 }
2961 }
2962 return(new_value);
2963 }
2964
2965 void
2966 rf_release_all_vps(cset)
2967 RF_ConfigSet_t *cset;
2968 {
2969 RF_AutoConfig_t *ac;
2970
2971 ac = cset->ac;
2972 while(ac!=NULL) {
2973 /* Close the vp, and give it back */
2974 if (ac->vp) {
2975 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2976 vput(ac->vp);
2977 }
2978 ac = ac->next;
2979 }
2980 }
2981
2982
2983 void
2984 rf_cleanup_config_set(cset)
2985 RF_ConfigSet_t *cset;
2986 {
2987 RF_AutoConfig_t *ac;
2988 RF_AutoConfig_t *next_ac;
2989
2990 ac = cset->ac;
2991 while(ac!=NULL) {
2992 next_ac = ac->next;
2993 /* nuke the label */
2994 free(ac->clabel, M_RAIDFRAME);
2995 /* cleanup the config structure */
2996 free(ac, M_RAIDFRAME);
2997 /* "next.." */
2998 ac = next_ac;
2999 }
3000 /* and, finally, nuke the config set */
3001 free(cset, M_RAIDFRAME);
3002 }
3003
3004
3005 void
3006 raid_init_component_label(raidPtr, clabel)
3007 RF_Raid_t *raidPtr;
3008 RF_ComponentLabel_t *clabel;
3009 {
3010 /* current version number */
3011 clabel->version = RF_COMPONENT_LABEL_VERSION;
3012 clabel->serial_number = raidPtr->serial_number;
3013 clabel->mod_counter = raidPtr->mod_counter;
3014 clabel->num_rows = raidPtr->numRow;
3015 clabel->num_columns = raidPtr->numCol;
3016 clabel->clean = RF_RAID_DIRTY; /* not clean */
3017 clabel->status = rf_ds_optimal; /* "It's good!" */
3018
3019 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3020 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3021 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3022
3023 clabel->blockSize = raidPtr->bytesPerSector;
3024 clabel->numBlocks = raidPtr->sectorsPerDisk;
3025
3026 /* XXX not portable */
3027 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3028 clabel->maxOutstanding = raidPtr->maxOutstanding;
3029 clabel->autoconfigure = raidPtr->autoconfigure;
3030 clabel->root_partition = raidPtr->root_partition;
3031 clabel->last_unit = raidPtr->raidid;
3032 clabel->config_order = raidPtr->config_order;
3033 }
3034
3035 int
3036 rf_auto_config_set(cset,unit)
3037 RF_ConfigSet_t *cset;
3038 int *unit;
3039 {
3040 RF_Raid_t *raidPtr;
3041 RF_Config_t *config;
3042 int raidID;
3043 int retcode;
3044
3045 printf("Starting autoconfigure on raid%d\n",raidID);
3046
3047 retcode = 0;
3048 *unit = -1;
3049
3050 /* 1. Create a config structure */
3051
3052 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3053 M_RAIDFRAME,
3054 M_NOWAIT);
3055 if (config==NULL) {
3056 printf("Out of mem!?!?\n");
3057 /* XXX do something more intelligent here. */
3058 return(1);
3059 }
3060 /* XXX raidID needs to be set correctly.. */
3061
3062 /*
3063 2. Figure out what RAID ID this one is supposed to live at
3064 See if we can get the same RAID dev that it was configured
3065 on last time..
3066 */
3067
3068 raidID = cset->ac->clabel->last_unit;
3069 if ((raidID < 0) || (raidID >= numraid)) {
3070 /* let's not wander off into lala land. */
3071 raidID = numraid - 1;
3072 }
3073 if (raidPtrs[raidID]->valid != 0) {
3074
3075 /*
3076 Nope... Go looking for an alternative...
3077 Start high so we don't immediately use raid0 if that's
3078 not taken.
3079 */
3080
3081 for(raidID = numraid; raidID >= 0; raidID--) {
3082 if (raidPtrs[raidID]->valid == 0) {
3083 /* can use this one! */
3084 break;
3085 }
3086 }
3087 }
3088
3089 if (raidID < 0) {
3090 /* punt... */
3091 printf("Unable to auto configure this set!\n");
3092 printf("(Out of RAID devs!)\n");
3093 return(1);
3094 }
3095
3096 raidPtr = raidPtrs[raidID];
3097
3098 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3099 raidPtr->raidid = raidID;
3100 raidPtr->openings = RAIDOUTSTANDING;
3101
3102 /* 3. Build the configuration structure */
3103 rf_create_configuration(cset->ac, config, raidPtr);
3104
3105 /* 4. Do the configuration */
3106 retcode = rf_Configure(raidPtr, config, cset->ac);
3107
3108 if (retcode == 0) {
3109 #if DEBUG
3110 printf("Calling raidinit()\n");
3111 #endif
3112 /* XXX the 0 below is bogus! */
3113 retcode = raidinit(0, raidPtrs[raidID], raidID);
3114 if (retcode) {
3115 printf("init returned: %d\n",retcode);
3116 }
3117 rf_markalldirty( raidPtrs[raidID] );
3118 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3119 if (cset->ac->clabel->root_partition==1) {
3120 /* everything configured just fine. Make a note
3121 that this set is eligible to be root. */
3122 cset->rootable = 1;
3123 /* XXX do this here? */
3124 raidPtrs[raidID]->root_partition = 1;
3125 }
3126 }
3127
3128 /* 5. Cleanup */
3129 free(config, M_RAIDFRAME);
3130
3131 *unit = raidID;
3132 return(retcode);
3133 }
3134