rf_netbsdkintf.c revision 1.59 1 /* $NetBSD: rf_netbsdkintf.c,v 1.59 2000/02/25 02:42:30 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit __P((RF_Raid_t *));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that...
208 */
209
210 struct raid_softc {
211 int sc_flags; /* flags */
212 int sc_cflags; /* configuration flags */
213 size_t sc_size; /* size of the raid device */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf_queue buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265 void rf_mountroot_hook __P((struct device *));
266
267 struct device *raidrootdev;
268 struct cfdata cf_raidrootdev;
269 struct cfdriver cfdrv;
270 /* XXX these should be moved up */
271 #include "rf_configure.h"
272 #include <sys/reboot.h>
273
274 void rf_ReconThread __P((struct rf_recon_req *));
275 /* XXX what I want is: */
276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
280 void rf_buildroothack __P((void *));
281
282 RF_AutoConfig_t *rf_find_raid_components __P((void));
283 void print_component_label __P((RF_ComponentLabel_t *));
284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
288 RF_Raid_t *));
289 int rf_set_autoconfig __P((RF_Raid_t *, int));
290 int rf_set_rootpartition __P((RF_Raid_t *, int));
291 void rf_release_all_vps __P((RF_ConfigSet_t *));
292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
293 int rf_have_enough_components __P((RF_ConfigSet_t *));
294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place */
298 /* XXX ugly hack. */
299 const char *raid_rooty = "raid0";
300 extern struct device *booted_device;
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 rc = rf_mutex_init(&rf_sparet_wait_mutex);
333 if (rc) {
334 RF_PANIC();
335 }
336
337 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
338
339 for (i = 0; i < num; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357
358 bzero(raid_softc, num * sizeof(struct raid_softc));
359
360 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
361 M_RAIDFRAME, M_NOWAIT);
362 if (raidrootdev == NULL) {
363 panic("No memory for RAIDframe driver!!?!?!\n");
364 }
365
366 for (raidID = 0; raidID < num; raidID++) {
367 BUFQ_INIT(&raid_softc[raidID].buf_queue);
368
369 raidrootdev[raidID].dv_class = DV_DISK;
370 raidrootdev[raidID].dv_cfdata = NULL;
371 raidrootdev[raidID].dv_unit = raidID;
372 raidrootdev[raidID].dv_parent = NULL;
373 raidrootdev[raidID].dv_flags = 0;
374 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
375
376 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
377 (RF_Raid_t *));
378 if (raidPtrs[raidID] == NULL) {
379 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
380 numraid = raidID;
381 return;
382 }
383 }
384
385 if (raidautoconfig) {
386 /* 1. locate all RAID components on the system */
387
388 #if DEBUG
389 printf("Searching for raid components...\n");
390 #endif
391 ac_list = rf_find_raid_components();
392
393 /* 2. sort them into their respective sets */
394
395 config_sets = rf_create_auto_sets(ac_list);
396
397 /* 3. evaluate each set and configure the valid ones
398 This gets done in rf_buildroothack() */
399
400 /* schedule the creation of the thread to do the
401 "/ on RAID" stuff */
402
403 kthread_create(rf_buildroothack,config_sets);
404
405 /* 4. make sure we get our mud.. I mean root.. hooks in.. */
406 /* XXXX pick raid0 for now... and this should be only done
407 if we find something that's bootable!!! */
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 if (boothowto & RB_ASKNAME) {
412 /* We don't auto-config... */
413 } else {
414 /* They didn't ask, and we found something bootable... */
415 /* XXX pretend for now.. */
416 #if 0
417 booted_device = &raidrootdev[0];
418 #endif
419 }
420 }
421
422 }
423
424 void
425 rf_buildroothack(arg)
426 void *arg;
427 {
428 RF_ConfigSet_t *config_sets = arg;
429 RF_ConfigSet_t *cset;
430 RF_ConfigSet_t *next_cset;
431 int retcode;
432 int raidID;
433 int rootID;
434 int num_root;
435
436 num_root = 0;
437 cset = config_sets;
438 while(cset != NULL ) {
439 next_cset = cset->next;
440 if (rf_have_enough_components(cset) &&
441 cset->ac->clabel->autoconfigure==1) {
442 retcode = rf_auto_config_set(cset,&raidID);
443 if (!retcode) {
444 if (cset->rootable) {
445 rootID = raidID;
446 num_root++;
447 }
448 } else {
449 /* The autoconfig didn't work :( */
450 #if DEBUG
451 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
452 #endif
453 rf_release_all_vps(cset);
454 #if DEBUG
455 printf("Done cleanup\n");
456 #endif
457 }
458 } else {
459 /* we're not autoconfiguring this set...
460 release the associated resources */
461 #if DEBUG
462 printf("Releasing vp's\n");
463 #endif
464 rf_release_all_vps(cset);
465 #if DEBUG
466 printf("Done.\n");
467 #endif
468 }
469 /* cleanup */
470 #if DEBUG
471 printf("Cleaning up config set\n");
472 #endif
473 rf_cleanup_config_set(cset);
474 #if DEBUG
475 printf("Done cleanup\n");
476 #endif
477 cset = next_cset;
478 }
479 if (boothowto & RB_ASKNAME) {
480 /* We don't auto-config... */
481 } else {
482 /* They didn't ask, and we found something bootable... */
483 /* XXX pretend for now.. */
484 if (num_root == 1) {
485 #if 1
486 booted_device = &raidrootdev[rootID];
487 #endif
488 } else if (num_root > 1) {
489 /* we can't guess.. require the user to answer... */
490 boothowto |= RB_ASKNAME;
491 }
492 }
493 }
494
495
496 int
497 raidsize(dev)
498 dev_t dev;
499 {
500 struct raid_softc *rs;
501 struct disklabel *lp;
502 int part, unit, omask, size;
503
504 unit = raidunit(dev);
505 if (unit >= numraid)
506 return (-1);
507 rs = &raid_softc[unit];
508
509 if ((rs->sc_flags & RAIDF_INITED) == 0)
510 return (-1);
511
512 part = DISKPART(dev);
513 omask = rs->sc_dkdev.dk_openmask & (1 << part);
514 lp = rs->sc_dkdev.dk_label;
515
516 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
517 return (-1);
518
519 if (lp->d_partitions[part].p_fstype != FS_SWAP)
520 size = -1;
521 else
522 size = lp->d_partitions[part].p_size *
523 (lp->d_secsize / DEV_BSIZE);
524
525 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
526 return (-1);
527
528 return (size);
529
530 }
531
532 int
533 raiddump(dev, blkno, va, size)
534 dev_t dev;
535 daddr_t blkno;
536 caddr_t va;
537 size_t size;
538 {
539 /* Not implemented. */
540 return ENXIO;
541 }
542 /* ARGSUSED */
543 int
544 raidopen(dev, flags, fmt, p)
545 dev_t dev;
546 int flags, fmt;
547 struct proc *p;
548 {
549 int unit = raidunit(dev);
550 struct raid_softc *rs;
551 struct disklabel *lp;
552 int part, pmask;
553 int error = 0;
554
555 if (unit >= numraid)
556 return (ENXIO);
557 rs = &raid_softc[unit];
558
559 if ((error = raidlock(rs)) != 0)
560 return (error);
561 lp = rs->sc_dkdev.dk_label;
562
563 part = DISKPART(dev);
564 pmask = (1 << part);
565
566 db1_printf(("Opening raid device number: %d partition: %d\n",
567 unit, part));
568
569
570 if ((rs->sc_flags & RAIDF_INITED) &&
571 (rs->sc_dkdev.dk_openmask == 0))
572 raidgetdisklabel(dev);
573
574 /* make sure that this partition exists */
575
576 if (part != RAW_PART) {
577 db1_printf(("Not a raw partition..\n"));
578 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
579 ((part >= lp->d_npartitions) ||
580 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
581 error = ENXIO;
582 raidunlock(rs);
583 db1_printf(("Bailing out...\n"));
584 return (error);
585 }
586 }
587 /* Prevent this unit from being unconfigured while open. */
588 switch (fmt) {
589 case S_IFCHR:
590 rs->sc_dkdev.dk_copenmask |= pmask;
591 break;
592
593 case S_IFBLK:
594 rs->sc_dkdev.dk_bopenmask |= pmask;
595 break;
596 }
597
598 if ((rs->sc_dkdev.dk_openmask == 0) &&
599 ((rs->sc_flags & RAIDF_INITED) != 0)) {
600 /* First one... mark things as dirty... Note that we *MUST*
601 have done a configure before this. I DO NOT WANT TO BE
602 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
603 THAT THEY BELONG TOGETHER!!!!! */
604 /* XXX should check to see if we're only open for reading
605 here... If so, we needn't do this, but then need some
606 other way of keeping track of what's happened.. */
607
608 rf_markalldirty( raidPtrs[unit] );
609 }
610
611
612 rs->sc_dkdev.dk_openmask =
613 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
614
615 raidunlock(rs);
616
617 return (error);
618
619
620 }
621 /* ARGSUSED */
622 int
623 raidclose(dev, flags, fmt, p)
624 dev_t dev;
625 int flags, fmt;
626 struct proc *p;
627 {
628 int unit = raidunit(dev);
629 struct raid_softc *rs;
630 int error = 0;
631 int part;
632
633 if (unit >= numraid)
634 return (ENXIO);
635 rs = &raid_softc[unit];
636
637 if ((error = raidlock(rs)) != 0)
638 return (error);
639
640 part = DISKPART(dev);
641
642 /* ...that much closer to allowing unconfiguration... */
643 switch (fmt) {
644 case S_IFCHR:
645 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
646 break;
647
648 case S_IFBLK:
649 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
650 break;
651 }
652 rs->sc_dkdev.dk_openmask =
653 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
654
655 if ((rs->sc_dkdev.dk_openmask == 0) &&
656 ((rs->sc_flags & RAIDF_INITED) != 0)) {
657 /* Last one... device is not unconfigured yet.
658 Device shutdown has taken care of setting the
659 clean bits if RAIDF_INITED is not set
660 mark things as clean... */
661 #ifdef DEBUG
662 printf("Last one on raid%d. Updating status.\n",unit);
663 #endif
664 rf_update_component_labels( raidPtrs[unit] );
665 }
666
667 raidunlock(rs);
668 return (0);
669
670 }
671
672 void
673 raidstrategy(bp)
674 register struct buf *bp;
675 {
676 register int s;
677
678 unsigned int raidID = raidunit(bp->b_dev);
679 RF_Raid_t *raidPtr;
680 struct raid_softc *rs = &raid_softc[raidID];
681 struct disklabel *lp;
682 int wlabel;
683
684 if ((rs->sc_flags & RAIDF_INITED) ==0) {
685 bp->b_error = ENXIO;
686 bp->b_flags = B_ERROR;
687 bp->b_resid = bp->b_bcount;
688 biodone(bp);
689 return;
690 }
691 if (raidID >= numraid || !raidPtrs[raidID]) {
692 bp->b_error = ENODEV;
693 bp->b_flags |= B_ERROR;
694 bp->b_resid = bp->b_bcount;
695 biodone(bp);
696 return;
697 }
698 raidPtr = raidPtrs[raidID];
699 if (!raidPtr->valid) {
700 bp->b_error = ENODEV;
701 bp->b_flags |= B_ERROR;
702 bp->b_resid = bp->b_bcount;
703 biodone(bp);
704 return;
705 }
706 if (bp->b_bcount == 0) {
707 db1_printf(("b_bcount is zero..\n"));
708 biodone(bp);
709 return;
710 }
711 lp = rs->sc_dkdev.dk_label;
712
713 /*
714 * Do bounds checking and adjust transfer. If there's an
715 * error, the bounds check will flag that for us.
716 */
717
718 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
719 if (DISKPART(bp->b_dev) != RAW_PART)
720 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
721 db1_printf(("Bounds check failed!!:%d %d\n",
722 (int) bp->b_blkno, (int) wlabel));
723 biodone(bp);
724 return;
725 }
726 s = splbio();
727
728 bp->b_resid = 0;
729
730 /* stuff it onto our queue */
731 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
732
733 raidstart(raidPtrs[raidID]);
734
735 splx(s);
736 }
737 /* ARGSUSED */
738 int
739 raidread(dev, uio, flags)
740 dev_t dev;
741 struct uio *uio;
742 int flags;
743 {
744 int unit = raidunit(dev);
745 struct raid_softc *rs;
746 int part;
747
748 if (unit >= numraid)
749 return (ENXIO);
750 rs = &raid_softc[unit];
751
752 if ((rs->sc_flags & RAIDF_INITED) == 0)
753 return (ENXIO);
754 part = DISKPART(dev);
755
756 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
757
758 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
759
760 }
761 /* ARGSUSED */
762 int
763 raidwrite(dev, uio, flags)
764 dev_t dev;
765 struct uio *uio;
766 int flags;
767 {
768 int unit = raidunit(dev);
769 struct raid_softc *rs;
770
771 if (unit >= numraid)
772 return (ENXIO);
773 rs = &raid_softc[unit];
774
775 if ((rs->sc_flags & RAIDF_INITED) == 0)
776 return (ENXIO);
777 db1_printf(("raidwrite\n"));
778 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
779
780 }
781
782 int
783 raidioctl(dev, cmd, data, flag, p)
784 dev_t dev;
785 u_long cmd;
786 caddr_t data;
787 int flag;
788 struct proc *p;
789 {
790 int unit = raidunit(dev);
791 int error = 0;
792 int part, pmask;
793 struct raid_softc *rs;
794 RF_Config_t *k_cfg, *u_cfg;
795 RF_Raid_t *raidPtr;
796 RF_RaidDisk_t *diskPtr;
797 RF_AccTotals_t *totals;
798 RF_DeviceConfig_t *d_cfg, **ucfgp;
799 u_char *specific_buf;
800 int retcode = 0;
801 int row;
802 int column;
803 struct rf_recon_req *rrcopy, *rr;
804 RF_ComponentLabel_t *clabel;
805 RF_ComponentLabel_t ci_label;
806 RF_ComponentLabel_t **clabel_ptr;
807 RF_SingleComponent_t *sparePtr,*componentPtr;
808 RF_SingleComponent_t hot_spare;
809 RF_SingleComponent_t component;
810 int i, j, d;
811
812 if (unit >= numraid)
813 return (ENXIO);
814 rs = &raid_softc[unit];
815 raidPtr = raidPtrs[unit];
816
817 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
818 (int) DISKPART(dev), (int) unit, (int) cmd));
819
820 /* Must be open for writes for these commands... */
821 switch (cmd) {
822 case DIOCSDINFO:
823 case DIOCWDINFO:
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 case DIOCGPART:
835 case DIOCWLABEL:
836 case DIOCGDEFLABEL:
837 case RAIDFRAME_SHUTDOWN:
838 case RAIDFRAME_REWRITEPARITY:
839 case RAIDFRAME_GET_INFO:
840 case RAIDFRAME_RESET_ACCTOTALS:
841 case RAIDFRAME_GET_ACCTOTALS:
842 case RAIDFRAME_KEEP_ACCTOTALS:
843 case RAIDFRAME_GET_SIZE:
844 case RAIDFRAME_FAIL_DISK:
845 case RAIDFRAME_COPYBACK:
846 case RAIDFRAME_CHECK_RECON_STATUS:
847 case RAIDFRAME_GET_COMPONENT_LABEL:
848 case RAIDFRAME_SET_COMPONENT_LABEL:
849 case RAIDFRAME_ADD_HOT_SPARE:
850 case RAIDFRAME_REMOVE_HOT_SPARE:
851 case RAIDFRAME_INIT_LABELS:
852 case RAIDFRAME_REBUILD_IN_PLACE:
853 case RAIDFRAME_CHECK_PARITY:
854 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
855 case RAIDFRAME_CHECK_COPYBACK_STATUS:
856 case RAIDFRAME_SET_AUTOCONFIG:
857 case RAIDFRAME_SET_ROOT:
858 if ((rs->sc_flags & RAIDF_INITED) == 0)
859 return (ENXIO);
860 }
861
862 switch (cmd) {
863
864 /* configure the system */
865 case RAIDFRAME_CONFIGURE:
866
867 if (raidPtr->valid) {
868 /* There is a valid RAID set running on this unit! */
869 printf("raid%d: Device already configured!\n",unit);
870 }
871
872 /* copy-in the configuration information */
873 /* data points to a pointer to the configuration structure */
874
875 u_cfg = *((RF_Config_t **) data);
876 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
877 if (k_cfg == NULL) {
878 return (ENOMEM);
879 }
880 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
881 sizeof(RF_Config_t));
882 if (retcode) {
883 RF_Free(k_cfg, sizeof(RF_Config_t));
884 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
885 retcode));
886 return (retcode);
887 }
888 /* allocate a buffer for the layout-specific data, and copy it
889 * in */
890 if (k_cfg->layoutSpecificSize) {
891 if (k_cfg->layoutSpecificSize > 10000) {
892 /* sanity check */
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 return (EINVAL);
895 }
896 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
897 (u_char *));
898 if (specific_buf == NULL) {
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 return (ENOMEM);
901 }
902 retcode = copyin(k_cfg->layoutSpecific,
903 (caddr_t) specific_buf,
904 k_cfg->layoutSpecificSize);
905 if (retcode) {
906 RF_Free(k_cfg, sizeof(RF_Config_t));
907 RF_Free(specific_buf,
908 k_cfg->layoutSpecificSize);
909 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
910 retcode));
911 return (retcode);
912 }
913 } else
914 specific_buf = NULL;
915 k_cfg->layoutSpecific = specific_buf;
916
917 /* should do some kind of sanity check on the configuration.
918 * Store the sum of all the bytes in the last byte? */
919
920 /* configure the system */
921
922 /*
923 * Clear the entire RAID descriptor, just to make sure
924 * there is no stale data left in the case of a
925 * reconfiguration
926 */
927 bzero((char *) raidPtr, sizeof(RF_Raid_t));
928 raidPtr->raidid = unit;
929
930 retcode = rf_Configure(raidPtr, k_cfg, NULL);
931
932 if (retcode == 0) {
933
934 /* allow this many simultaneous IO's to
935 this RAID device */
936 raidPtr->openings = RAIDOUTSTANDING;
937
938 raidinit(raidPtr);
939 rf_markalldirty(raidPtr);
940 }
941 /* free the buffers. No return code here. */
942 if (k_cfg->layoutSpecificSize) {
943 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
944 }
945 RF_Free(k_cfg, sizeof(RF_Config_t));
946
947 return (retcode);
948
949 /* shutdown the system */
950 case RAIDFRAME_SHUTDOWN:
951
952 if ((error = raidlock(rs)) != 0)
953 return (error);
954
955 /*
956 * If somebody has a partition mounted, we shouldn't
957 * shutdown.
958 */
959
960 part = DISKPART(dev);
961 pmask = (1 << part);
962 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
963 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
964 (rs->sc_dkdev.dk_copenmask & pmask))) {
965 raidunlock(rs);
966 return (EBUSY);
967 }
968
969 retcode = rf_Shutdown(raidPtr);
970
971 pool_destroy(&rs->sc_cbufpool);
972
973 /* It's no longer initialized... */
974 rs->sc_flags &= ~RAIDF_INITED;
975
976 /* Detach the disk. */
977 disk_detach(&rs->sc_dkdev);
978
979 raidunlock(rs);
980
981 return (retcode);
982 case RAIDFRAME_GET_COMPONENT_LABEL:
983 clabel_ptr = (RF_ComponentLabel_t **) data;
984 /* need to read the component label for the disk indicated
985 by row,column in clabel */
986
987 /* For practice, let's get it directly fromdisk, rather
988 than from the in-core copy */
989 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
990 (RF_ComponentLabel_t *));
991 if (clabel == NULL)
992 return (ENOMEM);
993
994 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
995
996 retcode = copyin( *clabel_ptr, clabel,
997 sizeof(RF_ComponentLabel_t));
998
999 if (retcode) {
1000 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1001 return(retcode);
1002 }
1003
1004 row = clabel->row;
1005 column = clabel->column;
1006
1007 if ((row < 0) || (row >= raidPtr->numRow) ||
1008 (column < 0) || (column >= raidPtr->numCol)) {
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return(EINVAL);
1011 }
1012
1013 raidread_component_label(raidPtr->Disks[row][column].dev,
1014 raidPtr->raid_cinfo[row][column].ci_vp,
1015 clabel );
1016
1017 retcode = copyout((caddr_t) clabel,
1018 (caddr_t) *clabel_ptr,
1019 sizeof(RF_ComponentLabel_t));
1020 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1021 return (retcode);
1022
1023 case RAIDFRAME_SET_COMPONENT_LABEL:
1024 clabel = (RF_ComponentLabel_t *) data;
1025
1026 /* XXX check the label for valid stuff... */
1027 /* Note that some things *should not* get modified --
1028 the user should be re-initing the labels instead of
1029 trying to patch things.
1030 */
1031
1032 printf("Got component label:\n");
1033 printf("Version: %d\n",clabel->version);
1034 printf("Serial Number: %d\n",clabel->serial_number);
1035 printf("Mod counter: %d\n",clabel->mod_counter);
1036 printf("Row: %d\n", clabel->row);
1037 printf("Column: %d\n", clabel->column);
1038 printf("Num Rows: %d\n", clabel->num_rows);
1039 printf("Num Columns: %d\n", clabel->num_columns);
1040 printf("Clean: %d\n", clabel->clean);
1041 printf("Status: %d\n", clabel->status);
1042
1043 row = clabel->row;
1044 column = clabel->column;
1045
1046 if ((row < 0) || (row >= raidPtr->numRow) ||
1047 (column < 0) || (column >= raidPtr->numCol)) {
1048 return(EINVAL);
1049 }
1050
1051 /* XXX this isn't allowed to do anything for now :-) */
1052
1053 /* XXX and before it is, we need to fill in the rest
1054 of the fields!?!?!?! */
1055 #if 0
1056 raidwrite_component_label(
1057 raidPtr->Disks[row][column].dev,
1058 raidPtr->raid_cinfo[row][column].ci_vp,
1059 clabel );
1060 #endif
1061 return (0);
1062
1063 case RAIDFRAME_INIT_LABELS:
1064 clabel = (RF_ComponentLabel_t *) data;
1065 /*
1066 we only want the serial number from
1067 the above. We get all the rest of the information
1068 from the config that was used to create this RAID
1069 set.
1070 */
1071
1072 raidPtr->serial_number = clabel->serial_number;
1073
1074 raid_init_component_label(raidPtr, &ci_label);
1075 ci_label.serial_number = clabel->serial_number;
1076
1077 for(row=0;row<raidPtr->numRow;row++) {
1078 ci_label.row = row;
1079 for(column=0;column<raidPtr->numCol;column++) {
1080 diskPtr = &raidPtr->Disks[row][column];
1081 ci_label.partitionSize = diskPtr->partitionSize;
1082 ci_label.column = column;
1083 raidwrite_component_label(
1084 raidPtr->Disks[row][column].dev,
1085 raidPtr->raid_cinfo[row][column].ci_vp,
1086 &ci_label );
1087 }
1088 }
1089
1090 return (retcode);
1091 case RAIDFRAME_SET_AUTOCONFIG:
1092 d = rf_set_autoconfig(raidPtr, *data);
1093 printf("New autoconfig value is: %d\n", d);
1094 *data = d;
1095 return (retcode);
1096
1097 case RAIDFRAME_SET_ROOT:
1098 d = rf_set_rootpartition(raidPtr, *data);
1099 printf("New rootpartition value is: %d\n", d);
1100 *data = d;
1101 return (retcode);
1102
1103 /* initialize all parity */
1104 case RAIDFRAME_REWRITEPARITY:
1105
1106 if (raidPtr->Layout.map->faultsTolerated == 0) {
1107 /* Parity for RAID 0 is trivially correct */
1108 raidPtr->parity_good = RF_RAID_CLEAN;
1109 return(0);
1110 }
1111
1112 if (raidPtr->parity_rewrite_in_progress == 1) {
1113 /* Re-write is already in progress! */
1114 return(EINVAL);
1115 }
1116
1117 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1118 rf_RewriteParityThread,
1119 raidPtr,"raid_parity");
1120 return (retcode);
1121
1122
1123 case RAIDFRAME_ADD_HOT_SPARE:
1124 sparePtr = (RF_SingleComponent_t *) data;
1125 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1126 printf("Adding spare\n");
1127 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1128 return(retcode);
1129
1130 case RAIDFRAME_REMOVE_HOT_SPARE:
1131 return(retcode);
1132
1133 case RAIDFRAME_REBUILD_IN_PLACE:
1134
1135 if (raidPtr->Layout.map->faultsTolerated == 0) {
1136 /* Can't do this on a RAID 0!! */
1137 return(EINVAL);
1138 }
1139
1140 if (raidPtr->recon_in_progress == 1) {
1141 /* a reconstruct is already in progress! */
1142 return(EINVAL);
1143 }
1144
1145 componentPtr = (RF_SingleComponent_t *) data;
1146 memcpy( &component, componentPtr,
1147 sizeof(RF_SingleComponent_t));
1148 row = component.row;
1149 column = component.column;
1150 printf("Rebuild: %d %d\n",row, column);
1151 if ((row < 0) || (row >= raidPtr->numRow) ||
1152 (column < 0) || (column >= raidPtr->numCol)) {
1153 return(EINVAL);
1154 }
1155
1156 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1157 if (rrcopy == NULL)
1158 return(ENOMEM);
1159
1160 rrcopy->raidPtr = (void *) raidPtr;
1161 rrcopy->row = row;
1162 rrcopy->col = column;
1163
1164 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1165 rf_ReconstructInPlaceThread,
1166 rrcopy,"raid_reconip");
1167 return(retcode);
1168
1169 case RAIDFRAME_GET_INFO:
1170 if (!raidPtr->valid)
1171 return (ENODEV);
1172 ucfgp = (RF_DeviceConfig_t **) data;
1173 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1174 (RF_DeviceConfig_t *));
1175 if (d_cfg == NULL)
1176 return (ENOMEM);
1177 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1178 d_cfg->rows = raidPtr->numRow;
1179 d_cfg->cols = raidPtr->numCol;
1180 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1181 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1182 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1183 return (ENOMEM);
1184 }
1185 d_cfg->nspares = raidPtr->numSpare;
1186 if (d_cfg->nspares >= RF_MAX_DISKS) {
1187 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1188 return (ENOMEM);
1189 }
1190 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1191 d = 0;
1192 for (i = 0; i < d_cfg->rows; i++) {
1193 for (j = 0; j < d_cfg->cols; j++) {
1194 d_cfg->devs[d] = raidPtr->Disks[i][j];
1195 d++;
1196 }
1197 }
1198 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1199 d_cfg->spares[i] = raidPtr->Disks[0][j];
1200 }
1201 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1202 sizeof(RF_DeviceConfig_t));
1203 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1204
1205 return (retcode);
1206
1207 case RAIDFRAME_CHECK_PARITY:
1208 *(int *) data = raidPtr->parity_good;
1209 return (0);
1210
1211 case RAIDFRAME_RESET_ACCTOTALS:
1212 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1213 return (0);
1214
1215 case RAIDFRAME_GET_ACCTOTALS:
1216 totals = (RF_AccTotals_t *) data;
1217 *totals = raidPtr->acc_totals;
1218 return (0);
1219
1220 case RAIDFRAME_KEEP_ACCTOTALS:
1221 raidPtr->keep_acc_totals = *(int *)data;
1222 return (0);
1223
1224 case RAIDFRAME_GET_SIZE:
1225 *(int *) data = raidPtr->totalSectors;
1226 return (0);
1227
1228 /* fail a disk & optionally start reconstruction */
1229 case RAIDFRAME_FAIL_DISK:
1230
1231 if (raidPtr->Layout.map->faultsTolerated == 0) {
1232 /* Can't do this on a RAID 0!! */
1233 return(EINVAL);
1234 }
1235
1236 rr = (struct rf_recon_req *) data;
1237
1238 if (rr->row < 0 || rr->row >= raidPtr->numRow
1239 || rr->col < 0 || rr->col >= raidPtr->numCol)
1240 return (EINVAL);
1241
1242 printf("raid%d: Failing the disk: row: %d col: %d\n",
1243 unit, rr->row, rr->col);
1244
1245 /* make a copy of the recon request so that we don't rely on
1246 * the user's buffer */
1247 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1248 if (rrcopy == NULL)
1249 return(ENOMEM);
1250 bcopy(rr, rrcopy, sizeof(*rr));
1251 rrcopy->raidPtr = (void *) raidPtr;
1252
1253 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1254 rf_ReconThread,
1255 rrcopy,"raid_recon");
1256 return (0);
1257
1258 /* invoke a copyback operation after recon on whatever disk
1259 * needs it, if any */
1260 case RAIDFRAME_COPYBACK:
1261
1262 if (raidPtr->Layout.map->faultsTolerated == 0) {
1263 /* This makes no sense on a RAID 0!! */
1264 return(EINVAL);
1265 }
1266
1267 if (raidPtr->copyback_in_progress == 1) {
1268 /* Copyback is already in progress! */
1269 return(EINVAL);
1270 }
1271
1272 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1273 rf_CopybackThread,
1274 raidPtr,"raid_copyback");
1275 return (retcode);
1276
1277 /* return the percentage completion of reconstruction */
1278 case RAIDFRAME_CHECK_RECON_STATUS:
1279 if (raidPtr->Layout.map->faultsTolerated == 0) {
1280 /* This makes no sense on a RAID 0 */
1281 return(EINVAL);
1282 }
1283 row = 0; /* XXX we only consider a single row... */
1284 if (raidPtr->status[row] != rf_rs_reconstructing)
1285 *(int *) data = 100;
1286 else
1287 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1288 return (0);
1289
1290 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1291 if (raidPtr->Layout.map->faultsTolerated == 0) {
1292 /* This makes no sense on a RAID 0 */
1293 return(EINVAL);
1294 }
1295 if (raidPtr->parity_rewrite_in_progress == 1) {
1296 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1297 } else {
1298 *(int *) data = 100;
1299 }
1300 return (0);
1301
1302 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1303 if (raidPtr->Layout.map->faultsTolerated == 0) {
1304 /* This makes no sense on a RAID 0 */
1305 return(EINVAL);
1306 }
1307 if (raidPtr->copyback_in_progress == 1) {
1308 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1309 raidPtr->Layout.numStripe;
1310 } else {
1311 *(int *) data = 100;
1312 }
1313 return (0);
1314
1315
1316 /* the sparetable daemon calls this to wait for the kernel to
1317 * need a spare table. this ioctl does not return until a
1318 * spare table is needed. XXX -- calling mpsleep here in the
1319 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1320 * -- I should either compute the spare table in the kernel,
1321 * or have a different -- XXX XXX -- interface (a different
1322 * character device) for delivering the table -- XXX */
1323 #if 0
1324 case RAIDFRAME_SPARET_WAIT:
1325 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1326 while (!rf_sparet_wait_queue)
1327 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1328 waitreq = rf_sparet_wait_queue;
1329 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1330 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1331
1332 /* structure assignment */
1333 *((RF_SparetWait_t *) data) = *waitreq;
1334
1335 RF_Free(waitreq, sizeof(*waitreq));
1336 return (0);
1337
1338 /* wakes up a process waiting on SPARET_WAIT and puts an error
1339 * code in it that will cause the dameon to exit */
1340 case RAIDFRAME_ABORT_SPARET_WAIT:
1341 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1342 waitreq->fcol = -1;
1343 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1344 waitreq->next = rf_sparet_wait_queue;
1345 rf_sparet_wait_queue = waitreq;
1346 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1347 wakeup(&rf_sparet_wait_queue);
1348 return (0);
1349
1350 /* used by the spare table daemon to deliver a spare table
1351 * into the kernel */
1352 case RAIDFRAME_SEND_SPARET:
1353
1354 /* install the spare table */
1355 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1356
1357 /* respond to the requestor. the return status of the spare
1358 * table installation is passed in the "fcol" field */
1359 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1360 waitreq->fcol = retcode;
1361 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1362 waitreq->next = rf_sparet_resp_queue;
1363 rf_sparet_resp_queue = waitreq;
1364 wakeup(&rf_sparet_resp_queue);
1365 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1366
1367 return (retcode);
1368 #endif
1369
1370 default:
1371 break; /* fall through to the os-specific code below */
1372
1373 }
1374
1375 if (!raidPtr->valid)
1376 return (EINVAL);
1377
1378 /*
1379 * Add support for "regular" device ioctls here.
1380 */
1381
1382 switch (cmd) {
1383 case DIOCGDINFO:
1384 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1385 break;
1386
1387 case DIOCGPART:
1388 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1389 ((struct partinfo *) data)->part =
1390 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1391 break;
1392
1393 case DIOCWDINFO:
1394 case DIOCSDINFO:
1395 if ((error = raidlock(rs)) != 0)
1396 return (error);
1397
1398 rs->sc_flags |= RAIDF_LABELLING;
1399
1400 error = setdisklabel(rs->sc_dkdev.dk_label,
1401 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1402 if (error == 0) {
1403 if (cmd == DIOCWDINFO)
1404 error = writedisklabel(RAIDLABELDEV(dev),
1405 raidstrategy, rs->sc_dkdev.dk_label,
1406 rs->sc_dkdev.dk_cpulabel);
1407 }
1408 rs->sc_flags &= ~RAIDF_LABELLING;
1409
1410 raidunlock(rs);
1411
1412 if (error)
1413 return (error);
1414 break;
1415
1416 case DIOCWLABEL:
1417 if (*(int *) data != 0)
1418 rs->sc_flags |= RAIDF_WLABEL;
1419 else
1420 rs->sc_flags &= ~RAIDF_WLABEL;
1421 break;
1422
1423 case DIOCGDEFLABEL:
1424 raidgetdefaultlabel(raidPtr, rs,
1425 (struct disklabel *) data);
1426 break;
1427
1428 default:
1429 retcode = ENOTTY;
1430 }
1431 return (retcode);
1432
1433 }
1434
1435
1436 /* raidinit -- complete the rest of the initialization for the
1437 RAIDframe device. */
1438
1439
1440 static void
1441 raidinit(raidPtr)
1442 RF_Raid_t *raidPtr;
1443 {
1444 struct raid_softc *rs;
1445 int unit;
1446
1447 unit = raidPtr->raidid;
1448
1449 rs = &raid_softc[unit];
1450 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1451 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1452
1453
1454 /* XXX should check return code first... */
1455 rs->sc_flags |= RAIDF_INITED;
1456
1457 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1458
1459 rs->sc_dkdev.dk_name = rs->sc_xname;
1460
1461 /* disk_attach actually creates space for the CPU disklabel, among
1462 * other things, so it's critical to call this *BEFORE* we try putzing
1463 * with disklabels. */
1464
1465 disk_attach(&rs->sc_dkdev);
1466
1467 /* XXX There may be a weird interaction here between this, and
1468 * protectedSectors, as used in RAIDframe. */
1469
1470 rs->sc_size = raidPtr->totalSectors;
1471
1472 }
1473
1474 /* wake up the daemon & tell it to get us a spare table
1475 * XXX
1476 * the entries in the queues should be tagged with the raidPtr
1477 * so that in the extremely rare case that two recons happen at once,
1478 * we know for which device were requesting a spare table
1479 * XXX
1480 *
1481 * XXX This code is not currently used. GO
1482 */
1483 int
1484 rf_GetSpareTableFromDaemon(req)
1485 RF_SparetWait_t *req;
1486 {
1487 int retcode;
1488
1489 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1490 req->next = rf_sparet_wait_queue;
1491 rf_sparet_wait_queue = req;
1492 wakeup(&rf_sparet_wait_queue);
1493
1494 /* mpsleep unlocks the mutex */
1495 while (!rf_sparet_resp_queue) {
1496 tsleep(&rf_sparet_resp_queue, PRIBIO,
1497 "raidframe getsparetable", 0);
1498 }
1499 req = rf_sparet_resp_queue;
1500 rf_sparet_resp_queue = req->next;
1501 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1502
1503 retcode = req->fcol;
1504 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1505 * alloc'd */
1506 return (retcode);
1507 }
1508
1509 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1510 * bp & passes it down.
1511 * any calls originating in the kernel must use non-blocking I/O
1512 * do some extra sanity checking to return "appropriate" error values for
1513 * certain conditions (to make some standard utilities work)
1514 *
1515 * Formerly known as: rf_DoAccessKernel
1516 */
1517 void
1518 raidstart(raidPtr)
1519 RF_Raid_t *raidPtr;
1520 {
1521 RF_SectorCount_t num_blocks, pb, sum;
1522 RF_RaidAddr_t raid_addr;
1523 int retcode;
1524 struct partition *pp;
1525 daddr_t blocknum;
1526 int unit;
1527 struct raid_softc *rs;
1528 int do_async;
1529 struct buf *bp;
1530
1531 unit = raidPtr->raidid;
1532 rs = &raid_softc[unit];
1533
1534 /* quick check to see if anything has died recently */
1535 RF_LOCK_MUTEX(raidPtr->mutex);
1536 if (raidPtr->numNewFailures > 0) {
1537 rf_update_component_labels(raidPtr);
1538 raidPtr->numNewFailures--;
1539 }
1540 RF_UNLOCK_MUTEX(raidPtr->mutex);
1541
1542 /* Check to see if we're at the limit... */
1543 RF_LOCK_MUTEX(raidPtr->mutex);
1544 while (raidPtr->openings > 0) {
1545 RF_UNLOCK_MUTEX(raidPtr->mutex);
1546
1547 /* get the next item, if any, from the queue */
1548 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1549 /* nothing more to do */
1550 return;
1551 }
1552 BUFQ_REMOVE(&rs->buf_queue, bp);
1553
1554 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1555 * partition.. Need to make it absolute to the underlying
1556 * device.. */
1557
1558 blocknum = bp->b_blkno;
1559 if (DISKPART(bp->b_dev) != RAW_PART) {
1560 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1561 blocknum += pp->p_offset;
1562 }
1563
1564 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1565 (int) blocknum));
1566
1567 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1568 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1569
1570 /* *THIS* is where we adjust what block we're going to...
1571 * but DO NOT TOUCH bp->b_blkno!!! */
1572 raid_addr = blocknum;
1573
1574 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1575 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1576 sum = raid_addr + num_blocks + pb;
1577 if (1 || rf_debugKernelAccess) {
1578 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1579 (int) raid_addr, (int) sum, (int) num_blocks,
1580 (int) pb, (int) bp->b_resid));
1581 }
1582 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1583 || (sum < num_blocks) || (sum < pb)) {
1584 bp->b_error = ENOSPC;
1585 bp->b_flags |= B_ERROR;
1586 bp->b_resid = bp->b_bcount;
1587 biodone(bp);
1588 RF_LOCK_MUTEX(raidPtr->mutex);
1589 continue;
1590 }
1591 /*
1592 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1593 */
1594
1595 if (bp->b_bcount & raidPtr->sectorMask) {
1596 bp->b_error = EINVAL;
1597 bp->b_flags |= B_ERROR;
1598 bp->b_resid = bp->b_bcount;
1599 biodone(bp);
1600 RF_LOCK_MUTEX(raidPtr->mutex);
1601 continue;
1602
1603 }
1604 db1_printf(("Calling DoAccess..\n"));
1605
1606
1607 RF_LOCK_MUTEX(raidPtr->mutex);
1608 raidPtr->openings--;
1609 RF_UNLOCK_MUTEX(raidPtr->mutex);
1610
1611 /*
1612 * Everything is async.
1613 */
1614 do_async = 1;
1615
1616 /* don't ever condition on bp->b_flags & B_WRITE.
1617 * always condition on B_READ instead */
1618
1619 /* XXX we're still at splbio() here... do we *really*
1620 need to be? */
1621
1622
1623 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1624 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1625 do_async, raid_addr, num_blocks,
1626 bp->b_un.b_addr, bp, NULL, NULL,
1627 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1628
1629
1630 RF_LOCK_MUTEX(raidPtr->mutex);
1631 }
1632 RF_UNLOCK_MUTEX(raidPtr->mutex);
1633 }
1634
1635
1636
1637
1638 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1639
1640 int
1641 rf_DispatchKernelIO(queue, req)
1642 RF_DiskQueue_t *queue;
1643 RF_DiskQueueData_t *req;
1644 {
1645 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1646 struct buf *bp;
1647 struct raidbuf *raidbp = NULL;
1648 struct raid_softc *rs;
1649 int unit;
1650 int s;
1651
1652 s=0;
1653 /* s = splbio();*/ /* want to test this */
1654 /* XXX along with the vnode, we also need the softc associated with
1655 * this device.. */
1656
1657 req->queue = queue;
1658
1659 unit = queue->raidPtr->raidid;
1660
1661 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1662
1663 if (unit >= numraid) {
1664 printf("Invalid unit number: %d %d\n", unit, numraid);
1665 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1666 }
1667 rs = &raid_softc[unit];
1668
1669 /* XXX is this the right place? */
1670 disk_busy(&rs->sc_dkdev);
1671
1672 bp = req->bp;
1673 #if 1
1674 /* XXX when there is a physical disk failure, someone is passing us a
1675 * buffer that contains old stuff!! Attempt to deal with this problem
1676 * without taking a performance hit... (not sure where the real bug
1677 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1678
1679 if (bp->b_flags & B_ERROR) {
1680 bp->b_flags &= ~B_ERROR;
1681 }
1682 if (bp->b_error != 0) {
1683 bp->b_error = 0;
1684 }
1685 #endif
1686 raidbp = RAIDGETBUF(rs);
1687
1688 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1689
1690 /*
1691 * context for raidiodone
1692 */
1693 raidbp->rf_obp = bp;
1694 raidbp->req = req;
1695
1696 LIST_INIT(&raidbp->rf_buf.b_dep);
1697
1698 switch (req->type) {
1699 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1700 /* XXX need to do something extra here.. */
1701 /* I'm leaving this in, as I've never actually seen it used,
1702 * and I'd like folks to report it... GO */
1703 printf(("WAKEUP CALLED\n"));
1704 queue->numOutstanding++;
1705
1706 /* XXX need to glue the original buffer into this?? */
1707
1708 KernelWakeupFunc(&raidbp->rf_buf);
1709 break;
1710
1711 case RF_IO_TYPE_READ:
1712 case RF_IO_TYPE_WRITE:
1713
1714 if (req->tracerec) {
1715 RF_ETIMER_START(req->tracerec->timer);
1716 }
1717 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1718 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1719 req->sectorOffset, req->numSector,
1720 req->buf, KernelWakeupFunc, (void *) req,
1721 queue->raidPtr->logBytesPerSector, req->b_proc);
1722
1723 if (rf_debugKernelAccess) {
1724 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1725 (long) bp->b_blkno));
1726 }
1727 queue->numOutstanding++;
1728 queue->last_deq_sector = req->sectorOffset;
1729 /* acc wouldn't have been let in if there were any pending
1730 * reqs at any other priority */
1731 queue->curPriority = req->priority;
1732
1733 db1_printf(("Going for %c to unit %d row %d col %d\n",
1734 req->type, unit, queue->row, queue->col));
1735 db1_printf(("sector %d count %d (%d bytes) %d\n",
1736 (int) req->sectorOffset, (int) req->numSector,
1737 (int) (req->numSector <<
1738 queue->raidPtr->logBytesPerSector),
1739 (int) queue->raidPtr->logBytesPerSector));
1740 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1741 raidbp->rf_buf.b_vp->v_numoutput++;
1742 }
1743 VOP_STRATEGY(&raidbp->rf_buf);
1744
1745 break;
1746
1747 default:
1748 panic("bad req->type in rf_DispatchKernelIO");
1749 }
1750 db1_printf(("Exiting from DispatchKernelIO\n"));
1751 /* splx(s); */ /* want to test this */
1752 return (0);
1753 }
1754 /* this is the callback function associated with a I/O invoked from
1755 kernel code.
1756 */
1757 static void
1758 KernelWakeupFunc(vbp)
1759 struct buf *vbp;
1760 {
1761 RF_DiskQueueData_t *req = NULL;
1762 RF_DiskQueue_t *queue;
1763 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1764 struct buf *bp;
1765 struct raid_softc *rs;
1766 int unit;
1767 register int s;
1768
1769 s = splbio();
1770 db1_printf(("recovering the request queue:\n"));
1771 req = raidbp->req;
1772
1773 bp = raidbp->rf_obp;
1774
1775 queue = (RF_DiskQueue_t *) req->queue;
1776
1777 if (raidbp->rf_buf.b_flags & B_ERROR) {
1778 bp->b_flags |= B_ERROR;
1779 bp->b_error = raidbp->rf_buf.b_error ?
1780 raidbp->rf_buf.b_error : EIO;
1781 }
1782
1783 /* XXX methinks this could be wrong... */
1784 #if 1
1785 bp->b_resid = raidbp->rf_buf.b_resid;
1786 #endif
1787
1788 if (req->tracerec) {
1789 RF_ETIMER_STOP(req->tracerec->timer);
1790 RF_ETIMER_EVAL(req->tracerec->timer);
1791 RF_LOCK_MUTEX(rf_tracing_mutex);
1792 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1793 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1794 req->tracerec->num_phys_ios++;
1795 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1796 }
1797 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1798
1799 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1800
1801
1802 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1803 * ballistic, and mark the component as hosed... */
1804
1805 if (bp->b_flags & B_ERROR) {
1806 /* Mark the disk as dead */
1807 /* but only mark it once... */
1808 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1809 rf_ds_optimal) {
1810 printf("raid%d: IO Error. Marking %s as failed.\n",
1811 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1812 queue->raidPtr->Disks[queue->row][queue->col].status =
1813 rf_ds_failed;
1814 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1815 queue->raidPtr->numFailures++;
1816 queue->raidPtr->numNewFailures++;
1817 /* XXX here we should bump the version number for each component, and write that data out */
1818 } else { /* Disk is already dead... */
1819 /* printf("Disk already marked as dead!\n"); */
1820 }
1821
1822 }
1823
1824 rs = &raid_softc[unit];
1825 RAIDPUTBUF(rs, raidbp);
1826
1827
1828 if (bp->b_resid == 0) {
1829 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1830 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1831 }
1832
1833 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1834 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1835
1836 splx(s);
1837 }
1838
1839
1840
1841 /*
1842 * initialize a buf structure for doing an I/O in the kernel.
1843 */
1844 static void
1845 InitBP(
1846 struct buf * bp,
1847 struct vnode * b_vp,
1848 unsigned rw_flag,
1849 dev_t dev,
1850 RF_SectorNum_t startSect,
1851 RF_SectorCount_t numSect,
1852 caddr_t buf,
1853 void (*cbFunc) (struct buf *),
1854 void *cbArg,
1855 int logBytesPerSector,
1856 struct proc * b_proc)
1857 {
1858 /* bp->b_flags = B_PHYS | rw_flag; */
1859 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1860 bp->b_bcount = numSect << logBytesPerSector;
1861 bp->b_bufsize = bp->b_bcount;
1862 bp->b_error = 0;
1863 bp->b_dev = dev;
1864 bp->b_un.b_addr = buf;
1865 bp->b_blkno = startSect;
1866 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1867 if (bp->b_bcount == 0) {
1868 panic("bp->b_bcount is zero in InitBP!!\n");
1869 }
1870 bp->b_proc = b_proc;
1871 bp->b_iodone = cbFunc;
1872 bp->b_vp = b_vp;
1873
1874 }
1875
1876 static void
1877 raidgetdefaultlabel(raidPtr, rs, lp)
1878 RF_Raid_t *raidPtr;
1879 struct raid_softc *rs;
1880 struct disklabel *lp;
1881 {
1882 db1_printf(("Building a default label...\n"));
1883 bzero(lp, sizeof(*lp));
1884
1885 /* fabricate a label... */
1886 lp->d_secperunit = raidPtr->totalSectors;
1887 lp->d_secsize = raidPtr->bytesPerSector;
1888 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1889 lp->d_ntracks = 1;
1890 lp->d_ncylinders = raidPtr->totalSectors /
1891 (lp->d_nsectors * lp->d_ntracks);
1892 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1893
1894 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1895 lp->d_type = DTYPE_RAID;
1896 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1897 lp->d_rpm = 3600;
1898 lp->d_interleave = 1;
1899 lp->d_flags = 0;
1900
1901 lp->d_partitions[RAW_PART].p_offset = 0;
1902 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1903 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1904 lp->d_npartitions = RAW_PART + 1;
1905
1906 lp->d_magic = DISKMAGIC;
1907 lp->d_magic2 = DISKMAGIC;
1908 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1909
1910 }
1911 /*
1912 * Read the disklabel from the raid device. If one is not present, fake one
1913 * up.
1914 */
1915 static void
1916 raidgetdisklabel(dev)
1917 dev_t dev;
1918 {
1919 int unit = raidunit(dev);
1920 struct raid_softc *rs = &raid_softc[unit];
1921 char *errstring;
1922 struct disklabel *lp = rs->sc_dkdev.dk_label;
1923 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1924 RF_Raid_t *raidPtr;
1925
1926 db1_printf(("Getting the disklabel...\n"));
1927
1928 bzero(clp, sizeof(*clp));
1929
1930 raidPtr = raidPtrs[unit];
1931
1932 raidgetdefaultlabel(raidPtr, rs, lp);
1933
1934 /*
1935 * Call the generic disklabel extraction routine.
1936 */
1937 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1938 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1939 if (errstring)
1940 raidmakedisklabel(rs);
1941 else {
1942 int i;
1943 struct partition *pp;
1944
1945 /*
1946 * Sanity check whether the found disklabel is valid.
1947 *
1948 * This is necessary since total size of the raid device
1949 * may vary when an interleave is changed even though exactly
1950 * same componets are used, and old disklabel may used
1951 * if that is found.
1952 */
1953 if (lp->d_secperunit != rs->sc_size)
1954 printf("WARNING: %s: "
1955 "total sector size in disklabel (%d) != "
1956 "the size of raid (%ld)\n", rs->sc_xname,
1957 lp->d_secperunit, (long) rs->sc_size);
1958 for (i = 0; i < lp->d_npartitions; i++) {
1959 pp = &lp->d_partitions[i];
1960 if (pp->p_offset + pp->p_size > rs->sc_size)
1961 printf("WARNING: %s: end of partition `%c' "
1962 "exceeds the size of raid (%ld)\n",
1963 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1964 }
1965 }
1966
1967 }
1968 /*
1969 * Take care of things one might want to take care of in the event
1970 * that a disklabel isn't present.
1971 */
1972 static void
1973 raidmakedisklabel(rs)
1974 struct raid_softc *rs;
1975 {
1976 struct disklabel *lp = rs->sc_dkdev.dk_label;
1977 db1_printf(("Making a label..\n"));
1978
1979 /*
1980 * For historical reasons, if there's no disklabel present
1981 * the raw partition must be marked FS_BSDFFS.
1982 */
1983
1984 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1985
1986 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1987
1988 lp->d_checksum = dkcksum(lp);
1989 }
1990 /*
1991 * Lookup the provided name in the filesystem. If the file exists,
1992 * is a valid block device, and isn't being used by anyone else,
1993 * set *vpp to the file's vnode.
1994 * You'll find the original of this in ccd.c
1995 */
1996 int
1997 raidlookup(path, p, vpp)
1998 char *path;
1999 struct proc *p;
2000 struct vnode **vpp; /* result */
2001 {
2002 struct nameidata nd;
2003 struct vnode *vp;
2004 struct vattr va;
2005 int error;
2006
2007 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2008 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2009 #ifdef DEBUG
2010 printf("RAIDframe: vn_open returned %d\n", error);
2011 #endif
2012 return (error);
2013 }
2014 vp = nd.ni_vp;
2015 if (vp->v_usecount > 1) {
2016 VOP_UNLOCK(vp, 0);
2017 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2018 return (EBUSY);
2019 }
2020 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2021 VOP_UNLOCK(vp, 0);
2022 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2023 return (error);
2024 }
2025 /* XXX: eventually we should handle VREG, too. */
2026 if (va.va_type != VBLK) {
2027 VOP_UNLOCK(vp, 0);
2028 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2029 return (ENOTBLK);
2030 }
2031 VOP_UNLOCK(vp, 0);
2032 *vpp = vp;
2033 return (0);
2034 }
2035 /*
2036 * Wait interruptibly for an exclusive lock.
2037 *
2038 * XXX
2039 * Several drivers do this; it should be abstracted and made MP-safe.
2040 * (Hmm... where have we seen this warning before :-> GO )
2041 */
2042 static int
2043 raidlock(rs)
2044 struct raid_softc *rs;
2045 {
2046 int error;
2047
2048 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2049 rs->sc_flags |= RAIDF_WANTED;
2050 if ((error =
2051 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2052 return (error);
2053 }
2054 rs->sc_flags |= RAIDF_LOCKED;
2055 return (0);
2056 }
2057 /*
2058 * Unlock and wake up any waiters.
2059 */
2060 static void
2061 raidunlock(rs)
2062 struct raid_softc *rs;
2063 {
2064
2065 rs->sc_flags &= ~RAIDF_LOCKED;
2066 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2067 rs->sc_flags &= ~RAIDF_WANTED;
2068 wakeup(rs);
2069 }
2070 }
2071
2072
2073 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2074 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2075
2076 int
2077 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2078 {
2079 RF_ComponentLabel_t clabel;
2080 raidread_component_label(dev, b_vp, &clabel);
2081 clabel.mod_counter = mod_counter;
2082 clabel.clean = RF_RAID_CLEAN;
2083 raidwrite_component_label(dev, b_vp, &clabel);
2084 return(0);
2085 }
2086
2087
2088 int
2089 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2090 {
2091 RF_ComponentLabel_t clabel;
2092 raidread_component_label(dev, b_vp, &clabel);
2093 clabel.mod_counter = mod_counter;
2094 clabel.clean = RF_RAID_DIRTY;
2095 raidwrite_component_label(dev, b_vp, &clabel);
2096 return(0);
2097 }
2098
2099 /* ARGSUSED */
2100 int
2101 raidread_component_label(dev, b_vp, clabel)
2102 dev_t dev;
2103 struct vnode *b_vp;
2104 RF_ComponentLabel_t *clabel;
2105 {
2106 struct buf *bp;
2107 int error;
2108
2109 /* XXX should probably ensure that we don't try to do this if
2110 someone has changed rf_protected_sectors. */
2111
2112 /* get a block of the appropriate size... */
2113 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2114 bp->b_dev = dev;
2115
2116 /* get our ducks in a row for the read */
2117 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2118 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2119 bp->b_flags = B_BUSY | B_READ;
2120 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2121
2122 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2123
2124 error = biowait(bp);
2125
2126 if (!error) {
2127 memcpy(clabel, bp->b_un.b_addr,
2128 sizeof(RF_ComponentLabel_t));
2129 #if 0
2130 print_component_label( clabel );
2131 #endif
2132 } else {
2133 #if 0
2134 printf("Failed to read RAID component label!\n");
2135 #endif
2136 }
2137
2138 bp->b_flags = B_INVAL | B_AGE;
2139 brelse(bp);
2140 return(error);
2141 }
2142 /* ARGSUSED */
2143 int
2144 raidwrite_component_label(dev, b_vp, clabel)
2145 dev_t dev;
2146 struct vnode *b_vp;
2147 RF_ComponentLabel_t *clabel;
2148 {
2149 struct buf *bp;
2150 int error;
2151
2152 /* get a block of the appropriate size... */
2153 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2154 bp->b_dev = dev;
2155
2156 /* get our ducks in a row for the write */
2157 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2158 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2159 bp->b_flags = B_BUSY | B_WRITE;
2160 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2161
2162 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2163
2164 memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
2165
2166 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2167 error = biowait(bp);
2168 bp->b_flags = B_INVAL | B_AGE;
2169 brelse(bp);
2170 if (error) {
2171 #if 1
2172 printf("Failed to write RAID component info!\n");
2173 #endif
2174 }
2175
2176 return(error);
2177 }
2178
2179 void
2180 rf_markalldirty( raidPtr )
2181 RF_Raid_t *raidPtr;
2182 {
2183 RF_ComponentLabel_t clabel;
2184 int r,c;
2185
2186 raidPtr->mod_counter++;
2187 for (r = 0; r < raidPtr->numRow; r++) {
2188 for (c = 0; c < raidPtr->numCol; c++) {
2189 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2190 raidread_component_label(
2191 raidPtr->Disks[r][c].dev,
2192 raidPtr->raid_cinfo[r][c].ci_vp,
2193 &clabel);
2194 if (clabel.status == rf_ds_spared) {
2195 /* XXX do something special...
2196 but whatever you do, don't
2197 try to access it!! */
2198 } else {
2199 #if 0
2200 clabel.status =
2201 raidPtr->Disks[r][c].status;
2202 raidwrite_component_label(
2203 raidPtr->Disks[r][c].dev,
2204 raidPtr->raid_cinfo[r][c].ci_vp,
2205 &clabel);
2206 #endif
2207 raidmarkdirty(
2208 raidPtr->Disks[r][c].dev,
2209 raidPtr->raid_cinfo[r][c].ci_vp,
2210 raidPtr->mod_counter);
2211 }
2212 }
2213 }
2214 }
2215 /* printf("Component labels marked dirty.\n"); */
2216 #if 0
2217 for( c = 0; c < raidPtr->numSpare ; c++) {
2218 sparecol = raidPtr->numCol + c;
2219 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2220 /*
2221
2222 XXX this is where we get fancy and map this spare
2223 into it's correct spot in the array.
2224
2225 */
2226 /*
2227
2228 we claim this disk is "optimal" if it's
2229 rf_ds_used_spare, as that means it should be
2230 directly substitutable for the disk it replaced.
2231 We note that too...
2232
2233 */
2234
2235 for(i=0;i<raidPtr->numRow;i++) {
2236 for(j=0;j<raidPtr->numCol;j++) {
2237 if ((raidPtr->Disks[i][j].spareRow ==
2238 r) &&
2239 (raidPtr->Disks[i][j].spareCol ==
2240 sparecol)) {
2241 srow = r;
2242 scol = sparecol;
2243 break;
2244 }
2245 }
2246 }
2247
2248 raidread_component_label(
2249 raidPtr->Disks[r][sparecol].dev,
2250 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2251 &clabel);
2252 /* make sure status is noted */
2253 clabel.version = RF_COMPONENT_LABEL_VERSION;
2254 clabel.mod_counter = raidPtr->mod_counter;
2255 clabel.serial_number = raidPtr->serial_number;
2256 clabel.row = srow;
2257 clabel.column = scol;
2258 clabel.num_rows = raidPtr->numRow;
2259 clabel.num_columns = raidPtr->numCol;
2260 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2261 clabel.status = rf_ds_optimal;
2262 raidwrite_component_label(
2263 raidPtr->Disks[r][sparecol].dev,
2264 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2265 &clabel);
2266 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2267 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2268 }
2269 }
2270
2271 #endif
2272 }
2273
2274
2275 void
2276 rf_update_component_labels( raidPtr )
2277 RF_Raid_t *raidPtr;
2278 {
2279 RF_ComponentLabel_t clabel;
2280 int sparecol;
2281 int r,c;
2282 int i,j;
2283 int srow, scol;
2284
2285 srow = -1;
2286 scol = -1;
2287
2288 /* XXX should do extra checks to make sure things really are clean,
2289 rather than blindly setting the clean bit... */
2290
2291 raidPtr->mod_counter++;
2292
2293 for (r = 0; r < raidPtr->numRow; r++) {
2294 for (c = 0; c < raidPtr->numCol; c++) {
2295 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2296 raidread_component_label(
2297 raidPtr->Disks[r][c].dev,
2298 raidPtr->raid_cinfo[r][c].ci_vp,
2299 &clabel);
2300 /* make sure status is noted */
2301 clabel.status = rf_ds_optimal;
2302 /* bump the counter */
2303 clabel.mod_counter++;
2304 #if 0
2305 /* note where this set was configured last */
2306 clabel.last_unit = raidPtr->raidid;
2307 #endif
2308 #if DEBUG
2309 if (raidPtr->mod_counter !=
2310 clabel.mod_counter) {
2311 printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
2312 }
2313 #endif
2314
2315 raidwrite_component_label(
2316 raidPtr->Disks[r][c].dev,
2317 raidPtr->raid_cinfo[r][c].ci_vp,
2318 &clabel);
2319 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2320 raidmarkclean(
2321 raidPtr->Disks[r][c].dev,
2322 raidPtr->raid_cinfo[r][c].ci_vp,
2323 raidPtr->mod_counter);
2324 }
2325 }
2326 /* else we don't touch it.. */
2327 #if 0
2328 else if (raidPtr->Disks[r][c].status !=
2329 rf_ds_failed) {
2330 raidread_component_label(
2331 raidPtr->Disks[r][c].dev,
2332 raidPtr->raid_cinfo[r][c].ci_vp,
2333 &clabel);
2334 /* make sure status is noted */
2335 clabel.status =
2336 raidPtr->Disks[r][c].status;
2337 raidwrite_component_label(
2338 raidPtr->Disks[r][c].dev,
2339 raidPtr->raid_cinfo[r][c].ci_vp,
2340 &clabel);
2341 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2342 raidmarkclean(
2343 raidPtr->Disks[r][c].dev,
2344 raidPtr->raid_cinfo[r][c].ci_vp,
2345 raidPtr->mod_counter);
2346 }
2347 }
2348 #endif
2349 }
2350 }
2351
2352 for( c = 0; c < raidPtr->numSpare ; c++) {
2353 sparecol = raidPtr->numCol + c;
2354 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2355 /*
2356
2357 we claim this disk is "optimal" if it's
2358 rf_ds_used_spare, as that means it should be
2359 directly substitutable for the disk it replaced.
2360 We note that too...
2361
2362 */
2363
2364 for(i=0;i<raidPtr->numRow;i++) {
2365 for(j=0;j<raidPtr->numCol;j++) {
2366 if ((raidPtr->Disks[i][j].spareRow ==
2367 0) &&
2368 (raidPtr->Disks[i][j].spareCol ==
2369 sparecol)) {
2370 srow = i;
2371 scol = j;
2372 break;
2373 }
2374 }
2375 }
2376
2377 /* XXX shouldn't *really* need this... */
2378 raidread_component_label(
2379 raidPtr->Disks[0][sparecol].dev,
2380 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2381 &clabel);
2382 /* make sure status is noted */
2383
2384 raid_init_component_label(raidPtr, &clabel);
2385
2386 clabel.mod_counter = raidPtr->mod_counter;
2387 clabel.row = srow;
2388 clabel.column = scol;
2389 clabel.status = rf_ds_optimal;
2390
2391 raidwrite_component_label(
2392 raidPtr->Disks[0][sparecol].dev,
2393 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2394 &clabel);
2395 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2396 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2397 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2398 raidPtr->mod_counter);
2399 }
2400 }
2401 }
2402 /* printf("Component labels updated\n"); */
2403 }
2404
2405 void
2406 rf_ReconThread(req)
2407 struct rf_recon_req *req;
2408 {
2409 int s;
2410 RF_Raid_t *raidPtr;
2411
2412 s = splbio();
2413 raidPtr = (RF_Raid_t *) req->raidPtr;
2414 raidPtr->recon_in_progress = 1;
2415
2416 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2417 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2418
2419 /* XXX get rid of this! we don't need it at all.. */
2420 RF_Free(req, sizeof(*req));
2421
2422 raidPtr->recon_in_progress = 0;
2423 splx(s);
2424
2425 /* That's all... */
2426 kthread_exit(0); /* does not return */
2427 }
2428
2429 void
2430 rf_RewriteParityThread(raidPtr)
2431 RF_Raid_t *raidPtr;
2432 {
2433 int retcode;
2434 int s;
2435
2436 raidPtr->parity_rewrite_in_progress = 1;
2437 s = splbio();
2438 retcode = rf_RewriteParity(raidPtr);
2439 splx(s);
2440 if (retcode) {
2441 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2442 } else {
2443 /* set the clean bit! If we shutdown correctly,
2444 the clean bit on each component label will get
2445 set */
2446 raidPtr->parity_good = RF_RAID_CLEAN;
2447 }
2448 raidPtr->parity_rewrite_in_progress = 0;
2449
2450 /* That's all... */
2451 kthread_exit(0); /* does not return */
2452 }
2453
2454
2455 void
2456 rf_CopybackThread(raidPtr)
2457 RF_Raid_t *raidPtr;
2458 {
2459 int s;
2460
2461 raidPtr->copyback_in_progress = 1;
2462 s = splbio();
2463 rf_CopybackReconstructedData(raidPtr);
2464 splx(s);
2465 raidPtr->copyback_in_progress = 0;
2466
2467 /* That's all... */
2468 kthread_exit(0); /* does not return */
2469 }
2470
2471
2472 void
2473 rf_ReconstructInPlaceThread(req)
2474 struct rf_recon_req *req;
2475 {
2476 int retcode;
2477 int s;
2478 RF_Raid_t *raidPtr;
2479
2480 s = splbio();
2481 raidPtr = req->raidPtr;
2482 raidPtr->recon_in_progress = 1;
2483 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2484 RF_Free(req, sizeof(*req));
2485 raidPtr->recon_in_progress = 0;
2486 splx(s);
2487
2488 /* That's all... */
2489 kthread_exit(0); /* does not return */
2490 }
2491
2492 void
2493 rf_mountroot_hook(dev)
2494 struct device *dev;
2495 {
2496
2497 }
2498
2499
2500 RF_AutoConfig_t *
2501 rf_find_raid_components()
2502 {
2503 struct devnametobdevmaj *dtobdm;
2504 struct vnode *vp;
2505 struct disklabel label;
2506 struct device *dv;
2507 char *cd_name;
2508 dev_t dev;
2509 int error;
2510 int i;
2511 int good_one;
2512 RF_ComponentLabel_t *clabel;
2513 RF_AutoConfig_t *ac_list;
2514 RF_AutoConfig_t *ac;
2515
2516
2517 /* initialize the AutoConfig list */
2518 ac_list = NULL;
2519
2520 if (raidautoconfig) {
2521
2522 /* we begin by trolling through *all* the devices on the system */
2523
2524 for (dv = alldevs.tqh_first; dv != NULL;
2525 dv = dv->dv_list.tqe_next) {
2526
2527 /* we are only interested in disks... */
2528 if (dv->dv_class != DV_DISK)
2529 continue;
2530
2531 /* we don't care about floppies... */
2532 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2533 continue;
2534 }
2535
2536 /* need to find the device_name_to_block_device_major stuff */
2537 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2538 dtobdm = dev_name2blk;
2539 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2540 dtobdm++;
2541 }
2542
2543 /* get a vnode for the raw partition of this disk */
2544
2545 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2546 if (bdevvp(dev, &vp))
2547 panic("RAID can't alloc vnode");
2548
2549 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2550
2551 if (error) {
2552 /* "Who cares." Continue looking
2553 for something that exists*/
2554 vput(vp);
2555 continue;
2556 }
2557
2558 /* Ok, the disk exists. Go get the disklabel. */
2559 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2560 FREAD, NOCRED, 0);
2561 if (error) {
2562 /*
2563 * XXX can't happen - open() would
2564 * have errored out (or faked up one)
2565 */
2566 printf("can't get label for dev %s%c (%d)!?!?\n",
2567 dv->dv_xname, 'a' + RAW_PART, error);
2568 }
2569
2570 /* don't need this any more. We'll allocate it again
2571 a little later if we really do... */
2572 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2573 vput(vp);
2574
2575 for (i=0; i < label.d_npartitions; i++) {
2576 /* We only support partitions marked as RAID */
2577 if (label.d_partitions[i].p_fstype != FS_RAID)
2578 continue;
2579
2580 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2581 if (bdevvp(dev, &vp))
2582 panic("RAID can't alloc vnode");
2583
2584 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2585 if (error) {
2586 /* Whatever... */
2587 vput(vp);
2588 continue;
2589 }
2590
2591 good_one = 0;
2592
2593 clabel = (RF_ComponentLabel_t *)
2594 malloc(sizeof(RF_ComponentLabel_t),
2595 M_RAIDFRAME, M_NOWAIT);
2596 if (clabel == NULL) {
2597 /* XXX CLEANUP HERE */
2598 printf("RAID auto config: out of memory!\n");
2599 return(NULL); /* XXX probably should panic? */
2600 }
2601
2602 if (!raidread_component_label(dev, vp, clabel)) {
2603 /* Got the label. Does it look reasonable? */
2604 if (rf_reasonable_label(clabel) &&
2605 (clabel->partitionSize <=
2606 label.d_partitions[i].p_size)) {
2607 #if DEBUG
2608 printf("Component on: %s%c: %d\n",
2609 dv->dv_xname, 'a'+i,
2610 label.d_partitions[i].p_size);
2611 print_component_label(clabel);
2612 #endif
2613 /* if it's reasonable, add it,
2614 else ignore it. */
2615 ac = (RF_AutoConfig_t *)
2616 malloc(sizeof(RF_AutoConfig_t),
2617 M_RAIDFRAME,
2618 M_NOWAIT);
2619 if (ac == NULL) {
2620 /* XXX should panic?? */
2621 return(NULL);
2622 }
2623
2624 sprintf(ac->devname, "%s%c",
2625 dv->dv_xname, 'a'+i);
2626 ac->dev = dev;
2627 ac->vp = vp;
2628 ac->clabel = clabel;
2629 ac->next = ac_list;
2630 ac_list = ac;
2631 good_one = 1;
2632 }
2633 }
2634 if (!good_one) {
2635 /* cleanup */
2636 free(clabel, M_RAIDFRAME);
2637 VOP_CLOSE(vp, FREAD, NOCRED, 0);
2638 vput(vp);
2639 }
2640 }
2641 }
2642 }
2643 return(ac_list);
2644 }
2645
2646 static int
2647 rf_reasonable_label(clabel)
2648 RF_ComponentLabel_t *clabel;
2649 {
2650
2651 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2652 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2653 ((clabel->clean == RF_RAID_CLEAN) ||
2654 (clabel->clean == RF_RAID_DIRTY)) &&
2655 clabel->row >=0 &&
2656 clabel->column >= 0 &&
2657 clabel->num_rows > 0 &&
2658 clabel->num_columns > 0 &&
2659 clabel->row < clabel->num_rows &&
2660 clabel->column < clabel->num_columns &&
2661 clabel->blockSize > 0 &&
2662 clabel->numBlocks > 0) {
2663 /* label looks reasonable enough... */
2664 return(1);
2665 }
2666 return(0);
2667 }
2668
2669
2670 void
2671 print_component_label(clabel)
2672 RF_ComponentLabel_t *clabel;
2673 {
2674 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2675 clabel->row, clabel->column,
2676 clabel->num_rows, clabel->num_columns);
2677 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2678 clabel->version, clabel->serial_number,
2679 clabel->mod_counter);
2680 printf(" Clean: %s Status: %d\n",
2681 clabel->clean ? "Yes" : "No", clabel->status );
2682 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2683 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2684 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2685 (char) clabel->parityConfig, clabel->blockSize,
2686 clabel->numBlocks);
2687 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2688 printf(" Last configured as: raid%d\n", clabel->last_unit );
2689 #if 0
2690 printf(" Config order: %d\n", clabel->config_order);
2691 #endif
2692
2693 }
2694
2695 RF_ConfigSet_t *
2696 rf_create_auto_sets(ac_list)
2697 RF_AutoConfig_t *ac_list;
2698 {
2699 RF_AutoConfig_t *ac;
2700 RF_ConfigSet_t *config_sets;
2701 RF_ConfigSet_t *cset;
2702 RF_AutoConfig_t *ac_next;
2703
2704
2705 config_sets = NULL;
2706
2707 /* Go through the AutoConfig list, and figure out which components
2708 belong to what sets. */
2709 ac = ac_list;
2710 while(ac!=NULL) {
2711 /* we're going to putz with ac->next, so save it here
2712 for use at the end of the loop */
2713 ac_next = ac->next;
2714
2715 if (config_sets == NULL) {
2716 /* will need at least this one... */
2717 config_sets = (RF_ConfigSet_t *)
2718 malloc(sizeof(RF_ConfigSet_t),
2719 M_RAIDFRAME, M_NOWAIT);
2720 if (config_sets == NULL) {
2721 panic("rf_create_auto_sets: No memory!\n");
2722 }
2723 /* this one is easy :) */
2724 config_sets->ac = ac;
2725 config_sets->next = NULL;
2726 config_sets->rootable = 0;
2727 ac->next = NULL;
2728 } else {
2729 /* which set does this component fit into? */
2730 cset = config_sets;
2731 while(cset!=NULL) {
2732 if (rf_does_it_fit(cset, ac)) {
2733 /* looks like it matches */
2734 ac->next = cset->ac;
2735 cset->ac = ac;
2736 break;
2737 }
2738 cset = cset->next;
2739 }
2740 if (cset==NULL) {
2741 /* didn't find a match above... new set..*/
2742 cset = (RF_ConfigSet_t *)
2743 malloc(sizeof(RF_ConfigSet_t),
2744 M_RAIDFRAME, M_NOWAIT);
2745 if (cset == NULL) {
2746 panic("rf_create_auto_sets: No memory!\n");
2747 }
2748 cset->ac = ac;
2749 ac->next = NULL;
2750 cset->next = config_sets;
2751 cset->rootable = 0;
2752 config_sets = cset;
2753 }
2754 }
2755 ac = ac_next;
2756 }
2757
2758
2759 return(config_sets);
2760 }
2761
2762 static int
2763 rf_does_it_fit(cset, ac)
2764 RF_ConfigSet_t *cset;
2765 RF_AutoConfig_t *ac;
2766 {
2767 RF_ComponentLabel_t *clabel1, *clabel2;
2768
2769 /* If this one matches the *first* one in the set, that's good
2770 enough, since the other members of the set would have been
2771 through here too... */
2772 /* note that we are not checking partitionSize here.. */
2773
2774 clabel1 = cset->ac->clabel;
2775 clabel2 = ac->clabel;
2776 if ((clabel1->version == clabel2->version) &&
2777 (clabel1->serial_number == clabel2->serial_number) &&
2778 (clabel1->mod_counter == clabel2->mod_counter) &&
2779 (clabel1->num_rows == clabel2->num_rows) &&
2780 (clabel1->num_columns == clabel2->num_columns) &&
2781 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2782 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2783 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2784 (clabel1->parityConfig == clabel2->parityConfig) &&
2785 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2786 (clabel1->blockSize == clabel2->blockSize) &&
2787 (clabel1->numBlocks == clabel2->numBlocks) &&
2788 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2789 (clabel1->root_partition == clabel2->root_partition) &&
2790 (clabel1->last_unit == clabel2->last_unit) &&
2791 (clabel1->config_order == clabel2->config_order)) {
2792 /* if it get's here, it almost *has* to be a match */
2793 } else {
2794 /* it's not consistent with somebody in the set..
2795 punt */
2796 return(0);
2797 }
2798 /* all was fine.. it must fit... */
2799 return(1);
2800 }
2801
2802 int
2803 rf_have_enough_components(cset)
2804 RF_ConfigSet_t *cset;
2805 {
2806 RF_AutoConfig_t *ac;
2807 RF_AutoConfig_t *auto_config;
2808 RF_ComponentLabel_t *clabel;
2809 int r,c;
2810 int num_rows;
2811 int num_cols;
2812 int num_missing;
2813
2814 /* check to see that we have enough 'live' components
2815 of this set. If so, we can configure it if necessary */
2816
2817 num_rows = cset->ac->clabel->num_rows;
2818 num_cols = cset->ac->clabel->num_columns;
2819
2820 /* XXX Check for duplicate components!?!?!? */
2821
2822 num_missing = 0;
2823 auto_config = cset->ac;
2824
2825 for(r=0; r<num_rows; r++) {
2826 for(c=0; c<num_cols; c++) {
2827 ac = auto_config;
2828 while(ac!=NULL) {
2829 if (ac->clabel==NULL) {
2830 /* big-time bad news. */
2831 goto fail;
2832 }
2833 if ((ac->clabel->row == r) &&
2834 (ac->clabel->column == c)) {
2835 /* it's this one... */
2836 #if DEBUG
2837 printf("Found: %s at %d,%d\n",
2838 ac->devname,r,c);
2839 #endif
2840 break;
2841 }
2842 ac=ac->next;
2843 }
2844 if (ac==NULL) {
2845 /* Didn't find one here! */
2846 num_missing++;
2847 }
2848 }
2849 }
2850
2851 clabel = cset->ac->clabel;
2852
2853 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2854 ((clabel->parityConfig == '1') && (num_missing > 1)) ||
2855 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2856 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2857 /* XXX this needs to be made *much* more general */
2858 /* Too many failures */
2859 return(0);
2860 }
2861 /* otherwise, all is well, and we've got enough to take a kick
2862 at autoconfiguring this set */
2863 return(1);
2864 fail:
2865 return(0);
2866
2867 }
2868
2869 void
2870 rf_create_configuration(ac,config,raidPtr)
2871 RF_AutoConfig_t *ac;
2872 RF_Config_t *config;
2873 RF_Raid_t *raidPtr;
2874 {
2875 RF_ComponentLabel_t *clabel;
2876
2877 clabel = ac->clabel;
2878
2879 /* 1. Fill in the common stuff */
2880 config->numRow = clabel->num_rows;
2881 config->numCol = clabel->num_columns;
2882 config->numSpare = 0; /* XXX should this be set here? */
2883 config->sectPerSU = clabel->sectPerSU;
2884 config->SUsPerPU = clabel->SUsPerPU;
2885 config->SUsPerRU = clabel->SUsPerRU;
2886 config->parityConfig = clabel->parityConfig;
2887 /* XXX... */
2888 strcpy(config->diskQueueType,"fifo");
2889 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
2890 config->layoutSpecificSize = 0; /* XXX ?? */
2891
2892 while(ac!=NULL) {
2893 /* row/col values will be in range due to the checks
2894 in reasonable_label() */
2895 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
2896 ac->devname);
2897 ac = ac->next;
2898 }
2899
2900 }
2901
2902 int
2903 rf_set_autoconfig(raidPtr, new_value)
2904 RF_Raid_t *raidPtr;
2905 int new_value;
2906 {
2907 RF_ComponentLabel_t clabel;
2908 struct vnode *vp;
2909 dev_t dev;
2910 int row, column;
2911
2912 raidPtr->autoconfigure = new_value;
2913 for(row=0; row<raidPtr->numRow; row++) {
2914 for(column=0; column<raidPtr->numCol; column++) {
2915 dev = raidPtr->Disks[row][column].dev;
2916 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2917 raidread_component_label(dev, vp, &clabel);
2918 clabel.autoconfigure = new_value;
2919 raidwrite_component_label(dev, vp, &clabel);
2920 }
2921 }
2922 return(new_value);
2923 }
2924
2925 int
2926 rf_set_rootpartition(raidPtr, new_value)
2927 RF_Raid_t *raidPtr;
2928 int new_value;
2929 {
2930 RF_ComponentLabel_t clabel;
2931 struct vnode *vp;
2932 dev_t dev;
2933 int row, column;
2934
2935 raidPtr->root_partition = new_value;
2936 for(row=0; row<raidPtr->numRow; row++) {
2937 for(column=0; column<raidPtr->numCol; column++) {
2938 dev = raidPtr->Disks[row][column].dev;
2939 vp = raidPtr->raid_cinfo[row][column].ci_vp;
2940 raidread_component_label(dev, vp, &clabel);
2941 clabel.root_partition = new_value;
2942 raidwrite_component_label(dev, vp, &clabel);
2943 }
2944 }
2945 return(new_value);
2946 }
2947
2948 void
2949 rf_release_all_vps(cset)
2950 RF_ConfigSet_t *cset;
2951 {
2952 RF_AutoConfig_t *ac;
2953
2954 ac = cset->ac;
2955 while(ac!=NULL) {
2956 /* Close the vp, and give it back */
2957 if (ac->vp) {
2958 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
2959 vput(ac->vp);
2960 }
2961 ac = ac->next;
2962 }
2963 }
2964
2965
2966 void
2967 rf_cleanup_config_set(cset)
2968 RF_ConfigSet_t *cset;
2969 {
2970 RF_AutoConfig_t *ac;
2971 RF_AutoConfig_t *next_ac;
2972
2973 ac = cset->ac;
2974 while(ac!=NULL) {
2975 next_ac = ac->next;
2976 /* nuke the label */
2977 free(ac->clabel, M_RAIDFRAME);
2978 /* cleanup the config structure */
2979 free(ac, M_RAIDFRAME);
2980 /* "next.." */
2981 ac = next_ac;
2982 }
2983 /* and, finally, nuke the config set */
2984 free(cset, M_RAIDFRAME);
2985 }
2986
2987
2988 void
2989 raid_init_component_label(raidPtr, clabel)
2990 RF_Raid_t *raidPtr;
2991 RF_ComponentLabel_t *clabel;
2992 {
2993 /* current version number */
2994 clabel->version = RF_COMPONENT_LABEL_VERSION;
2995 clabel->serial_number = raidPtr->serial_number;
2996 clabel->mod_counter = raidPtr->mod_counter;
2997 clabel->num_rows = raidPtr->numRow;
2998 clabel->num_columns = raidPtr->numCol;
2999 clabel->clean = RF_RAID_DIRTY; /* not clean */
3000 clabel->status = rf_ds_optimal; /* "It's good!" */
3001
3002 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3003 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3004 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3005
3006 clabel->blockSize = raidPtr->bytesPerSector;
3007 clabel->numBlocks = raidPtr->sectorsPerDisk;
3008
3009 /* XXX not portable */
3010 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3011 clabel->maxOutstanding = raidPtr->maxOutstanding;
3012 clabel->autoconfigure = raidPtr->autoconfigure;
3013 clabel->root_partition = raidPtr->root_partition;
3014 clabel->last_unit = raidPtr->raidid;
3015 clabel->config_order = raidPtr->config_order;
3016 }
3017
3018 int
3019 rf_auto_config_set(cset,unit)
3020 RF_ConfigSet_t *cset;
3021 int *unit;
3022 {
3023 RF_Raid_t *raidPtr;
3024 RF_Config_t *config;
3025 int raidID;
3026 int retcode;
3027
3028 printf("Starting autoconfigure on raid%d\n",raidID);
3029
3030 retcode = 0;
3031 *unit = -1;
3032
3033 /* 1. Create a config structure */
3034
3035 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3036 M_RAIDFRAME,
3037 M_NOWAIT);
3038 if (config==NULL) {
3039 printf("Out of mem!?!?\n");
3040 /* XXX do something more intelligent here. */
3041 return(1);
3042 }
3043 /* XXX raidID needs to be set correctly.. */
3044
3045 /*
3046 2. Figure out what RAID ID this one is supposed to live at
3047 See if we can get the same RAID dev that it was configured
3048 on last time..
3049 */
3050
3051 raidID = cset->ac->clabel->last_unit;
3052 if ((raidID < 0) || (raidID >= numraid)) {
3053 /* let's not wander off into lala land. */
3054 raidID = numraid - 1;
3055 }
3056 if (raidPtrs[raidID]->valid != 0) {
3057
3058 /*
3059 Nope... Go looking for an alternative...
3060 Start high so we don't immediately use raid0 if that's
3061 not taken.
3062 */
3063
3064 for(raidID = numraid; raidID >= 0; raidID--) {
3065 if (raidPtrs[raidID]->valid == 0) {
3066 /* can use this one! */
3067 break;
3068 }
3069 }
3070 }
3071
3072 if (raidID < 0) {
3073 /* punt... */
3074 printf("Unable to auto configure this set!\n");
3075 printf("(Out of RAID devs!)\n");
3076 return(1);
3077 }
3078
3079 raidPtr = raidPtrs[raidID];
3080
3081 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3082 raidPtr->raidid = raidID;
3083 raidPtr->openings = RAIDOUTSTANDING;
3084
3085 /* 3. Build the configuration structure */
3086 rf_create_configuration(cset->ac, config, raidPtr);
3087
3088 /* 4. Do the configuration */
3089 retcode = rf_Configure(raidPtr, config, cset->ac);
3090
3091 if (retcode == 0) {
3092 #if DEBUG
3093 printf("Calling raidinit()\n");
3094 #endif
3095 /* XXX the 0 below is bogus! */
3096 raidinit(raidPtrs[raidID]);
3097
3098 rf_markalldirty(raidPtrs[raidID]);
3099 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3100 if (cset->ac->clabel->root_partition==1) {
3101 /* everything configured just fine. Make a note
3102 that this set is eligible to be root. */
3103 cset->rootable = 1;
3104 /* XXX do this here? */
3105 raidPtrs[raidID]->root_partition = 1;
3106 }
3107 }
3108
3109 /* 5. Cleanup */
3110 free(config, M_RAIDFRAME);
3111
3112 *unit = raidID;
3113 return(retcode);
3114 }
3115